=================================================================== RCS file: /cvs/mandoc/tag.c,v retrieving revision 1.35 retrieving revision 1.38 diff -u -p -r1.35 -r1.38 --- mandoc/tag.c 2020/04/18 20:40:10 1.35 +++ mandoc/tag.c 2023/11/24 05:02:18 1.38 @@ -1,6 +1,7 @@ -/* $Id: tag.c,v 1.35 2020/04/18 20:40:10 schwarze Exp $ */ +/* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */ /* - * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze + * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 + * Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -25,11 +26,13 @@ #include #include #include +#include #include #include #include "mandoc_aux.h" #include "mandoc_ohash.h" +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "roff_int.h" @@ -43,6 +46,8 @@ struct tag_entry { char s[]; }; +static void tag_move_href(struct roff_man *, + struct roff_node *, const char *); static void tag_move_id(struct roff_node *); static struct ohash tag_data; @@ -78,19 +83,34 @@ tag_free(void) /* * Set a node where a term is defined, - * unless it is already defined at a lower priority. + * unless the term is already defined at a lower priority. */ void tag_put(const char *s, int prio, struct roff_node *n) { struct tag_entry *entry; struct roff_node *nold; - const char *se; + const char *se, *src; + char *cpy; size_t len; unsigned int slot; + int changed; assert(prio <= TAG_FALLBACK); + /* + * If the node is already tagged, the existing tag is + * explicit and we are now about to add an implicit tag. + * Don't do that; just skip implicit tagging if the author + * specified an explicit tag. + */ + + if (n->flags & NODE_ID) + return; + + /* Determine the implicit tag. */ + + changed = 1; if (s == NULL) { if (n->child == NULL || n->child->type != ROFFT_TEXT) return; @@ -107,27 +127,53 @@ tag_put(const char *s, int prio, struct roff_node *n) s += 2; break; default: - break; + return; } break; default: + changed = 0; break; } } /* + * Translate \- and ASCII_HYPH to plain '-'. * Skip whitespace and escapes and whatever follows, * and if there is any, downgrade the priority. */ - len = strcspn(s, " \t\\"); + cpy = mandoc_malloc(strlen(s) + 1); + for (src = s, len = 0; *src != '\0'; src++, len++) { + switch (*src) { + case '\t': + case ' ': + changed = 1; + break; + case ASCII_HYPH: + cpy[len] = '-'; + changed = 1; + continue; + case '\\': + if (src[1] != '-') + break; + src++; + changed = 1; + /* FALLTHROUGH */ + default: + cpy[len] = *src; + continue; + } + break; + } if (len == 0) - return; + goto out; + cpy[len] = '\0'; - se = s + len; - if (*se != '\0' && prio < TAG_WEAK) + if (*src != '\0' && prio < TAG_WEAK) prio = TAG_WEAK; + s = cpy; + se = cpy + len; slot = ohash_qlookupi(&tag_data, s, &se); entry = ohash_find(&tag_data, slot); @@ -135,8 +181,7 @@ tag_put(const char *s, int prio, struct roff_node *n) if (entry == NULL) { entry = mandoc_malloc(sizeof(*entry) + len + 1); - memcpy(entry->s, s, len); - entry->s[len] = '\0'; + memcpy(entry->s, s, len + 1); entry->nodes = NULL; entry->maxnodes = entry->nnodes = 0; ohash_insert(&tag_data, slot, entry); @@ -148,7 +193,7 @@ tag_put(const char *s, int prio, struct roff_node *n) */ else if (entry->prio < prio) - return; + goto out; /* * If the existing entry is worse, clear it. @@ -165,7 +210,7 @@ tag_put(const char *s, int prio, struct roff_node *n) } if (prio == TAG_FALLBACK) { entry->prio = TAG_DELETE; - return; + goto out; } } @@ -179,10 +224,13 @@ tag_put(const char *s, int prio, struct roff_node *n) entry->nodes[entry->nnodes++] = n; entry->prio = prio; n->flags |= NODE_ID; - if (n->child == NULL || n->child->string != s || *se != '\0') { + if (changed) { assert(n->tag == NULL); n->tag = mandoc_strndup(s, len); } + + out: + free(cpy); } int @@ -254,6 +302,39 @@ tag_move_id(struct roff_node *n) } /* + * When a paragraph is tagged and starts with text, + * move the permalink to the first few words. + */ +static void +tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) +{ + char *cp; + + if (n == NULL || n->type != ROFFT_TEXT || + *n->string == '\0' || *n->string == ' ') + return; + + cp = n->string; + while (cp != NULL && cp - n->string < 5) + cp = strchr(cp + 1, ' '); + + /* If the first text node is longer, split it. */ + + if (cp != NULL && cp[1] != '\0') { + man->last = n; + man->next = ROFF_NEXT_SIBLING; + roff_word_alloc(man, n->line, + n->pos + (cp - n->string), cp + 1); + man->last->flags = n->flags & ~NODE_LINE; + *cp = '\0'; + } + + assert(n->tag == NULL); + n->tag = mandoc_strdup(tag); + n->flags |= NODE_HREF; +} + +/* * When all tags have been set, decide where to put * the associated permalinks, and maybe move some tags * to the beginning of the respective paragraphs. @@ -261,34 +342,16 @@ tag_move_id(struct roff_node *n) void tag_postprocess(struct roff_man *man, struct roff_node *n) { - struct roff_node *nn; - char *cp; - if (n->flags & NODE_ID) { switch (n->tok) { case MDOC_Pp: - nn = n->next; - if (nn == NULL || nn->type != ROFFT_TEXT || - *nn->string == '\0' || *nn->string == ' ') - break; - /* Use the first few letters for the permalink. */ - cp = nn->string; - while (cp != NULL && cp - nn->string < 5) - cp = strchr(cp + 1, ' '); - if (cp != NULL && cp[1] != '\0') { - /* Split a longer text node. */ - man->last = nn; - man->next = ROFF_NEXT_SIBLING; - roff_word_alloc(man, nn->line, - nn->pos + (cp - nn->string), cp + 1); - man->last->flags = nn->flags; - *cp = '\0'; - } - assert(nn->tag == NULL); - nn->tag = mandoc_strdup(n->tag); - nn->flags |= NODE_HREF; + tag_move_href(man, n->next, n->tag); break; case MDOC_Bd: + case MDOC_D1: + case MDOC_Dl: + tag_move_href(man, n->child, n->tag); + break; case MDOC_Bl: /* XXX No permalink for now. */ break;