Annotation of mandoc/tag.c, Revision 1.38
1.38 ! schwarze 1: /* $Id: tag.c,v 1.37 2022/04/26 11:38:38 schwarze Exp $ */
1.1 schwarze 2: /*
1.38 ! schwarze 3: * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
1.37 schwarze 4: * Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.28 schwarze 17: *
18: * Functions to tag syntax tree nodes.
19: * For internal use by mandoc(1) validation modules only.
1.1 schwarze 20: */
1.7 schwarze 21: #include "config.h"
22:
1.1 schwarze 23: #include <sys/types.h>
24:
1.27 schwarze 25: #include <assert.h>
1.20 schwarze 26: #include <limits.h>
1.1 schwarze 27: #include <stddef.h>
1.32 schwarze 28: #include <stdint.h>
1.38 ! schwarze 29: #include <stdio.h>
1.1 schwarze 30: #include <stdlib.h>
31: #include <string.h>
32:
33: #include "mandoc_aux.h"
1.10 schwarze 34: #include "mandoc_ohash.h"
1.38 ! schwarze 35: #include "mandoc.h"
1.28 schwarze 36: #include "roff.h"
1.33 schwarze 37: #include "mdoc.h"
1.35 schwarze 38: #include "roff_int.h"
1.1 schwarze 39: #include "tag.h"
40:
41: struct tag_entry {
1.28 schwarze 42: struct roff_node **nodes;
43: size_t maxnodes;
44: size_t nnodes;
1.4 schwarze 45: int prio;
1.1 schwarze 46: char s[];
47: };
48:
1.36 schwarze 49: static void tag_move_href(struct roff_man *,
50: struct roff_node *, const char *);
1.33 schwarze 51: static void tag_move_id(struct roff_node *);
52:
1.1 schwarze 53: static struct ohash tag_data;
54:
55:
56: /*
1.28 schwarze 57: * Set up the ohash table to collect nodes
58: * where various marked-up terms are documented.
1.1 schwarze 59: */
1.28 schwarze 60: void
61: tag_alloc(void)
1.1 schwarze 62: {
1.28 schwarze 63: mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
64: }
1.1 schwarze 65:
1.28 schwarze 66: void
67: tag_free(void)
68: {
69: struct tag_entry *entry;
70: unsigned int slot;
1.12 schwarze 71:
1.29 schwarze 72: if (tag_data.info.free == NULL)
73: return;
1.28 schwarze 74: entry = ohash_first(&tag_data, &slot);
75: while (entry != NULL) {
76: free(entry->nodes);
77: free(entry);
78: entry = ohash_next(&tag_data, &slot);
1.22 schwarze 79: }
1.28 schwarze 80: ohash_delete(&tag_data);
1.29 schwarze 81: tag_data.info.free = NULL;
1.1 schwarze 82: }
83:
84: /*
1.28 schwarze 85: * Set a node where a term is defined,
1.37 schwarze 86: * unless the term is already defined at a lower priority.
1.1 schwarze 87: */
88: void
1.28 schwarze 89: tag_put(const char *s, int prio, struct roff_node *n)
1.1 schwarze 90: {
91: struct tag_entry *entry;
1.34 schwarze 92: struct roff_node *nold;
1.38 ! schwarze 93: const char *se, *src;
! 94: char *cpy;
1.5 schwarze 95: size_t len;
1.1 schwarze 96: unsigned int slot;
1.38 ! schwarze 97: int changed;
1.1 schwarze 98:
1.27 schwarze 99: assert(prio <= TAG_FALLBACK);
1.20 schwarze 100:
1.37 schwarze 101: /*
102: * If the node is already tagged, the existing tag is
103: * explicit and we are now about to add an implicit tag.
104: * Don't do that; just skip implicit tagging if the author
105: * specified an explicit tag.
106: */
107:
108: if (n->flags & NODE_ID)
109: return;
110:
111: /* Determine the implicit tag. */
112:
1.38 ! schwarze 113: changed = 1;
1.28 schwarze 114: if (s == NULL) {
115: if (n->child == NULL || n->child->type != ROFFT_TEXT)
116: return;
117: s = n->child->string;
1.30 schwarze 118: switch (s[0]) {
119: case '-':
120: s++;
121: break;
122: case '\\':
123: switch (s[1]) {
124: case '&':
125: case '-':
126: case 'e':
127: s += 2;
128: break;
129: default:
1.38 ! schwarze 130: return;
1.30 schwarze 131: }
132: break;
133: default:
1.38 ! schwarze 134: changed = 0;
1.30 schwarze 135: break;
136: }
1.28 schwarze 137: }
1.20 schwarze 138:
139: /*
1.38 ! schwarze 140: * Translate \- and ASCII_HYPH to plain '-'.
1.24 schwarze 141: * Skip whitespace and escapes and whatever follows,
1.20 schwarze 142: * and if there is any, downgrade the priority.
143: */
144:
1.38 ! schwarze 145: cpy = mandoc_malloc(strlen(s) + 1);
! 146: for (src = s, len = 0; *src != '\0'; src++, len++) {
! 147: switch (*src) {
! 148: case '\t':
! 149: case ' ':
! 150: changed = 1;
! 151: break;
! 152: case ASCII_HYPH:
! 153: cpy[len] = '-';
! 154: changed = 1;
! 155: continue;
! 156: case '\\':
! 157: if (src[1] != '-')
! 158: break;
! 159: src++;
! 160: changed = 1;
! 161: /* FALLTHROUGH */
! 162: default:
! 163: cpy[len] = *src;
! 164: continue;
! 165: }
! 166: break;
! 167: }
1.20 schwarze 168: if (len == 0)
1.38 ! schwarze 169: goto out;
! 170: cpy[len] = '\0';
1.14 schwarze 171:
1.38 ! schwarze 172: if (*src != '\0' && prio < TAG_WEAK)
1.27 schwarze 173: prio = TAG_WEAK;
1.20 schwarze 174:
1.38 ! schwarze 175: s = cpy;
! 176: se = cpy + len;
1.20 schwarze 177: slot = ohash_qlookupi(&tag_data, s, &se);
1.1 schwarze 178: entry = ohash_find(&tag_data, slot);
1.14 schwarze 179:
1.28 schwarze 180: /* Build a new entry. */
181:
1.1 schwarze 182: if (entry == NULL) {
1.20 schwarze 183: entry = mandoc_malloc(sizeof(*entry) + len + 1);
1.38 ! schwarze 184: memcpy(entry->s, s, len + 1);
1.28 schwarze 185: entry->nodes = NULL;
186: entry->maxnodes = entry->nnodes = 0;
1.1 schwarze 187: ohash_insert(&tag_data, slot, entry);
1.28 schwarze 188: }
1.14 schwarze 189:
1.28 schwarze 190: /*
191: * Lower priority numbers take precedence.
192: * If a better entry is already present, ignore the new one.
193: */
194:
195: else if (entry->prio < prio)
1.38 ! schwarze 196: goto out;
1.28 schwarze 197:
198: /*
199: * If the existing entry is worse, clear it.
200: * In addition, a tag with priority TAG_FALLBACK
201: * is only used if the tag occurs exactly once.
202: */
1.14 schwarze 203:
1.28 schwarze 204: else if (entry->prio > prio || prio == TAG_FALLBACK) {
1.34 schwarze 205: while (entry->nnodes > 0) {
206: nold = entry->nodes[--entry->nnodes];
207: nold->flags &= ~NODE_ID;
208: free(nold->tag);
209: nold->tag = NULL;
210: }
1.27 schwarze 211: if (prio == TAG_FALLBACK) {
1.28 schwarze 212: entry->prio = TAG_DELETE;
1.38 ! schwarze 213: goto out;
1.16 schwarze 214: }
1.14 schwarze 215: }
216:
1.28 schwarze 217: /* Remember the new node. */
1.14 schwarze 218:
1.28 schwarze 219: if (entry->maxnodes == entry->nnodes) {
220: entry->maxnodes += 4;
221: entry->nodes = mandoc_reallocarray(entry->nodes,
222: entry->maxnodes, sizeof(*entry->nodes));
1.14 schwarze 223: }
1.28 schwarze 224: entry->nodes[entry->nnodes++] = n;
1.4 schwarze 225: entry->prio = prio;
1.28 schwarze 226: n->flags |= NODE_ID;
1.38 ! schwarze 227: if (changed) {
1.34 schwarze 228: assert(n->tag == NULL);
229: n->tag = mandoc_strndup(s, len);
1.28 schwarze 230: }
1.38 ! schwarze 231:
! 232: out:
! 233: free(cpy);
1.1 schwarze 234: }
235:
1.31 schwarze 236: int
237: tag_exists(const char *tag)
1.1 schwarze 238: {
1.31 schwarze 239: return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
1.33 schwarze 240: }
241:
242: /*
243: * For in-line elements, move the link target
244: * to the enclosing paragraph when appropriate.
245: */
246: static void
247: tag_move_id(struct roff_node *n)
248: {
249: struct roff_node *np;
250:
251: np = n;
252: for (;;) {
253: if (np->prev != NULL)
254: np = np->prev;
255: else if ((np = np->parent) == NULL)
256: return;
257: switch (np->tok) {
258: case MDOC_It:
259: switch (np->parent->parent->norm->Bl.type) {
260: case LIST_column:
261: /* Target the ROFFT_BLOCK = <tr>. */
262: np = np->parent;
263: break;
264: case LIST_diag:
265: case LIST_hang:
266: case LIST_inset:
267: case LIST_ohang:
268: case LIST_tag:
269: /* Target the ROFFT_HEAD = <dt>. */
270: np = np->parent->head;
271: break;
272: default:
273: /* Target the ROFF_BODY = <li>. */
274: break;
275: }
276: /* FALLTHROUGH */
277: case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
1.34 schwarze 278: if (np->tag == NULL) {
279: np->tag = mandoc_strdup(n->tag == NULL ?
280: n->child->string : n->tag);
1.33 schwarze 281: np->flags |= NODE_ID;
282: n->flags &= ~NODE_ID;
283: }
284: return;
285: case MDOC_Sh:
286: case MDOC_Ss:
287: case MDOC_Bd:
288: case MDOC_Bl:
289: case MDOC_D1:
290: case MDOC_Dl:
291: case MDOC_Rs:
292: /* Do not move past major blocks. */
293: return;
294: default:
295: /*
296: * Move past in-line content and partial
297: * blocks, for example .It Xo or .It Bq Er.
298: */
299: break;
300: }
301: }
302: }
303:
304: /*
1.36 schwarze 305: * When a paragraph is tagged and starts with text,
306: * move the permalink to the first few words.
307: */
308: static void
309: tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
310: {
311: char *cp;
312:
313: if (n == NULL || n->type != ROFFT_TEXT ||
314: *n->string == '\0' || *n->string == ' ')
315: return;
316:
317: cp = n->string;
318: while (cp != NULL && cp - n->string < 5)
319: cp = strchr(cp + 1, ' ');
320:
321: /* If the first text node is longer, split it. */
322:
323: if (cp != NULL && cp[1] != '\0') {
324: man->last = n;
325: man->next = ROFF_NEXT_SIBLING;
326: roff_word_alloc(man, n->line,
327: n->pos + (cp - n->string), cp + 1);
328: man->last->flags = n->flags & ~NODE_LINE;
329: *cp = '\0';
330: }
331:
332: assert(n->tag == NULL);
333: n->tag = mandoc_strdup(tag);
334: n->flags |= NODE_HREF;
335: }
336:
337: /*
1.33 schwarze 338: * When all tags have been set, decide where to put
339: * the associated permalinks, and maybe move some tags
340: * to the beginning of the respective paragraphs.
341: */
342: void
1.35 schwarze 343: tag_postprocess(struct roff_man *man, struct roff_node *n)
1.33 schwarze 344: {
345: if (n->flags & NODE_ID) {
346: switch (n->tok) {
1.35 schwarze 347: case MDOC_Pp:
1.36 schwarze 348: tag_move_href(man, n->next, n->tag);
1.35 schwarze 349: break;
1.33 schwarze 350: case MDOC_Bd:
1.36 schwarze 351: case MDOC_D1:
352: case MDOC_Dl:
353: tag_move_href(man, n->child, n->tag);
354: break;
1.33 schwarze 355: case MDOC_Bl:
356: /* XXX No permalink for now. */
357: break;
358: default:
359: if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
360: tag_move_id(n);
361: if (n->tok != MDOC_Tg)
362: n->flags |= NODE_HREF;
1.34 schwarze 363: else if ((n->flags & NODE_ID) == 0) {
1.33 schwarze 364: n->flags |= NODE_NOPRT;
1.34 schwarze 365: free(n->tag);
366: n->tag = NULL;
367: }
1.33 schwarze 368: break;
369: }
370: }
371: for (n = n->child; n != NULL; n = n->next)
1.35 schwarze 372: tag_postprocess(man, n);
1.1 schwarze 373: }
CVSweb