[BACK]Return to tag.c CVS log [TXT][DIR] Up to [cvsweb.bsd.lv] / mandoc

Annotation of mandoc/tag.c, Revision 1.38

1.38    ! schwarze    1: /* $Id: tag.c,v 1.37 2022/04/26 11:38:38 schwarze Exp $ */
1.1       schwarze    2: /*
1.38    ! schwarze    3:  * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
1.37      schwarze    4:  *               Ingo Schwarze <schwarze@openbsd.org>
1.1       schwarze    5:  *
                      6:  * Permission to use, copy, modify, and distribute this software for any
                      7:  * purpose with or without fee is hereby granted, provided that the above
                      8:  * copyright notice and this permission notice appear in all copies.
                      9:  *
                     10:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
                     11:  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
                     12:  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
                     13:  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     14:  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
                     15:  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
                     16:  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1.28      schwarze   17:  *
                     18:  * Functions to tag syntax tree nodes.
                     19:  * For internal use by mandoc(1) validation modules only.
1.1       schwarze   20:  */
1.7       schwarze   21: #include "config.h"
                     22:
1.1       schwarze   23: #include <sys/types.h>
                     24:
1.27      schwarze   25: #include <assert.h>
1.20      schwarze   26: #include <limits.h>
1.1       schwarze   27: #include <stddef.h>
1.32      schwarze   28: #include <stdint.h>
1.38    ! schwarze   29: #include <stdio.h>
1.1       schwarze   30: #include <stdlib.h>
                     31: #include <string.h>
                     32:
                     33: #include "mandoc_aux.h"
1.10      schwarze   34: #include "mandoc_ohash.h"
1.38    ! schwarze   35: #include "mandoc.h"
1.28      schwarze   36: #include "roff.h"
1.33      schwarze   37: #include "mdoc.h"
1.35      schwarze   38: #include "roff_int.h"
1.1       schwarze   39: #include "tag.h"
                     40:
                     41: struct tag_entry {
1.28      schwarze   42:        struct roff_node **nodes;
                     43:        size_t   maxnodes;
                     44:        size_t   nnodes;
1.4       schwarze   45:        int      prio;
1.1       schwarze   46:        char     s[];
                     47: };
                     48:
1.36      schwarze   49: static void             tag_move_href(struct roff_man *,
                     50:                                struct roff_node *, const char *);
1.33      schwarze   51: static void             tag_move_id(struct roff_node *);
                     52:
1.1       schwarze   53: static struct ohash     tag_data;
                     54:
                     55:
                     56: /*
1.28      schwarze   57:  * Set up the ohash table to collect nodes
                     58:  * where various marked-up terms are documented.
1.1       schwarze   59:  */
1.28      schwarze   60: void
                     61: tag_alloc(void)
1.1       schwarze   62: {
1.28      schwarze   63:        mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
                     64: }
1.1       schwarze   65:
1.28      schwarze   66: void
                     67: tag_free(void)
                     68: {
                     69:        struct tag_entry        *entry;
                     70:        unsigned int             slot;
1.12      schwarze   71:
1.29      schwarze   72:        if (tag_data.info.free == NULL)
                     73:                return;
1.28      schwarze   74:        entry = ohash_first(&tag_data, &slot);
                     75:        while (entry != NULL) {
                     76:                free(entry->nodes);
                     77:                free(entry);
                     78:                entry = ohash_next(&tag_data, &slot);
1.22      schwarze   79:        }
1.28      schwarze   80:        ohash_delete(&tag_data);
1.29      schwarze   81:        tag_data.info.free = NULL;
1.1       schwarze   82: }
                     83:
                     84: /*
1.28      schwarze   85:  * Set a node where a term is defined,
1.37      schwarze   86:  * unless the term is already defined at a lower priority.
1.1       schwarze   87:  */
                     88: void
1.28      schwarze   89: tag_put(const char *s, int prio, struct roff_node *n)
1.1       schwarze   90: {
                     91:        struct tag_entry        *entry;
1.34      schwarze   92:        struct roff_node        *nold;
1.38    ! schwarze   93:        const char              *se, *src;
        !            94:        char                    *cpy;
1.5       schwarze   95:        size_t                   len;
1.1       schwarze   96:        unsigned int             slot;
1.38    ! schwarze   97:        int                      changed;
1.1       schwarze   98:
1.27      schwarze   99:        assert(prio <= TAG_FALLBACK);
1.20      schwarze  100:
1.37      schwarze  101:        /*
                    102:         * If the node is already tagged, the existing tag is
                    103:         * explicit and we are now about to add an implicit tag.
                    104:         * Don't do that; just skip implicit tagging if the author
                    105:         * specified an explicit tag.
                    106:         */
                    107:
                    108:        if (n->flags & NODE_ID)
                    109:                return;
                    110:
                    111:        /* Determine the implicit tag. */
                    112:
1.38    ! schwarze  113:        changed = 1;
1.28      schwarze  114:        if (s == NULL) {
                    115:                if (n->child == NULL || n->child->type != ROFFT_TEXT)
                    116:                        return;
                    117:                s = n->child->string;
1.30      schwarze  118:                switch (s[0]) {
                    119:                case '-':
                    120:                        s++;
                    121:                        break;
                    122:                case '\\':
                    123:                        switch (s[1]) {
                    124:                        case '&':
                    125:                        case '-':
                    126:                        case 'e':
                    127:                                s += 2;
                    128:                                break;
                    129:                        default:
1.38    ! schwarze  130:                                return;
1.30      schwarze  131:                        }
                    132:                        break;
                    133:                default:
1.38    ! schwarze  134:                        changed = 0;
1.30      schwarze  135:                        break;
                    136:                }
1.28      schwarze  137:        }
1.20      schwarze  138:
                    139:        /*
1.38    ! schwarze  140:         * Translate \- and ASCII_HYPH to plain '-'.
1.24      schwarze  141:         * Skip whitespace and escapes and whatever follows,
1.20      schwarze  142:         * and if there is any, downgrade the priority.
                    143:         */
                    144:
1.38    ! schwarze  145:        cpy = mandoc_malloc(strlen(s) + 1);
        !           146:        for (src = s, len = 0; *src != '\0'; src++, len++) {
        !           147:                switch (*src) {
        !           148:                case '\t':
        !           149:                case ' ':
        !           150:                        changed = 1;
        !           151:                        break;
        !           152:                case ASCII_HYPH:
        !           153:                        cpy[len] = '-';
        !           154:                        changed = 1;
        !           155:                        continue;
        !           156:                case '\\':
        !           157:                        if (src[1] != '-')
        !           158:                                break;
        !           159:                        src++;
        !           160:                        changed = 1;
        !           161:                        /* FALLTHROUGH */
        !           162:                default:
        !           163:                        cpy[len] = *src;
        !           164:                        continue;
        !           165:                }
        !           166:                break;
        !           167:        }
1.20      schwarze  168:        if (len == 0)
1.38    ! schwarze  169:                goto out;
        !           170:        cpy[len] = '\0';
1.14      schwarze  171:
1.38    ! schwarze  172:        if (*src != '\0' && prio < TAG_WEAK)
1.27      schwarze  173:                prio = TAG_WEAK;
1.20      schwarze  174:
1.38    ! schwarze  175:        s = cpy;
        !           176:        se = cpy + len;
1.20      schwarze  177:        slot = ohash_qlookupi(&tag_data, s, &se);
1.1       schwarze  178:        entry = ohash_find(&tag_data, slot);
1.14      schwarze  179:
1.28      schwarze  180:        /* Build a new entry. */
                    181:
1.1       schwarze  182:        if (entry == NULL) {
1.20      schwarze  183:                entry = mandoc_malloc(sizeof(*entry) + len + 1);
1.38    ! schwarze  184:                memcpy(entry->s, s, len + 1);
1.28      schwarze  185:                entry->nodes = NULL;
                    186:                entry->maxnodes = entry->nnodes = 0;
1.1       schwarze  187:                ohash_insert(&tag_data, slot, entry);
1.28      schwarze  188:        }
1.14      schwarze  189:
1.28      schwarze  190:        /*
                    191:         * Lower priority numbers take precedence.
                    192:         * If a better entry is already present, ignore the new one.
                    193:         */
                    194:
                    195:        else if (entry->prio < prio)
1.38    ! schwarze  196:                goto out;
1.28      schwarze  197:
                    198:        /*
                    199:         * If the existing entry is worse, clear it.
                    200:         * In addition, a tag with priority TAG_FALLBACK
                    201:         * is only used if the tag occurs exactly once.
                    202:         */
1.14      schwarze  203:
1.28      schwarze  204:        else if (entry->prio > prio || prio == TAG_FALLBACK) {
1.34      schwarze  205:                while (entry->nnodes > 0) {
                    206:                        nold = entry->nodes[--entry->nnodes];
                    207:                        nold->flags &= ~NODE_ID;
                    208:                        free(nold->tag);
                    209:                        nold->tag = NULL;
                    210:                }
1.27      schwarze  211:                if (prio == TAG_FALLBACK) {
1.28      schwarze  212:                        entry->prio = TAG_DELETE;
1.38    ! schwarze  213:                        goto out;
1.16      schwarze  214:                }
1.14      schwarze  215:        }
                    216:
1.28      schwarze  217:        /* Remember the new node. */
1.14      schwarze  218:
1.28      schwarze  219:        if (entry->maxnodes == entry->nnodes) {
                    220:                entry->maxnodes += 4;
                    221:                entry->nodes = mandoc_reallocarray(entry->nodes,
                    222:                    entry->maxnodes, sizeof(*entry->nodes));
1.14      schwarze  223:        }
1.28      schwarze  224:        entry->nodes[entry->nnodes++] = n;
1.4       schwarze  225:        entry->prio = prio;
1.28      schwarze  226:        n->flags |= NODE_ID;
1.38    ! schwarze  227:        if (changed) {
1.34      schwarze  228:                assert(n->tag == NULL);
                    229:                n->tag = mandoc_strndup(s, len);
1.28      schwarze  230:        }
1.38    ! schwarze  231:
        !           232:  out:
        !           233:        free(cpy);
1.1       schwarze  234: }
                    235:
1.31      schwarze  236: int
                    237: tag_exists(const char *tag)
1.1       schwarze  238: {
1.31      schwarze  239:        return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
1.33      schwarze  240: }
                    241:
                    242: /*
                    243:  * For in-line elements, move the link target
                    244:  * to the enclosing paragraph when appropriate.
                    245:  */
                    246: static void
                    247: tag_move_id(struct roff_node *n)
                    248: {
                    249:        struct roff_node *np;
                    250:
                    251:        np = n;
                    252:        for (;;) {
                    253:                if (np->prev != NULL)
                    254:                        np = np->prev;
                    255:                else if ((np = np->parent) == NULL)
                    256:                        return;
                    257:                switch (np->tok) {
                    258:                case MDOC_It:
                    259:                        switch (np->parent->parent->norm->Bl.type) {
                    260:                        case LIST_column:
                    261:                                /* Target the ROFFT_BLOCK = <tr>. */
                    262:                                np = np->parent;
                    263:                                break;
                    264:                        case LIST_diag:
                    265:                        case LIST_hang:
                    266:                        case LIST_inset:
                    267:                        case LIST_ohang:
                    268:                        case LIST_tag:
                    269:                                /* Target the ROFFT_HEAD = <dt>. */
                    270:                                np = np->parent->head;
                    271:                                break;
                    272:                        default:
                    273:                                /* Target the ROFF_BODY = <li>. */
                    274:                                break;
                    275:                        }
                    276:                        /* FALLTHROUGH */
                    277:                case MDOC_Pp:   /* Target the ROFFT_ELEM = <p>. */
1.34      schwarze  278:                        if (np->tag == NULL) {
                    279:                                np->tag = mandoc_strdup(n->tag == NULL ?
                    280:                                    n->child->string : n->tag);
1.33      schwarze  281:                                np->flags |= NODE_ID;
                    282:                                n->flags &= ~NODE_ID;
                    283:                        }
                    284:                        return;
                    285:                case MDOC_Sh:
                    286:                case MDOC_Ss:
                    287:                case MDOC_Bd:
                    288:                case MDOC_Bl:
                    289:                case MDOC_D1:
                    290:                case MDOC_Dl:
                    291:                case MDOC_Rs:
                    292:                        /* Do not move past major blocks. */
                    293:                        return;
                    294:                default:
                    295:                        /*
                    296:                         * Move past in-line content and partial
                    297:                         * blocks, for example .It Xo or .It Bq Er.
                    298:                         */
                    299:                        break;
                    300:                }
                    301:        }
                    302: }
                    303:
                    304: /*
1.36      schwarze  305:  * When a paragraph is tagged and starts with text,
                    306:  * move the permalink to the first few words.
                    307:  */
                    308: static void
                    309: tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
                    310: {
                    311:        char    *cp;
                    312:
                    313:        if (n == NULL || n->type != ROFFT_TEXT ||
                    314:            *n->string == '\0' || *n->string == ' ')
                    315:                return;
                    316:
                    317:        cp = n->string;
                    318:        while (cp != NULL && cp - n->string < 5)
                    319:                cp = strchr(cp + 1, ' ');
                    320:
                    321:        /* If the first text node is longer, split it. */
                    322:
                    323:        if (cp != NULL && cp[1] != '\0') {
                    324:                man->last = n;
                    325:                man->next = ROFF_NEXT_SIBLING;
                    326:                roff_word_alloc(man, n->line,
                    327:                    n->pos + (cp - n->string), cp + 1);
                    328:                man->last->flags = n->flags & ~NODE_LINE;
                    329:                *cp = '\0';
                    330:        }
                    331:
                    332:        assert(n->tag == NULL);
                    333:        n->tag = mandoc_strdup(tag);
                    334:        n->flags |= NODE_HREF;
                    335: }
                    336:
                    337: /*
1.33      schwarze  338:  * When all tags have been set, decide where to put
                    339:  * the associated permalinks, and maybe move some tags
                    340:  * to the beginning of the respective paragraphs.
                    341:  */
                    342: void
1.35      schwarze  343: tag_postprocess(struct roff_man *man, struct roff_node *n)
1.33      schwarze  344: {
                    345:        if (n->flags & NODE_ID) {
                    346:                switch (n->tok) {
1.35      schwarze  347:                case MDOC_Pp:
1.36      schwarze  348:                        tag_move_href(man, n->next, n->tag);
1.35      schwarze  349:                        break;
1.33      schwarze  350:                case MDOC_Bd:
1.36      schwarze  351:                case MDOC_D1:
                    352:                case MDOC_Dl:
                    353:                        tag_move_href(man, n->child, n->tag);
                    354:                        break;
1.33      schwarze  355:                case MDOC_Bl:
                    356:                        /* XXX No permalink for now. */
                    357:                        break;
                    358:                default:
                    359:                        if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
                    360:                                tag_move_id(n);
                    361:                        if (n->tok != MDOC_Tg)
                    362:                                n->flags |= NODE_HREF;
1.34      schwarze  363:                        else if ((n->flags & NODE_ID) == 0) {
1.33      schwarze  364:                                n->flags |= NODE_NOPRT;
1.34      schwarze  365:                                free(n->tag);
                    366:                                n->tag = NULL;
                    367:                        }
1.33      schwarze  368:                        break;
                    369:                }
                    370:        }
                    371:        for (n = n->child; n != NULL; n = n->next)
1.35      schwarze  372:                tag_postprocess(man, n);
1.1       schwarze  373: }

CVSweb