Annotation of mandoc/roff.c, Revision 1.1
1.1 ! kristaps 1: /* $Id: html4_strict.c,v 1.6 2008/11/24 08:50:33 kristaps Exp $ */
! 2: /*
! 3: * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
! 4: *
! 5: * Permission to use, copy, modify, and distribute this software for any
! 6: * purpose with or without fee is hereby granted, provided that the
! 7: * above copyright notice and this permission notice appear in all
! 8: * copies.
! 9: *
! 10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
! 11: * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
! 12: * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
! 13: * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
! 14: * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
! 15: * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
! 16: * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
! 17: * PERFORMANCE OF THIS SOFTWARE.
! 18: */
! 19: #include <assert.h>
! 20: #include <ctype.h>
! 21: #include <err.h>
! 22: #include <stdlib.h>
! 23: #include <stdio.h>
! 24: #include <string.h>
! 25: #include <time.h>
! 26:
! 27: #include "libmdocml.h"
! 28: #include "private.h"
! 29:
! 30: #define ROFF_MAXARG 10
! 31:
! 32: enum roffd {
! 33: ROFF_ENTER = 0,
! 34: ROFF_EXIT
! 35: };
! 36:
! 37: enum rofftype {
! 38: ROFF_TITLE,
! 39: ROFF_COMMENT,
! 40: ROFF_TEXT,
! 41: ROFF_LAYOUT
! 42: };
! 43:
! 44: #define ROFFCALL_ARGS \
! 45: struct rofftree *tree, const char *argv[], enum roffd type
! 46:
! 47: struct rofftree;
! 48:
! 49: struct rofftok {
! 50: char *name;
! 51: int (*cb)(ROFFCALL_ARGS);
! 52: enum rofftype type;
! 53: int flags;
! 54: #define ROFF_NESTED (1 << 0)
! 55: #define ROFF_PARSED (1 << 1)
! 56: #define ROFF_CALLABLE (1 << 2)
! 57: #define ROFF_QUOTES (1 << 3)
! 58: };
! 59:
! 60: struct roffarg {
! 61: char *name;
! 62: int flags;
! 63: #define ROFF_VALUE (1 << 0)
! 64: };
! 65:
! 66: struct roffnode {
! 67: int tok;
! 68: struct roffnode *parent;
! 69: size_t line;
! 70: };
! 71:
! 72: struct rofftree {
! 73: struct roffnode *last;
! 74: time_t date;
! 75: char title[256];
! 76: char section[256];
! 77: char volume[256];
! 78: int state;
! 79: #define ROFF_PRELUDE (1 << 1)
! 80: #define ROFF_PRELUDE_Os (1 << 2)
! 81: #define ROFF_PRELUDE_Dt (1 << 3)
! 82: #define ROFF_PRELUDE_Dd (1 << 4)
! 83: #define ROFF_BODY (1 << 5)
! 84: struct md_mbuf *mbuf; /* NULL if ROFF_EXIT and error. */
! 85:
! 86: const struct md_args *args;
! 87: const struct md_rbuf *rbuf;
! 88: };
! 89:
! 90: #define ROFF___ 0
! 91: #define ROFF_Dd 1
! 92: #define ROFF_Dt 2
! 93: #define ROFF_Os 3
! 94: #define ROFF_Sh 4
! 95: #define ROFF_An 5
! 96: #define ROFF_Li 6
! 97: #define ROFF_MAX 7
! 98:
! 99: static int roff_Dd(ROFFCALL_ARGS);
! 100: static int roff_Dt(ROFFCALL_ARGS);
! 101: static int roff_Os(ROFFCALL_ARGS);
! 102: static int roff_Sh(ROFFCALL_ARGS);
! 103: static int roff_An(ROFFCALL_ARGS);
! 104: static int roff_Li(ROFFCALL_ARGS);
! 105:
! 106: static struct roffnode *roffnode_new(int, size_t,
! 107: struct rofftree *);
! 108: static void roffnode_free(int, struct rofftree *);
! 109:
! 110: static int rofffindtok(const char *);
! 111: static int rofffindarg(const char *);
! 112: static int roffargs(int, char *, char **);
! 113: static int roffparse(struct rofftree *, char *, size_t);
! 114: static int textparse(const struct rofftree *,
! 115: const char *, size_t);
! 116:
! 117: static void dbg_enter(const struct md_args *, int);
! 118: static void dbg_leave(const struct md_args *, int);
! 119:
! 120:
! 121: static const struct rofftok tokens[ROFF_MAX] = {
! 122: { "\\\"", NULL, ROFF_COMMENT, 0 },
! 123: { "Dd", roff_Dd, ROFF_TITLE, 0 },
! 124: { "Dt", roff_Dt, ROFF_TITLE, 0 },
! 125: { "Os", roff_Os, ROFF_TITLE, 0 },
! 126: { "Sh", roff_Sh, ROFF_LAYOUT, 0 },
! 127: { "An", roff_An, ROFF_TEXT, ROFF_PARSED },
! 128: { "Li", roff_Li, ROFF_TEXT, ROFF_PARSED | ROFF_CALLABLE },
! 129: };
! 130:
! 131: #define ROFF_Split 0
! 132: #define ROFF_Nosplit 1
! 133: #define ROFF_ARGMAX 2
! 134:
! 135: static const struct roffarg tokenargs[ROFF_ARGMAX] = {
! 136: { "split", 0 },
! 137: { "nosplit", 0 },
! 138: };
! 139:
! 140:
! 141: int
! 142: roff_free(struct rofftree *tree, int flush)
! 143: {
! 144: int error;
! 145:
! 146: assert(tree->mbuf);
! 147: if ( ! flush)
! 148: tree->mbuf = NULL;
! 149:
! 150: /* LINTED */
! 151: while (tree->last)
! 152: if ( ! (*tokens[tree->last->tok].cb)
! 153: (tree, NULL, ROFF_EXIT))
! 154: /* Disallow flushing. */
! 155: tree->mbuf = NULL;
! 156:
! 157: error = tree->mbuf ? 0 : 1;
! 158:
! 159: if (tree->mbuf && (ROFF_PRELUDE & tree->state)) {
! 160: warnx("%s: prelude never finished",
! 161: tree->rbuf->name);
! 162: error = 1;
! 163: }
! 164:
! 165: free(tree);
! 166: return(error ? 0 : 1);
! 167: }
! 168:
! 169:
! 170: struct rofftree *
! 171: roff_alloc(const struct md_args *args, struct md_mbuf *out,
! 172: const struct md_rbuf *in)
! 173: {
! 174: struct rofftree *tree;
! 175:
! 176: if (NULL == (tree = calloc(1, sizeof(struct rofftree)))) {
! 177: warn("malloc");
! 178: return(NULL);
! 179: }
! 180:
! 181: tree->state = ROFF_PRELUDE;
! 182: tree->args = args;
! 183: tree->mbuf = out;
! 184: tree->rbuf = in;
! 185:
! 186: return(tree);
! 187: }
! 188:
! 189:
! 190: int
! 191: roff_engine(struct rofftree *tree, char *buf, size_t sz)
! 192: {
! 193:
! 194: if (0 == sz) {
! 195: warnx("%s: blank line (line %zu)",
! 196: tree->rbuf->name,
! 197: tree->rbuf->line);
! 198: return(0);
! 199: } else if ('.' != *buf)
! 200: return(textparse(tree, buf, sz));
! 201:
! 202: return(roffparse(tree, buf, sz));
! 203: }
! 204:
! 205:
! 206: static int
! 207: textparse(const struct rofftree *tree, const char *buf, size_t sz)
! 208: {
! 209:
! 210: if (NULL == tree->last) {
! 211: warnx("%s: unexpected text (line %zu)",
! 212: tree->rbuf->name,
! 213: tree->rbuf->line);
! 214: return(0);
! 215: } else if (NULL == tree->last->parent) {
! 216: warnx("%s: disallowed text (line %zu)",
! 217: tree->rbuf->name,
! 218: tree->rbuf->line);
! 219: return(0);
! 220: }
! 221:
! 222: /* Print text. */
! 223:
! 224: return(1);
! 225: }
! 226:
! 227:
! 228: static int
! 229: roffargs(int tok, char *buf, char **argv)
! 230: {
! 231: int i;
! 232:
! 233: (void)tok;/* FIXME: quotable strings? */
! 234:
! 235: assert(tok >= 0 && tok < ROFF_MAX);
! 236: assert('.' == *buf);
! 237:
! 238: /* LINTED */
! 239: for (i = 0; *buf && i < ROFF_MAXARG; i++) {
! 240: argv[i] = buf++;
! 241: while (*buf && ! isspace(*buf))
! 242: buf++;
! 243: if (0 == *buf) {
! 244: continue;
! 245: }
! 246: *buf++ = 0;
! 247: while (*buf && isspace(*buf))
! 248: buf++;
! 249: }
! 250:
! 251: assert(i > 0);
! 252: if (i < ROFF_MAXARG)
! 253: argv[i] = NULL;
! 254:
! 255: return(ROFF_MAXARG > i);
! 256: }
! 257:
! 258:
! 259: static int
! 260: roffparse(struct rofftree *tree, char *buf, size_t sz)
! 261: {
! 262: int tok, t;
! 263: struct roffnode *node;
! 264: char *argv[ROFF_MAXARG];
! 265: const char **argvp;
! 266:
! 267: assert(sz > 0);
! 268:
! 269: /*
! 270: * Extract the token identifier from the buffer. If there's no
! 271: * callback for the token (comment, etc.) then exit immediately.
! 272: * We don't do any error handling (yet), so if the token doesn't
! 273: * exist, die.
! 274: */
! 275:
! 276: if (3 > sz) {
! 277: warnx("%s: malformed line (line %zu)",
! 278: tree->rbuf->name,
! 279: tree->rbuf->line);
! 280: return(0);
! 281: } else if (ROFF_MAX == (tok = rofffindtok(buf + 1))) {
! 282: warnx("%s: unknown line token `%c%c' (line %zu)",
! 283: tree->rbuf->name,
! 284: *(buf + 1), *(buf + 2),
! 285: tree->rbuf->line);
! 286: return(0);
! 287: } else if (ROFF_COMMENT == tokens[tok].type)
! 288: /* Ignore comment tokens. */
! 289: return(1);
! 290:
! 291: if ( ! roffargs(tok, buf, argv)) {
! 292: warnx("%s: too many arguments to `%s' (line %zu)",
! 293: tree->rbuf->name, tokens[tok].name,
! 294: tree->rbuf->line);
! 295: return(0);
! 296: }
! 297:
! 298: /* Domain cross-contamination (and sanity) checks. */
! 299:
! 300: switch (tokens[tok].type) {
! 301: case (ROFF_TITLE):
! 302: if (ROFF_PRELUDE & tree->state) {
! 303: assert( ! (ROFF_BODY & tree->state));
! 304: break;
! 305: }
! 306: assert(ROFF_BODY & tree->state);
! 307: warnx("%s: prelude token `%s' in body (line %zu)",
! 308: tree->rbuf->name, tokens[tok].name,
! 309: tree->rbuf->line);
! 310: return(0);
! 311: case (ROFF_LAYOUT):
! 312: /* FALLTHROUGH */
! 313: case (ROFF_TEXT):
! 314: if (ROFF_BODY & tree->state) {
! 315: assert( ! (ROFF_PRELUDE & tree->state));
! 316: break;
! 317: }
! 318: assert(ROFF_PRELUDE & tree->state);
! 319: warnx("%s: body token `%s' in prelude (line %zu)",
! 320: tree->rbuf->name, tokens[tok].name,
! 321: tree->rbuf->line);
! 322: return(0);
! 323: case (ROFF_COMMENT):
! 324: return(1);
! 325: default:
! 326: abort();
! 327: }
! 328:
! 329: /*
! 330: * If this is a non-nestable layout token and we're below a
! 331: * token of the same type, then recurse upward to the token,
! 332: * closing out the interim scopes.
! 333: *
! 334: * If there's a nested token on the chain, then raise an error
! 335: * as nested tokens have corresponding "ending" tokens and we're
! 336: * breaking their scope.
! 337: */
! 338:
! 339: node = NULL;
! 340:
! 341: if (ROFF_LAYOUT == tokens[tok].type &&
! 342: ! (ROFF_NESTED & tokens[tok].flags)) {
! 343: for (node = tree->last; node; node = node->parent) {
! 344: if (node->tok == tok)
! 345: break;
! 346:
! 347: /* Don't break nested scope. */
! 348:
! 349: if ( ! (ROFF_NESTED & tokens[node->tok].flags))
! 350: continue;
! 351: warnx("%s: scope of %s (line %zu) broken by "
! 352: "%s (line %zu)",
! 353: tree->rbuf->name,
! 354: tokens[tok].name,
! 355: node->line,
! 356: tokens[node->tok].name,
! 357: tree->rbuf->line);
! 358: return(0);
! 359: }
! 360: }
! 361:
! 362: if (node) {
! 363: assert(ROFF_LAYOUT == tokens[tok].type);
! 364: assert( ! (ROFF_NESTED & tokens[tok].flags));
! 365: assert(node->tok == tok);
! 366:
! 367: /* Clear up to last scoped token. */
! 368:
! 369: /* LINTED */
! 370: do {
! 371: t = tree->last->tok;
! 372: if ( ! (*tokens[tree->last->tok].cb)
! 373: (tree, NULL, ROFF_EXIT))
! 374: return(0);
! 375: } while (t != tok);
! 376: }
! 377:
! 378: /* Proceed with actual token processing. */
! 379:
! 380: argvp = (const char **)&argv[1];
! 381: return((*tokens[tok].cb)(tree, argvp, ROFF_ENTER));
! 382: }
! 383:
! 384:
! 385: static int
! 386: rofffindarg(const char *name)
! 387: {
! 388: size_t i;
! 389:
! 390: /* FIXME: use a table, this is slow but ok for now. */
! 391:
! 392: /* LINTED */
! 393: for (i = 0; i < ROFF_ARGMAX; i++)
! 394: /* LINTED */
! 395: if (0 == strcmp(name, tokenargs[i].name))
! 396: return((int)i);
! 397:
! 398: return(ROFF_ARGMAX);
! 399: }
! 400:
! 401:
! 402: static int
! 403: rofffindtok(const char *name)
! 404: {
! 405: size_t i;
! 406:
! 407: /* FIXME: use a table, this is slow but ok for now. */
! 408:
! 409: /* LINTED */
! 410: for (i = 0; i < ROFF_MAX; i++)
! 411: /* LINTED */
! 412: if (0 == strncmp(name, tokens[i].name, 2))
! 413: return((int)i);
! 414:
! 415: return(ROFF_MAX);
! 416: }
! 417:
! 418:
! 419: /* FIXME: accept only struct rofftree *. */
! 420: static struct roffnode *
! 421: roffnode_new(int tokid, size_t line, struct rofftree *tree)
! 422: {
! 423: struct roffnode *p;
! 424:
! 425: if (NULL == (p = malloc(sizeof(struct roffnode)))) {
! 426: warn("malloc");
! 427: return(NULL);
! 428: }
! 429:
! 430: p->line = line;
! 431: p->tok = tokid;
! 432: p->parent = tree->last;
! 433: tree->last = p;
! 434: return(p);
! 435: }
! 436:
! 437:
! 438: static void
! 439: roffnode_free(int tokid, struct rofftree *tree)
! 440: {
! 441: struct roffnode *p;
! 442:
! 443: assert(tree->last);
! 444: assert(tree->last->tok == tokid);
! 445:
! 446: p = tree->last;
! 447: tree->last = tree->last->parent;
! 448: free(p);
! 449: }
! 450:
! 451:
! 452: static int dbg_lvl = 0;
! 453:
! 454:
! 455: static void
! 456: dbg_enter(const struct md_args *args, int tokid)
! 457: {
! 458: int i;
! 459: static char buf[72];
! 460:
! 461: assert(args);
! 462: if ( ! (args->dbg & MD_DBG_TREE))
! 463: return;
! 464: assert(tokid >= 0 && tokid <= ROFF_MAX);
! 465:
! 466: buf[0] = buf[71] = 0;
! 467:
! 468: switch (tokens[tokid].type) {
! 469: case (ROFF_LAYOUT):
! 470: (void)strncat(buf, "[body-layout] ", sizeof(buf) - 1);
! 471: break;
! 472: case (ROFF_TEXT):
! 473: (void)strncat(buf, "[ body-text] ", sizeof(buf) - 1);
! 474: break;
! 475: case (ROFF_TITLE):
! 476: (void)strncat(buf, "[ prelude] ", sizeof(buf) - 1);
! 477: break;
! 478: default:
! 479: abort();
! 480: }
! 481:
! 482: /* LINTED */
! 483: for (i = 0; i < dbg_lvl; i++)
! 484: (void)strncat(buf, " ", sizeof(buf) - 1);
! 485:
! 486: (void)strncat(buf, tokens[tokid].name, sizeof(buf) - 1);
! 487:
! 488: (void)printf("%s\n", buf);
! 489:
! 490: dbg_lvl++;
! 491: }
! 492:
! 493:
! 494: /* FIXME: accept only struct rofftree *. */
! 495: static void
! 496: dbg_leave(const struct md_args *args, int tokid)
! 497: {
! 498: assert(args);
! 499: if ( ! (args->dbg & MD_DBG_TREE))
! 500: return;
! 501:
! 502: assert(tokid >= 0 && tokid <= ROFF_MAX);
! 503: assert(dbg_lvl > 0);
! 504: dbg_lvl--;
! 505: }
! 506:
! 507:
! 508: /* FIXME: accept only struct rofftree *. */
! 509: /* ARGSUSED */
! 510: static int
! 511: roff_Dd(ROFFCALL_ARGS)
! 512: {
! 513:
! 514: dbg_enter(tree->args, ROFF_Dd);
! 515:
! 516: assert(ROFF_PRELUDE & tree->state);
! 517: if (ROFF_PRELUDE_Dt & tree->state ||
! 518: ROFF_PRELUDE_Dd & tree->state) {
! 519: warnx("%s: prelude `Dd' out-of-order (line %zu)",
! 520: tree->rbuf->name, tree->rbuf->line);
! 521: return(0);
! 522: }
! 523:
! 524: assert(NULL == tree->last);
! 525: tree->state |= ROFF_PRELUDE_Dd;
! 526:
! 527: dbg_leave(tree->args, ROFF_Dd);
! 528:
! 529: return(1);
! 530: }
! 531:
! 532:
! 533: /* ARGSUSED */
! 534: static int
! 535: roff_Dt(ROFFCALL_ARGS)
! 536: {
! 537:
! 538: dbg_enter(tree->args, ROFF_Dt);
! 539:
! 540: assert(ROFF_PRELUDE & tree->state);
! 541: if ( ! (ROFF_PRELUDE_Dd & tree->state) ||
! 542: (ROFF_PRELUDE_Dt & tree->state)) {
! 543: warnx("%s: prelude `Dt' out-of-order (line %zu)",
! 544: tree->rbuf->name, tree->rbuf->line);
! 545: return(0);
! 546: }
! 547:
! 548: assert(NULL == tree->last);
! 549: tree->state |= ROFF_PRELUDE_Dt;
! 550:
! 551: dbg_leave(tree->args, ROFF_Dt);
! 552:
! 553: return(1);
! 554: }
! 555:
! 556:
! 557: /* ARGSUSED */
! 558: static int
! 559: roff_Os(ROFFCALL_ARGS)
! 560: {
! 561:
! 562: if (ROFF_EXIT == type) {
! 563: roffnode_free(ROFF_Os, tree);
! 564: dbg_leave(tree->args, ROFF_Os);
! 565: return(1);
! 566: }
! 567:
! 568: dbg_enter(tree->args, ROFF_Os);
! 569:
! 570: assert(ROFF_PRELUDE & tree->state);
! 571: if ( ! (ROFF_PRELUDE_Dt & tree->state) ||
! 572: ! (ROFF_PRELUDE_Dd & tree->state)) {
! 573: warnx("%s: prelude `Os' out-of-order (line %zu)",
! 574: tree->rbuf->name, tree->rbuf->line);
! 575: return(0);
! 576: }
! 577:
! 578: assert(NULL == tree->last);
! 579: if (NULL == roffnode_new(ROFF_Os, tree->rbuf->line, tree))
! 580: return(0);
! 581:
! 582: tree->state |= ROFF_PRELUDE_Os;
! 583: tree->state &= ~ROFF_PRELUDE;
! 584: tree->state |= ROFF_BODY;
! 585:
! 586: return(1);
! 587: }
! 588:
! 589:
! 590: /* ARGSUSED */
! 591: static int
! 592: roff_Sh(ROFFCALL_ARGS)
! 593: {
! 594:
! 595: if (ROFF_EXIT == type) {
! 596: roffnode_free(ROFF_Sh, tree);
! 597: dbg_leave(tree->args, ROFF_Sh);
! 598: return(1);
! 599: }
! 600:
! 601: dbg_enter(tree->args, ROFF_Sh);
! 602:
! 603: if (NULL == roffnode_new(ROFF_Sh, tree->rbuf->line, tree))
! 604: return(0);
! 605:
! 606: return(1);
! 607: }
! 608:
! 609:
! 610: /* ARGSUSED */
! 611: static int
! 612: roff_Li(ROFFCALL_ARGS)
! 613: {
! 614:
! 615: dbg_enter(tree->args, ROFF_Li);
! 616: dbg_leave(tree->args, ROFF_Li);
! 617:
! 618: return(1);
! 619: }
! 620:
! 621:
! 622: static int
! 623: roffnextopt(const char ***in, char **val)
! 624: {
! 625: const char *arg, **argv;
! 626: int v;
! 627:
! 628: *val = NULL;
! 629: argv = *in;
! 630: assert(argv);
! 631:
! 632: if (NULL == (arg = *argv))
! 633: return(-1);
! 634: if ('-' != *arg)
! 635: return(-1);
! 636: if (ROFF_ARGMAX == (v = rofffindarg(&arg[1])))
! 637: return(-1);
! 638: if ( ! (ROFF_VALUE & tokenargs[v].flags))
! 639: return(v);
! 640:
! 641: *in = ++argv;
! 642:
! 643: /* FIXME: what if this looks like a roff token or argument? */
! 644:
! 645: return(*argv ? v : ROFF_ARGMAX);
! 646: }
! 647:
! 648:
! 649: /* ARGSUSED */
! 650: static int
! 651: roff_An(ROFFCALL_ARGS)
! 652: {
! 653: int c;
! 654: char *val;
! 655:
! 656: dbg_enter(tree->args, ROFF_An);
! 657:
! 658: while (-1 != (c = roffnextopt(&argv, &val))) {
! 659: switch (c) {
! 660: case (ROFF_Split):
! 661: /* Process argument. */
! 662: break;
! 663: case (ROFF_Nosplit):
! 664: /* Process argument. */
! 665: break;
! 666: default:
! 667: warnx("%s: error parsing `An' args (line %zu)",
! 668: tree->rbuf->name,
! 669: tree->rbuf->line);
! 670: return(0);
! 671: }
! 672: argv++;
! 673: }
! 674:
! 675: /* Print header. */
! 676:
! 677: while (*argv) {
! 678: if (/* is_parsable && */ 2 >= strlen(*argv)) {
! 679: if (ROFF_MAX != (c = rofffindtok(*argv))) {
! 680: if (ROFF_CALLABLE & tokens[c].flags) {
! 681: /* Call to token. */
! 682: if ( ! (*tokens[c].cb)(tree, (const char **)argv + 1, ROFF_ENTER))
! 683: return(0);
! 684: }
! 685: /* Print token. */
! 686: } else {
! 687: /* Print token. */
! 688: }
! 689: } else {
! 690: /* Print token. */
! 691: }
! 692: argv++;
! 693: }
! 694:
! 695: /* Print footer. */
! 696:
! 697: dbg_leave(tree->args, ROFF_An);
! 698:
! 699: return(1);
! 700: }
! 701:
CVSweb