Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.23
1.23 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.22 2014/03/30 17:22:01 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.21 kristaps 90: { "acronym", 0 },
1.4 kristaps 91: { "arg", 0 },
1.1 kristaps 92: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 93: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 94: { "code", 0 },
1.4 kristaps 95: { "command", 0 },
1.15 kristaps 96: { "date", 0 },
1.13 kristaps 97: { "emphasis", 0 },
1.21 kristaps 98: { "envar", 0 },
1.17 kristaps 99: { "filename", 0 },
1.3 kristaps 100: { "funcdef", 0 },
101: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 102: { "funcsynopsis", NODE_IGNTEXT },
103: { "funcsynopsisinfo", 0 },
1.3 kristaps 104: { "function", 0 },
1.16 kristaps 105: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 106: { "link", 0 },
1.13 kristaps 107: { "listitem", NODE_IGNTEXT },
1.19 kristaps 108: { "literal", 0 },
1.1 kristaps 109: { "manvolnum", 0 },
1.4 kristaps 110: { "option", 0 },
1.21 kristaps 111: { "orderedlist", NODE_IGNTEXT },
1.1 kristaps 112: { "para", 0 },
1.3 kristaps 113: { "paramdef", 0 },
114: { "parameter", 0 },
1.1 kristaps 115: { "programlisting", 0 },
1.22 kristaps 116: { "prompt", 0 },
1.1 kristaps 117: { "refclass", NODE_IGNTEXT },
118: { "refdescriptor", NODE_IGNTEXT },
119: { "refentry", NODE_IGNTEXT },
1.15 kristaps 120: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 121: { "refentrytitle", 0 },
122: { "refmeta", NODE_IGNTEXT },
123: { "refmiscinfo", NODE_IGNTEXT },
124: { "refname", 0 },
125: { "refnamediv", NODE_IGNTEXT },
126: { "refpurpose", 0 },
1.20 kristaps 127: { "refsect1", NODE_IGNTEXT },
128: { "refsect2", NODE_IGNTEXT },
1.1 kristaps 129: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 130: { "replaceable", 0 },
1.19 kristaps 131: { "sbr", NODE_IGNTEXT },
1.22 kristaps 132: { "screen", NODE_IGNTEXT },
1.8 kristaps 133: { "structname", 0 },
1.1 kristaps 134: { "synopsis", 0 },
1.13 kristaps 135: { "term", 0 },
1.1 kristaps 136: { NULL, 0 },
137: { "title", 0 },
1.14 kristaps 138: { "ulink", 0 },
1.23 ! kristaps 139: { "userinput", 0 },
1.13 kristaps 140: { "variablelist", NODE_IGNTEXT },
141: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 142: };
143:
1.10 kristaps 144: static void
145: pnode_print(struct parse *p, struct pnode *pn);
146:
1.8 kristaps 147: /*
148: * Process a stream of characters.
149: * We store text as nodes in and of themselves.
150: * If a text node is already open, append to it.
151: * If it's not open, open one under the current context.
152: */
1.1 kristaps 153: static void
154: xml_char(void *arg, const XML_Char *p, int sz)
155: {
156: struct parse *ps = arg;
157: struct pnode *dat;
1.4 kristaps 158: int i;
1.1 kristaps 159:
160: /* Stopped or no tree yet. */
161: if (ps->stop || NODE_ROOT == ps->node)
162: return;
163:
164: /* Not supposed to be collecting text. */
165: assert(NULL != ps->cur);
166: if (NODE_IGNTEXT & nodes[ps->node].flags)
167: return;
168:
169: /*
170: * Are we in the midst of processing text?
171: * If we're not processing text right now, then create a text
172: * node for doing so.
1.4 kristaps 173: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 174: * process: strip out all leading whitespace to be sure.
1.1 kristaps 175: */
176: if (NODE_TEXT != ps->node) {
1.4 kristaps 177: for (i = 0; i < sz; i++)
178: if ( ! isspace((int)p[i]))
179: break;
180: if (i == sz)
181: return;
1.10 kristaps 182: p += i;
183: sz -= i;
1.1 kristaps 184: dat = calloc(1, sizeof(struct pnode));
185: if (NULL == dat) {
186: perror(NULL);
187: exit(EXIT_FAILURE);
188: }
189:
190: dat->node = ps->node = NODE_TEXT;
191: dat->parent = ps->cur;
192: TAILQ_INIT(&dat->childq);
1.12 kristaps 193: TAILQ_INIT(&dat->attrq);
1.1 kristaps 194: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
195: ps->cur = dat;
196: assert(NULL != ps->root);
197: }
198:
199: /* Append to current buffer. */
200: assert(sz >= 0);
201: ps->cur->b = realloc(ps->cur->b,
202: ps->cur->bsz + (size_t)sz);
203: if (NULL == ps->cur->b) {
204: perror(NULL);
205: exit(EXIT_FAILURE);
206: }
207: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
208: ps->cur->bsz += (size_t)sz;
209: }
210:
1.10 kristaps 211: static void
212: pnode_trim(struct pnode *pn)
213: {
214:
215: assert(NODE_TEXT == pn->node);
216: for ( ; pn->bsz > 0; pn->bsz--)
217: if ( ! isspace((int)pn->b[pn->bsz - 1]))
218: break;
219: }
220:
1.1 kristaps 221: /*
222: * Begin an element.
223: * First, look for the element.
224: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 225: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 226: * If we find it but we're not parsing yet (i.e., it's not a refentry
227: * and thus out of context), keep going.
1.8 kristaps 228: * If we find it and we're at the root and already have a tree, puke and
229: * exit (FIXME: I don't think this is right?).
230: * If we find it but we're parsing a text node, close out the text node,
231: * return to its parent, and keep going.
1.1 kristaps 232: * Make sure that the element is in the right context.
233: * Lastly, put the node onto our parse tree and continue.
234: */
235: static void
236: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
237: {
1.12 kristaps 238: struct parse *ps = arg;
239: enum nodeid node;
240: enum attrkey key;
241: enum attrval val;
242: struct pnode *dat;
243: struct pattr *pattr;
244: const XML_Char **att;
1.1 kristaps 245:
246: if (ps->stop)
247: return;
248:
249: /* Close out text node, if applicable... */
250: if (NODE_TEXT == ps->node) {
251: assert(NULL != ps->cur);
1.10 kristaps 252: pnode_trim(ps->cur);
1.1 kristaps 253: ps->cur = ps->cur->parent;
254: assert(NULL != ps->cur);
255: ps->node = ps->cur->node;
256: }
257:
258: for (node = 0; node < NODE__MAX; node++)
259: if (NULL == nodes[node].name)
260: continue;
261: else if (0 == strcmp(nodes[node].name, name))
262: break;
263:
264: if (NODE__MAX == node && NODE_ROOT == ps->node) {
265: return;
266: } else if (NODE__MAX == node) {
1.12 kristaps 267: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
268: ps->fname, XML_GetCurrentLineNumber(ps->xml),
269: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 270: ps->stop = 1;
271: return;
272: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 273: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
274: ps->fname, XML_GetCurrentLineNumber(ps->xml),
275: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 276: ps->stop = 1;
277: return;
278: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
279: return;
280: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 281: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
282: "of node \"%s\"\n",
1.12 kristaps 283: ps->fname, XML_GetCurrentLineNumber(ps->xml),
284: XML_GetCurrentColumnNumber(ps->xml),
285: NULL == nodes[ps->node].name ?
1.13 kristaps 286: "(none)" : nodes[ps->node].name,
287: NULL == nodes[node].name ?
288: "(none)" : nodes[node].name);
1.1 kristaps 289: ps->stop = 1;
290: return;
291: }
292:
293: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
294: perror(NULL);
295: exit(EXIT_FAILURE);
296: }
297:
298: dat->node = ps->node = node;
299: dat->parent = ps->cur;
300: TAILQ_INIT(&dat->childq);
1.12 kristaps 301: TAILQ_INIT(&dat->attrq);
1.1 kristaps 302:
303: if (NULL != ps->cur)
304: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
305:
306: ps->cur = dat;
307: if (NULL == ps->root)
308: ps->root = dat;
1.12 kristaps 309:
310: /*
311: * Process attributes.
312: */
313: for (att = atts; NULL != *att; att += 2) {
314: for (key = 0; key < ATTRKEY__MAX; key++)
315: if (0 == strcmp(*att, attrkeys[key]))
316: break;
317: if (ATTRKEY__MAX == key) {
318: fprintf(stderr, "%s:%zu:%zu: unknown "
319: "attribute \"%s\"\n", ps->fname,
320: XML_GetCurrentLineNumber(ps->xml),
321: XML_GetCurrentColumnNumber(ps->xml),
322: *att);
323: continue;
324: } else if ( ! isattrkey(node, key)) {
325: fprintf(stderr, "%s:%zu:%zu: bad "
326: "attribute \"%s\"\n", ps->fname,
327: XML_GetCurrentLineNumber(ps->xml),
328: XML_GetCurrentColumnNumber(ps->xml),
329: *att);
330: continue;
331: }
332: for (val = 0; val < ATTRVAL__MAX; val++)
333: if (0 == strcmp(*(att + 1), attrvals[val]))
334: break;
335: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
336: fprintf(stderr, "%s:%zu:%zu: bad "
337: "value \"%s\"\n", ps->fname,
338: XML_GetCurrentLineNumber(ps->xml),
339: XML_GetCurrentColumnNumber(ps->xml),
340: *(att + 1));
341: continue;
342: }
343: pattr = calloc(1, sizeof(struct pattr));
344: pattr->key = key;
345: pattr->val = val;
346: if (ATTRVAL__MAX == val)
347: pattr->rawval = strdup(*(att + 1));
348: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
349: }
350:
1.1 kristaps 351: }
352:
353: /*
354: * Roll up the parse tree.
1.8 kristaps 355: * If we're at a text node, roll that one up first.
1.1 kristaps 356: * If we hit the root, then assign ourselves as the NODE_ROOT.
357: */
358: static void
359: xml_elem_end(void *arg, const XML_Char *name)
360: {
361: struct parse *ps = arg;
362:
363: if (ps->stop || NODE_ROOT == ps->node)
364: return;
365:
366: /* Close out text node, if applicable... */
367: if (NODE_TEXT == ps->node) {
368: assert(NULL != ps->cur);
1.10 kristaps 369: pnode_trim(ps->cur);
1.1 kristaps 370: ps->cur = ps->cur->parent;
371: assert(NULL != ps->cur);
372: ps->node = ps->cur->node;
373: }
374:
375: if (NULL == (ps->cur = ps->cur->parent))
376: ps->node = NODE_ROOT;
377: else
378: ps->node = ps->cur->node;
379: }
380:
1.8 kristaps 381: /*
382: * Recursively free a node (NULL is ok).
383: */
1.1 kristaps 384: static void
385: pnode_free(struct pnode *pn)
386: {
387: struct pnode *pp;
1.12 kristaps 388: struct pattr *ap;
1.1 kristaps 389:
390: if (NULL == pn)
391: return;
392:
393: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
394: TAILQ_REMOVE(&pn->childq, pp, child);
395: pnode_free(pp);
396: }
397:
1.12 kristaps 398: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
399: TAILQ_REMOVE(&pn->attrq, ap, child);
400: free(ap->rawval);
401: free(ap);
402: }
403:
1.1 kristaps 404: free(pn->b);
405: free(pn);
406: }
407:
1.8 kristaps 408: /*
409: * Unlink a node from its parent and pnode_free() it.
410: */
1.1 kristaps 411: static void
412: pnode_unlink(struct pnode *pn)
413: {
414:
415: if (NULL != pn->parent)
416: TAILQ_REMOVE(&pn->parent->childq, pn, child);
417: pnode_free(pn);
418: }
419:
1.8 kristaps 420: /*
421: * Unlink all children of a node and pnode_free() them.
422: */
1.1 kristaps 423: static void
1.4 kristaps 424: pnode_unlinksub(struct pnode *pn)
425: {
426:
427: while ( ! TAILQ_EMPTY(&pn->childq))
428: pnode_unlink(TAILQ_FIRST(&pn->childq));
429: }
430:
1.8 kristaps 431: /*
432: * Reset the lookaside buffer.
433: */
1.4 kristaps 434: static void
1.1 kristaps 435: bufclear(struct parse *p)
436: {
437:
438: p->b[p->bsz = 0] = '\0';
439: }
440:
1.8 kristaps 441: /*
442: * Append NODE_TEXT contents to the current buffer, reallocating its
443: * size if necessary.
444: * The buffer is ALWAYS nil-terminated.
445: */
1.1 kristaps 446: static void
447: bufappend(struct parse *p, struct pnode *pn)
448: {
449:
450: assert(NODE_TEXT == pn->node);
451: if (p->bsz + pn->bsz + 1 > p->mbsz) {
452: p->mbsz = p->bsz + pn->bsz + 1;
453: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
454: perror(NULL);
455: exit(EXIT_FAILURE);
456: }
457: }
458: memcpy(p->b + p->bsz, pn->b, pn->bsz);
459: p->bsz += pn->bsz;
460: p->b[p->bsz] = '\0';
461: }
462:
1.8 kristaps 463: /*
464: * Recursively append all NODE_TEXT nodes to the buffer.
465: * This descends into non-text nodes, but doesn't do anything beyond
466: * them.
467: * In other words, this is a recursive text grok.
468: */
1.3 kristaps 469: static void
470: bufappend_r(struct parse *p, struct pnode *pn)
471: {
472: struct pnode *pp;
473:
474: if (NODE_TEXT == pn->node)
475: bufappend(p, pn);
476: TAILQ_FOREACH(pp, &pn->childq, child)
477: bufappend_r(p, pp);
478: }
479:
1.12 kristaps 480: #define MACROLINE_NORM 0
481: #define MACROLINE_UPPER 1
1.1 kristaps 482: /*
1.8 kristaps 483: * Recursively print text presumably on a macro line.
1.1 kristaps 484: * Convert all whitespace to regular spaces.
485: */
486: static void
1.12 kristaps 487: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 488: {
489: char *cp;
490:
1.13 kristaps 491: if (0 == p->newln)
492: putchar(' ');
493:
1.1 kristaps 494: bufclear(p);
1.3 kristaps 495: bufappend_r(p, pn);
1.1 kristaps 496:
497: /* Convert all space to spaces. */
498: for (cp = p->b; '\0' != *cp; cp++)
499: if (isspace((int)*cp))
500: *cp = ' ';
501:
502: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 503: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 504: for ( ; '\0' != *cp; cp++) {
505: /* Escape us if we look like a macro. */
506: if ((cp == p->b || ' ' == *(cp - 1)) &&
507: isupper((int)*cp) &&
508: '\0' != *(cp + 1) &&
509: islower((int)*(cp + 1)) &&
510: ('\0' == *(cp + 2) ||
511: ' ' == *(cp + 2) ||
512: (islower((int)*(cp + 2)) &&
513: ('\0' == *(cp + 3) ||
514: ' ' == *(cp + 3)))))
515: fputs("\\&", stdout);
1.12 kristaps 516: if (MACROLINE_UPPER & fl)
517: putchar(toupper((int)*cp));
518: else
519: putchar((int)*cp);
1.1 kristaps 520: /* If we're a character escape, escape us. */
521: if ('\\' == *cp)
522: putchar('e');
523: }
524: }
525:
1.12 kristaps 526: static void
527: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
528: {
529:
530: pnode_printmacrolinetext(p, pn, 0);
531: }
532:
1.1 kristaps 533: /*
534: * Just pnode_printmacrolinepart() but with a newline.
535: * If no text, just the newline.
536: */
537: static void
538: pnode_printmacroline(struct parse *p, struct pnode *pn)
539: {
540:
1.13 kristaps 541: assert(0 == p->newln);
1.12 kristaps 542: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 543: putchar('\n');
1.13 kristaps 544: p->newln = 1;
1.1 kristaps 545: }
546:
1.10 kristaps 547: static void
548: pnode_printmopen(struct parse *p)
549: {
550: if (p->newln) {
551: putchar('.');
552: p->newln = 0;
553: } else
554: putchar(' ');
555: }
556:
557: static void
558: pnode_printmclose(struct parse *p, int sv)
559: {
560:
561: if (sv && ! p->newln) {
562: putchar('\n');
563: p->newln = 1;
564: }
565: }
566:
1.8 kristaps 567: /*
1.10 kristaps 568: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 569: */
1.1 kristaps 570: static void
1.6 kristaps 571: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
572: {
573: struct pnode *pp;
574:
1.10 kristaps 575: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 576: if (NODE_TITLE == pp->node) {
577: pnode_unlink(pp);
1.10 kristaps 578: return;
1.6 kristaps 579: }
580: }
581:
1.8 kristaps 582: /*
583: * Start a hopefully-named `Sh' section.
584: */
1.6 kristaps 585: static void
1.1 kristaps 586: pnode_printrefsect(struct parse *p, struct pnode *pn)
587: {
588: struct pnode *pp;
589:
590: TAILQ_FOREACH(pp, &pn->childq, child)
591: if (NODE_TITLE == pp->node)
592: break;
593:
1.20 kristaps 594: if (NODE_REFSECT1 == pn->node)
595: fputs(".Sh", stdout);
596: else
597: fputs(".Ss", stdout);
598:
1.13 kristaps 599: p->newln = 0;
1.4 kristaps 600:
1.5 kristaps 601: if (NULL != pp) {
1.20 kristaps 602: pnode_printmacrolinetext(p, pp,
603: NODE_REFSECT1 == pn->node ?
604: MACROLINE_UPPER : 0);
1.18 kristaps 605: pnode_printmclose(p, 1);
1.5 kristaps 606: pnode_unlink(pp);
1.13 kristaps 607: } else {
1.4 kristaps 608: puts("UNKNOWN");
1.13 kristaps 609: p->newln = 1;
610: }
1.1 kristaps 611: }
612:
1.8 kristaps 613: /*
614: * Start a reference, extracting the title and volume.
615: */
1.1 kristaps 616: static void
617: pnode_printciterefentry(struct parse *p, struct pnode *pn)
618: {
619: struct pnode *pp, *title, *manvol;
620:
621: title = manvol = NULL;
1.13 kristaps 622: assert(p->newln);
1.1 kristaps 623: TAILQ_FOREACH(pp, &pn->childq, child)
624: if (NODE_MANVOLNUM == pp->node)
625: manvol = pp;
626: else if (NODE_REFENTRYTITLE == pp->node)
627: title = pp;
628:
1.13 kristaps 629: fputs(".Xr", stdout);
630: p->newln = 0;
1.4 kristaps 631:
1.1 kristaps 632: if (NULL != title) {
633: pnode_printmacrolinepart(p, title);
634: } else
1.13 kristaps 635: fputs(" unknown ", stdout);
1.4 kristaps 636:
1.13 kristaps 637: if (NULL == manvol) {
638: puts(" 1");
639: p->newln = 1;
640: } else
1.1 kristaps 641: pnode_printmacroline(p, manvol);
642: }
643:
644: static void
645: pnode_printrefmeta(struct parse *p, struct pnode *pn)
646: {
647: struct pnode *pp, *title, *manvol;
648:
649: title = manvol = NULL;
1.13 kristaps 650: assert(p->newln);
1.1 kristaps 651: TAILQ_FOREACH(pp, &pn->childq, child)
652: if (NODE_MANVOLNUM == pp->node)
653: manvol = pp;
654: else if (NODE_REFENTRYTITLE == pp->node)
655: title = pp;
656:
1.2 kristaps 657: puts(".Dd $Mdocdate" "$");
1.13 kristaps 658: fputs(".Dt", stdout);
659: p->newln = 0;
1.1 kristaps 660:
1.13 kristaps 661: if (NULL != title)
1.12 kristaps 662: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 663: else
664: fputs(" UNKNOWN ", stdout);
665:
666: if (NULL == manvol) {
667: puts(" 1");
668: p->newln = 1;
1.1 kristaps 669: } else
670: pnode_printmacroline(p, manvol);
671:
672: puts(".Os");
673: }
674:
1.3 kristaps 675: static void
676: pnode_printfuncdef(struct parse *p, struct pnode *pn)
677: {
678: struct pnode *pp, *ftype, *func;
679:
1.13 kristaps 680: assert(p->newln);
1.3 kristaps 681: ftype = func = NULL;
682: TAILQ_FOREACH(pp, &pn->childq, child)
683: if (NODE_TEXT == pp->node)
684: ftype = pp;
685: else if (NODE_FUNCTION == pp->node)
686: func = pp;
687:
688: if (NULL != ftype) {
1.13 kristaps 689: fputs(".Ft", stdout);
690: p->newln = 0;
1.3 kristaps 691: pnode_printmacroline(p, ftype);
692: }
693:
694: if (NULL != func) {
1.13 kristaps 695: fputs(".Fo", stdout);
696: p->newln = 0;
1.3 kristaps 697: pnode_printmacroline(p, func);
1.13 kristaps 698: } else {
1.3 kristaps 699: puts(".Fo UNKNOWN");
1.13 kristaps 700: p->newln = 1;
701: }
1.3 kristaps 702: }
703:
704: static void
705: pnode_printparamdef(struct parse *p, struct pnode *pn)
706: {
707: struct pnode *pp, *ptype, *param;
708:
1.13 kristaps 709: assert(p->newln);
1.3 kristaps 710: ptype = param = NULL;
711: TAILQ_FOREACH(pp, &pn->childq, child)
712: if (NODE_TEXT == pp->node)
713: ptype = pp;
714: else if (NODE_PARAMETER == pp->node)
715: param = pp;
716:
717: fputs(".Fa \"", stdout);
1.13 kristaps 718: p->newln = 0;
1.3 kristaps 719: if (NULL != ptype) {
720: pnode_printmacrolinepart(p, ptype);
721: putchar(' ');
722: }
723:
724: if (NULL != param)
725: pnode_printmacrolinepart(p, param);
726:
727: puts("\"");
1.13 kristaps 728: p->newln = 1;
1.3 kristaps 729: }
730:
731: static void
732: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
733: {
734: struct pnode *pp, *fdef;
735:
1.13 kristaps 736: assert(p->newln);
1.3 kristaps 737: TAILQ_FOREACH(fdef, &pn->childq, child)
738: if (NODE_FUNCDEF == fdef->node)
739: break;
740:
1.4 kristaps 741: if (NULL != fdef)
1.3 kristaps 742: pnode_printfuncdef(p, fdef);
1.4 kristaps 743: else
1.3 kristaps 744: puts(".Fo UNKNOWN");
745:
1.4 kristaps 746: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 747: if (NODE_PARAMDEF == pp->node)
748: pnode_printparamdef(p, pp);
749:
750: puts(".Fc");
1.13 kristaps 751: p->newln = 1;
1.3 kristaps 752: }
753:
1.10 kristaps 754: /*
755: * The <arg> element is more complicated than it should be because text
756: * nodes are treated like ".Ar foo", but non-text nodes need to be
757: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 758: * This also handles the case of "repetition" (or in other words, the
759: * ellipsis following an argument) and optionality.
1.10 kristaps 760: */
1.4 kristaps 761: static void
1.10 kristaps 762: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 763: {
764: struct pnode *pp;
1.12 kristaps 765: struct pattr *ap;
766: int isop, isrep;
767:
768: isop = 1;
769: isrep = 0;
770: TAILQ_FOREACH(ap, &pn->attrq, child)
771: if (ATTRKEY_CHOICE == ap->key &&
772: (ATTRVAL_PLAIN == ap->val ||
773: ATTRVAL_REQ == ap->val))
774: isop = 0;
775: else if (ATTRKEY_REP == ap->key &&
776: (ATTRVAL_REPEAT == ap->val))
777: isrep = 1;
778:
779: if (isop) {
780: pnode_printmopen(p);
1.13 kristaps 781: fputs("Op", stdout);
1.12 kristaps 782: }
1.4 kristaps 783:
1.10 kristaps 784: TAILQ_FOREACH(pp, &pn->childq, child) {
785: if (NODE_TEXT == pp->node) {
786: pnode_printmopen(p);
1.13 kristaps 787: fputs("Ar", stdout);
1.10 kristaps 788: }
789: pnode_print(p, pp);
1.12 kristaps 790: if (NODE_TEXT == pp->node && isrep)
791: fputs("...", stdout);
1.10 kristaps 792: }
1.4 kristaps 793: }
794:
1.7 kristaps 795: /*
796: * Recursively search and return the first instance of "node".
797: */
798: static struct pnode *
799: pnode_findfirst(struct pnode *pn, enum nodeid node)
800: {
801: struct pnode *pp, *res;
802:
803: res = NULL;
804: TAILQ_FOREACH(pp, &pn->childq, child) {
805: res = pp->node == node ? pp :
806: pnode_findfirst(pp, node);
807: if (NULL != res)
808: break;
809: }
810:
811: return(res);
812: }
813:
814: static void
815: pnode_printprologue(struct parse *p, struct pnode *pn)
816: {
817: struct pnode *pp;
818:
1.9 kristaps 819: pp = NULL == p->root ? NULL :
820: pnode_findfirst(p->root, NODE_REFMETA);
821:
822: if (NULL != pp) {
1.7 kristaps 823: pnode_printrefmeta(p, pp);
824: pnode_unlink(pp);
825: } else {
826: puts(".\\\" Supplying bogus prologue...");
827: puts(".Dd $Mdocdate" "$");
828: puts(".Dt UNKNOWN 1");
829: puts(".Os");
830: }
831: }
832:
1.13 kristaps 833: static void
834: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
835: {
836: struct pnode *pp;
837:
838: assert(p->newln);
839: TAILQ_FOREACH(pp, &pn->childq, child)
840: if (NODE_TERM == pp->node) {
841: fputs(".It", stdout);
842: p->newln = 0;
843: pnode_print(p, pp);
844: pnode_unlink(pp);
1.16 kristaps 845: pnode_printmclose(p, 1);
1.13 kristaps 846: return;
847: }
848:
849: puts(".It");
850: p->newln = 1;
851: }
852:
853: static void
1.16 kristaps 854: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
855: {
856: struct pnode *pp;
857:
858: assert(p->newln);
859: TAILQ_FOREACH(pp, &pn->childq, child)
860: if (NODE_TITLE == pp->node) {
861: puts(".Pp");
862: pnode_print(p, pp);
863: pnode_unlink(pp);
864: }
865:
866: assert(p->newln);
1.21 kristaps 867:
868: if (NODE_ORDEREDLIST == pn->node)
869: puts(".Bl -enum");
870: else
871: puts(".Bl -item");
872:
1.16 kristaps 873: TAILQ_FOREACH(pp, &pn->childq, child) {
874: assert(p->newln);
875: puts(".It");
876: pnode_print(p, pp);
877: pnode_printmclose(p, 1);
878: }
879: assert(p->newln);
880: puts(".El");
881: }
882:
883: static void
1.13 kristaps 884: pnode_printvariablelist(struct parse *p, struct pnode *pn)
885: {
886: struct pnode *pp;
887:
888: assert(p->newln);
889: TAILQ_FOREACH(pp, &pn->childq, child)
890: if (NODE_TITLE == pp->node) {
891: puts(".Pp");
892: pnode_print(p, pp);
893: pnode_unlink(pp);
894: }
895:
896: assert(p->newln);
897: puts(".Bl -tag -width Ds");
898: TAILQ_FOREACH(pp, &pn->childq, child)
899: if (NODE_VARLISTENTRY != pp->node) {
900: assert(p->newln);
901: fputs(".It", stdout);
902: pnode_printmacroline(p, pp);
903: } else {
904: assert(p->newln);
905: pnode_print(p, pp);
906: }
907: assert(p->newln);
908: puts(".El");
909: }
910:
1.1 kristaps 911: /*
912: * Print a parsed node (or ignore it--whatever).
913: * This is a recursive function.
1.23 ! kristaps 914: * FIXME: if we're in a literal context (<screen> or <programlisting> or
! 915: * whatever), don't print inline macros.
1.1 kristaps 916: */
917: static void
918: pnode_print(struct parse *p, struct pnode *pn)
919: {
920: struct pnode *pp;
921: char *cp;
1.10 kristaps 922: int last, sv;
1.1 kristaps 923:
924: if (NULL == pn)
925: return;
926:
1.10 kristaps 927: sv = p->newln;
1.1 kristaps 928:
929: switch (pn->node) {
1.4 kristaps 930: case (NODE_ARG):
1.10 kristaps 931: pnode_printarg(p, pn);
1.4 kristaps 932: pnode_unlinksub(pn);
933: break;
1.1 kristaps 934: case (NODE_CITEREFENTRY):
1.10 kristaps 935: assert(p->newln);
1.1 kristaps 936: pnode_printciterefentry(p, pn);
1.4 kristaps 937: pnode_unlinksub(pn);
1.1 kristaps 938: break;
939: case (NODE_CODE):
1.10 kristaps 940: pnode_printmopen(p);
1.13 kristaps 941: fputs("Li", stdout);
1.4 kristaps 942: break;
943: case (NODE_COMMAND):
1.10 kristaps 944: pnode_printmopen(p);
1.13 kristaps 945: fputs("Nm", stdout);
946: break;
947: case (NODE_EMPHASIS):
948: pnode_printmopen(p);
949: fputs("Em", stdout);
1.1 kristaps 950: break;
1.21 kristaps 951: case (NODE_ENVAR):
952: pnode_printmopen(p);
953: fputs("Ev", stdout);
954: break;
1.17 kristaps 955: case (NODE_FILENAME):
956: pnode_printmopen(p);
957: fputs("Pa", stdout);
958: break;
1.3 kristaps 959: case (NODE_FUNCTION):
1.10 kristaps 960: pnode_printmopen(p);
1.13 kristaps 961: fputs("Fn", stdout);
1.3 kristaps 962: break;
963: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 964: assert(p->newln);
1.3 kristaps 965: pnode_printfuncprototype(p, pn);
1.4 kristaps 966: pnode_unlinksub(pn);
1.3 kristaps 967: break;
1.1 kristaps 968: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 969: pnode_printmopen(p);
1.13 kristaps 970: fputs("Fd", stdout);
1.16 kristaps 971: break;
972: case (NODE_ITEMIZEDLIST):
1.21 kristaps 973: /* FALLTHROUGH */
974: case (NODE_ORDEREDLIST):
1.16 kristaps 975: assert(p->newln);
976: pnode_printitemizedlist(p, pn);
1.10 kristaps 977: break;
1.19 kristaps 978: case (NODE_LITERAL):
979: pnode_printmopen(p);
980: fputs("Li", stdout);
981: break;
1.10 kristaps 982: case (NODE_OPTION):
983: pnode_printmopen(p);
1.13 kristaps 984: fputs("Fl", stdout);
1.1 kristaps 985: break;
986: case (NODE_PARA):
1.10 kristaps 987: assert(p->newln);
1.13 kristaps 988: if (NULL != pn->parent &&
989: NODE_LISTITEM == pn->parent->node)
990: break;
1.1 kristaps 991: puts(".Pp");
1.3 kristaps 992: break;
993: case (NODE_PARAMETER):
1.10 kristaps 994: /* Suppress non-text children... */
995: pnode_printmopen(p);
996: fputs("Fa \"", stdout);
1.3 kristaps 997: pnode_printmacrolinepart(p, pn);
998: puts("\"");
1.4 kristaps 999: pnode_unlinksub(pn);
1.1 kristaps 1000: break;
1001: case (NODE_PROGRAMLISTING):
1.22 kristaps 1002: /* FALLTHROUGH */
1003: case (NODE_SCREEN):
1.10 kristaps 1004: assert(p->newln);
1.1 kristaps 1005: puts(".Bd -literal");
1.15 kristaps 1006: break;
1007: case (NODE_REFENTRYINFO):
1008: /* Suppress. */
1009: pnode_unlinksub(pn);
1.1 kristaps 1010: break;
1011: case (NODE_REFMETA):
1.7 kristaps 1012: abort();
1.1 kristaps 1013: break;
1014: case (NODE_REFNAME):
1.10 kristaps 1015: /* Suppress non-text children... */
1016: pnode_printmopen(p);
1.13 kristaps 1017: fputs("Nm", stdout);
1018: p->newln = 0;
1.10 kristaps 1019: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1020: pnode_unlinksub(pn);
1.10 kristaps 1021: break;
1.1 kristaps 1022: case (NODE_REFNAMEDIV):
1.10 kristaps 1023: assert(p->newln);
1.1 kristaps 1024: puts(".Sh NAME");
1025: break;
1026: case (NODE_REFPURPOSE):
1.10 kristaps 1027: assert(p->newln);
1.13 kristaps 1028: pnode_printmopen(p);
1029: fputs("Nd", stdout);
1.10 kristaps 1030: break;
1.1 kristaps 1031: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1032: assert(p->newln);
1.6 kristaps 1033: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1034: puts(".Sh SYNOPSIS");
1.1 kristaps 1035: break;
1036: case (NODE_REFSECT1):
1.20 kristaps 1037: /* FALLTHROUGH */
1038: case (NODE_REFSECT2):
1.10 kristaps 1039: assert(p->newln);
1.1 kristaps 1040: pnode_printrefsect(p, pn);
1041: break;
1.13 kristaps 1042: case (NODE_REPLACEABLE):
1043: pnode_printmopen(p);
1044: fputs("Ar", stdout);
1045: break;
1.19 kristaps 1046: case (NODE_SBR):
1047: assert(p->newln);
1048: puts(".br");
1049: break;
1.8 kristaps 1050: case (NODE_STRUCTNAME):
1.10 kristaps 1051: pnode_printmopen(p);
1.13 kristaps 1052: fputs("Vt", stdout);
1.10 kristaps 1053: break;
1.1 kristaps 1054: case (NODE_TEXT):
1.13 kristaps 1055: if (0 == p->newln)
1056: putchar(' ');
1.1 kristaps 1057: bufclear(p);
1058: bufappend(p, pn);
1059: /*
1060: * Output all characters, squeezing out whitespace
1061: * between newlines.
1062: * XXX: all whitespace, including tabs (?).
1063: * Remember to escape control characters and escapes.
1064: */
1.10 kristaps 1065: assert(p->bsz);
1.20 kristaps 1066: cp = p->b;
1067: /*
1068: * There's often a superfluous "-" in its <option> tags
1069: * before the actual flags themselves.
1070: * "Fl" does this for us, so remove it.
1071: */
1072: if (NULL != pn->parent &&
1073: NODE_OPTION == pn->parent->node &&
1074: '-' == *cp)
1075: cp++;
1076: for (last = '\n'; '\0' != *cp; ) {
1.1 kristaps 1077: if ('\n' == last) {
1078: /* Consume all whitespace. */
1079: if (isspace((int)*cp)) {
1080: while (isspace((int)*cp))
1081: cp++;
1082: continue;
1083: } else if ('\'' == *cp || '.' == *cp)
1084: fputs("\\&", stdout);
1085: }
1086: putchar(last = *cp++);
1087: /* If we're a character escape, escape us. */
1088: if ('\\' == last)
1089: putchar('e');
1090: }
1.10 kristaps 1091: p->newln = 0;
1.1 kristaps 1092: break;
1.13 kristaps 1093: case (NODE_VARIABLELIST):
1094: assert(p->newln);
1095: pnode_printvariablelist(p, pn);
1096: pnode_unlinksub(pn);
1097: break;
1098: case (NODE_VARLISTENTRY):
1099: assert(p->newln);
1100: pnode_printvarlistentry(p, pn);
1101: break;
1.23 ! kristaps 1102: case (NODE_USERINPUT):
! 1103: pnode_printmopen(p);
! 1104: fputs("Li", stdout);
! 1105: break;
1.1 kristaps 1106: default:
1107: break;
1108: }
1109:
1110: TAILQ_FOREACH(pp, &pn->childq, child)
1111: pnode_print(p, pp);
1112:
1113: switch (pn->node) {
1.10 kristaps 1114: case (NODE_ARG):
1115: case (NODE_CODE):
1116: case (NODE_COMMAND):
1.13 kristaps 1117: case (NODE_EMPHASIS):
1.21 kristaps 1118: case (NODE_ENVAR):
1.17 kristaps 1119: case (NODE_FILENAME):
1.10 kristaps 1120: case (NODE_FUNCTION):
1121: case (NODE_FUNCSYNOPSISINFO):
1.19 kristaps 1122: case (NODE_LITERAL):
1.10 kristaps 1123: case (NODE_OPTION):
1124: case (NODE_PARAMETER):
1.13 kristaps 1125: case (NODE_REPLACEABLE):
1126: case (NODE_REFPURPOSE):
1.10 kristaps 1127: case (NODE_STRUCTNAME):
1128: case (NODE_TEXT):
1.23 ! kristaps 1129: case (NODE_USERINPUT):
1.10 kristaps 1130: pnode_printmclose(p, sv);
1131: break;
1.12 kristaps 1132: case (NODE_REFNAME):
1133: /*
1134: * If we're in the NAME macro and we have multiple
1135: * <refname> macros in sequence, then print out a
1136: * trailing comma before the newline.
1137: */
1138: if (NULL != pn->parent &&
1139: NODE_REFNAMEDIV == pn->parent->node &&
1140: NULL != TAILQ_NEXT(pn, child) &&
1141: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1142: fputs(" ,", stdout);
1143: pnode_printmclose(p, sv);
1144: break;
1.1 kristaps 1145: case (NODE_PROGRAMLISTING):
1.22 kristaps 1146: /* FALLTHROUGH */
1147: case (NODE_SCREEN):
1.10 kristaps 1148: assert(p->newln);
1.1 kristaps 1149: puts(".Ed");
1.10 kristaps 1150: p->newln = 1;
1.1 kristaps 1151: break;
1152: default:
1153: break;
1154: }
1155: }
1156:
1157: /*
1158: * Loop around the read buffer until we've drained it of all data.
1159: * Invoke the parser context with each buffer fill.
1160: */
1161: static int
1162: readfile(XML_Parser xp, int fd,
1163: char *b, size_t bsz, const char *fn)
1164: {
1165: struct parse p;
1166: int rc;
1167: ssize_t ssz;
1168:
1169: memset(&p, 0, sizeof(struct parse));
1170:
1171: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1172: p.fname = fn;
1173: p.xml = xp;
1.1 kristaps 1174:
1175: XML_SetCharacterDataHandler(xp, xml_char);
1176: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1177: XML_SetUserData(xp, &p);
1178:
1179: while ((ssz = read(fd, b, bsz)) >= 0) {
1180: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1181: fprintf(stderr, "%s: %s\n", fn,
1182: XML_ErrorString
1183: (XML_GetErrorCode(xp)));
1184: else if ( ! p.stop && ssz > 0)
1185: continue;
1186: /*
1187: * Exit when we've read all or errors have occured
1188: * during the parse sequence.
1189: */
1.10 kristaps 1190: p.newln = 1;
1.7 kristaps 1191: pnode_printprologue(&p, p.root);
1.1 kristaps 1192: pnode_print(&p, p.root);
1193: pnode_free(p.root);
1194: free(p.b);
1195: return(0 != rc && ! p.stop);
1196: }
1197:
1198: /* Read error has occured. */
1199: perror(fn);
1200: pnode_free(p.root);
1201: free(p.b);
1202: return(0);
1203: }
1204:
1205: int
1206: main(int argc, char *argv[])
1207: {
1208: XML_Parser xp;
1209: const char *fname;
1210: char *buf;
1211: int fd, rc;
1212:
1213: fname = "-";
1214: xp = NULL;
1215: buf = NULL;
1216: rc = 0;
1217:
1218: if (-1 != getopt(argc, argv, ""))
1219: return(EXIT_FAILURE);
1220:
1221: argc -= optind;
1222: argv += optind;
1223:
1224: if (argc > 1)
1225: return(EXIT_FAILURE);
1226: else if (argc > 0)
1227: fname = argv[0];
1228:
1229: /* Read from stdin or a file. */
1230: fd = 0 == strcmp(fname, "-") ?
1231: STDIN_FILENO : open(fname, O_RDONLY, 0);
1232:
1233: /*
1234: * Open file for reading.
1235: * Allocate a read buffer.
1236: * Create the parser context.
1237: * Dive directly into the parse.
1238: */
1239: if (-1 == fd)
1240: perror(fname);
1241: else if (NULL == (buf = malloc(4096)))
1242: perror(NULL);
1243: else if (NULL == (xp = XML_ParserCreate(NULL)))
1244: perror(NULL);
1245: else if ( ! readfile(xp, fd, buf, 4096, fname))
1246: rc = 1;
1247:
1248: XML_ParserFree(xp);
1249: free(buf);
1250: if (STDIN_FILENO != fd)
1251: close(fd);
1252: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1253: }
CVSweb