Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.19
1.19 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.18 2014/03/30 16:33:27 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.15 kristaps 95: { "date", 0 },
1.13 kristaps 96: { "emphasis", 0 },
1.17 kristaps 97: { "filename", 0 },
1.3 kristaps 98: { "funcdef", 0 },
99: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 100: { "funcsynopsis", NODE_IGNTEXT },
101: { "funcsynopsisinfo", 0 },
1.3 kristaps 102: { "function", 0 },
1.16 kristaps 103: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 104: { "link", 0 },
1.13 kristaps 105: { "listitem", NODE_IGNTEXT },
1.19 ! kristaps 106: { "literal", 0 },
1.1 kristaps 107: { "manvolnum", 0 },
1.4 kristaps 108: { "option", 0 },
1.1 kristaps 109: { "para", 0 },
1.3 kristaps 110: { "paramdef", 0 },
111: { "parameter", 0 },
1.1 kristaps 112: { "programlisting", 0 },
113: { "refclass", NODE_IGNTEXT },
114: { "refdescriptor", NODE_IGNTEXT },
115: { "refentry", NODE_IGNTEXT },
1.15 kristaps 116: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 117: { "refentrytitle", 0 },
118: { "refmeta", NODE_IGNTEXT },
119: { "refmiscinfo", NODE_IGNTEXT },
120: { "refname", 0 },
121: { "refnamediv", NODE_IGNTEXT },
122: { "refpurpose", 0 },
123: { "refsect1", 0 },
124: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 125: { "replaceable", 0 },
1.19 ! kristaps 126: { "sbr", NODE_IGNTEXT },
1.8 kristaps 127: { "structname", 0 },
1.1 kristaps 128: { "synopsis", 0 },
1.13 kristaps 129: { "term", 0 },
1.1 kristaps 130: { NULL, 0 },
131: { "title", 0 },
1.14 kristaps 132: { "ulink", 0 },
1.13 kristaps 133: { "variablelist", NODE_IGNTEXT },
134: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 135: };
136:
1.10 kristaps 137: static void
138: pnode_print(struct parse *p, struct pnode *pn);
139:
1.8 kristaps 140: /*
141: * Process a stream of characters.
142: * We store text as nodes in and of themselves.
143: * If a text node is already open, append to it.
144: * If it's not open, open one under the current context.
145: */
1.1 kristaps 146: static void
147: xml_char(void *arg, const XML_Char *p, int sz)
148: {
149: struct parse *ps = arg;
150: struct pnode *dat;
1.4 kristaps 151: int i;
1.1 kristaps 152:
153: /* Stopped or no tree yet. */
154: if (ps->stop || NODE_ROOT == ps->node)
155: return;
156:
157: /* Not supposed to be collecting text. */
158: assert(NULL != ps->cur);
159: if (NODE_IGNTEXT & nodes[ps->node].flags)
160: return;
161:
162: /*
163: * Are we in the midst of processing text?
164: * If we're not processing text right now, then create a text
165: * node for doing so.
1.4 kristaps 166: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 167: * process: strip out all leading whitespace to be sure.
1.1 kristaps 168: */
169: if (NODE_TEXT != ps->node) {
1.4 kristaps 170: for (i = 0; i < sz; i++)
171: if ( ! isspace((int)p[i]))
172: break;
173: if (i == sz)
174: return;
1.10 kristaps 175: p += i;
176: sz -= i;
1.1 kristaps 177: dat = calloc(1, sizeof(struct pnode));
178: if (NULL == dat) {
179: perror(NULL);
180: exit(EXIT_FAILURE);
181: }
182:
183: dat->node = ps->node = NODE_TEXT;
184: dat->parent = ps->cur;
185: TAILQ_INIT(&dat->childq);
1.12 kristaps 186: TAILQ_INIT(&dat->attrq);
1.1 kristaps 187: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
188: ps->cur = dat;
189: assert(NULL != ps->root);
190: }
191:
192: /* Append to current buffer. */
193: assert(sz >= 0);
194: ps->cur->b = realloc(ps->cur->b,
195: ps->cur->bsz + (size_t)sz);
196: if (NULL == ps->cur->b) {
197: perror(NULL);
198: exit(EXIT_FAILURE);
199: }
200: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
201: ps->cur->bsz += (size_t)sz;
202: }
203:
1.10 kristaps 204: static void
205: pnode_trim(struct pnode *pn)
206: {
207:
208: assert(NODE_TEXT == pn->node);
209: for ( ; pn->bsz > 0; pn->bsz--)
210: if ( ! isspace((int)pn->b[pn->bsz - 1]))
211: break;
212: }
213:
1.1 kristaps 214: /*
215: * Begin an element.
216: * First, look for the element.
217: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 218: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 219: * If we find it but we're not parsing yet (i.e., it's not a refentry
220: * and thus out of context), keep going.
1.8 kristaps 221: * If we find it and we're at the root and already have a tree, puke and
222: * exit (FIXME: I don't think this is right?).
223: * If we find it but we're parsing a text node, close out the text node,
224: * return to its parent, and keep going.
1.1 kristaps 225: * Make sure that the element is in the right context.
226: * Lastly, put the node onto our parse tree and continue.
227: */
228: static void
229: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
230: {
1.12 kristaps 231: struct parse *ps = arg;
232: enum nodeid node;
233: enum attrkey key;
234: enum attrval val;
235: struct pnode *dat;
236: struct pattr *pattr;
237: const XML_Char **att;
1.1 kristaps 238:
239: if (ps->stop)
240: return;
241:
242: /* Close out text node, if applicable... */
243: if (NODE_TEXT == ps->node) {
244: assert(NULL != ps->cur);
1.10 kristaps 245: pnode_trim(ps->cur);
1.1 kristaps 246: ps->cur = ps->cur->parent;
247: assert(NULL != ps->cur);
248: ps->node = ps->cur->node;
249: }
250:
251: for (node = 0; node < NODE__MAX; node++)
252: if (NULL == nodes[node].name)
253: continue;
254: else if (0 == strcmp(nodes[node].name, name))
255: break;
256:
257: if (NODE__MAX == node && NODE_ROOT == ps->node) {
258: return;
259: } else if (NODE__MAX == node) {
1.12 kristaps 260: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
261: ps->fname, XML_GetCurrentLineNumber(ps->xml),
262: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 263: ps->stop = 1;
264: return;
265: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 266: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
267: ps->fname, XML_GetCurrentLineNumber(ps->xml),
268: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 269: ps->stop = 1;
270: return;
271: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
272: return;
273: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 274: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
275: "of node \"%s\"\n",
1.12 kristaps 276: ps->fname, XML_GetCurrentLineNumber(ps->xml),
277: XML_GetCurrentColumnNumber(ps->xml),
278: NULL == nodes[ps->node].name ?
1.13 kristaps 279: "(none)" : nodes[ps->node].name,
280: NULL == nodes[node].name ?
281: "(none)" : nodes[node].name);
1.1 kristaps 282: ps->stop = 1;
283: return;
284: }
285:
286: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
287: perror(NULL);
288: exit(EXIT_FAILURE);
289: }
290:
291: dat->node = ps->node = node;
292: dat->parent = ps->cur;
293: TAILQ_INIT(&dat->childq);
1.12 kristaps 294: TAILQ_INIT(&dat->attrq);
1.1 kristaps 295:
296: if (NULL != ps->cur)
297: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
298:
299: ps->cur = dat;
300: if (NULL == ps->root)
301: ps->root = dat;
1.12 kristaps 302:
303: /*
304: * Process attributes.
305: */
306: for (att = atts; NULL != *att; att += 2) {
307: for (key = 0; key < ATTRKEY__MAX; key++)
308: if (0 == strcmp(*att, attrkeys[key]))
309: break;
310: if (ATTRKEY__MAX == key) {
311: fprintf(stderr, "%s:%zu:%zu: unknown "
312: "attribute \"%s\"\n", ps->fname,
313: XML_GetCurrentLineNumber(ps->xml),
314: XML_GetCurrentColumnNumber(ps->xml),
315: *att);
316: continue;
317: } else if ( ! isattrkey(node, key)) {
318: fprintf(stderr, "%s:%zu:%zu: bad "
319: "attribute \"%s\"\n", ps->fname,
320: XML_GetCurrentLineNumber(ps->xml),
321: XML_GetCurrentColumnNumber(ps->xml),
322: *att);
323: continue;
324: }
325: for (val = 0; val < ATTRVAL__MAX; val++)
326: if (0 == strcmp(*(att + 1), attrvals[val]))
327: break;
328: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
329: fprintf(stderr, "%s:%zu:%zu: bad "
330: "value \"%s\"\n", ps->fname,
331: XML_GetCurrentLineNumber(ps->xml),
332: XML_GetCurrentColumnNumber(ps->xml),
333: *(att + 1));
334: continue;
335: }
336: pattr = calloc(1, sizeof(struct pattr));
337: pattr->key = key;
338: pattr->val = val;
339: if (ATTRVAL__MAX == val)
340: pattr->rawval = strdup(*(att + 1));
341: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
342: }
343:
1.1 kristaps 344: }
345:
346: /*
347: * Roll up the parse tree.
1.8 kristaps 348: * If we're at a text node, roll that one up first.
1.1 kristaps 349: * If we hit the root, then assign ourselves as the NODE_ROOT.
350: */
351: static void
352: xml_elem_end(void *arg, const XML_Char *name)
353: {
354: struct parse *ps = arg;
355:
356: if (ps->stop || NODE_ROOT == ps->node)
357: return;
358:
359: /* Close out text node, if applicable... */
360: if (NODE_TEXT == ps->node) {
361: assert(NULL != ps->cur);
1.10 kristaps 362: pnode_trim(ps->cur);
1.1 kristaps 363: ps->cur = ps->cur->parent;
364: assert(NULL != ps->cur);
365: ps->node = ps->cur->node;
366: }
367:
368: if (NULL == (ps->cur = ps->cur->parent))
369: ps->node = NODE_ROOT;
370: else
371: ps->node = ps->cur->node;
372: }
373:
1.8 kristaps 374: /*
375: * Recursively free a node (NULL is ok).
376: */
1.1 kristaps 377: static void
378: pnode_free(struct pnode *pn)
379: {
380: struct pnode *pp;
1.12 kristaps 381: struct pattr *ap;
1.1 kristaps 382:
383: if (NULL == pn)
384: return;
385:
386: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
387: TAILQ_REMOVE(&pn->childq, pp, child);
388: pnode_free(pp);
389: }
390:
1.12 kristaps 391: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
392: TAILQ_REMOVE(&pn->attrq, ap, child);
393: free(ap->rawval);
394: free(ap);
395: }
396:
1.1 kristaps 397: free(pn->b);
398: free(pn);
399: }
400:
1.8 kristaps 401: /*
402: * Unlink a node from its parent and pnode_free() it.
403: */
1.1 kristaps 404: static void
405: pnode_unlink(struct pnode *pn)
406: {
407:
408: if (NULL != pn->parent)
409: TAILQ_REMOVE(&pn->parent->childq, pn, child);
410: pnode_free(pn);
411: }
412:
1.8 kristaps 413: /*
414: * Unlink all children of a node and pnode_free() them.
415: */
1.1 kristaps 416: static void
1.4 kristaps 417: pnode_unlinksub(struct pnode *pn)
418: {
419:
420: while ( ! TAILQ_EMPTY(&pn->childq))
421: pnode_unlink(TAILQ_FIRST(&pn->childq));
422: }
423:
1.8 kristaps 424: /*
425: * Reset the lookaside buffer.
426: */
1.4 kristaps 427: static void
1.1 kristaps 428: bufclear(struct parse *p)
429: {
430:
431: p->b[p->bsz = 0] = '\0';
432: }
433:
1.8 kristaps 434: /*
435: * Append NODE_TEXT contents to the current buffer, reallocating its
436: * size if necessary.
437: * The buffer is ALWAYS nil-terminated.
438: */
1.1 kristaps 439: static void
440: bufappend(struct parse *p, struct pnode *pn)
441: {
442:
443: assert(NODE_TEXT == pn->node);
444: if (p->bsz + pn->bsz + 1 > p->mbsz) {
445: p->mbsz = p->bsz + pn->bsz + 1;
446: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
447: perror(NULL);
448: exit(EXIT_FAILURE);
449: }
450: }
451: memcpy(p->b + p->bsz, pn->b, pn->bsz);
452: p->bsz += pn->bsz;
453: p->b[p->bsz] = '\0';
454: }
455:
1.8 kristaps 456: /*
457: * Recursively append all NODE_TEXT nodes to the buffer.
458: * This descends into non-text nodes, but doesn't do anything beyond
459: * them.
460: * In other words, this is a recursive text grok.
461: */
1.3 kristaps 462: static void
463: bufappend_r(struct parse *p, struct pnode *pn)
464: {
465: struct pnode *pp;
466:
467: if (NODE_TEXT == pn->node)
468: bufappend(p, pn);
469: TAILQ_FOREACH(pp, &pn->childq, child)
470: bufappend_r(p, pp);
471: }
472:
1.12 kristaps 473: #define MACROLINE_NORM 0
474: #define MACROLINE_UPPER 1
1.1 kristaps 475: /*
1.8 kristaps 476: * Recursively print text presumably on a macro line.
1.1 kristaps 477: * Convert all whitespace to regular spaces.
478: */
479: static void
1.12 kristaps 480: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 481: {
482: char *cp;
483:
1.13 kristaps 484: if (0 == p->newln)
485: putchar(' ');
486:
1.1 kristaps 487: bufclear(p);
1.3 kristaps 488: bufappend_r(p, pn);
1.1 kristaps 489:
490: /* Convert all space to spaces. */
491: for (cp = p->b; '\0' != *cp; cp++)
492: if (isspace((int)*cp))
493: *cp = ' ';
494:
495: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 496: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 497: for ( ; '\0' != *cp; cp++) {
498: /* Escape us if we look like a macro. */
499: if ((cp == p->b || ' ' == *(cp - 1)) &&
500: isupper((int)*cp) &&
501: '\0' != *(cp + 1) &&
502: islower((int)*(cp + 1)) &&
503: ('\0' == *(cp + 2) ||
504: ' ' == *(cp + 2) ||
505: (islower((int)*(cp + 2)) &&
506: ('\0' == *(cp + 3) ||
507: ' ' == *(cp + 3)))))
508: fputs("\\&", stdout);
1.12 kristaps 509: if (MACROLINE_UPPER & fl)
510: putchar(toupper((int)*cp));
511: else
512: putchar((int)*cp);
1.1 kristaps 513: /* If we're a character escape, escape us. */
514: if ('\\' == *cp)
515: putchar('e');
516: }
517: }
518:
1.12 kristaps 519: static void
520: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
521: {
522:
523: pnode_printmacrolinetext(p, pn, 0);
524: }
525:
1.1 kristaps 526: /*
527: * Just pnode_printmacrolinepart() but with a newline.
528: * If no text, just the newline.
529: */
530: static void
531: pnode_printmacroline(struct parse *p, struct pnode *pn)
532: {
533:
1.13 kristaps 534: assert(0 == p->newln);
1.12 kristaps 535: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 536: putchar('\n');
1.13 kristaps 537: p->newln = 1;
1.1 kristaps 538: }
539:
1.10 kristaps 540: static void
541: pnode_printmopen(struct parse *p)
542: {
543: if (p->newln) {
544: putchar('.');
545: p->newln = 0;
546: } else
547: putchar(' ');
548: }
549:
550: static void
551: pnode_printmclose(struct parse *p, int sv)
552: {
553:
554: if (sv && ! p->newln) {
555: putchar('\n');
556: p->newln = 1;
557: }
558: }
559:
1.8 kristaps 560: /*
1.10 kristaps 561: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 562: */
1.1 kristaps 563: static void
1.6 kristaps 564: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
565: {
566: struct pnode *pp;
567:
1.10 kristaps 568: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 569: if (NODE_TITLE == pp->node) {
570: pnode_unlink(pp);
1.10 kristaps 571: return;
1.6 kristaps 572: }
573: }
574:
1.8 kristaps 575: /*
576: * Start a hopefully-named `Sh' section.
577: */
1.6 kristaps 578: static void
1.1 kristaps 579: pnode_printrefsect(struct parse *p, struct pnode *pn)
580: {
581: struct pnode *pp;
582:
583: TAILQ_FOREACH(pp, &pn->childq, child)
584: if (NODE_TITLE == pp->node)
585: break;
586:
1.13 kristaps 587: fputs(".Sh", stdout);
588: p->newln = 0;
1.4 kristaps 589:
1.5 kristaps 590: if (NULL != pp) {
1.18 kristaps 591: pnode_printmacrolinetext(p, pp, MACROLINE_UPPER);
592: pnode_printmclose(p, 1);
1.5 kristaps 593: pnode_unlink(pp);
1.13 kristaps 594: } else {
1.4 kristaps 595: puts("UNKNOWN");
1.13 kristaps 596: p->newln = 1;
597: }
1.1 kristaps 598: }
599:
1.8 kristaps 600: /*
601: * Start a reference, extracting the title and volume.
602: */
1.1 kristaps 603: static void
604: pnode_printciterefentry(struct parse *p, struct pnode *pn)
605: {
606: struct pnode *pp, *title, *manvol;
607:
608: title = manvol = NULL;
1.13 kristaps 609: assert(p->newln);
1.1 kristaps 610: TAILQ_FOREACH(pp, &pn->childq, child)
611: if (NODE_MANVOLNUM == pp->node)
612: manvol = pp;
613: else if (NODE_REFENTRYTITLE == pp->node)
614: title = pp;
615:
1.13 kristaps 616: fputs(".Xr", stdout);
617: p->newln = 0;
1.4 kristaps 618:
1.1 kristaps 619: if (NULL != title) {
620: pnode_printmacrolinepart(p, title);
621: } else
1.13 kristaps 622: fputs(" unknown ", stdout);
1.4 kristaps 623:
1.13 kristaps 624: if (NULL == manvol) {
625: puts(" 1");
626: p->newln = 1;
627: } else
1.1 kristaps 628: pnode_printmacroline(p, manvol);
629: }
630:
631: static void
632: pnode_printrefmeta(struct parse *p, struct pnode *pn)
633: {
634: struct pnode *pp, *title, *manvol;
635:
636: title = manvol = NULL;
1.13 kristaps 637: assert(p->newln);
1.1 kristaps 638: TAILQ_FOREACH(pp, &pn->childq, child)
639: if (NODE_MANVOLNUM == pp->node)
640: manvol = pp;
641: else if (NODE_REFENTRYTITLE == pp->node)
642: title = pp;
643:
1.2 kristaps 644: puts(".Dd $Mdocdate" "$");
1.13 kristaps 645: fputs(".Dt", stdout);
646: p->newln = 0;
1.1 kristaps 647:
1.13 kristaps 648: if (NULL != title)
1.12 kristaps 649: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 650: else
651: fputs(" UNKNOWN ", stdout);
652:
653: if (NULL == manvol) {
654: puts(" 1");
655: p->newln = 1;
1.1 kristaps 656: } else
657: pnode_printmacroline(p, manvol);
658:
659: puts(".Os");
660: }
661:
1.3 kristaps 662: static void
663: pnode_printfuncdef(struct parse *p, struct pnode *pn)
664: {
665: struct pnode *pp, *ftype, *func;
666:
1.13 kristaps 667: assert(p->newln);
1.3 kristaps 668: ftype = func = NULL;
669: TAILQ_FOREACH(pp, &pn->childq, child)
670: if (NODE_TEXT == pp->node)
671: ftype = pp;
672: else if (NODE_FUNCTION == pp->node)
673: func = pp;
674:
675: if (NULL != ftype) {
1.13 kristaps 676: fputs(".Ft", stdout);
677: p->newln = 0;
1.3 kristaps 678: pnode_printmacroline(p, ftype);
679: }
680:
681: if (NULL != func) {
1.13 kristaps 682: fputs(".Fo", stdout);
683: p->newln = 0;
1.3 kristaps 684: pnode_printmacroline(p, func);
1.13 kristaps 685: } else {
1.3 kristaps 686: puts(".Fo UNKNOWN");
1.13 kristaps 687: p->newln = 1;
688: }
1.3 kristaps 689: }
690:
691: static void
692: pnode_printparamdef(struct parse *p, struct pnode *pn)
693: {
694: struct pnode *pp, *ptype, *param;
695:
1.13 kristaps 696: assert(p->newln);
1.3 kristaps 697: ptype = param = NULL;
698: TAILQ_FOREACH(pp, &pn->childq, child)
699: if (NODE_TEXT == pp->node)
700: ptype = pp;
701: else if (NODE_PARAMETER == pp->node)
702: param = pp;
703:
704: fputs(".Fa \"", stdout);
1.13 kristaps 705: p->newln = 0;
1.3 kristaps 706: if (NULL != ptype) {
707: pnode_printmacrolinepart(p, ptype);
708: putchar(' ');
709: }
710:
711: if (NULL != param)
712: pnode_printmacrolinepart(p, param);
713:
714: puts("\"");
1.13 kristaps 715: p->newln = 1;
1.3 kristaps 716: }
717:
718: static void
719: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
720: {
721: struct pnode *pp, *fdef;
722:
1.13 kristaps 723: assert(p->newln);
1.3 kristaps 724: TAILQ_FOREACH(fdef, &pn->childq, child)
725: if (NODE_FUNCDEF == fdef->node)
726: break;
727:
1.4 kristaps 728: if (NULL != fdef)
1.3 kristaps 729: pnode_printfuncdef(p, fdef);
1.4 kristaps 730: else
1.3 kristaps 731: puts(".Fo UNKNOWN");
732:
1.4 kristaps 733: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 734: if (NODE_PARAMDEF == pp->node)
735: pnode_printparamdef(p, pp);
736:
737: puts(".Fc");
1.13 kristaps 738: p->newln = 1;
1.3 kristaps 739: }
740:
1.10 kristaps 741: /*
742: * The <arg> element is more complicated than it should be because text
743: * nodes are treated like ".Ar foo", but non-text nodes need to be
744: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 745: * This also handles the case of "repetition" (or in other words, the
746: * ellipsis following an argument) and optionality.
1.10 kristaps 747: */
1.4 kristaps 748: static void
1.10 kristaps 749: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 750: {
751: struct pnode *pp;
1.12 kristaps 752: struct pattr *ap;
753: int isop, isrep;
754:
755: isop = 1;
756: isrep = 0;
757: TAILQ_FOREACH(ap, &pn->attrq, child)
758: if (ATTRKEY_CHOICE == ap->key &&
759: (ATTRVAL_PLAIN == ap->val ||
760: ATTRVAL_REQ == ap->val))
761: isop = 0;
762: else if (ATTRKEY_REP == ap->key &&
763: (ATTRVAL_REPEAT == ap->val))
764: isrep = 1;
765:
766: if (isop) {
767: pnode_printmopen(p);
1.13 kristaps 768: fputs("Op", stdout);
1.12 kristaps 769: }
1.4 kristaps 770:
1.10 kristaps 771: TAILQ_FOREACH(pp, &pn->childq, child) {
772: if (NODE_TEXT == pp->node) {
773: pnode_printmopen(p);
1.13 kristaps 774: fputs("Ar", stdout);
1.10 kristaps 775: }
776: pnode_print(p, pp);
1.12 kristaps 777: if (NODE_TEXT == pp->node && isrep)
778: fputs("...", stdout);
1.10 kristaps 779: }
1.4 kristaps 780: }
781:
1.7 kristaps 782: /*
783: * Recursively search and return the first instance of "node".
784: */
785: static struct pnode *
786: pnode_findfirst(struct pnode *pn, enum nodeid node)
787: {
788: struct pnode *pp, *res;
789:
790: res = NULL;
791: TAILQ_FOREACH(pp, &pn->childq, child) {
792: res = pp->node == node ? pp :
793: pnode_findfirst(pp, node);
794: if (NULL != res)
795: break;
796: }
797:
798: return(res);
799: }
800:
801: static void
802: pnode_printprologue(struct parse *p, struct pnode *pn)
803: {
804: struct pnode *pp;
805:
1.9 kristaps 806: pp = NULL == p->root ? NULL :
807: pnode_findfirst(p->root, NODE_REFMETA);
808:
809: if (NULL != pp) {
1.7 kristaps 810: pnode_printrefmeta(p, pp);
811: pnode_unlink(pp);
812: } else {
813: puts(".\\\" Supplying bogus prologue...");
814: puts(".Dd $Mdocdate" "$");
815: puts(".Dt UNKNOWN 1");
816: puts(".Os");
817: }
818: }
819:
1.13 kristaps 820: static void
821: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
822: {
823: struct pnode *pp;
824:
825: assert(p->newln);
826: TAILQ_FOREACH(pp, &pn->childq, child)
827: if (NODE_TERM == pp->node) {
828: fputs(".It", stdout);
829: p->newln = 0;
830: pnode_print(p, pp);
831: pnode_unlink(pp);
1.16 kristaps 832: pnode_printmclose(p, 1);
1.13 kristaps 833: return;
834: }
835:
836: puts(".It");
837: p->newln = 1;
838: }
839:
840: static void
1.16 kristaps 841: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
842: {
843: struct pnode *pp;
844:
845: assert(p->newln);
846: TAILQ_FOREACH(pp, &pn->childq, child)
847: if (NODE_TITLE == pp->node) {
848: puts(".Pp");
849: pnode_print(p, pp);
850: pnode_unlink(pp);
851: }
852:
853: assert(p->newln);
854: puts(".Bl -item");
855: TAILQ_FOREACH(pp, &pn->childq, child) {
856: assert(p->newln);
857: puts(".It");
858: pnode_print(p, pp);
859: pnode_printmclose(p, 1);
860: }
861: assert(p->newln);
862: puts(".El");
863: }
864:
865: static void
1.13 kristaps 866: pnode_printvariablelist(struct parse *p, struct pnode *pn)
867: {
868: struct pnode *pp;
869:
870: assert(p->newln);
871: TAILQ_FOREACH(pp, &pn->childq, child)
872: if (NODE_TITLE == pp->node) {
873: puts(".Pp");
874: pnode_print(p, pp);
875: pnode_unlink(pp);
876: }
877:
878: assert(p->newln);
879: puts(".Bl -tag -width Ds");
880: TAILQ_FOREACH(pp, &pn->childq, child)
881: if (NODE_VARLISTENTRY != pp->node) {
882: assert(p->newln);
883: fputs(".It", stdout);
884: pnode_printmacroline(p, pp);
885: } else {
886: assert(p->newln);
887: pnode_print(p, pp);
888: }
889: assert(p->newln);
890: puts(".El");
891: }
892:
1.1 kristaps 893: /*
894: * Print a parsed node (or ignore it--whatever).
895: * This is a recursive function.
896: * FIXME: macro line continuation?
897: */
898: static void
899: pnode_print(struct parse *p, struct pnode *pn)
900: {
901: struct pnode *pp;
902: char *cp;
1.10 kristaps 903: int last, sv;
1.1 kristaps 904:
905: if (NULL == pn)
906: return;
907:
1.10 kristaps 908: sv = p->newln;
1.1 kristaps 909:
910: switch (pn->node) {
1.4 kristaps 911: case (NODE_ARG):
1.10 kristaps 912: pnode_printarg(p, pn);
1.4 kristaps 913: pnode_unlinksub(pn);
914: break;
1.1 kristaps 915: case (NODE_CITEREFENTRY):
1.10 kristaps 916: assert(p->newln);
1.1 kristaps 917: pnode_printciterefentry(p, pn);
1.4 kristaps 918: pnode_unlinksub(pn);
1.1 kristaps 919: break;
920: case (NODE_CODE):
1.10 kristaps 921: pnode_printmopen(p);
1.13 kristaps 922: fputs("Li", stdout);
1.4 kristaps 923: break;
924: case (NODE_COMMAND):
1.10 kristaps 925: pnode_printmopen(p);
1.13 kristaps 926: fputs("Nm", stdout);
927: break;
928: case (NODE_EMPHASIS):
929: pnode_printmopen(p);
930: fputs("Em", stdout);
1.1 kristaps 931: break;
1.17 kristaps 932: case (NODE_FILENAME):
933: pnode_printmopen(p);
934: fputs("Pa", stdout);
935: break;
1.3 kristaps 936: case (NODE_FUNCTION):
1.10 kristaps 937: pnode_printmopen(p);
1.13 kristaps 938: fputs("Fn", stdout);
1.3 kristaps 939: break;
940: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 941: assert(p->newln);
1.3 kristaps 942: pnode_printfuncprototype(p, pn);
1.4 kristaps 943: pnode_unlinksub(pn);
1.3 kristaps 944: break;
1.1 kristaps 945: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 946: pnode_printmopen(p);
1.13 kristaps 947: fputs("Fd", stdout);
1.16 kristaps 948: break;
949: case (NODE_ITEMIZEDLIST):
950: assert(p->newln);
951: pnode_printitemizedlist(p, pn);
1.10 kristaps 952: break;
1.19 ! kristaps 953: case (NODE_LITERAL):
! 954: pnode_printmopen(p);
! 955: fputs("Li", stdout);
! 956: break;
1.10 kristaps 957: case (NODE_OPTION):
958: pnode_printmopen(p);
1.13 kristaps 959: fputs("Fl", stdout);
960: /* FIXME: bogus leading '-'? */
1.1 kristaps 961: break;
962: case (NODE_PARA):
1.10 kristaps 963: assert(p->newln);
1.13 kristaps 964: if (NULL != pn->parent &&
965: NODE_LISTITEM == pn->parent->node)
966: break;
1.1 kristaps 967: puts(".Pp");
1.3 kristaps 968: break;
969: case (NODE_PARAMETER):
1.10 kristaps 970: /* Suppress non-text children... */
971: pnode_printmopen(p);
972: fputs("Fa \"", stdout);
1.3 kristaps 973: pnode_printmacrolinepart(p, pn);
974: puts("\"");
1.4 kristaps 975: pnode_unlinksub(pn);
1.1 kristaps 976: break;
977: case (NODE_PROGRAMLISTING):
1.10 kristaps 978: assert(p->newln);
1.1 kristaps 979: puts(".Bd -literal");
1.15 kristaps 980: break;
981: case (NODE_REFENTRYINFO):
982: /* Suppress. */
983: pnode_unlinksub(pn);
1.1 kristaps 984: break;
985: case (NODE_REFMETA):
1.7 kristaps 986: abort();
1.1 kristaps 987: break;
988: case (NODE_REFNAME):
1.10 kristaps 989: /* Suppress non-text children... */
990: pnode_printmopen(p);
1.13 kristaps 991: fputs("Nm", stdout);
992: p->newln = 0;
1.10 kristaps 993: pnode_printmacrolinepart(p, pn);
1.4 kristaps 994: pnode_unlinksub(pn);
1.10 kristaps 995: break;
1.1 kristaps 996: case (NODE_REFNAMEDIV):
1.10 kristaps 997: assert(p->newln);
1.1 kristaps 998: puts(".Sh NAME");
999: break;
1000: case (NODE_REFPURPOSE):
1.10 kristaps 1001: assert(p->newln);
1.13 kristaps 1002: pnode_printmopen(p);
1003: fputs("Nd", stdout);
1.10 kristaps 1004: break;
1.1 kristaps 1005: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1006: assert(p->newln);
1.6 kristaps 1007: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1008: puts(".Sh SYNOPSIS");
1.1 kristaps 1009: break;
1010: case (NODE_REFSECT1):
1.10 kristaps 1011: assert(p->newln);
1.1 kristaps 1012: pnode_printrefsect(p, pn);
1013: break;
1.13 kristaps 1014: case (NODE_REPLACEABLE):
1015: pnode_printmopen(p);
1016: fputs("Ar", stdout);
1017: break;
1.19 ! kristaps 1018: case (NODE_SBR):
! 1019: assert(p->newln);
! 1020: puts(".br");
! 1021: break;
1.8 kristaps 1022: case (NODE_STRUCTNAME):
1.10 kristaps 1023: pnode_printmopen(p);
1.13 kristaps 1024: fputs("Vt", stdout);
1.10 kristaps 1025: break;
1.1 kristaps 1026: case (NODE_TEXT):
1.13 kristaps 1027: if (0 == p->newln)
1028: putchar(' ');
1.1 kristaps 1029: bufclear(p);
1030: bufappend(p, pn);
1031: /*
1032: * Output all characters, squeezing out whitespace
1033: * between newlines.
1034: * XXX: all whitespace, including tabs (?).
1035: * Remember to escape control characters and escapes.
1036: */
1.10 kristaps 1037: assert(p->bsz);
1.1 kristaps 1038: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1039: if ('\n' == last) {
1040: /* Consume all whitespace. */
1041: if (isspace((int)*cp)) {
1042: while (isspace((int)*cp))
1043: cp++;
1044: continue;
1045: } else if ('\'' == *cp || '.' == *cp)
1046: fputs("\\&", stdout);
1047: }
1048: putchar(last = *cp++);
1049: /* If we're a character escape, escape us. */
1050: if ('\\' == last)
1051: putchar('e');
1052: }
1.10 kristaps 1053: p->newln = 0;
1.1 kristaps 1054: break;
1.13 kristaps 1055: case (NODE_VARIABLELIST):
1056: assert(p->newln);
1057: pnode_printvariablelist(p, pn);
1058: pnode_unlinksub(pn);
1059: break;
1060: case (NODE_VARLISTENTRY):
1061: assert(p->newln);
1062: pnode_printvarlistentry(p, pn);
1063: break;
1.1 kristaps 1064: default:
1065: break;
1066: }
1067:
1068: TAILQ_FOREACH(pp, &pn->childq, child)
1069: pnode_print(p, pp);
1070:
1071: switch (pn->node) {
1.10 kristaps 1072: case (NODE_ARG):
1073: case (NODE_CODE):
1074: case (NODE_COMMAND):
1.13 kristaps 1075: case (NODE_EMPHASIS):
1.17 kristaps 1076: case (NODE_FILENAME):
1.10 kristaps 1077: case (NODE_FUNCTION):
1078: case (NODE_FUNCSYNOPSISINFO):
1.19 ! kristaps 1079: case (NODE_LITERAL):
1.10 kristaps 1080: case (NODE_OPTION):
1081: case (NODE_PARAMETER):
1.13 kristaps 1082: case (NODE_REPLACEABLE):
1083: case (NODE_REFPURPOSE):
1.10 kristaps 1084: case (NODE_STRUCTNAME):
1085: case (NODE_TEXT):
1086: pnode_printmclose(p, sv);
1087: break;
1.12 kristaps 1088: case (NODE_REFNAME):
1089: /*
1090: * If we're in the NAME macro and we have multiple
1091: * <refname> macros in sequence, then print out a
1092: * trailing comma before the newline.
1093: */
1094: if (NULL != pn->parent &&
1095: NODE_REFNAMEDIV == pn->parent->node &&
1096: NULL != TAILQ_NEXT(pn, child) &&
1097: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1098: fputs(" ,", stdout);
1099: pnode_printmclose(p, sv);
1100: break;
1.1 kristaps 1101: case (NODE_PROGRAMLISTING):
1.10 kristaps 1102: assert(p->newln);
1.1 kristaps 1103: puts(".Ed");
1.10 kristaps 1104: p->newln = 1;
1.1 kristaps 1105: break;
1106: default:
1107: break;
1108: }
1109: }
1110:
1111: /*
1112: * Loop around the read buffer until we've drained it of all data.
1113: * Invoke the parser context with each buffer fill.
1114: */
1115: static int
1116: readfile(XML_Parser xp, int fd,
1117: char *b, size_t bsz, const char *fn)
1118: {
1119: struct parse p;
1120: int rc;
1121: ssize_t ssz;
1122:
1123: memset(&p, 0, sizeof(struct parse));
1124:
1125: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1126: p.fname = fn;
1127: p.xml = xp;
1.1 kristaps 1128:
1129: XML_SetCharacterDataHandler(xp, xml_char);
1130: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1131: XML_SetUserData(xp, &p);
1132:
1133: while ((ssz = read(fd, b, bsz)) >= 0) {
1134: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1135: fprintf(stderr, "%s: %s\n", fn,
1136: XML_ErrorString
1137: (XML_GetErrorCode(xp)));
1138: else if ( ! p.stop && ssz > 0)
1139: continue;
1140: /*
1141: * Exit when we've read all or errors have occured
1142: * during the parse sequence.
1143: */
1.10 kristaps 1144: p.newln = 1;
1.7 kristaps 1145: pnode_printprologue(&p, p.root);
1.1 kristaps 1146: pnode_print(&p, p.root);
1147: pnode_free(p.root);
1148: free(p.b);
1149: return(0 != rc && ! p.stop);
1150: }
1151:
1152: /* Read error has occured. */
1153: perror(fn);
1154: pnode_free(p.root);
1155: free(p.b);
1156: return(0);
1157: }
1158:
1159: int
1160: main(int argc, char *argv[])
1161: {
1162: XML_Parser xp;
1163: const char *fname;
1164: char *buf;
1165: int fd, rc;
1166:
1167: fname = "-";
1168: xp = NULL;
1169: buf = NULL;
1170: rc = 0;
1171:
1172: if (-1 != getopt(argc, argv, ""))
1173: return(EXIT_FAILURE);
1174:
1175: argc -= optind;
1176: argv += optind;
1177:
1178: if (argc > 1)
1179: return(EXIT_FAILURE);
1180: else if (argc > 0)
1181: fname = argv[0];
1182:
1183: /* Read from stdin or a file. */
1184: fd = 0 == strcmp(fname, "-") ?
1185: STDIN_FILENO : open(fname, O_RDONLY, 0);
1186:
1187: /*
1188: * Open file for reading.
1189: * Allocate a read buffer.
1190: * Create the parser context.
1191: * Dive directly into the parse.
1192: */
1193: if (-1 == fd)
1194: perror(fname);
1195: else if (NULL == (buf = malloc(4096)))
1196: perror(NULL);
1197: else if (NULL == (xp = XML_ParserCreate(NULL)))
1198: perror(NULL);
1199: else if ( ! readfile(xp, fd, buf, 4096, fname))
1200: rc = 1;
1201:
1202: XML_ParserFree(xp);
1203: free(buf);
1204: if (STDIN_FILENO != fd)
1205: close(fd);
1206: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1207: }
CVSweb