Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.18
1.18 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.17 2014/03/30 16:32:03 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.15 kristaps 95: { "date", 0 },
1.13 kristaps 96: { "emphasis", 0 },
1.17 kristaps 97: { "filename", 0 },
1.3 kristaps 98: { "funcdef", 0 },
99: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 100: { "funcsynopsis", NODE_IGNTEXT },
101: { "funcsynopsisinfo", 0 },
1.3 kristaps 102: { "function", 0 },
1.16 kristaps 103: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 104: { "link", 0 },
1.13 kristaps 105: { "listitem", NODE_IGNTEXT },
1.1 kristaps 106: { "manvolnum", 0 },
1.4 kristaps 107: { "option", 0 },
1.1 kristaps 108: { "para", 0 },
1.3 kristaps 109: { "paramdef", 0 },
110: { "parameter", 0 },
1.1 kristaps 111: { "programlisting", 0 },
112: { "refclass", NODE_IGNTEXT },
113: { "refdescriptor", NODE_IGNTEXT },
114: { "refentry", NODE_IGNTEXT },
1.15 kristaps 115: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 116: { "refentrytitle", 0 },
117: { "refmeta", NODE_IGNTEXT },
118: { "refmiscinfo", NODE_IGNTEXT },
119: { "refname", 0 },
120: { "refnamediv", NODE_IGNTEXT },
121: { "refpurpose", 0 },
122: { "refsect1", 0 },
123: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 124: { "replaceable", 0 },
1.8 kristaps 125: { "structname", 0 },
1.1 kristaps 126: { "synopsis", 0 },
1.13 kristaps 127: { "term", 0 },
1.1 kristaps 128: { NULL, 0 },
129: { "title", 0 },
1.14 kristaps 130: { "ulink", 0 },
1.13 kristaps 131: { "variablelist", NODE_IGNTEXT },
132: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 133: };
134:
1.10 kristaps 135: static void
136: pnode_print(struct parse *p, struct pnode *pn);
137:
1.8 kristaps 138: /*
139: * Process a stream of characters.
140: * We store text as nodes in and of themselves.
141: * If a text node is already open, append to it.
142: * If it's not open, open one under the current context.
143: */
1.1 kristaps 144: static void
145: xml_char(void *arg, const XML_Char *p, int sz)
146: {
147: struct parse *ps = arg;
148: struct pnode *dat;
1.4 kristaps 149: int i;
1.1 kristaps 150:
151: /* Stopped or no tree yet. */
152: if (ps->stop || NODE_ROOT == ps->node)
153: return;
154:
155: /* Not supposed to be collecting text. */
156: assert(NULL != ps->cur);
157: if (NODE_IGNTEXT & nodes[ps->node].flags)
158: return;
159:
160: /*
161: * Are we in the midst of processing text?
162: * If we're not processing text right now, then create a text
163: * node for doing so.
1.4 kristaps 164: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 165: * process: strip out all leading whitespace to be sure.
1.1 kristaps 166: */
167: if (NODE_TEXT != ps->node) {
1.4 kristaps 168: for (i = 0; i < sz; i++)
169: if ( ! isspace((int)p[i]))
170: break;
171: if (i == sz)
172: return;
1.10 kristaps 173: p += i;
174: sz -= i;
1.1 kristaps 175: dat = calloc(1, sizeof(struct pnode));
176: if (NULL == dat) {
177: perror(NULL);
178: exit(EXIT_FAILURE);
179: }
180:
181: dat->node = ps->node = NODE_TEXT;
182: dat->parent = ps->cur;
183: TAILQ_INIT(&dat->childq);
1.12 kristaps 184: TAILQ_INIT(&dat->attrq);
1.1 kristaps 185: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
186: ps->cur = dat;
187: assert(NULL != ps->root);
188: }
189:
190: /* Append to current buffer. */
191: assert(sz >= 0);
192: ps->cur->b = realloc(ps->cur->b,
193: ps->cur->bsz + (size_t)sz);
194: if (NULL == ps->cur->b) {
195: perror(NULL);
196: exit(EXIT_FAILURE);
197: }
198: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
199: ps->cur->bsz += (size_t)sz;
200: }
201:
1.10 kristaps 202: static void
203: pnode_trim(struct pnode *pn)
204: {
205:
206: assert(NODE_TEXT == pn->node);
207: for ( ; pn->bsz > 0; pn->bsz--)
208: if ( ! isspace((int)pn->b[pn->bsz - 1]))
209: break;
210: }
211:
1.1 kristaps 212: /*
213: * Begin an element.
214: * First, look for the element.
215: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 216: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 217: * If we find it but we're not parsing yet (i.e., it's not a refentry
218: * and thus out of context), keep going.
1.8 kristaps 219: * If we find it and we're at the root and already have a tree, puke and
220: * exit (FIXME: I don't think this is right?).
221: * If we find it but we're parsing a text node, close out the text node,
222: * return to its parent, and keep going.
1.1 kristaps 223: * Make sure that the element is in the right context.
224: * Lastly, put the node onto our parse tree and continue.
225: */
226: static void
227: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
228: {
1.12 kristaps 229: struct parse *ps = arg;
230: enum nodeid node;
231: enum attrkey key;
232: enum attrval val;
233: struct pnode *dat;
234: struct pattr *pattr;
235: const XML_Char **att;
1.1 kristaps 236:
237: if (ps->stop)
238: return;
239:
240: /* Close out text node, if applicable... */
241: if (NODE_TEXT == ps->node) {
242: assert(NULL != ps->cur);
1.10 kristaps 243: pnode_trim(ps->cur);
1.1 kristaps 244: ps->cur = ps->cur->parent;
245: assert(NULL != ps->cur);
246: ps->node = ps->cur->node;
247: }
248:
249: for (node = 0; node < NODE__MAX; node++)
250: if (NULL == nodes[node].name)
251: continue;
252: else if (0 == strcmp(nodes[node].name, name))
253: break;
254:
255: if (NODE__MAX == node && NODE_ROOT == ps->node) {
256: return;
257: } else if (NODE__MAX == node) {
1.12 kristaps 258: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
259: ps->fname, XML_GetCurrentLineNumber(ps->xml),
260: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 261: ps->stop = 1;
262: return;
263: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 264: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
265: ps->fname, XML_GetCurrentLineNumber(ps->xml),
266: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 267: ps->stop = 1;
268: return;
269: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
270: return;
271: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 272: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
273: "of node \"%s\"\n",
1.12 kristaps 274: ps->fname, XML_GetCurrentLineNumber(ps->xml),
275: XML_GetCurrentColumnNumber(ps->xml),
276: NULL == nodes[ps->node].name ?
1.13 kristaps 277: "(none)" : nodes[ps->node].name,
278: NULL == nodes[node].name ?
279: "(none)" : nodes[node].name);
1.1 kristaps 280: ps->stop = 1;
281: return;
282: }
283:
284: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
285: perror(NULL);
286: exit(EXIT_FAILURE);
287: }
288:
289: dat->node = ps->node = node;
290: dat->parent = ps->cur;
291: TAILQ_INIT(&dat->childq);
1.12 kristaps 292: TAILQ_INIT(&dat->attrq);
1.1 kristaps 293:
294: if (NULL != ps->cur)
295: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
296:
297: ps->cur = dat;
298: if (NULL == ps->root)
299: ps->root = dat;
1.12 kristaps 300:
301: /*
302: * Process attributes.
303: */
304: for (att = atts; NULL != *att; att += 2) {
305: for (key = 0; key < ATTRKEY__MAX; key++)
306: if (0 == strcmp(*att, attrkeys[key]))
307: break;
308: if (ATTRKEY__MAX == key) {
309: fprintf(stderr, "%s:%zu:%zu: unknown "
310: "attribute \"%s\"\n", ps->fname,
311: XML_GetCurrentLineNumber(ps->xml),
312: XML_GetCurrentColumnNumber(ps->xml),
313: *att);
314: continue;
315: } else if ( ! isattrkey(node, key)) {
316: fprintf(stderr, "%s:%zu:%zu: bad "
317: "attribute \"%s\"\n", ps->fname,
318: XML_GetCurrentLineNumber(ps->xml),
319: XML_GetCurrentColumnNumber(ps->xml),
320: *att);
321: continue;
322: }
323: for (val = 0; val < ATTRVAL__MAX; val++)
324: if (0 == strcmp(*(att + 1), attrvals[val]))
325: break;
326: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
327: fprintf(stderr, "%s:%zu:%zu: bad "
328: "value \"%s\"\n", ps->fname,
329: XML_GetCurrentLineNumber(ps->xml),
330: XML_GetCurrentColumnNumber(ps->xml),
331: *(att + 1));
332: continue;
333: }
334: pattr = calloc(1, sizeof(struct pattr));
335: pattr->key = key;
336: pattr->val = val;
337: if (ATTRVAL__MAX == val)
338: pattr->rawval = strdup(*(att + 1));
339: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
340: }
341:
1.1 kristaps 342: }
343:
344: /*
345: * Roll up the parse tree.
1.8 kristaps 346: * If we're at a text node, roll that one up first.
1.1 kristaps 347: * If we hit the root, then assign ourselves as the NODE_ROOT.
348: */
349: static void
350: xml_elem_end(void *arg, const XML_Char *name)
351: {
352: struct parse *ps = arg;
353:
354: if (ps->stop || NODE_ROOT == ps->node)
355: return;
356:
357: /* Close out text node, if applicable... */
358: if (NODE_TEXT == ps->node) {
359: assert(NULL != ps->cur);
1.10 kristaps 360: pnode_trim(ps->cur);
1.1 kristaps 361: ps->cur = ps->cur->parent;
362: assert(NULL != ps->cur);
363: ps->node = ps->cur->node;
364: }
365:
366: if (NULL == (ps->cur = ps->cur->parent))
367: ps->node = NODE_ROOT;
368: else
369: ps->node = ps->cur->node;
370: }
371:
1.8 kristaps 372: /*
373: * Recursively free a node (NULL is ok).
374: */
1.1 kristaps 375: static void
376: pnode_free(struct pnode *pn)
377: {
378: struct pnode *pp;
1.12 kristaps 379: struct pattr *ap;
1.1 kristaps 380:
381: if (NULL == pn)
382: return;
383:
384: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
385: TAILQ_REMOVE(&pn->childq, pp, child);
386: pnode_free(pp);
387: }
388:
1.12 kristaps 389: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
390: TAILQ_REMOVE(&pn->attrq, ap, child);
391: free(ap->rawval);
392: free(ap);
393: }
394:
1.1 kristaps 395: free(pn->b);
396: free(pn);
397: }
398:
1.8 kristaps 399: /*
400: * Unlink a node from its parent and pnode_free() it.
401: */
1.1 kristaps 402: static void
403: pnode_unlink(struct pnode *pn)
404: {
405:
406: if (NULL != pn->parent)
407: TAILQ_REMOVE(&pn->parent->childq, pn, child);
408: pnode_free(pn);
409: }
410:
1.8 kristaps 411: /*
412: * Unlink all children of a node and pnode_free() them.
413: */
1.1 kristaps 414: static void
1.4 kristaps 415: pnode_unlinksub(struct pnode *pn)
416: {
417:
418: while ( ! TAILQ_EMPTY(&pn->childq))
419: pnode_unlink(TAILQ_FIRST(&pn->childq));
420: }
421:
1.8 kristaps 422: /*
423: * Reset the lookaside buffer.
424: */
1.4 kristaps 425: static void
1.1 kristaps 426: bufclear(struct parse *p)
427: {
428:
429: p->b[p->bsz = 0] = '\0';
430: }
431:
1.8 kristaps 432: /*
433: * Append NODE_TEXT contents to the current buffer, reallocating its
434: * size if necessary.
435: * The buffer is ALWAYS nil-terminated.
436: */
1.1 kristaps 437: static void
438: bufappend(struct parse *p, struct pnode *pn)
439: {
440:
441: assert(NODE_TEXT == pn->node);
442: if (p->bsz + pn->bsz + 1 > p->mbsz) {
443: p->mbsz = p->bsz + pn->bsz + 1;
444: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
445: perror(NULL);
446: exit(EXIT_FAILURE);
447: }
448: }
449: memcpy(p->b + p->bsz, pn->b, pn->bsz);
450: p->bsz += pn->bsz;
451: p->b[p->bsz] = '\0';
452: }
453:
1.8 kristaps 454: /*
455: * Recursively append all NODE_TEXT nodes to the buffer.
456: * This descends into non-text nodes, but doesn't do anything beyond
457: * them.
458: * In other words, this is a recursive text grok.
459: */
1.3 kristaps 460: static void
461: bufappend_r(struct parse *p, struct pnode *pn)
462: {
463: struct pnode *pp;
464:
465: if (NODE_TEXT == pn->node)
466: bufappend(p, pn);
467: TAILQ_FOREACH(pp, &pn->childq, child)
468: bufappend_r(p, pp);
469: }
470:
1.12 kristaps 471: #define MACROLINE_NORM 0
472: #define MACROLINE_UPPER 1
1.1 kristaps 473: /*
1.8 kristaps 474: * Recursively print text presumably on a macro line.
1.1 kristaps 475: * Convert all whitespace to regular spaces.
476: */
477: static void
1.12 kristaps 478: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 479: {
480: char *cp;
481:
1.13 kristaps 482: if (0 == p->newln)
483: putchar(' ');
484:
1.1 kristaps 485: bufclear(p);
1.3 kristaps 486: bufappend_r(p, pn);
1.1 kristaps 487:
488: /* Convert all space to spaces. */
489: for (cp = p->b; '\0' != *cp; cp++)
490: if (isspace((int)*cp))
491: *cp = ' ';
492:
493: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 494: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 495: for ( ; '\0' != *cp; cp++) {
496: /* Escape us if we look like a macro. */
497: if ((cp == p->b || ' ' == *(cp - 1)) &&
498: isupper((int)*cp) &&
499: '\0' != *(cp + 1) &&
500: islower((int)*(cp + 1)) &&
501: ('\0' == *(cp + 2) ||
502: ' ' == *(cp + 2) ||
503: (islower((int)*(cp + 2)) &&
504: ('\0' == *(cp + 3) ||
505: ' ' == *(cp + 3)))))
506: fputs("\\&", stdout);
1.12 kristaps 507: if (MACROLINE_UPPER & fl)
508: putchar(toupper((int)*cp));
509: else
510: putchar((int)*cp);
1.1 kristaps 511: /* If we're a character escape, escape us. */
512: if ('\\' == *cp)
513: putchar('e');
514: }
515: }
516:
1.12 kristaps 517: static void
518: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
519: {
520:
521: pnode_printmacrolinetext(p, pn, 0);
522: }
523:
1.1 kristaps 524: /*
525: * Just pnode_printmacrolinepart() but with a newline.
526: * If no text, just the newline.
527: */
528: static void
529: pnode_printmacroline(struct parse *p, struct pnode *pn)
530: {
531:
1.13 kristaps 532: assert(0 == p->newln);
1.12 kristaps 533: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 534: putchar('\n');
1.13 kristaps 535: p->newln = 1;
1.1 kristaps 536: }
537:
1.10 kristaps 538: static void
539: pnode_printmopen(struct parse *p)
540: {
541: if (p->newln) {
542: putchar('.');
543: p->newln = 0;
544: } else
545: putchar(' ');
546: }
547:
548: static void
549: pnode_printmclose(struct parse *p, int sv)
550: {
551:
552: if (sv && ! p->newln) {
553: putchar('\n');
554: p->newln = 1;
555: }
556: }
557:
1.8 kristaps 558: /*
1.10 kristaps 559: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 560: */
1.1 kristaps 561: static void
1.6 kristaps 562: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
563: {
564: struct pnode *pp;
565:
1.10 kristaps 566: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 567: if (NODE_TITLE == pp->node) {
568: pnode_unlink(pp);
1.10 kristaps 569: return;
1.6 kristaps 570: }
571: }
572:
1.8 kristaps 573: /*
574: * Start a hopefully-named `Sh' section.
575: */
1.6 kristaps 576: static void
1.1 kristaps 577: pnode_printrefsect(struct parse *p, struct pnode *pn)
578: {
579: struct pnode *pp;
580:
581: TAILQ_FOREACH(pp, &pn->childq, child)
582: if (NODE_TITLE == pp->node)
583: break;
584:
1.13 kristaps 585: fputs(".Sh", stdout);
586: p->newln = 0;
1.4 kristaps 587:
1.5 kristaps 588: if (NULL != pp) {
1.18 ! kristaps 589: pnode_printmacrolinetext(p, pp, MACROLINE_UPPER);
! 590: pnode_printmclose(p, 1);
1.5 kristaps 591: pnode_unlink(pp);
1.13 kristaps 592: } else {
1.4 kristaps 593: puts("UNKNOWN");
1.13 kristaps 594: p->newln = 1;
595: }
1.1 kristaps 596: }
597:
1.8 kristaps 598: /*
599: * Start a reference, extracting the title and volume.
600: */
1.1 kristaps 601: static void
602: pnode_printciterefentry(struct parse *p, struct pnode *pn)
603: {
604: struct pnode *pp, *title, *manvol;
605:
606: title = manvol = NULL;
1.13 kristaps 607: assert(p->newln);
1.1 kristaps 608: TAILQ_FOREACH(pp, &pn->childq, child)
609: if (NODE_MANVOLNUM == pp->node)
610: manvol = pp;
611: else if (NODE_REFENTRYTITLE == pp->node)
612: title = pp;
613:
1.13 kristaps 614: fputs(".Xr", stdout);
615: p->newln = 0;
1.4 kristaps 616:
1.1 kristaps 617: if (NULL != title) {
618: pnode_printmacrolinepart(p, title);
619: } else
1.13 kristaps 620: fputs(" unknown ", stdout);
1.4 kristaps 621:
1.13 kristaps 622: if (NULL == manvol) {
623: puts(" 1");
624: p->newln = 1;
625: } else
1.1 kristaps 626: pnode_printmacroline(p, manvol);
627: }
628:
629: static void
630: pnode_printrefmeta(struct parse *p, struct pnode *pn)
631: {
632: struct pnode *pp, *title, *manvol;
633:
634: title = manvol = NULL;
1.13 kristaps 635: assert(p->newln);
1.1 kristaps 636: TAILQ_FOREACH(pp, &pn->childq, child)
637: if (NODE_MANVOLNUM == pp->node)
638: manvol = pp;
639: else if (NODE_REFENTRYTITLE == pp->node)
640: title = pp;
641:
1.2 kristaps 642: puts(".Dd $Mdocdate" "$");
1.13 kristaps 643: fputs(".Dt", stdout);
644: p->newln = 0;
1.1 kristaps 645:
1.13 kristaps 646: if (NULL != title)
1.12 kristaps 647: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 648: else
649: fputs(" UNKNOWN ", stdout);
650:
651: if (NULL == manvol) {
652: puts(" 1");
653: p->newln = 1;
1.1 kristaps 654: } else
655: pnode_printmacroline(p, manvol);
656:
657: puts(".Os");
658: }
659:
1.3 kristaps 660: static void
661: pnode_printfuncdef(struct parse *p, struct pnode *pn)
662: {
663: struct pnode *pp, *ftype, *func;
664:
1.13 kristaps 665: assert(p->newln);
1.3 kristaps 666: ftype = func = NULL;
667: TAILQ_FOREACH(pp, &pn->childq, child)
668: if (NODE_TEXT == pp->node)
669: ftype = pp;
670: else if (NODE_FUNCTION == pp->node)
671: func = pp;
672:
673: if (NULL != ftype) {
1.13 kristaps 674: fputs(".Ft", stdout);
675: p->newln = 0;
1.3 kristaps 676: pnode_printmacroline(p, ftype);
677: }
678:
679: if (NULL != func) {
1.13 kristaps 680: fputs(".Fo", stdout);
681: p->newln = 0;
1.3 kristaps 682: pnode_printmacroline(p, func);
1.13 kristaps 683: } else {
1.3 kristaps 684: puts(".Fo UNKNOWN");
1.13 kristaps 685: p->newln = 1;
686: }
1.3 kristaps 687: }
688:
689: static void
690: pnode_printparamdef(struct parse *p, struct pnode *pn)
691: {
692: struct pnode *pp, *ptype, *param;
693:
1.13 kristaps 694: assert(p->newln);
1.3 kristaps 695: ptype = param = NULL;
696: TAILQ_FOREACH(pp, &pn->childq, child)
697: if (NODE_TEXT == pp->node)
698: ptype = pp;
699: else if (NODE_PARAMETER == pp->node)
700: param = pp;
701:
702: fputs(".Fa \"", stdout);
1.13 kristaps 703: p->newln = 0;
1.3 kristaps 704: if (NULL != ptype) {
705: pnode_printmacrolinepart(p, ptype);
706: putchar(' ');
707: }
708:
709: if (NULL != param)
710: pnode_printmacrolinepart(p, param);
711:
712: puts("\"");
1.13 kristaps 713: p->newln = 1;
1.3 kristaps 714: }
715:
716: static void
717: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
718: {
719: struct pnode *pp, *fdef;
720:
1.13 kristaps 721: assert(p->newln);
1.3 kristaps 722: TAILQ_FOREACH(fdef, &pn->childq, child)
723: if (NODE_FUNCDEF == fdef->node)
724: break;
725:
1.4 kristaps 726: if (NULL != fdef)
1.3 kristaps 727: pnode_printfuncdef(p, fdef);
1.4 kristaps 728: else
1.3 kristaps 729: puts(".Fo UNKNOWN");
730:
1.4 kristaps 731: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 732: if (NODE_PARAMDEF == pp->node)
733: pnode_printparamdef(p, pp);
734:
735: puts(".Fc");
1.13 kristaps 736: p->newln = 1;
1.3 kristaps 737: }
738:
1.10 kristaps 739: /*
740: * The <arg> element is more complicated than it should be because text
741: * nodes are treated like ".Ar foo", but non-text nodes need to be
742: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 743: * This also handles the case of "repetition" (or in other words, the
744: * ellipsis following an argument) and optionality.
1.10 kristaps 745: */
1.4 kristaps 746: static void
1.10 kristaps 747: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 748: {
749: struct pnode *pp;
1.12 kristaps 750: struct pattr *ap;
751: int isop, isrep;
752:
753: isop = 1;
754: isrep = 0;
755: TAILQ_FOREACH(ap, &pn->attrq, child)
756: if (ATTRKEY_CHOICE == ap->key &&
757: (ATTRVAL_PLAIN == ap->val ||
758: ATTRVAL_REQ == ap->val))
759: isop = 0;
760: else if (ATTRKEY_REP == ap->key &&
761: (ATTRVAL_REPEAT == ap->val))
762: isrep = 1;
763:
764: if (isop) {
765: pnode_printmopen(p);
1.13 kristaps 766: fputs("Op", stdout);
1.12 kristaps 767: }
1.4 kristaps 768:
1.10 kristaps 769: TAILQ_FOREACH(pp, &pn->childq, child) {
770: if (NODE_TEXT == pp->node) {
771: pnode_printmopen(p);
1.13 kristaps 772: fputs("Ar", stdout);
1.10 kristaps 773: }
774: pnode_print(p, pp);
1.12 kristaps 775: if (NODE_TEXT == pp->node && isrep)
776: fputs("...", stdout);
1.10 kristaps 777: }
1.4 kristaps 778: }
779:
1.7 kristaps 780: /*
781: * Recursively search and return the first instance of "node".
782: */
783: static struct pnode *
784: pnode_findfirst(struct pnode *pn, enum nodeid node)
785: {
786: struct pnode *pp, *res;
787:
788: res = NULL;
789: TAILQ_FOREACH(pp, &pn->childq, child) {
790: res = pp->node == node ? pp :
791: pnode_findfirst(pp, node);
792: if (NULL != res)
793: break;
794: }
795:
796: return(res);
797: }
798:
799: static void
800: pnode_printprologue(struct parse *p, struct pnode *pn)
801: {
802: struct pnode *pp;
803:
1.9 kristaps 804: pp = NULL == p->root ? NULL :
805: pnode_findfirst(p->root, NODE_REFMETA);
806:
807: if (NULL != pp) {
1.7 kristaps 808: pnode_printrefmeta(p, pp);
809: pnode_unlink(pp);
810: } else {
811: puts(".\\\" Supplying bogus prologue...");
812: puts(".Dd $Mdocdate" "$");
813: puts(".Dt UNKNOWN 1");
814: puts(".Os");
815: }
816: }
817:
1.13 kristaps 818: static void
819: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
820: {
821: struct pnode *pp;
822:
823: assert(p->newln);
824: TAILQ_FOREACH(pp, &pn->childq, child)
825: if (NODE_TERM == pp->node) {
826: fputs(".It", stdout);
827: p->newln = 0;
828: pnode_print(p, pp);
829: pnode_unlink(pp);
1.16 kristaps 830: pnode_printmclose(p, 1);
1.13 kristaps 831: return;
832: }
833:
834: puts(".It");
835: p->newln = 1;
836: }
837:
838: static void
1.16 kristaps 839: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
840: {
841: struct pnode *pp;
842:
843: assert(p->newln);
844: TAILQ_FOREACH(pp, &pn->childq, child)
845: if (NODE_TITLE == pp->node) {
846: puts(".Pp");
847: pnode_print(p, pp);
848: pnode_unlink(pp);
849: }
850:
851: assert(p->newln);
852: puts(".Bl -item");
853: TAILQ_FOREACH(pp, &pn->childq, child) {
854: assert(p->newln);
855: puts(".It");
856: pnode_print(p, pp);
857: pnode_printmclose(p, 1);
858: }
859: assert(p->newln);
860: puts(".El");
861: }
862:
863: static void
1.13 kristaps 864: pnode_printvariablelist(struct parse *p, struct pnode *pn)
865: {
866: struct pnode *pp;
867:
868: assert(p->newln);
869: TAILQ_FOREACH(pp, &pn->childq, child)
870: if (NODE_TITLE == pp->node) {
871: puts(".Pp");
872: pnode_print(p, pp);
873: pnode_unlink(pp);
874: }
875:
876: assert(p->newln);
877: puts(".Bl -tag -width Ds");
878: TAILQ_FOREACH(pp, &pn->childq, child)
879: if (NODE_VARLISTENTRY != pp->node) {
880: assert(p->newln);
881: fputs(".It", stdout);
882: pnode_printmacroline(p, pp);
883: } else {
884: assert(p->newln);
885: pnode_print(p, pp);
886: }
887: assert(p->newln);
888: puts(".El");
889: }
890:
1.1 kristaps 891: /*
892: * Print a parsed node (or ignore it--whatever).
893: * This is a recursive function.
894: * FIXME: macro line continuation?
895: */
896: static void
897: pnode_print(struct parse *p, struct pnode *pn)
898: {
899: struct pnode *pp;
900: char *cp;
1.10 kristaps 901: int last, sv;
1.1 kristaps 902:
903: if (NULL == pn)
904: return;
905:
1.10 kristaps 906: sv = p->newln;
1.1 kristaps 907:
908: switch (pn->node) {
1.4 kristaps 909: case (NODE_ARG):
1.10 kristaps 910: pnode_printarg(p, pn);
1.4 kristaps 911: pnode_unlinksub(pn);
912: break;
1.1 kristaps 913: case (NODE_CITEREFENTRY):
1.10 kristaps 914: assert(p->newln);
1.1 kristaps 915: pnode_printciterefentry(p, pn);
1.4 kristaps 916: pnode_unlinksub(pn);
1.1 kristaps 917: break;
918: case (NODE_CODE):
1.10 kristaps 919: pnode_printmopen(p);
1.13 kristaps 920: fputs("Li", stdout);
1.4 kristaps 921: break;
922: case (NODE_COMMAND):
1.10 kristaps 923: pnode_printmopen(p);
1.13 kristaps 924: fputs("Nm", stdout);
925: break;
926: case (NODE_EMPHASIS):
927: pnode_printmopen(p);
928: fputs("Em", stdout);
1.1 kristaps 929: break;
1.17 kristaps 930: case (NODE_FILENAME):
931: pnode_printmopen(p);
932: fputs("Pa", stdout);
933: break;
1.3 kristaps 934: case (NODE_FUNCTION):
1.10 kristaps 935: pnode_printmopen(p);
1.13 kristaps 936: fputs("Fn", stdout);
1.3 kristaps 937: break;
938: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 939: assert(p->newln);
1.3 kristaps 940: pnode_printfuncprototype(p, pn);
1.4 kristaps 941: pnode_unlinksub(pn);
1.3 kristaps 942: break;
1.1 kristaps 943: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 944: pnode_printmopen(p);
1.13 kristaps 945: fputs("Fd", stdout);
1.16 kristaps 946: break;
947: case (NODE_ITEMIZEDLIST):
948: assert(p->newln);
949: pnode_printitemizedlist(p, pn);
1.10 kristaps 950: break;
951: case (NODE_OPTION):
952: pnode_printmopen(p);
1.13 kristaps 953: fputs("Fl", stdout);
954: /* FIXME: bogus leading '-'? */
1.1 kristaps 955: break;
956: case (NODE_PARA):
1.10 kristaps 957: assert(p->newln);
1.13 kristaps 958: if (NULL != pn->parent &&
959: NODE_LISTITEM == pn->parent->node)
960: break;
1.1 kristaps 961: puts(".Pp");
1.3 kristaps 962: break;
963: case (NODE_PARAMETER):
1.10 kristaps 964: /* Suppress non-text children... */
965: pnode_printmopen(p);
966: fputs("Fa \"", stdout);
1.3 kristaps 967: pnode_printmacrolinepart(p, pn);
968: puts("\"");
1.4 kristaps 969: pnode_unlinksub(pn);
1.1 kristaps 970: break;
971: case (NODE_PROGRAMLISTING):
1.10 kristaps 972: assert(p->newln);
1.1 kristaps 973: puts(".Bd -literal");
1.15 kristaps 974: break;
975: case (NODE_REFENTRYINFO):
976: /* Suppress. */
977: pnode_unlinksub(pn);
1.1 kristaps 978: break;
979: case (NODE_REFMETA):
1.7 kristaps 980: abort();
1.1 kristaps 981: break;
982: case (NODE_REFNAME):
1.10 kristaps 983: /* Suppress non-text children... */
984: pnode_printmopen(p);
1.13 kristaps 985: fputs("Nm", stdout);
986: p->newln = 0;
1.10 kristaps 987: pnode_printmacrolinepart(p, pn);
1.4 kristaps 988: pnode_unlinksub(pn);
1.10 kristaps 989: break;
1.1 kristaps 990: case (NODE_REFNAMEDIV):
1.10 kristaps 991: assert(p->newln);
1.1 kristaps 992: puts(".Sh NAME");
993: break;
994: case (NODE_REFPURPOSE):
1.10 kristaps 995: assert(p->newln);
1.13 kristaps 996: pnode_printmopen(p);
997: fputs("Nd", stdout);
1.10 kristaps 998: break;
1.1 kristaps 999: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1000: assert(p->newln);
1.6 kristaps 1001: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1002: puts(".Sh SYNOPSIS");
1.1 kristaps 1003: break;
1004: case (NODE_REFSECT1):
1.10 kristaps 1005: assert(p->newln);
1.1 kristaps 1006: pnode_printrefsect(p, pn);
1007: break;
1.13 kristaps 1008: case (NODE_REPLACEABLE):
1009: pnode_printmopen(p);
1010: fputs("Ar", stdout);
1011: break;
1.8 kristaps 1012: case (NODE_STRUCTNAME):
1.10 kristaps 1013: pnode_printmopen(p);
1.13 kristaps 1014: fputs("Vt", stdout);
1.10 kristaps 1015: break;
1.1 kristaps 1016: case (NODE_TEXT):
1.13 kristaps 1017: if (0 == p->newln)
1018: putchar(' ');
1.1 kristaps 1019: bufclear(p);
1020: bufappend(p, pn);
1021: /*
1022: * Output all characters, squeezing out whitespace
1023: * between newlines.
1024: * XXX: all whitespace, including tabs (?).
1025: * Remember to escape control characters and escapes.
1026: */
1.10 kristaps 1027: assert(p->bsz);
1.1 kristaps 1028: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1029: if ('\n' == last) {
1030: /* Consume all whitespace. */
1031: if (isspace((int)*cp)) {
1032: while (isspace((int)*cp))
1033: cp++;
1034: continue;
1035: } else if ('\'' == *cp || '.' == *cp)
1036: fputs("\\&", stdout);
1037: }
1038: putchar(last = *cp++);
1039: /* If we're a character escape, escape us. */
1040: if ('\\' == last)
1041: putchar('e');
1042: }
1.10 kristaps 1043: p->newln = 0;
1.1 kristaps 1044: break;
1.13 kristaps 1045: case (NODE_VARIABLELIST):
1046: assert(p->newln);
1047: pnode_printvariablelist(p, pn);
1048: pnode_unlinksub(pn);
1049: break;
1050: case (NODE_VARLISTENTRY):
1051: assert(p->newln);
1052: pnode_printvarlistentry(p, pn);
1053: break;
1.1 kristaps 1054: default:
1055: break;
1056: }
1057:
1058: TAILQ_FOREACH(pp, &pn->childq, child)
1059: pnode_print(p, pp);
1060:
1061: switch (pn->node) {
1.10 kristaps 1062: case (NODE_ARG):
1063: case (NODE_CODE):
1064: case (NODE_COMMAND):
1.13 kristaps 1065: case (NODE_EMPHASIS):
1.17 kristaps 1066: case (NODE_FILENAME):
1.10 kristaps 1067: case (NODE_FUNCTION):
1068: case (NODE_FUNCSYNOPSISINFO):
1069: case (NODE_OPTION):
1070: case (NODE_PARAMETER):
1.13 kristaps 1071: case (NODE_REPLACEABLE):
1072: case (NODE_REFPURPOSE):
1.10 kristaps 1073: case (NODE_STRUCTNAME):
1074: case (NODE_TEXT):
1075: pnode_printmclose(p, sv);
1076: break;
1.12 kristaps 1077: case (NODE_REFNAME):
1078: /*
1079: * If we're in the NAME macro and we have multiple
1080: * <refname> macros in sequence, then print out a
1081: * trailing comma before the newline.
1082: */
1083: if (NULL != pn->parent &&
1084: NODE_REFNAMEDIV == pn->parent->node &&
1085: NULL != TAILQ_NEXT(pn, child) &&
1086: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1087: fputs(" ,", stdout);
1088: pnode_printmclose(p, sv);
1089: break;
1.1 kristaps 1090: case (NODE_PROGRAMLISTING):
1.10 kristaps 1091: assert(p->newln);
1.1 kristaps 1092: puts(".Ed");
1.10 kristaps 1093: p->newln = 1;
1.1 kristaps 1094: break;
1095: default:
1096: break;
1097: }
1098: }
1099:
1100: /*
1101: * Loop around the read buffer until we've drained it of all data.
1102: * Invoke the parser context with each buffer fill.
1103: */
1104: static int
1105: readfile(XML_Parser xp, int fd,
1106: char *b, size_t bsz, const char *fn)
1107: {
1108: struct parse p;
1109: int rc;
1110: ssize_t ssz;
1111:
1112: memset(&p, 0, sizeof(struct parse));
1113:
1114: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1115: p.fname = fn;
1116: p.xml = xp;
1.1 kristaps 1117:
1118: XML_SetCharacterDataHandler(xp, xml_char);
1119: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1120: XML_SetUserData(xp, &p);
1121:
1122: while ((ssz = read(fd, b, bsz)) >= 0) {
1123: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1124: fprintf(stderr, "%s: %s\n", fn,
1125: XML_ErrorString
1126: (XML_GetErrorCode(xp)));
1127: else if ( ! p.stop && ssz > 0)
1128: continue;
1129: /*
1130: * Exit when we've read all or errors have occured
1131: * during the parse sequence.
1132: */
1.10 kristaps 1133: p.newln = 1;
1.7 kristaps 1134: pnode_printprologue(&p, p.root);
1.1 kristaps 1135: pnode_print(&p, p.root);
1136: pnode_free(p.root);
1137: free(p.b);
1138: return(0 != rc && ! p.stop);
1139: }
1140:
1141: /* Read error has occured. */
1142: perror(fn);
1143: pnode_free(p.root);
1144: free(p.b);
1145: return(0);
1146: }
1147:
1148: int
1149: main(int argc, char *argv[])
1150: {
1151: XML_Parser xp;
1152: const char *fname;
1153: char *buf;
1154: int fd, rc;
1155:
1156: fname = "-";
1157: xp = NULL;
1158: buf = NULL;
1159: rc = 0;
1160:
1161: if (-1 != getopt(argc, argv, ""))
1162: return(EXIT_FAILURE);
1163:
1164: argc -= optind;
1165: argv += optind;
1166:
1167: if (argc > 1)
1168: return(EXIT_FAILURE);
1169: else if (argc > 0)
1170: fname = argv[0];
1171:
1172: /* Read from stdin or a file. */
1173: fd = 0 == strcmp(fname, "-") ?
1174: STDIN_FILENO : open(fname, O_RDONLY, 0);
1175:
1176: /*
1177: * Open file for reading.
1178: * Allocate a read buffer.
1179: * Create the parser context.
1180: * Dive directly into the parse.
1181: */
1182: if (-1 == fd)
1183: perror(fname);
1184: else if (NULL == (buf = malloc(4096)))
1185: perror(NULL);
1186: else if (NULL == (xp = XML_ParserCreate(NULL)))
1187: perror(NULL);
1188: else if ( ! readfile(xp, fd, buf, 4096, fname))
1189: rc = 1;
1190:
1191: XML_ParserFree(xp);
1192: free(buf);
1193: if (STDIN_FILENO != fd)
1194: close(fd);
1195: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1196: }
CVSweb