Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.13
1.13 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.12 2014/03/29 22:44:06 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 ! kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.13 ! kristaps 95: { "emphasis", 0 },
1.3 kristaps 96: { "funcdef", 0 },
97: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 98: { "funcsynopsis", NODE_IGNTEXT },
99: { "funcsynopsisinfo", 0 },
1.3 kristaps 100: { "function", 0 },
1.13 ! kristaps 101: { "listitem", NODE_IGNTEXT },
1.1 kristaps 102: { "manvolnum", 0 },
1.4 kristaps 103: { "option", 0 },
1.1 kristaps 104: { "para", 0 },
1.3 kristaps 105: { "paramdef", 0 },
106: { "parameter", 0 },
1.1 kristaps 107: { "programlisting", 0 },
108: { "refclass", NODE_IGNTEXT },
109: { "refdescriptor", NODE_IGNTEXT },
110: { "refentry", NODE_IGNTEXT },
111: { "refentrytitle", 0 },
112: { "refmeta", NODE_IGNTEXT },
113: { "refmiscinfo", NODE_IGNTEXT },
114: { "refname", 0 },
115: { "refnamediv", NODE_IGNTEXT },
116: { "refpurpose", 0 },
117: { "refsect1", 0 },
118: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 ! kristaps 119: { "replaceable", 0 },
1.8 kristaps 120: { "structname", 0 },
1.1 kristaps 121: { "synopsis", 0 },
1.13 ! kristaps 122: { "term", 0 },
1.1 kristaps 123: { NULL, 0 },
124: { "title", 0 },
1.13 ! kristaps 125: { "variablelist", NODE_IGNTEXT },
! 126: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 127: };
128:
1.10 kristaps 129: static void
130: pnode_print(struct parse *p, struct pnode *pn);
131:
1.8 kristaps 132: /*
133: * Process a stream of characters.
134: * We store text as nodes in and of themselves.
135: * If a text node is already open, append to it.
136: * If it's not open, open one under the current context.
137: */
1.1 kristaps 138: static void
139: xml_char(void *arg, const XML_Char *p, int sz)
140: {
141: struct parse *ps = arg;
142: struct pnode *dat;
1.4 kristaps 143: int i;
1.1 kristaps 144:
145: /* Stopped or no tree yet. */
146: if (ps->stop || NODE_ROOT == ps->node)
147: return;
148:
149: /* Not supposed to be collecting text. */
150: assert(NULL != ps->cur);
151: if (NODE_IGNTEXT & nodes[ps->node].flags)
152: return;
153:
154: /*
155: * Are we in the midst of processing text?
156: * If we're not processing text right now, then create a text
157: * node for doing so.
1.4 kristaps 158: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 159: * process: strip out all leading whitespace to be sure.
1.1 kristaps 160: */
161: if (NODE_TEXT != ps->node) {
1.4 kristaps 162: for (i = 0; i < sz; i++)
163: if ( ! isspace((int)p[i]))
164: break;
165: if (i == sz)
166: return;
1.10 kristaps 167: p += i;
168: sz -= i;
1.1 kristaps 169: dat = calloc(1, sizeof(struct pnode));
170: if (NULL == dat) {
171: perror(NULL);
172: exit(EXIT_FAILURE);
173: }
174:
175: dat->node = ps->node = NODE_TEXT;
176: dat->parent = ps->cur;
177: TAILQ_INIT(&dat->childq);
1.12 kristaps 178: TAILQ_INIT(&dat->attrq);
1.1 kristaps 179: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
180: ps->cur = dat;
181: assert(NULL != ps->root);
182: }
183:
184: /* Append to current buffer. */
185: assert(sz >= 0);
186: ps->cur->b = realloc(ps->cur->b,
187: ps->cur->bsz + (size_t)sz);
188: if (NULL == ps->cur->b) {
189: perror(NULL);
190: exit(EXIT_FAILURE);
191: }
192: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
193: ps->cur->bsz += (size_t)sz;
194: }
195:
1.10 kristaps 196: static void
197: pnode_trim(struct pnode *pn)
198: {
199:
200: assert(NODE_TEXT == pn->node);
201: for ( ; pn->bsz > 0; pn->bsz--)
202: if ( ! isspace((int)pn->b[pn->bsz - 1]))
203: break;
204: }
205:
1.1 kristaps 206: /*
207: * Begin an element.
208: * First, look for the element.
209: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 210: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 211: * If we find it but we're not parsing yet (i.e., it's not a refentry
212: * and thus out of context), keep going.
1.8 kristaps 213: * If we find it and we're at the root and already have a tree, puke and
214: * exit (FIXME: I don't think this is right?).
215: * If we find it but we're parsing a text node, close out the text node,
216: * return to its parent, and keep going.
1.1 kristaps 217: * Make sure that the element is in the right context.
218: * Lastly, put the node onto our parse tree and continue.
219: */
220: static void
221: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
222: {
1.12 kristaps 223: struct parse *ps = arg;
224: enum nodeid node;
225: enum attrkey key;
226: enum attrval val;
227: struct pnode *dat;
228: struct pattr *pattr;
229: const XML_Char **att;
1.1 kristaps 230:
231: if (ps->stop)
232: return;
233:
234: /* Close out text node, if applicable... */
235: if (NODE_TEXT == ps->node) {
236: assert(NULL != ps->cur);
1.10 kristaps 237: pnode_trim(ps->cur);
1.1 kristaps 238: ps->cur = ps->cur->parent;
239: assert(NULL != ps->cur);
240: ps->node = ps->cur->node;
241: }
242:
243: for (node = 0; node < NODE__MAX; node++)
244: if (NULL == nodes[node].name)
245: continue;
246: else if (0 == strcmp(nodes[node].name, name))
247: break;
248:
249: if (NODE__MAX == node && NODE_ROOT == ps->node) {
250: return;
251: } else if (NODE__MAX == node) {
1.12 kristaps 252: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
253: ps->fname, XML_GetCurrentLineNumber(ps->xml),
254: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 255: ps->stop = 1;
256: return;
257: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 258: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
259: ps->fname, XML_GetCurrentLineNumber(ps->xml),
260: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 261: ps->stop = 1;
262: return;
263: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
264: return;
265: } else if ( ! isparent(node, ps->node)) {
1.13 ! kristaps 266: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
! 267: "of node \"%s\"\n",
1.12 kristaps 268: ps->fname, XML_GetCurrentLineNumber(ps->xml),
269: XML_GetCurrentColumnNumber(ps->xml),
270: NULL == nodes[ps->node].name ?
1.13 ! kristaps 271: "(none)" : nodes[ps->node].name,
! 272: NULL == nodes[node].name ?
! 273: "(none)" : nodes[node].name);
1.1 kristaps 274: ps->stop = 1;
275: return;
276: }
277:
278: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
279: perror(NULL);
280: exit(EXIT_FAILURE);
281: }
282:
283: dat->node = ps->node = node;
284: dat->parent = ps->cur;
285: TAILQ_INIT(&dat->childq);
1.12 kristaps 286: TAILQ_INIT(&dat->attrq);
1.1 kristaps 287:
288: if (NULL != ps->cur)
289: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
290:
291: ps->cur = dat;
292: if (NULL == ps->root)
293: ps->root = dat;
1.12 kristaps 294:
295: /*
296: * Process attributes.
297: */
298: for (att = atts; NULL != *att; att += 2) {
299: for (key = 0; key < ATTRKEY__MAX; key++)
300: if (0 == strcmp(*att, attrkeys[key]))
301: break;
302: if (ATTRKEY__MAX == key) {
303: fprintf(stderr, "%s:%zu:%zu: unknown "
304: "attribute \"%s\"\n", ps->fname,
305: XML_GetCurrentLineNumber(ps->xml),
306: XML_GetCurrentColumnNumber(ps->xml),
307: *att);
308: continue;
309: } else if ( ! isattrkey(node, key)) {
310: fprintf(stderr, "%s:%zu:%zu: bad "
311: "attribute \"%s\"\n", ps->fname,
312: XML_GetCurrentLineNumber(ps->xml),
313: XML_GetCurrentColumnNumber(ps->xml),
314: *att);
315: continue;
316: }
317: for (val = 0; val < ATTRVAL__MAX; val++)
318: if (0 == strcmp(*(att + 1), attrvals[val]))
319: break;
320: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
321: fprintf(stderr, "%s:%zu:%zu: bad "
322: "value \"%s\"\n", ps->fname,
323: XML_GetCurrentLineNumber(ps->xml),
324: XML_GetCurrentColumnNumber(ps->xml),
325: *(att + 1));
326: continue;
327: }
328: pattr = calloc(1, sizeof(struct pattr));
329: pattr->key = key;
330: pattr->val = val;
331: if (ATTRVAL__MAX == val)
332: pattr->rawval = strdup(*(att + 1));
333: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
334: }
335:
1.1 kristaps 336: }
337:
338: /*
339: * Roll up the parse tree.
1.8 kristaps 340: * If we're at a text node, roll that one up first.
1.1 kristaps 341: * If we hit the root, then assign ourselves as the NODE_ROOT.
342: */
343: static void
344: xml_elem_end(void *arg, const XML_Char *name)
345: {
346: struct parse *ps = arg;
347:
348: if (ps->stop || NODE_ROOT == ps->node)
349: return;
350:
351: /* Close out text node, if applicable... */
352: if (NODE_TEXT == ps->node) {
353: assert(NULL != ps->cur);
1.10 kristaps 354: pnode_trim(ps->cur);
1.1 kristaps 355: ps->cur = ps->cur->parent;
356: assert(NULL != ps->cur);
357: ps->node = ps->cur->node;
358: }
359:
360: if (NULL == (ps->cur = ps->cur->parent))
361: ps->node = NODE_ROOT;
362: else
363: ps->node = ps->cur->node;
364: }
365:
1.8 kristaps 366: /*
367: * Recursively free a node (NULL is ok).
368: */
1.1 kristaps 369: static void
370: pnode_free(struct pnode *pn)
371: {
372: struct pnode *pp;
1.12 kristaps 373: struct pattr *ap;
1.1 kristaps 374:
375: if (NULL == pn)
376: return;
377:
378: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
379: TAILQ_REMOVE(&pn->childq, pp, child);
380: pnode_free(pp);
381: }
382:
1.12 kristaps 383: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
384: TAILQ_REMOVE(&pn->attrq, ap, child);
385: free(ap->rawval);
386: free(ap);
387: }
388:
1.1 kristaps 389: free(pn->b);
390: free(pn);
391: }
392:
1.8 kristaps 393: /*
394: * Unlink a node from its parent and pnode_free() it.
395: */
1.1 kristaps 396: static void
397: pnode_unlink(struct pnode *pn)
398: {
399:
400: if (NULL != pn->parent)
401: TAILQ_REMOVE(&pn->parent->childq, pn, child);
402: pnode_free(pn);
403: }
404:
1.8 kristaps 405: /*
406: * Unlink all children of a node and pnode_free() them.
407: */
1.1 kristaps 408: static void
1.4 kristaps 409: pnode_unlinksub(struct pnode *pn)
410: {
411:
412: while ( ! TAILQ_EMPTY(&pn->childq))
413: pnode_unlink(TAILQ_FIRST(&pn->childq));
414: }
415:
1.8 kristaps 416: /*
417: * Reset the lookaside buffer.
418: */
1.4 kristaps 419: static void
1.1 kristaps 420: bufclear(struct parse *p)
421: {
422:
423: p->b[p->bsz = 0] = '\0';
424: }
425:
1.8 kristaps 426: /*
427: * Append NODE_TEXT contents to the current buffer, reallocating its
428: * size if necessary.
429: * The buffer is ALWAYS nil-terminated.
430: */
1.1 kristaps 431: static void
432: bufappend(struct parse *p, struct pnode *pn)
433: {
434:
435: assert(NODE_TEXT == pn->node);
436: if (p->bsz + pn->bsz + 1 > p->mbsz) {
437: p->mbsz = p->bsz + pn->bsz + 1;
438: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
439: perror(NULL);
440: exit(EXIT_FAILURE);
441: }
442: }
443: memcpy(p->b + p->bsz, pn->b, pn->bsz);
444: p->bsz += pn->bsz;
445: p->b[p->bsz] = '\0';
446: }
447:
1.8 kristaps 448: /*
449: * Recursively append all NODE_TEXT nodes to the buffer.
450: * This descends into non-text nodes, but doesn't do anything beyond
451: * them.
452: * In other words, this is a recursive text grok.
453: */
1.3 kristaps 454: static void
455: bufappend_r(struct parse *p, struct pnode *pn)
456: {
457: struct pnode *pp;
458:
459: if (NODE_TEXT == pn->node)
460: bufappend(p, pn);
461: TAILQ_FOREACH(pp, &pn->childq, child)
462: bufappend_r(p, pp);
463: }
464:
1.12 kristaps 465: #define MACROLINE_NORM 0
466: #define MACROLINE_UPPER 1
1.1 kristaps 467: /*
1.8 kristaps 468: * Recursively print text presumably on a macro line.
1.1 kristaps 469: * Convert all whitespace to regular spaces.
470: */
471: static void
1.12 kristaps 472: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 473: {
474: char *cp;
475:
1.13 ! kristaps 476: if (0 == p->newln)
! 477: putchar(' ');
! 478:
1.1 kristaps 479: bufclear(p);
1.3 kristaps 480: bufappend_r(p, pn);
1.1 kristaps 481:
482: /* Convert all space to spaces. */
483: for (cp = p->b; '\0' != *cp; cp++)
484: if (isspace((int)*cp))
485: *cp = ' ';
486:
487: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 488: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 489: for ( ; '\0' != *cp; cp++) {
490: /* Escape us if we look like a macro. */
491: if ((cp == p->b || ' ' == *(cp - 1)) &&
492: isupper((int)*cp) &&
493: '\0' != *(cp + 1) &&
494: islower((int)*(cp + 1)) &&
495: ('\0' == *(cp + 2) ||
496: ' ' == *(cp + 2) ||
497: (islower((int)*(cp + 2)) &&
498: ('\0' == *(cp + 3) ||
499: ' ' == *(cp + 3)))))
500: fputs("\\&", stdout);
1.12 kristaps 501: if (MACROLINE_UPPER & fl)
502: putchar(toupper((int)*cp));
503: else
504: putchar((int)*cp);
1.1 kristaps 505: /* If we're a character escape, escape us. */
506: if ('\\' == *cp)
507: putchar('e');
508: }
509: }
510:
1.12 kristaps 511: static void
512: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
513: {
514:
515: pnode_printmacrolinetext(p, pn, 0);
516: }
517:
1.1 kristaps 518: /*
519: * Just pnode_printmacrolinepart() but with a newline.
520: * If no text, just the newline.
521: */
522: static void
523: pnode_printmacroline(struct parse *p, struct pnode *pn)
524: {
525:
1.13 ! kristaps 526: assert(0 == p->newln);
1.12 kristaps 527: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 528: putchar('\n');
1.13 ! kristaps 529: p->newln = 1;
1.1 kristaps 530: }
531:
1.10 kristaps 532: static void
533: pnode_printmopen(struct parse *p)
534: {
535: if (p->newln) {
536: putchar('.');
537: p->newln = 0;
538: } else
539: putchar(' ');
540: }
541:
542: static void
543: pnode_printmclose(struct parse *p, int sv)
544: {
545:
546: if (sv && ! p->newln) {
547: putchar('\n');
548: p->newln = 1;
549: }
550: }
551:
1.8 kristaps 552: /*
1.10 kristaps 553: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 554: */
1.1 kristaps 555: static void
1.6 kristaps 556: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
557: {
558: struct pnode *pp;
559:
1.10 kristaps 560: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 561: if (NODE_TITLE == pp->node) {
562: pnode_unlink(pp);
1.10 kristaps 563: return;
1.6 kristaps 564: }
565: }
566:
1.8 kristaps 567: /*
568: * Start a hopefully-named `Sh' section.
569: */
1.6 kristaps 570: static void
1.1 kristaps 571: pnode_printrefsect(struct parse *p, struct pnode *pn)
572: {
573: struct pnode *pp;
574:
575: TAILQ_FOREACH(pp, &pn->childq, child)
576: if (NODE_TITLE == pp->node)
577: break;
578:
1.13 ! kristaps 579: fputs(".Sh", stdout);
! 580: p->newln = 0;
1.4 kristaps 581:
1.5 kristaps 582: if (NULL != pp) {
1.1 kristaps 583: pnode_printmacroline(p, pp);
1.5 kristaps 584: pnode_unlink(pp);
1.13 ! kristaps 585: } else {
1.4 kristaps 586: puts("UNKNOWN");
1.13 ! kristaps 587: p->newln = 1;
! 588: }
1.1 kristaps 589: }
590:
1.8 kristaps 591: /*
592: * Start a reference, extracting the title and volume.
593: */
1.1 kristaps 594: static void
595: pnode_printciterefentry(struct parse *p, struct pnode *pn)
596: {
597: struct pnode *pp, *title, *manvol;
598:
599: title = manvol = NULL;
1.13 ! kristaps 600: assert(p->newln);
1.1 kristaps 601: TAILQ_FOREACH(pp, &pn->childq, child)
602: if (NODE_MANVOLNUM == pp->node)
603: manvol = pp;
604: else if (NODE_REFENTRYTITLE == pp->node)
605: title = pp;
606:
1.13 ! kristaps 607: fputs(".Xr", stdout);
! 608: p->newln = 0;
1.4 kristaps 609:
1.1 kristaps 610: if (NULL != title) {
611: pnode_printmacrolinepart(p, title);
612: } else
1.13 ! kristaps 613: fputs(" unknown ", stdout);
1.4 kristaps 614:
1.13 ! kristaps 615: if (NULL == manvol) {
! 616: puts(" 1");
! 617: p->newln = 1;
! 618: } else
1.1 kristaps 619: pnode_printmacroline(p, manvol);
620: }
621:
622: static void
623: pnode_printrefmeta(struct parse *p, struct pnode *pn)
624: {
625: struct pnode *pp, *title, *manvol;
626:
627: title = manvol = NULL;
1.13 ! kristaps 628: assert(p->newln);
1.1 kristaps 629: TAILQ_FOREACH(pp, &pn->childq, child)
630: if (NODE_MANVOLNUM == pp->node)
631: manvol = pp;
632: else if (NODE_REFENTRYTITLE == pp->node)
633: title = pp;
634:
1.2 kristaps 635: puts(".Dd $Mdocdate" "$");
1.13 ! kristaps 636: fputs(".Dt", stdout);
! 637: p->newln = 0;
1.1 kristaps 638:
1.13 ! kristaps 639: if (NULL != title)
1.12 kristaps 640: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 ! kristaps 641: else
! 642: fputs(" UNKNOWN ", stdout);
! 643:
! 644: if (NULL == manvol) {
! 645: puts(" 1");
! 646: p->newln = 1;
1.1 kristaps 647: } else
648: pnode_printmacroline(p, manvol);
649:
650: puts(".Os");
651: }
652:
1.3 kristaps 653: static void
654: pnode_printfuncdef(struct parse *p, struct pnode *pn)
655: {
656: struct pnode *pp, *ftype, *func;
657:
1.13 ! kristaps 658: assert(p->newln);
1.3 kristaps 659: ftype = func = NULL;
660: TAILQ_FOREACH(pp, &pn->childq, child)
661: if (NODE_TEXT == pp->node)
662: ftype = pp;
663: else if (NODE_FUNCTION == pp->node)
664: func = pp;
665:
666: if (NULL != ftype) {
1.13 ! kristaps 667: fputs(".Ft", stdout);
! 668: p->newln = 0;
1.3 kristaps 669: pnode_printmacroline(p, ftype);
670: }
671:
672: if (NULL != func) {
1.13 ! kristaps 673: fputs(".Fo", stdout);
! 674: p->newln = 0;
1.3 kristaps 675: pnode_printmacroline(p, func);
1.13 ! kristaps 676: } else {
1.3 kristaps 677: puts(".Fo UNKNOWN");
1.13 ! kristaps 678: p->newln = 1;
! 679: }
1.3 kristaps 680: }
681:
682: static void
683: pnode_printparamdef(struct parse *p, struct pnode *pn)
684: {
685: struct pnode *pp, *ptype, *param;
686:
1.13 ! kristaps 687: assert(p->newln);
1.3 kristaps 688: ptype = param = NULL;
689: TAILQ_FOREACH(pp, &pn->childq, child)
690: if (NODE_TEXT == pp->node)
691: ptype = pp;
692: else if (NODE_PARAMETER == pp->node)
693: param = pp;
694:
695: fputs(".Fa \"", stdout);
1.13 ! kristaps 696: p->newln = 0;
1.3 kristaps 697: if (NULL != ptype) {
698: pnode_printmacrolinepart(p, ptype);
699: putchar(' ');
700: }
701:
702: if (NULL != param)
703: pnode_printmacrolinepart(p, param);
704:
705: puts("\"");
1.13 ! kristaps 706: p->newln = 1;
1.3 kristaps 707: }
708:
709: static void
710: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
711: {
712: struct pnode *pp, *fdef;
713:
1.13 ! kristaps 714: assert(p->newln);
1.3 kristaps 715: TAILQ_FOREACH(fdef, &pn->childq, child)
716: if (NODE_FUNCDEF == fdef->node)
717: break;
718:
1.4 kristaps 719: if (NULL != fdef)
1.3 kristaps 720: pnode_printfuncdef(p, fdef);
1.4 kristaps 721: else
1.3 kristaps 722: puts(".Fo UNKNOWN");
723:
1.4 kristaps 724: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 725: if (NODE_PARAMDEF == pp->node)
726: pnode_printparamdef(p, pp);
727:
728: puts(".Fc");
1.13 ! kristaps 729: p->newln = 1;
1.3 kristaps 730: }
731:
1.10 kristaps 732: /*
733: * The <arg> element is more complicated than it should be because text
734: * nodes are treated like ".Ar foo", but non-text nodes need to be
735: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 736: * This also handles the case of "repetition" (or in other words, the
737: * ellipsis following an argument) and optionality.
1.10 kristaps 738: */
1.4 kristaps 739: static void
1.10 kristaps 740: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 741: {
742: struct pnode *pp;
1.12 kristaps 743: struct pattr *ap;
744: int isop, isrep;
745:
746: isop = 1;
747: isrep = 0;
748: TAILQ_FOREACH(ap, &pn->attrq, child)
749: if (ATTRKEY_CHOICE == ap->key &&
750: (ATTRVAL_PLAIN == ap->val ||
751: ATTRVAL_REQ == ap->val))
752: isop = 0;
753: else if (ATTRKEY_REP == ap->key &&
754: (ATTRVAL_REPEAT == ap->val))
755: isrep = 1;
756:
757: if (isop) {
758: pnode_printmopen(p);
1.13 ! kristaps 759: fputs("Op", stdout);
1.12 kristaps 760: }
1.4 kristaps 761:
1.10 kristaps 762: TAILQ_FOREACH(pp, &pn->childq, child) {
763: if (NODE_TEXT == pp->node) {
764: pnode_printmopen(p);
1.13 ! kristaps 765: fputs("Ar", stdout);
1.10 kristaps 766: }
767: pnode_print(p, pp);
1.12 kristaps 768: if (NODE_TEXT == pp->node && isrep)
769: fputs("...", stdout);
1.10 kristaps 770: }
1.4 kristaps 771: }
772:
1.7 kristaps 773: /*
774: * Recursively search and return the first instance of "node".
775: */
776: static struct pnode *
777: pnode_findfirst(struct pnode *pn, enum nodeid node)
778: {
779: struct pnode *pp, *res;
780:
781: res = NULL;
782: TAILQ_FOREACH(pp, &pn->childq, child) {
783: res = pp->node == node ? pp :
784: pnode_findfirst(pp, node);
785: if (NULL != res)
786: break;
787: }
788:
789: return(res);
790: }
791:
792: static void
793: pnode_printprologue(struct parse *p, struct pnode *pn)
794: {
795: struct pnode *pp;
796:
1.9 kristaps 797: pp = NULL == p->root ? NULL :
798: pnode_findfirst(p->root, NODE_REFMETA);
799:
800: if (NULL != pp) {
1.7 kristaps 801: pnode_printrefmeta(p, pp);
802: pnode_unlink(pp);
803: } else {
804: puts(".\\\" Supplying bogus prologue...");
805: puts(".Dd $Mdocdate" "$");
806: puts(".Dt UNKNOWN 1");
807: puts(".Os");
808: }
809: }
810:
1.13 ! kristaps 811: static void
! 812: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
! 813: {
! 814: struct pnode *pp;
! 815:
! 816: assert(p->newln);
! 817: TAILQ_FOREACH(pp, &pn->childq, child)
! 818: if (NODE_TERM == pp->node) {
! 819: fputs(".It", stdout);
! 820: p->newln = 0;
! 821: pnode_print(p, pp);
! 822: pnode_unlink(pp);
! 823: putchar('\n');
! 824: p->newln = 1;
! 825: return;
! 826: }
! 827:
! 828: puts(".It");
! 829: p->newln = 1;
! 830: }
! 831:
! 832: static void
! 833: pnode_printvariablelist(struct parse *p, struct pnode *pn)
! 834: {
! 835: struct pnode *pp;
! 836:
! 837: assert(p->newln);
! 838: TAILQ_FOREACH(pp, &pn->childq, child)
! 839: if (NODE_TITLE == pp->node) {
! 840: puts(".Pp");
! 841: pnode_print(p, pp);
! 842: pnode_unlink(pp);
! 843: }
! 844:
! 845: assert(p->newln);
! 846: puts(".Bl -tag -width Ds");
! 847: TAILQ_FOREACH(pp, &pn->childq, child)
! 848: if (NODE_VARLISTENTRY != pp->node) {
! 849: assert(p->newln);
! 850: fputs(".It", stdout);
! 851: pnode_printmacroline(p, pp);
! 852: } else {
! 853: assert(p->newln);
! 854: pnode_print(p, pp);
! 855: }
! 856: assert(p->newln);
! 857: puts(".El");
! 858: }
! 859:
1.1 kristaps 860: /*
861: * Print a parsed node (or ignore it--whatever).
862: * This is a recursive function.
863: * FIXME: macro line continuation?
864: */
865: static void
866: pnode_print(struct parse *p, struct pnode *pn)
867: {
868: struct pnode *pp;
869: char *cp;
1.10 kristaps 870: int last, sv;
1.1 kristaps 871:
872: if (NULL == pn)
873: return;
874:
1.10 kristaps 875: sv = p->newln;
1.1 kristaps 876:
877: switch (pn->node) {
1.4 kristaps 878: case (NODE_ARG):
1.10 kristaps 879: pnode_printarg(p, pn);
1.4 kristaps 880: pnode_unlinksub(pn);
881: break;
1.1 kristaps 882: case (NODE_CITEREFENTRY):
1.10 kristaps 883: assert(p->newln);
1.1 kristaps 884: pnode_printciterefentry(p, pn);
1.4 kristaps 885: pnode_unlinksub(pn);
1.1 kristaps 886: break;
887: case (NODE_CODE):
1.10 kristaps 888: pnode_printmopen(p);
1.13 ! kristaps 889: fputs("Li", stdout);
1.4 kristaps 890: break;
891: case (NODE_COMMAND):
1.10 kristaps 892: pnode_printmopen(p);
1.13 ! kristaps 893: fputs("Nm", stdout);
! 894: break;
! 895: case (NODE_EMPHASIS):
! 896: pnode_printmopen(p);
! 897: fputs("Em", stdout);
1.1 kristaps 898: break;
1.3 kristaps 899: case (NODE_FUNCTION):
1.10 kristaps 900: pnode_printmopen(p);
1.13 ! kristaps 901: fputs("Fn", stdout);
1.3 kristaps 902: break;
903: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 904: assert(p->newln);
1.3 kristaps 905: pnode_printfuncprototype(p, pn);
1.4 kristaps 906: pnode_unlinksub(pn);
1.3 kristaps 907: break;
1.1 kristaps 908: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 909: pnode_printmopen(p);
1.13 ! kristaps 910: fputs("Fd", stdout);
1.10 kristaps 911: break;
912: case (NODE_OPTION):
913: pnode_printmopen(p);
1.13 ! kristaps 914: fputs("Fl", stdout);
! 915: /* FIXME: bogus leading '-'? */
1.1 kristaps 916: break;
917: case (NODE_PARA):
1.10 kristaps 918: assert(p->newln);
1.13 ! kristaps 919: if (NULL != pn->parent &&
! 920: NODE_LISTITEM == pn->parent->node)
! 921: break;
1.1 kristaps 922: puts(".Pp");
1.3 kristaps 923: break;
924: case (NODE_PARAMETER):
1.10 kristaps 925: /* Suppress non-text children... */
926: pnode_printmopen(p);
927: fputs("Fa \"", stdout);
1.3 kristaps 928: pnode_printmacrolinepart(p, pn);
929: puts("\"");
1.4 kristaps 930: pnode_unlinksub(pn);
1.1 kristaps 931: break;
932: case (NODE_PROGRAMLISTING):
1.10 kristaps 933: assert(p->newln);
1.1 kristaps 934: puts(".Bd -literal");
935: break;
936: case (NODE_REFMETA):
1.7 kristaps 937: abort();
1.1 kristaps 938: break;
939: case (NODE_REFNAME):
1.10 kristaps 940: /* Suppress non-text children... */
941: pnode_printmopen(p);
1.13 ! kristaps 942: fputs("Nm", stdout);
! 943: p->newln = 0;
1.10 kristaps 944: pnode_printmacrolinepart(p, pn);
1.4 kristaps 945: pnode_unlinksub(pn);
1.10 kristaps 946: break;
1.1 kristaps 947: case (NODE_REFNAMEDIV):
1.10 kristaps 948: assert(p->newln);
1.1 kristaps 949: puts(".Sh NAME");
950: break;
951: case (NODE_REFPURPOSE):
1.10 kristaps 952: assert(p->newln);
1.13 ! kristaps 953: pnode_printmopen(p);
! 954: fputs("Nd", stdout);
1.10 kristaps 955: break;
1.1 kristaps 956: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 957: assert(p->newln);
1.6 kristaps 958: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 959: puts(".Sh SYNOPSIS");
1.1 kristaps 960: break;
961: case (NODE_REFSECT1):
1.10 kristaps 962: assert(p->newln);
1.1 kristaps 963: pnode_printrefsect(p, pn);
964: break;
1.13 ! kristaps 965: case (NODE_REPLACEABLE):
! 966: pnode_printmopen(p);
! 967: fputs("Ar", stdout);
! 968: break;
1.8 kristaps 969: case (NODE_STRUCTNAME):
1.10 kristaps 970: pnode_printmopen(p);
1.13 ! kristaps 971: fputs("Vt", stdout);
1.10 kristaps 972: break;
1.1 kristaps 973: case (NODE_TEXT):
1.13 ! kristaps 974: if (0 == p->newln)
! 975: putchar(' ');
1.1 kristaps 976: bufclear(p);
977: bufappend(p, pn);
978: /*
979: * Output all characters, squeezing out whitespace
980: * between newlines.
981: * XXX: all whitespace, including tabs (?).
982: * Remember to escape control characters and escapes.
983: */
1.10 kristaps 984: assert(p->bsz);
1.1 kristaps 985: for (last = '\n', cp = p->b; '\0' != *cp; ) {
986: if ('\n' == last) {
987: /* Consume all whitespace. */
988: if (isspace((int)*cp)) {
989: while (isspace((int)*cp))
990: cp++;
991: continue;
992: } else if ('\'' == *cp || '.' == *cp)
993: fputs("\\&", stdout);
994: }
995: putchar(last = *cp++);
996: /* If we're a character escape, escape us. */
997: if ('\\' == last)
998: putchar('e');
999: }
1.10 kristaps 1000: p->newln = 0;
1.1 kristaps 1001: break;
1.13 ! kristaps 1002: case (NODE_VARIABLELIST):
! 1003: assert(p->newln);
! 1004: pnode_printvariablelist(p, pn);
! 1005: pnode_unlinksub(pn);
! 1006: break;
! 1007: case (NODE_VARLISTENTRY):
! 1008: assert(p->newln);
! 1009: pnode_printvarlistentry(p, pn);
! 1010: break;
1.1 kristaps 1011: default:
1012: break;
1013: }
1014:
1015: TAILQ_FOREACH(pp, &pn->childq, child)
1016: pnode_print(p, pp);
1017:
1018: switch (pn->node) {
1.10 kristaps 1019: case (NODE_ARG):
1020: case (NODE_CODE):
1021: case (NODE_COMMAND):
1.13 ! kristaps 1022: case (NODE_EMPHASIS):
1.10 kristaps 1023: case (NODE_FUNCTION):
1024: case (NODE_FUNCSYNOPSISINFO):
1025: case (NODE_OPTION):
1026: case (NODE_PARAMETER):
1.13 ! kristaps 1027: case (NODE_REPLACEABLE):
! 1028: case (NODE_REFPURPOSE):
1.10 kristaps 1029: case (NODE_STRUCTNAME):
1030: case (NODE_TEXT):
1031: pnode_printmclose(p, sv);
1032: break;
1.12 kristaps 1033: case (NODE_REFNAME):
1034: /*
1035: * If we're in the NAME macro and we have multiple
1036: * <refname> macros in sequence, then print out a
1037: * trailing comma before the newline.
1038: */
1039: if (NULL != pn->parent &&
1040: NODE_REFNAMEDIV == pn->parent->node &&
1041: NULL != TAILQ_NEXT(pn, child) &&
1042: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1043: fputs(" ,", stdout);
1044: pnode_printmclose(p, sv);
1045: break;
1.1 kristaps 1046: case (NODE_PROGRAMLISTING):
1.10 kristaps 1047: assert(p->newln);
1.1 kristaps 1048: puts(".Ed");
1.10 kristaps 1049: p->newln = 1;
1.1 kristaps 1050: break;
1051: default:
1052: break;
1053: }
1054: }
1055:
1056: /*
1057: * Loop around the read buffer until we've drained it of all data.
1058: * Invoke the parser context with each buffer fill.
1059: */
1060: static int
1061: readfile(XML_Parser xp, int fd,
1062: char *b, size_t bsz, const char *fn)
1063: {
1064: struct parse p;
1065: int rc;
1066: ssize_t ssz;
1067:
1068: memset(&p, 0, sizeof(struct parse));
1069:
1070: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1071: p.fname = fn;
1072: p.xml = xp;
1.1 kristaps 1073:
1074: XML_SetCharacterDataHandler(xp, xml_char);
1075: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1076: XML_SetUserData(xp, &p);
1077:
1078: while ((ssz = read(fd, b, bsz)) >= 0) {
1079: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1080: fprintf(stderr, "%s: %s\n", fn,
1081: XML_ErrorString
1082: (XML_GetErrorCode(xp)));
1083: else if ( ! p.stop && ssz > 0)
1084: continue;
1085: /*
1086: * Exit when we've read all or errors have occured
1087: * during the parse sequence.
1088: */
1.10 kristaps 1089: p.newln = 1;
1.7 kristaps 1090: pnode_printprologue(&p, p.root);
1.1 kristaps 1091: pnode_print(&p, p.root);
1092: pnode_free(p.root);
1093: free(p.b);
1094: return(0 != rc && ! p.stop);
1095: }
1096:
1097: /* Read error has occured. */
1098: perror(fn);
1099: pnode_free(p.root);
1100: free(p.b);
1101: return(0);
1102: }
1103:
1104: int
1105: main(int argc, char *argv[])
1106: {
1107: XML_Parser xp;
1108: const char *fname;
1109: char *buf;
1110: int fd, rc;
1111:
1112: fname = "-";
1113: xp = NULL;
1114: buf = NULL;
1115: rc = 0;
1116:
1117: if (-1 != getopt(argc, argv, ""))
1118: return(EXIT_FAILURE);
1119:
1120: argc -= optind;
1121: argv += optind;
1122:
1123: if (argc > 1)
1124: return(EXIT_FAILURE);
1125: else if (argc > 0)
1126: fname = argv[0];
1127:
1128: /* Read from stdin or a file. */
1129: fd = 0 == strcmp(fname, "-") ?
1130: STDIN_FILENO : open(fname, O_RDONLY, 0);
1131:
1132: /*
1133: * Open file for reading.
1134: * Allocate a read buffer.
1135: * Create the parser context.
1136: * Dive directly into the parse.
1137: */
1138: if (-1 == fd)
1139: perror(fname);
1140: else if (NULL == (buf = malloc(4096)))
1141: perror(NULL);
1142: else if (NULL == (xp = XML_ParserCreate(NULL)))
1143: perror(NULL);
1144: else if ( ! readfile(xp, fd, buf, 4096, fname))
1145: rc = 1;
1146:
1147: XML_ParserFree(xp);
1148: free(buf);
1149: if (STDIN_FILENO != fd)
1150: close(fd);
1151: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1152: }
CVSweb