Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.16
1.16 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.15 2014/03/30 15:08:03 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.15 kristaps 95: { "date", 0 },
1.13 kristaps 96: { "emphasis", 0 },
1.3 kristaps 97: { "funcdef", 0 },
98: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 99: { "funcsynopsis", NODE_IGNTEXT },
100: { "funcsynopsisinfo", 0 },
1.3 kristaps 101: { "function", 0 },
1.16 ! kristaps 102: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 103: { "link", 0 },
1.13 kristaps 104: { "listitem", NODE_IGNTEXT },
1.1 kristaps 105: { "manvolnum", 0 },
1.4 kristaps 106: { "option", 0 },
1.1 kristaps 107: { "para", 0 },
1.3 kristaps 108: { "paramdef", 0 },
109: { "parameter", 0 },
1.1 kristaps 110: { "programlisting", 0 },
111: { "refclass", NODE_IGNTEXT },
112: { "refdescriptor", NODE_IGNTEXT },
113: { "refentry", NODE_IGNTEXT },
1.15 kristaps 114: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 115: { "refentrytitle", 0 },
116: { "refmeta", NODE_IGNTEXT },
117: { "refmiscinfo", NODE_IGNTEXT },
118: { "refname", 0 },
119: { "refnamediv", NODE_IGNTEXT },
120: { "refpurpose", 0 },
121: { "refsect1", 0 },
122: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 123: { "replaceable", 0 },
1.8 kristaps 124: { "structname", 0 },
1.1 kristaps 125: { "synopsis", 0 },
1.13 kristaps 126: { "term", 0 },
1.1 kristaps 127: { NULL, 0 },
128: { "title", 0 },
1.14 kristaps 129: { "ulink", 0 },
1.13 kristaps 130: { "variablelist", NODE_IGNTEXT },
131: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 132: };
133:
1.10 kristaps 134: static void
135: pnode_print(struct parse *p, struct pnode *pn);
136:
1.8 kristaps 137: /*
138: * Process a stream of characters.
139: * We store text as nodes in and of themselves.
140: * If a text node is already open, append to it.
141: * If it's not open, open one under the current context.
142: */
1.1 kristaps 143: static void
144: xml_char(void *arg, const XML_Char *p, int sz)
145: {
146: struct parse *ps = arg;
147: struct pnode *dat;
1.4 kristaps 148: int i;
1.1 kristaps 149:
150: /* Stopped or no tree yet. */
151: if (ps->stop || NODE_ROOT == ps->node)
152: return;
153:
154: /* Not supposed to be collecting text. */
155: assert(NULL != ps->cur);
156: if (NODE_IGNTEXT & nodes[ps->node].flags)
157: return;
158:
159: /*
160: * Are we in the midst of processing text?
161: * If we're not processing text right now, then create a text
162: * node for doing so.
1.4 kristaps 163: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 164: * process: strip out all leading whitespace to be sure.
1.1 kristaps 165: */
166: if (NODE_TEXT != ps->node) {
1.4 kristaps 167: for (i = 0; i < sz; i++)
168: if ( ! isspace((int)p[i]))
169: break;
170: if (i == sz)
171: return;
1.10 kristaps 172: p += i;
173: sz -= i;
1.1 kristaps 174: dat = calloc(1, sizeof(struct pnode));
175: if (NULL == dat) {
176: perror(NULL);
177: exit(EXIT_FAILURE);
178: }
179:
180: dat->node = ps->node = NODE_TEXT;
181: dat->parent = ps->cur;
182: TAILQ_INIT(&dat->childq);
1.12 kristaps 183: TAILQ_INIT(&dat->attrq);
1.1 kristaps 184: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
185: ps->cur = dat;
186: assert(NULL != ps->root);
187: }
188:
189: /* Append to current buffer. */
190: assert(sz >= 0);
191: ps->cur->b = realloc(ps->cur->b,
192: ps->cur->bsz + (size_t)sz);
193: if (NULL == ps->cur->b) {
194: perror(NULL);
195: exit(EXIT_FAILURE);
196: }
197: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
198: ps->cur->bsz += (size_t)sz;
199: }
200:
1.10 kristaps 201: static void
202: pnode_trim(struct pnode *pn)
203: {
204:
205: assert(NODE_TEXT == pn->node);
206: for ( ; pn->bsz > 0; pn->bsz--)
207: if ( ! isspace((int)pn->b[pn->bsz - 1]))
208: break;
209: }
210:
1.1 kristaps 211: /*
212: * Begin an element.
213: * First, look for the element.
214: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 215: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 216: * If we find it but we're not parsing yet (i.e., it's not a refentry
217: * and thus out of context), keep going.
1.8 kristaps 218: * If we find it and we're at the root and already have a tree, puke and
219: * exit (FIXME: I don't think this is right?).
220: * If we find it but we're parsing a text node, close out the text node,
221: * return to its parent, and keep going.
1.1 kristaps 222: * Make sure that the element is in the right context.
223: * Lastly, put the node onto our parse tree and continue.
224: */
225: static void
226: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
227: {
1.12 kristaps 228: struct parse *ps = arg;
229: enum nodeid node;
230: enum attrkey key;
231: enum attrval val;
232: struct pnode *dat;
233: struct pattr *pattr;
234: const XML_Char **att;
1.1 kristaps 235:
236: if (ps->stop)
237: return;
238:
239: /* Close out text node, if applicable... */
240: if (NODE_TEXT == ps->node) {
241: assert(NULL != ps->cur);
1.10 kristaps 242: pnode_trim(ps->cur);
1.1 kristaps 243: ps->cur = ps->cur->parent;
244: assert(NULL != ps->cur);
245: ps->node = ps->cur->node;
246: }
247:
248: for (node = 0; node < NODE__MAX; node++)
249: if (NULL == nodes[node].name)
250: continue;
251: else if (0 == strcmp(nodes[node].name, name))
252: break;
253:
254: if (NODE__MAX == node && NODE_ROOT == ps->node) {
255: return;
256: } else if (NODE__MAX == node) {
1.12 kristaps 257: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
258: ps->fname, XML_GetCurrentLineNumber(ps->xml),
259: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 260: ps->stop = 1;
261: return;
262: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 263: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
264: ps->fname, XML_GetCurrentLineNumber(ps->xml),
265: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 266: ps->stop = 1;
267: return;
268: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
269: return;
270: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 271: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
272: "of node \"%s\"\n",
1.12 kristaps 273: ps->fname, XML_GetCurrentLineNumber(ps->xml),
274: XML_GetCurrentColumnNumber(ps->xml),
275: NULL == nodes[ps->node].name ?
1.13 kristaps 276: "(none)" : nodes[ps->node].name,
277: NULL == nodes[node].name ?
278: "(none)" : nodes[node].name);
1.1 kristaps 279: ps->stop = 1;
280: return;
281: }
282:
283: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
284: perror(NULL);
285: exit(EXIT_FAILURE);
286: }
287:
288: dat->node = ps->node = node;
289: dat->parent = ps->cur;
290: TAILQ_INIT(&dat->childq);
1.12 kristaps 291: TAILQ_INIT(&dat->attrq);
1.1 kristaps 292:
293: if (NULL != ps->cur)
294: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
295:
296: ps->cur = dat;
297: if (NULL == ps->root)
298: ps->root = dat;
1.12 kristaps 299:
300: /*
301: * Process attributes.
302: */
303: for (att = atts; NULL != *att; att += 2) {
304: for (key = 0; key < ATTRKEY__MAX; key++)
305: if (0 == strcmp(*att, attrkeys[key]))
306: break;
307: if (ATTRKEY__MAX == key) {
308: fprintf(stderr, "%s:%zu:%zu: unknown "
309: "attribute \"%s\"\n", ps->fname,
310: XML_GetCurrentLineNumber(ps->xml),
311: XML_GetCurrentColumnNumber(ps->xml),
312: *att);
313: continue;
314: } else if ( ! isattrkey(node, key)) {
315: fprintf(stderr, "%s:%zu:%zu: bad "
316: "attribute \"%s\"\n", ps->fname,
317: XML_GetCurrentLineNumber(ps->xml),
318: XML_GetCurrentColumnNumber(ps->xml),
319: *att);
320: continue;
321: }
322: for (val = 0; val < ATTRVAL__MAX; val++)
323: if (0 == strcmp(*(att + 1), attrvals[val]))
324: break;
325: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
326: fprintf(stderr, "%s:%zu:%zu: bad "
327: "value \"%s\"\n", ps->fname,
328: XML_GetCurrentLineNumber(ps->xml),
329: XML_GetCurrentColumnNumber(ps->xml),
330: *(att + 1));
331: continue;
332: }
333: pattr = calloc(1, sizeof(struct pattr));
334: pattr->key = key;
335: pattr->val = val;
336: if (ATTRVAL__MAX == val)
337: pattr->rawval = strdup(*(att + 1));
338: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
339: }
340:
1.1 kristaps 341: }
342:
343: /*
344: * Roll up the parse tree.
1.8 kristaps 345: * If we're at a text node, roll that one up first.
1.1 kristaps 346: * If we hit the root, then assign ourselves as the NODE_ROOT.
347: */
348: static void
349: xml_elem_end(void *arg, const XML_Char *name)
350: {
351: struct parse *ps = arg;
352:
353: if (ps->stop || NODE_ROOT == ps->node)
354: return;
355:
356: /* Close out text node, if applicable... */
357: if (NODE_TEXT == ps->node) {
358: assert(NULL != ps->cur);
1.10 kristaps 359: pnode_trim(ps->cur);
1.1 kristaps 360: ps->cur = ps->cur->parent;
361: assert(NULL != ps->cur);
362: ps->node = ps->cur->node;
363: }
364:
365: if (NULL == (ps->cur = ps->cur->parent))
366: ps->node = NODE_ROOT;
367: else
368: ps->node = ps->cur->node;
369: }
370:
1.8 kristaps 371: /*
372: * Recursively free a node (NULL is ok).
373: */
1.1 kristaps 374: static void
375: pnode_free(struct pnode *pn)
376: {
377: struct pnode *pp;
1.12 kristaps 378: struct pattr *ap;
1.1 kristaps 379:
380: if (NULL == pn)
381: return;
382:
383: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
384: TAILQ_REMOVE(&pn->childq, pp, child);
385: pnode_free(pp);
386: }
387:
1.12 kristaps 388: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
389: TAILQ_REMOVE(&pn->attrq, ap, child);
390: free(ap->rawval);
391: free(ap);
392: }
393:
1.1 kristaps 394: free(pn->b);
395: free(pn);
396: }
397:
1.8 kristaps 398: /*
399: * Unlink a node from its parent and pnode_free() it.
400: */
1.1 kristaps 401: static void
402: pnode_unlink(struct pnode *pn)
403: {
404:
405: if (NULL != pn->parent)
406: TAILQ_REMOVE(&pn->parent->childq, pn, child);
407: pnode_free(pn);
408: }
409:
1.8 kristaps 410: /*
411: * Unlink all children of a node and pnode_free() them.
412: */
1.1 kristaps 413: static void
1.4 kristaps 414: pnode_unlinksub(struct pnode *pn)
415: {
416:
417: while ( ! TAILQ_EMPTY(&pn->childq))
418: pnode_unlink(TAILQ_FIRST(&pn->childq));
419: }
420:
1.8 kristaps 421: /*
422: * Reset the lookaside buffer.
423: */
1.4 kristaps 424: static void
1.1 kristaps 425: bufclear(struct parse *p)
426: {
427:
428: p->b[p->bsz = 0] = '\0';
429: }
430:
1.8 kristaps 431: /*
432: * Append NODE_TEXT contents to the current buffer, reallocating its
433: * size if necessary.
434: * The buffer is ALWAYS nil-terminated.
435: */
1.1 kristaps 436: static void
437: bufappend(struct parse *p, struct pnode *pn)
438: {
439:
440: assert(NODE_TEXT == pn->node);
441: if (p->bsz + pn->bsz + 1 > p->mbsz) {
442: p->mbsz = p->bsz + pn->bsz + 1;
443: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
444: perror(NULL);
445: exit(EXIT_FAILURE);
446: }
447: }
448: memcpy(p->b + p->bsz, pn->b, pn->bsz);
449: p->bsz += pn->bsz;
450: p->b[p->bsz] = '\0';
451: }
452:
1.8 kristaps 453: /*
454: * Recursively append all NODE_TEXT nodes to the buffer.
455: * This descends into non-text nodes, but doesn't do anything beyond
456: * them.
457: * In other words, this is a recursive text grok.
458: */
1.3 kristaps 459: static void
460: bufappend_r(struct parse *p, struct pnode *pn)
461: {
462: struct pnode *pp;
463:
464: if (NODE_TEXT == pn->node)
465: bufappend(p, pn);
466: TAILQ_FOREACH(pp, &pn->childq, child)
467: bufappend_r(p, pp);
468: }
469:
1.12 kristaps 470: #define MACROLINE_NORM 0
471: #define MACROLINE_UPPER 1
1.1 kristaps 472: /*
1.8 kristaps 473: * Recursively print text presumably on a macro line.
1.1 kristaps 474: * Convert all whitespace to regular spaces.
475: */
476: static void
1.12 kristaps 477: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 478: {
479: char *cp;
480:
1.13 kristaps 481: if (0 == p->newln)
482: putchar(' ');
483:
1.1 kristaps 484: bufclear(p);
1.3 kristaps 485: bufappend_r(p, pn);
1.1 kristaps 486:
487: /* Convert all space to spaces. */
488: for (cp = p->b; '\0' != *cp; cp++)
489: if (isspace((int)*cp))
490: *cp = ' ';
491:
492: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 493: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 494: for ( ; '\0' != *cp; cp++) {
495: /* Escape us if we look like a macro. */
496: if ((cp == p->b || ' ' == *(cp - 1)) &&
497: isupper((int)*cp) &&
498: '\0' != *(cp + 1) &&
499: islower((int)*(cp + 1)) &&
500: ('\0' == *(cp + 2) ||
501: ' ' == *(cp + 2) ||
502: (islower((int)*(cp + 2)) &&
503: ('\0' == *(cp + 3) ||
504: ' ' == *(cp + 3)))))
505: fputs("\\&", stdout);
1.12 kristaps 506: if (MACROLINE_UPPER & fl)
507: putchar(toupper((int)*cp));
508: else
509: putchar((int)*cp);
1.1 kristaps 510: /* If we're a character escape, escape us. */
511: if ('\\' == *cp)
512: putchar('e');
513: }
514: }
515:
1.12 kristaps 516: static void
517: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
518: {
519:
520: pnode_printmacrolinetext(p, pn, 0);
521: }
522:
1.1 kristaps 523: /*
524: * Just pnode_printmacrolinepart() but with a newline.
525: * If no text, just the newline.
526: */
527: static void
528: pnode_printmacroline(struct parse *p, struct pnode *pn)
529: {
530:
1.13 kristaps 531: assert(0 == p->newln);
1.12 kristaps 532: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 533: putchar('\n');
1.13 kristaps 534: p->newln = 1;
1.1 kristaps 535: }
536:
1.10 kristaps 537: static void
538: pnode_printmopen(struct parse *p)
539: {
540: if (p->newln) {
541: putchar('.');
542: p->newln = 0;
543: } else
544: putchar(' ');
545: }
546:
547: static void
548: pnode_printmclose(struct parse *p, int sv)
549: {
550:
551: if (sv && ! p->newln) {
552: putchar('\n');
553: p->newln = 1;
554: }
555: }
556:
1.8 kristaps 557: /*
1.10 kristaps 558: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 559: */
1.1 kristaps 560: static void
1.6 kristaps 561: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
562: {
563: struct pnode *pp;
564:
1.10 kristaps 565: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 566: if (NODE_TITLE == pp->node) {
567: pnode_unlink(pp);
1.10 kristaps 568: return;
1.6 kristaps 569: }
570: }
571:
1.8 kristaps 572: /*
573: * Start a hopefully-named `Sh' section.
574: */
1.6 kristaps 575: static void
1.1 kristaps 576: pnode_printrefsect(struct parse *p, struct pnode *pn)
577: {
578: struct pnode *pp;
579:
580: TAILQ_FOREACH(pp, &pn->childq, child)
581: if (NODE_TITLE == pp->node)
582: break;
583:
1.13 kristaps 584: fputs(".Sh", stdout);
585: p->newln = 0;
1.4 kristaps 586:
1.5 kristaps 587: if (NULL != pp) {
1.1 kristaps 588: pnode_printmacroline(p, pp);
1.5 kristaps 589: pnode_unlink(pp);
1.13 kristaps 590: } else {
1.4 kristaps 591: puts("UNKNOWN");
1.13 kristaps 592: p->newln = 1;
593: }
1.1 kristaps 594: }
595:
1.8 kristaps 596: /*
597: * Start a reference, extracting the title and volume.
598: */
1.1 kristaps 599: static void
600: pnode_printciterefentry(struct parse *p, struct pnode *pn)
601: {
602: struct pnode *pp, *title, *manvol;
603:
604: title = manvol = NULL;
1.13 kristaps 605: assert(p->newln);
1.1 kristaps 606: TAILQ_FOREACH(pp, &pn->childq, child)
607: if (NODE_MANVOLNUM == pp->node)
608: manvol = pp;
609: else if (NODE_REFENTRYTITLE == pp->node)
610: title = pp;
611:
1.13 kristaps 612: fputs(".Xr", stdout);
613: p->newln = 0;
1.4 kristaps 614:
1.1 kristaps 615: if (NULL != title) {
616: pnode_printmacrolinepart(p, title);
617: } else
1.13 kristaps 618: fputs(" unknown ", stdout);
1.4 kristaps 619:
1.13 kristaps 620: if (NULL == manvol) {
621: puts(" 1");
622: p->newln = 1;
623: } else
1.1 kristaps 624: pnode_printmacroline(p, manvol);
625: }
626:
627: static void
628: pnode_printrefmeta(struct parse *p, struct pnode *pn)
629: {
630: struct pnode *pp, *title, *manvol;
631:
632: title = manvol = NULL;
1.13 kristaps 633: assert(p->newln);
1.1 kristaps 634: TAILQ_FOREACH(pp, &pn->childq, child)
635: if (NODE_MANVOLNUM == pp->node)
636: manvol = pp;
637: else if (NODE_REFENTRYTITLE == pp->node)
638: title = pp;
639:
1.2 kristaps 640: puts(".Dd $Mdocdate" "$");
1.13 kristaps 641: fputs(".Dt", stdout);
642: p->newln = 0;
1.1 kristaps 643:
1.13 kristaps 644: if (NULL != title)
1.12 kristaps 645: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 646: else
647: fputs(" UNKNOWN ", stdout);
648:
649: if (NULL == manvol) {
650: puts(" 1");
651: p->newln = 1;
1.1 kristaps 652: } else
653: pnode_printmacroline(p, manvol);
654:
655: puts(".Os");
656: }
657:
1.3 kristaps 658: static void
659: pnode_printfuncdef(struct parse *p, struct pnode *pn)
660: {
661: struct pnode *pp, *ftype, *func;
662:
1.13 kristaps 663: assert(p->newln);
1.3 kristaps 664: ftype = func = NULL;
665: TAILQ_FOREACH(pp, &pn->childq, child)
666: if (NODE_TEXT == pp->node)
667: ftype = pp;
668: else if (NODE_FUNCTION == pp->node)
669: func = pp;
670:
671: if (NULL != ftype) {
1.13 kristaps 672: fputs(".Ft", stdout);
673: p->newln = 0;
1.3 kristaps 674: pnode_printmacroline(p, ftype);
675: }
676:
677: if (NULL != func) {
1.13 kristaps 678: fputs(".Fo", stdout);
679: p->newln = 0;
1.3 kristaps 680: pnode_printmacroline(p, func);
1.13 kristaps 681: } else {
1.3 kristaps 682: puts(".Fo UNKNOWN");
1.13 kristaps 683: p->newln = 1;
684: }
1.3 kristaps 685: }
686:
687: static void
688: pnode_printparamdef(struct parse *p, struct pnode *pn)
689: {
690: struct pnode *pp, *ptype, *param;
691:
1.13 kristaps 692: assert(p->newln);
1.3 kristaps 693: ptype = param = NULL;
694: TAILQ_FOREACH(pp, &pn->childq, child)
695: if (NODE_TEXT == pp->node)
696: ptype = pp;
697: else if (NODE_PARAMETER == pp->node)
698: param = pp;
699:
700: fputs(".Fa \"", stdout);
1.13 kristaps 701: p->newln = 0;
1.3 kristaps 702: if (NULL != ptype) {
703: pnode_printmacrolinepart(p, ptype);
704: putchar(' ');
705: }
706:
707: if (NULL != param)
708: pnode_printmacrolinepart(p, param);
709:
710: puts("\"");
1.13 kristaps 711: p->newln = 1;
1.3 kristaps 712: }
713:
714: static void
715: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
716: {
717: struct pnode *pp, *fdef;
718:
1.13 kristaps 719: assert(p->newln);
1.3 kristaps 720: TAILQ_FOREACH(fdef, &pn->childq, child)
721: if (NODE_FUNCDEF == fdef->node)
722: break;
723:
1.4 kristaps 724: if (NULL != fdef)
1.3 kristaps 725: pnode_printfuncdef(p, fdef);
1.4 kristaps 726: else
1.3 kristaps 727: puts(".Fo UNKNOWN");
728:
1.4 kristaps 729: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 730: if (NODE_PARAMDEF == pp->node)
731: pnode_printparamdef(p, pp);
732:
733: puts(".Fc");
1.13 kristaps 734: p->newln = 1;
1.3 kristaps 735: }
736:
1.10 kristaps 737: /*
738: * The <arg> element is more complicated than it should be because text
739: * nodes are treated like ".Ar foo", but non-text nodes need to be
740: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 741: * This also handles the case of "repetition" (or in other words, the
742: * ellipsis following an argument) and optionality.
1.10 kristaps 743: */
1.4 kristaps 744: static void
1.10 kristaps 745: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 746: {
747: struct pnode *pp;
1.12 kristaps 748: struct pattr *ap;
749: int isop, isrep;
750:
751: isop = 1;
752: isrep = 0;
753: TAILQ_FOREACH(ap, &pn->attrq, child)
754: if (ATTRKEY_CHOICE == ap->key &&
755: (ATTRVAL_PLAIN == ap->val ||
756: ATTRVAL_REQ == ap->val))
757: isop = 0;
758: else if (ATTRKEY_REP == ap->key &&
759: (ATTRVAL_REPEAT == ap->val))
760: isrep = 1;
761:
762: if (isop) {
763: pnode_printmopen(p);
1.13 kristaps 764: fputs("Op", stdout);
1.12 kristaps 765: }
1.4 kristaps 766:
1.10 kristaps 767: TAILQ_FOREACH(pp, &pn->childq, child) {
768: if (NODE_TEXT == pp->node) {
769: pnode_printmopen(p);
1.13 kristaps 770: fputs("Ar", stdout);
1.10 kristaps 771: }
772: pnode_print(p, pp);
1.12 kristaps 773: if (NODE_TEXT == pp->node && isrep)
774: fputs("...", stdout);
1.10 kristaps 775: }
1.4 kristaps 776: }
777:
1.7 kristaps 778: /*
779: * Recursively search and return the first instance of "node".
780: */
781: static struct pnode *
782: pnode_findfirst(struct pnode *pn, enum nodeid node)
783: {
784: struct pnode *pp, *res;
785:
786: res = NULL;
787: TAILQ_FOREACH(pp, &pn->childq, child) {
788: res = pp->node == node ? pp :
789: pnode_findfirst(pp, node);
790: if (NULL != res)
791: break;
792: }
793:
794: return(res);
795: }
796:
797: static void
798: pnode_printprologue(struct parse *p, struct pnode *pn)
799: {
800: struct pnode *pp;
801:
1.9 kristaps 802: pp = NULL == p->root ? NULL :
803: pnode_findfirst(p->root, NODE_REFMETA);
804:
805: if (NULL != pp) {
1.7 kristaps 806: pnode_printrefmeta(p, pp);
807: pnode_unlink(pp);
808: } else {
809: puts(".\\\" Supplying bogus prologue...");
810: puts(".Dd $Mdocdate" "$");
811: puts(".Dt UNKNOWN 1");
812: puts(".Os");
813: }
814: }
815:
1.13 kristaps 816: static void
817: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
818: {
819: struct pnode *pp;
820:
821: assert(p->newln);
822: TAILQ_FOREACH(pp, &pn->childq, child)
823: if (NODE_TERM == pp->node) {
824: fputs(".It", stdout);
825: p->newln = 0;
826: pnode_print(p, pp);
827: pnode_unlink(pp);
1.16 ! kristaps 828: pnode_printmclose(p, 1);
1.13 kristaps 829: return;
830: }
831:
832: puts(".It");
833: p->newln = 1;
834: }
835:
836: static void
1.16 ! kristaps 837: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
! 838: {
! 839: struct pnode *pp;
! 840:
! 841: assert(p->newln);
! 842: TAILQ_FOREACH(pp, &pn->childq, child)
! 843: if (NODE_TITLE == pp->node) {
! 844: puts(".Pp");
! 845: pnode_print(p, pp);
! 846: pnode_unlink(pp);
! 847: }
! 848:
! 849: assert(p->newln);
! 850: puts(".Bl -item");
! 851: TAILQ_FOREACH(pp, &pn->childq, child) {
! 852: assert(p->newln);
! 853: puts(".It");
! 854: pnode_print(p, pp);
! 855: pnode_printmclose(p, 1);
! 856: }
! 857: assert(p->newln);
! 858: puts(".El");
! 859: }
! 860:
! 861: static void
1.13 kristaps 862: pnode_printvariablelist(struct parse *p, struct pnode *pn)
863: {
864: struct pnode *pp;
865:
866: assert(p->newln);
867: TAILQ_FOREACH(pp, &pn->childq, child)
868: if (NODE_TITLE == pp->node) {
869: puts(".Pp");
870: pnode_print(p, pp);
871: pnode_unlink(pp);
872: }
873:
874: assert(p->newln);
875: puts(".Bl -tag -width Ds");
876: TAILQ_FOREACH(pp, &pn->childq, child)
877: if (NODE_VARLISTENTRY != pp->node) {
878: assert(p->newln);
879: fputs(".It", stdout);
880: pnode_printmacroline(p, pp);
881: } else {
882: assert(p->newln);
883: pnode_print(p, pp);
884: }
885: assert(p->newln);
886: puts(".El");
887: }
888:
1.1 kristaps 889: /*
890: * Print a parsed node (or ignore it--whatever).
891: * This is a recursive function.
892: * FIXME: macro line continuation?
893: */
894: static void
895: pnode_print(struct parse *p, struct pnode *pn)
896: {
897: struct pnode *pp;
898: char *cp;
1.10 kristaps 899: int last, sv;
1.1 kristaps 900:
901: if (NULL == pn)
902: return;
903:
1.10 kristaps 904: sv = p->newln;
1.1 kristaps 905:
906: switch (pn->node) {
1.4 kristaps 907: case (NODE_ARG):
1.10 kristaps 908: pnode_printarg(p, pn);
1.4 kristaps 909: pnode_unlinksub(pn);
910: break;
1.1 kristaps 911: case (NODE_CITEREFENTRY):
1.10 kristaps 912: assert(p->newln);
1.1 kristaps 913: pnode_printciterefentry(p, pn);
1.4 kristaps 914: pnode_unlinksub(pn);
1.1 kristaps 915: break;
916: case (NODE_CODE):
1.10 kristaps 917: pnode_printmopen(p);
1.13 kristaps 918: fputs("Li", stdout);
1.4 kristaps 919: break;
920: case (NODE_COMMAND):
1.10 kristaps 921: pnode_printmopen(p);
1.13 kristaps 922: fputs("Nm", stdout);
923: break;
924: case (NODE_EMPHASIS):
925: pnode_printmopen(p);
926: fputs("Em", stdout);
1.1 kristaps 927: break;
1.3 kristaps 928: case (NODE_FUNCTION):
1.10 kristaps 929: pnode_printmopen(p);
1.13 kristaps 930: fputs("Fn", stdout);
1.3 kristaps 931: break;
932: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 933: assert(p->newln);
1.3 kristaps 934: pnode_printfuncprototype(p, pn);
1.4 kristaps 935: pnode_unlinksub(pn);
1.3 kristaps 936: break;
1.1 kristaps 937: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 938: pnode_printmopen(p);
1.13 kristaps 939: fputs("Fd", stdout);
1.16 ! kristaps 940: break;
! 941: case (NODE_ITEMIZEDLIST):
! 942: assert(p->newln);
! 943: pnode_printitemizedlist(p, pn);
1.10 kristaps 944: break;
945: case (NODE_OPTION):
946: pnode_printmopen(p);
1.13 kristaps 947: fputs("Fl", stdout);
948: /* FIXME: bogus leading '-'? */
1.1 kristaps 949: break;
950: case (NODE_PARA):
1.10 kristaps 951: assert(p->newln);
1.13 kristaps 952: if (NULL != pn->parent &&
953: NODE_LISTITEM == pn->parent->node)
954: break;
1.1 kristaps 955: puts(".Pp");
1.3 kristaps 956: break;
957: case (NODE_PARAMETER):
1.10 kristaps 958: /* Suppress non-text children... */
959: pnode_printmopen(p);
960: fputs("Fa \"", stdout);
1.3 kristaps 961: pnode_printmacrolinepart(p, pn);
962: puts("\"");
1.4 kristaps 963: pnode_unlinksub(pn);
1.1 kristaps 964: break;
965: case (NODE_PROGRAMLISTING):
1.10 kristaps 966: assert(p->newln);
1.1 kristaps 967: puts(".Bd -literal");
1.15 kristaps 968: break;
969: case (NODE_REFENTRYINFO):
970: /* Suppress. */
971: pnode_unlinksub(pn);
1.1 kristaps 972: break;
973: case (NODE_REFMETA):
1.7 kristaps 974: abort();
1.1 kristaps 975: break;
976: case (NODE_REFNAME):
1.10 kristaps 977: /* Suppress non-text children... */
978: pnode_printmopen(p);
1.13 kristaps 979: fputs("Nm", stdout);
980: p->newln = 0;
1.10 kristaps 981: pnode_printmacrolinepart(p, pn);
1.4 kristaps 982: pnode_unlinksub(pn);
1.10 kristaps 983: break;
1.1 kristaps 984: case (NODE_REFNAMEDIV):
1.10 kristaps 985: assert(p->newln);
1.1 kristaps 986: puts(".Sh NAME");
987: break;
988: case (NODE_REFPURPOSE):
1.10 kristaps 989: assert(p->newln);
1.13 kristaps 990: pnode_printmopen(p);
991: fputs("Nd", stdout);
1.10 kristaps 992: break;
1.1 kristaps 993: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 994: assert(p->newln);
1.6 kristaps 995: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 996: puts(".Sh SYNOPSIS");
1.1 kristaps 997: break;
998: case (NODE_REFSECT1):
1.10 kristaps 999: assert(p->newln);
1.1 kristaps 1000: pnode_printrefsect(p, pn);
1001: break;
1.13 kristaps 1002: case (NODE_REPLACEABLE):
1003: pnode_printmopen(p);
1004: fputs("Ar", stdout);
1005: break;
1.8 kristaps 1006: case (NODE_STRUCTNAME):
1.10 kristaps 1007: pnode_printmopen(p);
1.13 kristaps 1008: fputs("Vt", stdout);
1.10 kristaps 1009: break;
1.1 kristaps 1010: case (NODE_TEXT):
1.13 kristaps 1011: if (0 == p->newln)
1012: putchar(' ');
1.1 kristaps 1013: bufclear(p);
1014: bufappend(p, pn);
1015: /*
1016: * Output all characters, squeezing out whitespace
1017: * between newlines.
1018: * XXX: all whitespace, including tabs (?).
1019: * Remember to escape control characters and escapes.
1020: */
1.10 kristaps 1021: assert(p->bsz);
1.1 kristaps 1022: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1023: if ('\n' == last) {
1024: /* Consume all whitespace. */
1025: if (isspace((int)*cp)) {
1026: while (isspace((int)*cp))
1027: cp++;
1028: continue;
1029: } else if ('\'' == *cp || '.' == *cp)
1030: fputs("\\&", stdout);
1031: }
1032: putchar(last = *cp++);
1033: /* If we're a character escape, escape us. */
1034: if ('\\' == last)
1035: putchar('e');
1036: }
1.10 kristaps 1037: p->newln = 0;
1.1 kristaps 1038: break;
1.13 kristaps 1039: case (NODE_VARIABLELIST):
1040: assert(p->newln);
1041: pnode_printvariablelist(p, pn);
1042: pnode_unlinksub(pn);
1043: break;
1044: case (NODE_VARLISTENTRY):
1045: assert(p->newln);
1046: pnode_printvarlistentry(p, pn);
1047: break;
1.1 kristaps 1048: default:
1049: break;
1050: }
1051:
1052: TAILQ_FOREACH(pp, &pn->childq, child)
1053: pnode_print(p, pp);
1054:
1055: switch (pn->node) {
1.10 kristaps 1056: case (NODE_ARG):
1057: case (NODE_CODE):
1058: case (NODE_COMMAND):
1.13 kristaps 1059: case (NODE_EMPHASIS):
1.10 kristaps 1060: case (NODE_FUNCTION):
1061: case (NODE_FUNCSYNOPSISINFO):
1062: case (NODE_OPTION):
1063: case (NODE_PARAMETER):
1.13 kristaps 1064: case (NODE_REPLACEABLE):
1065: case (NODE_REFPURPOSE):
1.10 kristaps 1066: case (NODE_STRUCTNAME):
1067: case (NODE_TEXT):
1068: pnode_printmclose(p, sv);
1069: break;
1.12 kristaps 1070: case (NODE_REFNAME):
1071: /*
1072: * If we're in the NAME macro and we have multiple
1073: * <refname> macros in sequence, then print out a
1074: * trailing comma before the newline.
1075: */
1076: if (NULL != pn->parent &&
1077: NODE_REFNAMEDIV == pn->parent->node &&
1078: NULL != TAILQ_NEXT(pn, child) &&
1079: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1080: fputs(" ,", stdout);
1081: pnode_printmclose(p, sv);
1082: break;
1.1 kristaps 1083: case (NODE_PROGRAMLISTING):
1.10 kristaps 1084: assert(p->newln);
1.1 kristaps 1085: puts(".Ed");
1.10 kristaps 1086: p->newln = 1;
1.1 kristaps 1087: break;
1088: default:
1089: break;
1090: }
1091: }
1092:
1093: /*
1094: * Loop around the read buffer until we've drained it of all data.
1095: * Invoke the parser context with each buffer fill.
1096: */
1097: static int
1098: readfile(XML_Parser xp, int fd,
1099: char *b, size_t bsz, const char *fn)
1100: {
1101: struct parse p;
1102: int rc;
1103: ssize_t ssz;
1104:
1105: memset(&p, 0, sizeof(struct parse));
1106:
1107: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1108: p.fname = fn;
1109: p.xml = xp;
1.1 kristaps 1110:
1111: XML_SetCharacterDataHandler(xp, xml_char);
1112: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1113: XML_SetUserData(xp, &p);
1114:
1115: while ((ssz = read(fd, b, bsz)) >= 0) {
1116: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1117: fprintf(stderr, "%s: %s\n", fn,
1118: XML_ErrorString
1119: (XML_GetErrorCode(xp)));
1120: else if ( ! p.stop && ssz > 0)
1121: continue;
1122: /*
1123: * Exit when we've read all or errors have occured
1124: * during the parse sequence.
1125: */
1.10 kristaps 1126: p.newln = 1;
1.7 kristaps 1127: pnode_printprologue(&p, p.root);
1.1 kristaps 1128: pnode_print(&p, p.root);
1129: pnode_free(p.root);
1130: free(p.b);
1131: return(0 != rc && ! p.stop);
1132: }
1133:
1134: /* Read error has occured. */
1135: perror(fn);
1136: pnode_free(p.root);
1137: free(p.b);
1138: return(0);
1139: }
1140:
1141: int
1142: main(int argc, char *argv[])
1143: {
1144: XML_Parser xp;
1145: const char *fname;
1146: char *buf;
1147: int fd, rc;
1148:
1149: fname = "-";
1150: xp = NULL;
1151: buf = NULL;
1152: rc = 0;
1153:
1154: if (-1 != getopt(argc, argv, ""))
1155: return(EXIT_FAILURE);
1156:
1157: argc -= optind;
1158: argv += optind;
1159:
1160: if (argc > 1)
1161: return(EXIT_FAILURE);
1162: else if (argc > 0)
1163: fname = argv[0];
1164:
1165: /* Read from stdin or a file. */
1166: fd = 0 == strcmp(fname, "-") ?
1167: STDIN_FILENO : open(fname, O_RDONLY, 0);
1168:
1169: /*
1170: * Open file for reading.
1171: * Allocate a read buffer.
1172: * Create the parser context.
1173: * Dive directly into the parse.
1174: */
1175: if (-1 == fd)
1176: perror(fname);
1177: else if (NULL == (buf = malloc(4096)))
1178: perror(NULL);
1179: else if (NULL == (xp = XML_ParserCreate(NULL)))
1180: perror(NULL);
1181: else if ( ! readfile(xp, fd, buf, 4096, fname))
1182: rc = 1;
1183:
1184: XML_ParserFree(xp);
1185: free(buf);
1186: if (STDIN_FILENO != fd)
1187: close(fd);
1188: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1189: }
CVSweb