Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.15
1.15 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.14 2014/03/30 13:18:49 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.15 ! kristaps 95: { "date", 0 },
1.13 kristaps 96: { "emphasis", 0 },
1.3 kristaps 97: { "funcdef", 0 },
98: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 99: { "funcsynopsis", NODE_IGNTEXT },
100: { "funcsynopsisinfo", 0 },
1.3 kristaps 101: { "function", 0 },
1.14 kristaps 102: { "link", 0 },
1.13 kristaps 103: { "listitem", NODE_IGNTEXT },
1.1 kristaps 104: { "manvolnum", 0 },
1.4 kristaps 105: { "option", 0 },
1.1 kristaps 106: { "para", 0 },
1.3 kristaps 107: { "paramdef", 0 },
108: { "parameter", 0 },
1.1 kristaps 109: { "programlisting", 0 },
110: { "refclass", NODE_IGNTEXT },
111: { "refdescriptor", NODE_IGNTEXT },
112: { "refentry", NODE_IGNTEXT },
1.15 ! kristaps 113: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 114: { "refentrytitle", 0 },
115: { "refmeta", NODE_IGNTEXT },
116: { "refmiscinfo", NODE_IGNTEXT },
117: { "refname", 0 },
118: { "refnamediv", NODE_IGNTEXT },
119: { "refpurpose", 0 },
120: { "refsect1", 0 },
121: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 122: { "replaceable", 0 },
1.8 kristaps 123: { "structname", 0 },
1.1 kristaps 124: { "synopsis", 0 },
1.13 kristaps 125: { "term", 0 },
1.1 kristaps 126: { NULL, 0 },
127: { "title", 0 },
1.14 kristaps 128: { "ulink", 0 },
1.13 kristaps 129: { "variablelist", NODE_IGNTEXT },
130: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 131: };
132:
1.10 kristaps 133: static void
134: pnode_print(struct parse *p, struct pnode *pn);
135:
1.8 kristaps 136: /*
137: * Process a stream of characters.
138: * We store text as nodes in and of themselves.
139: * If a text node is already open, append to it.
140: * If it's not open, open one under the current context.
141: */
1.1 kristaps 142: static void
143: xml_char(void *arg, const XML_Char *p, int sz)
144: {
145: struct parse *ps = arg;
146: struct pnode *dat;
1.4 kristaps 147: int i;
1.1 kristaps 148:
149: /* Stopped or no tree yet. */
150: if (ps->stop || NODE_ROOT == ps->node)
151: return;
152:
153: /* Not supposed to be collecting text. */
154: assert(NULL != ps->cur);
155: if (NODE_IGNTEXT & nodes[ps->node].flags)
156: return;
157:
158: /*
159: * Are we in the midst of processing text?
160: * If we're not processing text right now, then create a text
161: * node for doing so.
1.4 kristaps 162: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 163: * process: strip out all leading whitespace to be sure.
1.1 kristaps 164: */
165: if (NODE_TEXT != ps->node) {
1.4 kristaps 166: for (i = 0; i < sz; i++)
167: if ( ! isspace((int)p[i]))
168: break;
169: if (i == sz)
170: return;
1.10 kristaps 171: p += i;
172: sz -= i;
1.1 kristaps 173: dat = calloc(1, sizeof(struct pnode));
174: if (NULL == dat) {
175: perror(NULL);
176: exit(EXIT_FAILURE);
177: }
178:
179: dat->node = ps->node = NODE_TEXT;
180: dat->parent = ps->cur;
181: TAILQ_INIT(&dat->childq);
1.12 kristaps 182: TAILQ_INIT(&dat->attrq);
1.1 kristaps 183: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
184: ps->cur = dat;
185: assert(NULL != ps->root);
186: }
187:
188: /* Append to current buffer. */
189: assert(sz >= 0);
190: ps->cur->b = realloc(ps->cur->b,
191: ps->cur->bsz + (size_t)sz);
192: if (NULL == ps->cur->b) {
193: perror(NULL);
194: exit(EXIT_FAILURE);
195: }
196: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
197: ps->cur->bsz += (size_t)sz;
198: }
199:
1.10 kristaps 200: static void
201: pnode_trim(struct pnode *pn)
202: {
203:
204: assert(NODE_TEXT == pn->node);
205: for ( ; pn->bsz > 0; pn->bsz--)
206: if ( ! isspace((int)pn->b[pn->bsz - 1]))
207: break;
208: }
209:
1.1 kristaps 210: /*
211: * Begin an element.
212: * First, look for the element.
213: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 214: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 215: * If we find it but we're not parsing yet (i.e., it's not a refentry
216: * and thus out of context), keep going.
1.8 kristaps 217: * If we find it and we're at the root and already have a tree, puke and
218: * exit (FIXME: I don't think this is right?).
219: * If we find it but we're parsing a text node, close out the text node,
220: * return to its parent, and keep going.
1.1 kristaps 221: * Make sure that the element is in the right context.
222: * Lastly, put the node onto our parse tree and continue.
223: */
224: static void
225: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
226: {
1.12 kristaps 227: struct parse *ps = arg;
228: enum nodeid node;
229: enum attrkey key;
230: enum attrval val;
231: struct pnode *dat;
232: struct pattr *pattr;
233: const XML_Char **att;
1.1 kristaps 234:
235: if (ps->stop)
236: return;
237:
238: /* Close out text node, if applicable... */
239: if (NODE_TEXT == ps->node) {
240: assert(NULL != ps->cur);
1.10 kristaps 241: pnode_trim(ps->cur);
1.1 kristaps 242: ps->cur = ps->cur->parent;
243: assert(NULL != ps->cur);
244: ps->node = ps->cur->node;
245: }
246:
247: for (node = 0; node < NODE__MAX; node++)
248: if (NULL == nodes[node].name)
249: continue;
250: else if (0 == strcmp(nodes[node].name, name))
251: break;
252:
253: if (NODE__MAX == node && NODE_ROOT == ps->node) {
254: return;
255: } else if (NODE__MAX == node) {
1.12 kristaps 256: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
257: ps->fname, XML_GetCurrentLineNumber(ps->xml),
258: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 259: ps->stop = 1;
260: return;
261: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 262: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
263: ps->fname, XML_GetCurrentLineNumber(ps->xml),
264: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 265: ps->stop = 1;
266: return;
267: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
268: return;
269: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 270: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
271: "of node \"%s\"\n",
1.12 kristaps 272: ps->fname, XML_GetCurrentLineNumber(ps->xml),
273: XML_GetCurrentColumnNumber(ps->xml),
274: NULL == nodes[ps->node].name ?
1.13 kristaps 275: "(none)" : nodes[ps->node].name,
276: NULL == nodes[node].name ?
277: "(none)" : nodes[node].name);
1.1 kristaps 278: ps->stop = 1;
279: return;
280: }
281:
282: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
283: perror(NULL);
284: exit(EXIT_FAILURE);
285: }
286:
287: dat->node = ps->node = node;
288: dat->parent = ps->cur;
289: TAILQ_INIT(&dat->childq);
1.12 kristaps 290: TAILQ_INIT(&dat->attrq);
1.1 kristaps 291:
292: if (NULL != ps->cur)
293: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
294:
295: ps->cur = dat;
296: if (NULL == ps->root)
297: ps->root = dat;
1.12 kristaps 298:
299: /*
300: * Process attributes.
301: */
302: for (att = atts; NULL != *att; att += 2) {
303: for (key = 0; key < ATTRKEY__MAX; key++)
304: if (0 == strcmp(*att, attrkeys[key]))
305: break;
306: if (ATTRKEY__MAX == key) {
307: fprintf(stderr, "%s:%zu:%zu: unknown "
308: "attribute \"%s\"\n", ps->fname,
309: XML_GetCurrentLineNumber(ps->xml),
310: XML_GetCurrentColumnNumber(ps->xml),
311: *att);
312: continue;
313: } else if ( ! isattrkey(node, key)) {
314: fprintf(stderr, "%s:%zu:%zu: bad "
315: "attribute \"%s\"\n", ps->fname,
316: XML_GetCurrentLineNumber(ps->xml),
317: XML_GetCurrentColumnNumber(ps->xml),
318: *att);
319: continue;
320: }
321: for (val = 0; val < ATTRVAL__MAX; val++)
322: if (0 == strcmp(*(att + 1), attrvals[val]))
323: break;
324: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
325: fprintf(stderr, "%s:%zu:%zu: bad "
326: "value \"%s\"\n", ps->fname,
327: XML_GetCurrentLineNumber(ps->xml),
328: XML_GetCurrentColumnNumber(ps->xml),
329: *(att + 1));
330: continue;
331: }
332: pattr = calloc(1, sizeof(struct pattr));
333: pattr->key = key;
334: pattr->val = val;
335: if (ATTRVAL__MAX == val)
336: pattr->rawval = strdup(*(att + 1));
337: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
338: }
339:
1.1 kristaps 340: }
341:
342: /*
343: * Roll up the parse tree.
1.8 kristaps 344: * If we're at a text node, roll that one up first.
1.1 kristaps 345: * If we hit the root, then assign ourselves as the NODE_ROOT.
346: */
347: static void
348: xml_elem_end(void *arg, const XML_Char *name)
349: {
350: struct parse *ps = arg;
351:
352: if (ps->stop || NODE_ROOT == ps->node)
353: return;
354:
355: /* Close out text node, if applicable... */
356: if (NODE_TEXT == ps->node) {
357: assert(NULL != ps->cur);
1.10 kristaps 358: pnode_trim(ps->cur);
1.1 kristaps 359: ps->cur = ps->cur->parent;
360: assert(NULL != ps->cur);
361: ps->node = ps->cur->node;
362: }
363:
364: if (NULL == (ps->cur = ps->cur->parent))
365: ps->node = NODE_ROOT;
366: else
367: ps->node = ps->cur->node;
368: }
369:
1.8 kristaps 370: /*
371: * Recursively free a node (NULL is ok).
372: */
1.1 kristaps 373: static void
374: pnode_free(struct pnode *pn)
375: {
376: struct pnode *pp;
1.12 kristaps 377: struct pattr *ap;
1.1 kristaps 378:
379: if (NULL == pn)
380: return;
381:
382: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
383: TAILQ_REMOVE(&pn->childq, pp, child);
384: pnode_free(pp);
385: }
386:
1.12 kristaps 387: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
388: TAILQ_REMOVE(&pn->attrq, ap, child);
389: free(ap->rawval);
390: free(ap);
391: }
392:
1.1 kristaps 393: free(pn->b);
394: free(pn);
395: }
396:
1.8 kristaps 397: /*
398: * Unlink a node from its parent and pnode_free() it.
399: */
1.1 kristaps 400: static void
401: pnode_unlink(struct pnode *pn)
402: {
403:
404: if (NULL != pn->parent)
405: TAILQ_REMOVE(&pn->parent->childq, pn, child);
406: pnode_free(pn);
407: }
408:
1.8 kristaps 409: /*
410: * Unlink all children of a node and pnode_free() them.
411: */
1.1 kristaps 412: static void
1.4 kristaps 413: pnode_unlinksub(struct pnode *pn)
414: {
415:
416: while ( ! TAILQ_EMPTY(&pn->childq))
417: pnode_unlink(TAILQ_FIRST(&pn->childq));
418: }
419:
1.8 kristaps 420: /*
421: * Reset the lookaside buffer.
422: */
1.4 kristaps 423: static void
1.1 kristaps 424: bufclear(struct parse *p)
425: {
426:
427: p->b[p->bsz = 0] = '\0';
428: }
429:
1.8 kristaps 430: /*
431: * Append NODE_TEXT contents to the current buffer, reallocating its
432: * size if necessary.
433: * The buffer is ALWAYS nil-terminated.
434: */
1.1 kristaps 435: static void
436: bufappend(struct parse *p, struct pnode *pn)
437: {
438:
439: assert(NODE_TEXT == pn->node);
440: if (p->bsz + pn->bsz + 1 > p->mbsz) {
441: p->mbsz = p->bsz + pn->bsz + 1;
442: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
443: perror(NULL);
444: exit(EXIT_FAILURE);
445: }
446: }
447: memcpy(p->b + p->bsz, pn->b, pn->bsz);
448: p->bsz += pn->bsz;
449: p->b[p->bsz] = '\0';
450: }
451:
1.8 kristaps 452: /*
453: * Recursively append all NODE_TEXT nodes to the buffer.
454: * This descends into non-text nodes, but doesn't do anything beyond
455: * them.
456: * In other words, this is a recursive text grok.
457: */
1.3 kristaps 458: static void
459: bufappend_r(struct parse *p, struct pnode *pn)
460: {
461: struct pnode *pp;
462:
463: if (NODE_TEXT == pn->node)
464: bufappend(p, pn);
465: TAILQ_FOREACH(pp, &pn->childq, child)
466: bufappend_r(p, pp);
467: }
468:
1.12 kristaps 469: #define MACROLINE_NORM 0
470: #define MACROLINE_UPPER 1
1.1 kristaps 471: /*
1.8 kristaps 472: * Recursively print text presumably on a macro line.
1.1 kristaps 473: * Convert all whitespace to regular spaces.
474: */
475: static void
1.12 kristaps 476: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 477: {
478: char *cp;
479:
1.13 kristaps 480: if (0 == p->newln)
481: putchar(' ');
482:
1.1 kristaps 483: bufclear(p);
1.3 kristaps 484: bufappend_r(p, pn);
1.1 kristaps 485:
486: /* Convert all space to spaces. */
487: for (cp = p->b; '\0' != *cp; cp++)
488: if (isspace((int)*cp))
489: *cp = ' ';
490:
491: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 492: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 493: for ( ; '\0' != *cp; cp++) {
494: /* Escape us if we look like a macro. */
495: if ((cp == p->b || ' ' == *(cp - 1)) &&
496: isupper((int)*cp) &&
497: '\0' != *(cp + 1) &&
498: islower((int)*(cp + 1)) &&
499: ('\0' == *(cp + 2) ||
500: ' ' == *(cp + 2) ||
501: (islower((int)*(cp + 2)) &&
502: ('\0' == *(cp + 3) ||
503: ' ' == *(cp + 3)))))
504: fputs("\\&", stdout);
1.12 kristaps 505: if (MACROLINE_UPPER & fl)
506: putchar(toupper((int)*cp));
507: else
508: putchar((int)*cp);
1.1 kristaps 509: /* If we're a character escape, escape us. */
510: if ('\\' == *cp)
511: putchar('e');
512: }
513: }
514:
1.12 kristaps 515: static void
516: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
517: {
518:
519: pnode_printmacrolinetext(p, pn, 0);
520: }
521:
1.1 kristaps 522: /*
523: * Just pnode_printmacrolinepart() but with a newline.
524: * If no text, just the newline.
525: */
526: static void
527: pnode_printmacroline(struct parse *p, struct pnode *pn)
528: {
529:
1.13 kristaps 530: assert(0 == p->newln);
1.12 kristaps 531: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 532: putchar('\n');
1.13 kristaps 533: p->newln = 1;
1.1 kristaps 534: }
535:
1.10 kristaps 536: static void
537: pnode_printmopen(struct parse *p)
538: {
539: if (p->newln) {
540: putchar('.');
541: p->newln = 0;
542: } else
543: putchar(' ');
544: }
545:
546: static void
547: pnode_printmclose(struct parse *p, int sv)
548: {
549:
550: if (sv && ! p->newln) {
551: putchar('\n');
552: p->newln = 1;
553: }
554: }
555:
1.8 kristaps 556: /*
1.10 kristaps 557: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 558: */
1.1 kristaps 559: static void
1.6 kristaps 560: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
561: {
562: struct pnode *pp;
563:
1.10 kristaps 564: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 565: if (NODE_TITLE == pp->node) {
566: pnode_unlink(pp);
1.10 kristaps 567: return;
1.6 kristaps 568: }
569: }
570:
1.8 kristaps 571: /*
572: * Start a hopefully-named `Sh' section.
573: */
1.6 kristaps 574: static void
1.1 kristaps 575: pnode_printrefsect(struct parse *p, struct pnode *pn)
576: {
577: struct pnode *pp;
578:
579: TAILQ_FOREACH(pp, &pn->childq, child)
580: if (NODE_TITLE == pp->node)
581: break;
582:
1.13 kristaps 583: fputs(".Sh", stdout);
584: p->newln = 0;
1.4 kristaps 585:
1.5 kristaps 586: if (NULL != pp) {
1.1 kristaps 587: pnode_printmacroline(p, pp);
1.5 kristaps 588: pnode_unlink(pp);
1.13 kristaps 589: } else {
1.4 kristaps 590: puts("UNKNOWN");
1.13 kristaps 591: p->newln = 1;
592: }
1.1 kristaps 593: }
594:
1.8 kristaps 595: /*
596: * Start a reference, extracting the title and volume.
597: */
1.1 kristaps 598: static void
599: pnode_printciterefentry(struct parse *p, struct pnode *pn)
600: {
601: struct pnode *pp, *title, *manvol;
602:
603: title = manvol = NULL;
1.13 kristaps 604: assert(p->newln);
1.1 kristaps 605: TAILQ_FOREACH(pp, &pn->childq, child)
606: if (NODE_MANVOLNUM == pp->node)
607: manvol = pp;
608: else if (NODE_REFENTRYTITLE == pp->node)
609: title = pp;
610:
1.13 kristaps 611: fputs(".Xr", stdout);
612: p->newln = 0;
1.4 kristaps 613:
1.1 kristaps 614: if (NULL != title) {
615: pnode_printmacrolinepart(p, title);
616: } else
1.13 kristaps 617: fputs(" unknown ", stdout);
1.4 kristaps 618:
1.13 kristaps 619: if (NULL == manvol) {
620: puts(" 1");
621: p->newln = 1;
622: } else
1.1 kristaps 623: pnode_printmacroline(p, manvol);
624: }
625:
626: static void
627: pnode_printrefmeta(struct parse *p, struct pnode *pn)
628: {
629: struct pnode *pp, *title, *manvol;
630:
631: title = manvol = NULL;
1.13 kristaps 632: assert(p->newln);
1.1 kristaps 633: TAILQ_FOREACH(pp, &pn->childq, child)
634: if (NODE_MANVOLNUM == pp->node)
635: manvol = pp;
636: else if (NODE_REFENTRYTITLE == pp->node)
637: title = pp;
638:
1.2 kristaps 639: puts(".Dd $Mdocdate" "$");
1.13 kristaps 640: fputs(".Dt", stdout);
641: p->newln = 0;
1.1 kristaps 642:
1.13 kristaps 643: if (NULL != title)
1.12 kristaps 644: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 645: else
646: fputs(" UNKNOWN ", stdout);
647:
648: if (NULL == manvol) {
649: puts(" 1");
650: p->newln = 1;
1.1 kristaps 651: } else
652: pnode_printmacroline(p, manvol);
653:
654: puts(".Os");
655: }
656:
1.3 kristaps 657: static void
658: pnode_printfuncdef(struct parse *p, struct pnode *pn)
659: {
660: struct pnode *pp, *ftype, *func;
661:
1.13 kristaps 662: assert(p->newln);
1.3 kristaps 663: ftype = func = NULL;
664: TAILQ_FOREACH(pp, &pn->childq, child)
665: if (NODE_TEXT == pp->node)
666: ftype = pp;
667: else if (NODE_FUNCTION == pp->node)
668: func = pp;
669:
670: if (NULL != ftype) {
1.13 kristaps 671: fputs(".Ft", stdout);
672: p->newln = 0;
1.3 kristaps 673: pnode_printmacroline(p, ftype);
674: }
675:
676: if (NULL != func) {
1.13 kristaps 677: fputs(".Fo", stdout);
678: p->newln = 0;
1.3 kristaps 679: pnode_printmacroline(p, func);
1.13 kristaps 680: } else {
1.3 kristaps 681: puts(".Fo UNKNOWN");
1.13 kristaps 682: p->newln = 1;
683: }
1.3 kristaps 684: }
685:
686: static void
687: pnode_printparamdef(struct parse *p, struct pnode *pn)
688: {
689: struct pnode *pp, *ptype, *param;
690:
1.13 kristaps 691: assert(p->newln);
1.3 kristaps 692: ptype = param = NULL;
693: TAILQ_FOREACH(pp, &pn->childq, child)
694: if (NODE_TEXT == pp->node)
695: ptype = pp;
696: else if (NODE_PARAMETER == pp->node)
697: param = pp;
698:
699: fputs(".Fa \"", stdout);
1.13 kristaps 700: p->newln = 0;
1.3 kristaps 701: if (NULL != ptype) {
702: pnode_printmacrolinepart(p, ptype);
703: putchar(' ');
704: }
705:
706: if (NULL != param)
707: pnode_printmacrolinepart(p, param);
708:
709: puts("\"");
1.13 kristaps 710: p->newln = 1;
1.3 kristaps 711: }
712:
713: static void
714: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
715: {
716: struct pnode *pp, *fdef;
717:
1.13 kristaps 718: assert(p->newln);
1.3 kristaps 719: TAILQ_FOREACH(fdef, &pn->childq, child)
720: if (NODE_FUNCDEF == fdef->node)
721: break;
722:
1.4 kristaps 723: if (NULL != fdef)
1.3 kristaps 724: pnode_printfuncdef(p, fdef);
1.4 kristaps 725: else
1.3 kristaps 726: puts(".Fo UNKNOWN");
727:
1.4 kristaps 728: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 729: if (NODE_PARAMDEF == pp->node)
730: pnode_printparamdef(p, pp);
731:
732: puts(".Fc");
1.13 kristaps 733: p->newln = 1;
1.3 kristaps 734: }
735:
1.10 kristaps 736: /*
737: * The <arg> element is more complicated than it should be because text
738: * nodes are treated like ".Ar foo", but non-text nodes need to be
739: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 740: * This also handles the case of "repetition" (or in other words, the
741: * ellipsis following an argument) and optionality.
1.10 kristaps 742: */
1.4 kristaps 743: static void
1.10 kristaps 744: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 745: {
746: struct pnode *pp;
1.12 kristaps 747: struct pattr *ap;
748: int isop, isrep;
749:
750: isop = 1;
751: isrep = 0;
752: TAILQ_FOREACH(ap, &pn->attrq, child)
753: if (ATTRKEY_CHOICE == ap->key &&
754: (ATTRVAL_PLAIN == ap->val ||
755: ATTRVAL_REQ == ap->val))
756: isop = 0;
757: else if (ATTRKEY_REP == ap->key &&
758: (ATTRVAL_REPEAT == ap->val))
759: isrep = 1;
760:
761: if (isop) {
762: pnode_printmopen(p);
1.13 kristaps 763: fputs("Op", stdout);
1.12 kristaps 764: }
1.4 kristaps 765:
1.10 kristaps 766: TAILQ_FOREACH(pp, &pn->childq, child) {
767: if (NODE_TEXT == pp->node) {
768: pnode_printmopen(p);
1.13 kristaps 769: fputs("Ar", stdout);
1.10 kristaps 770: }
771: pnode_print(p, pp);
1.12 kristaps 772: if (NODE_TEXT == pp->node && isrep)
773: fputs("...", stdout);
1.10 kristaps 774: }
1.4 kristaps 775: }
776:
1.7 kristaps 777: /*
778: * Recursively search and return the first instance of "node".
779: */
780: static struct pnode *
781: pnode_findfirst(struct pnode *pn, enum nodeid node)
782: {
783: struct pnode *pp, *res;
784:
785: res = NULL;
786: TAILQ_FOREACH(pp, &pn->childq, child) {
787: res = pp->node == node ? pp :
788: pnode_findfirst(pp, node);
789: if (NULL != res)
790: break;
791: }
792:
793: return(res);
794: }
795:
796: static void
797: pnode_printprologue(struct parse *p, struct pnode *pn)
798: {
799: struct pnode *pp;
800:
1.9 kristaps 801: pp = NULL == p->root ? NULL :
802: pnode_findfirst(p->root, NODE_REFMETA);
803:
804: if (NULL != pp) {
1.7 kristaps 805: pnode_printrefmeta(p, pp);
806: pnode_unlink(pp);
807: } else {
808: puts(".\\\" Supplying bogus prologue...");
809: puts(".Dd $Mdocdate" "$");
810: puts(".Dt UNKNOWN 1");
811: puts(".Os");
812: }
813: }
814:
1.13 kristaps 815: static void
816: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
817: {
818: struct pnode *pp;
819:
820: assert(p->newln);
821: TAILQ_FOREACH(pp, &pn->childq, child)
822: if (NODE_TERM == pp->node) {
823: fputs(".It", stdout);
824: p->newln = 0;
825: pnode_print(p, pp);
826: pnode_unlink(pp);
827: putchar('\n');
828: p->newln = 1;
829: return;
830: }
831:
832: puts(".It");
833: p->newln = 1;
834: }
835:
836: static void
837: pnode_printvariablelist(struct parse *p, struct pnode *pn)
838: {
839: struct pnode *pp;
840:
841: assert(p->newln);
842: TAILQ_FOREACH(pp, &pn->childq, child)
843: if (NODE_TITLE == pp->node) {
844: puts(".Pp");
845: pnode_print(p, pp);
846: pnode_unlink(pp);
847: }
848:
849: assert(p->newln);
850: puts(".Bl -tag -width Ds");
851: TAILQ_FOREACH(pp, &pn->childq, child)
852: if (NODE_VARLISTENTRY != pp->node) {
853: assert(p->newln);
854: fputs(".It", stdout);
855: pnode_printmacroline(p, pp);
856: } else {
857: assert(p->newln);
858: pnode_print(p, pp);
859: }
860: assert(p->newln);
861: puts(".El");
862: }
863:
1.1 kristaps 864: /*
865: * Print a parsed node (or ignore it--whatever).
866: * This is a recursive function.
867: * FIXME: macro line continuation?
868: */
869: static void
870: pnode_print(struct parse *p, struct pnode *pn)
871: {
872: struct pnode *pp;
873: char *cp;
1.10 kristaps 874: int last, sv;
1.1 kristaps 875:
876: if (NULL == pn)
877: return;
878:
1.10 kristaps 879: sv = p->newln;
1.1 kristaps 880:
881: switch (pn->node) {
1.4 kristaps 882: case (NODE_ARG):
1.10 kristaps 883: pnode_printarg(p, pn);
1.4 kristaps 884: pnode_unlinksub(pn);
885: break;
1.1 kristaps 886: case (NODE_CITEREFENTRY):
1.10 kristaps 887: assert(p->newln);
1.1 kristaps 888: pnode_printciterefentry(p, pn);
1.4 kristaps 889: pnode_unlinksub(pn);
1.1 kristaps 890: break;
891: case (NODE_CODE):
1.10 kristaps 892: pnode_printmopen(p);
1.13 kristaps 893: fputs("Li", stdout);
1.4 kristaps 894: break;
895: case (NODE_COMMAND):
1.10 kristaps 896: pnode_printmopen(p);
1.13 kristaps 897: fputs("Nm", stdout);
898: break;
899: case (NODE_EMPHASIS):
900: pnode_printmopen(p);
901: fputs("Em", stdout);
1.1 kristaps 902: break;
1.3 kristaps 903: case (NODE_FUNCTION):
1.10 kristaps 904: pnode_printmopen(p);
1.13 kristaps 905: fputs("Fn", stdout);
1.3 kristaps 906: break;
907: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 908: assert(p->newln);
1.3 kristaps 909: pnode_printfuncprototype(p, pn);
1.4 kristaps 910: pnode_unlinksub(pn);
1.3 kristaps 911: break;
1.1 kristaps 912: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 913: pnode_printmopen(p);
1.13 kristaps 914: fputs("Fd", stdout);
1.10 kristaps 915: break;
916: case (NODE_OPTION):
917: pnode_printmopen(p);
1.13 kristaps 918: fputs("Fl", stdout);
919: /* FIXME: bogus leading '-'? */
1.1 kristaps 920: break;
921: case (NODE_PARA):
1.10 kristaps 922: assert(p->newln);
1.13 kristaps 923: if (NULL != pn->parent &&
924: NODE_LISTITEM == pn->parent->node)
925: break;
1.1 kristaps 926: puts(".Pp");
1.3 kristaps 927: break;
928: case (NODE_PARAMETER):
1.10 kristaps 929: /* Suppress non-text children... */
930: pnode_printmopen(p);
931: fputs("Fa \"", stdout);
1.3 kristaps 932: pnode_printmacrolinepart(p, pn);
933: puts("\"");
1.4 kristaps 934: pnode_unlinksub(pn);
1.1 kristaps 935: break;
936: case (NODE_PROGRAMLISTING):
1.10 kristaps 937: assert(p->newln);
1.1 kristaps 938: puts(".Bd -literal");
1.15 ! kristaps 939: break;
! 940: case (NODE_REFENTRYINFO):
! 941: /* Suppress. */
! 942: pnode_unlinksub(pn);
1.1 kristaps 943: break;
944: case (NODE_REFMETA):
1.7 kristaps 945: abort();
1.1 kristaps 946: break;
947: case (NODE_REFNAME):
1.10 kristaps 948: /* Suppress non-text children... */
949: pnode_printmopen(p);
1.13 kristaps 950: fputs("Nm", stdout);
951: p->newln = 0;
1.10 kristaps 952: pnode_printmacrolinepart(p, pn);
1.4 kristaps 953: pnode_unlinksub(pn);
1.10 kristaps 954: break;
1.1 kristaps 955: case (NODE_REFNAMEDIV):
1.10 kristaps 956: assert(p->newln);
1.1 kristaps 957: puts(".Sh NAME");
958: break;
959: case (NODE_REFPURPOSE):
1.10 kristaps 960: assert(p->newln);
1.13 kristaps 961: pnode_printmopen(p);
962: fputs("Nd", stdout);
1.10 kristaps 963: break;
1.1 kristaps 964: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 965: assert(p->newln);
1.6 kristaps 966: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 967: puts(".Sh SYNOPSIS");
1.1 kristaps 968: break;
969: case (NODE_REFSECT1):
1.10 kristaps 970: assert(p->newln);
1.1 kristaps 971: pnode_printrefsect(p, pn);
972: break;
1.13 kristaps 973: case (NODE_REPLACEABLE):
974: pnode_printmopen(p);
975: fputs("Ar", stdout);
976: break;
1.8 kristaps 977: case (NODE_STRUCTNAME):
1.10 kristaps 978: pnode_printmopen(p);
1.13 kristaps 979: fputs("Vt", stdout);
1.10 kristaps 980: break;
1.1 kristaps 981: case (NODE_TEXT):
1.13 kristaps 982: if (0 == p->newln)
983: putchar(' ');
1.1 kristaps 984: bufclear(p);
985: bufappend(p, pn);
986: /*
987: * Output all characters, squeezing out whitespace
988: * between newlines.
989: * XXX: all whitespace, including tabs (?).
990: * Remember to escape control characters and escapes.
991: */
1.10 kristaps 992: assert(p->bsz);
1.1 kristaps 993: for (last = '\n', cp = p->b; '\0' != *cp; ) {
994: if ('\n' == last) {
995: /* Consume all whitespace. */
996: if (isspace((int)*cp)) {
997: while (isspace((int)*cp))
998: cp++;
999: continue;
1000: } else if ('\'' == *cp || '.' == *cp)
1001: fputs("\\&", stdout);
1002: }
1003: putchar(last = *cp++);
1004: /* If we're a character escape, escape us. */
1005: if ('\\' == last)
1006: putchar('e');
1007: }
1.10 kristaps 1008: p->newln = 0;
1.1 kristaps 1009: break;
1.13 kristaps 1010: case (NODE_VARIABLELIST):
1011: assert(p->newln);
1012: pnode_printvariablelist(p, pn);
1013: pnode_unlinksub(pn);
1014: break;
1015: case (NODE_VARLISTENTRY):
1016: assert(p->newln);
1017: pnode_printvarlistentry(p, pn);
1018: break;
1.1 kristaps 1019: default:
1020: break;
1021: }
1022:
1023: TAILQ_FOREACH(pp, &pn->childq, child)
1024: pnode_print(p, pp);
1025:
1026: switch (pn->node) {
1.10 kristaps 1027: case (NODE_ARG):
1028: case (NODE_CODE):
1029: case (NODE_COMMAND):
1.13 kristaps 1030: case (NODE_EMPHASIS):
1.10 kristaps 1031: case (NODE_FUNCTION):
1032: case (NODE_FUNCSYNOPSISINFO):
1033: case (NODE_OPTION):
1034: case (NODE_PARAMETER):
1.13 kristaps 1035: case (NODE_REPLACEABLE):
1036: case (NODE_REFPURPOSE):
1.10 kristaps 1037: case (NODE_STRUCTNAME):
1038: case (NODE_TEXT):
1039: pnode_printmclose(p, sv);
1040: break;
1.12 kristaps 1041: case (NODE_REFNAME):
1042: /*
1043: * If we're in the NAME macro and we have multiple
1044: * <refname> macros in sequence, then print out a
1045: * trailing comma before the newline.
1046: */
1047: if (NULL != pn->parent &&
1048: NODE_REFNAMEDIV == pn->parent->node &&
1049: NULL != TAILQ_NEXT(pn, child) &&
1050: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1051: fputs(" ,", stdout);
1052: pnode_printmclose(p, sv);
1053: break;
1.1 kristaps 1054: case (NODE_PROGRAMLISTING):
1.10 kristaps 1055: assert(p->newln);
1.1 kristaps 1056: puts(".Ed");
1.10 kristaps 1057: p->newln = 1;
1.1 kristaps 1058: break;
1059: default:
1060: break;
1061: }
1062: }
1063:
1064: /*
1065: * Loop around the read buffer until we've drained it of all data.
1066: * Invoke the parser context with each buffer fill.
1067: */
1068: static int
1069: readfile(XML_Parser xp, int fd,
1070: char *b, size_t bsz, const char *fn)
1071: {
1072: struct parse p;
1073: int rc;
1074: ssize_t ssz;
1075:
1076: memset(&p, 0, sizeof(struct parse));
1077:
1078: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1079: p.fname = fn;
1080: p.xml = xp;
1.1 kristaps 1081:
1082: XML_SetCharacterDataHandler(xp, xml_char);
1083: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1084: XML_SetUserData(xp, &p);
1085:
1086: while ((ssz = read(fd, b, bsz)) >= 0) {
1087: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1088: fprintf(stderr, "%s: %s\n", fn,
1089: XML_ErrorString
1090: (XML_GetErrorCode(xp)));
1091: else if ( ! p.stop && ssz > 0)
1092: continue;
1093: /*
1094: * Exit when we've read all or errors have occured
1095: * during the parse sequence.
1096: */
1.10 kristaps 1097: p.newln = 1;
1.7 kristaps 1098: pnode_printprologue(&p, p.root);
1.1 kristaps 1099: pnode_print(&p, p.root);
1100: pnode_free(p.root);
1101: free(p.b);
1102: return(0 != rc && ! p.stop);
1103: }
1104:
1105: /* Read error has occured. */
1106: perror(fn);
1107: pnode_free(p.root);
1108: free(p.b);
1109: return(0);
1110: }
1111:
1112: int
1113: main(int argc, char *argv[])
1114: {
1115: XML_Parser xp;
1116: const char *fname;
1117: char *buf;
1118: int fd, rc;
1119:
1120: fname = "-";
1121: xp = NULL;
1122: buf = NULL;
1123: rc = 0;
1124:
1125: if (-1 != getopt(argc, argv, ""))
1126: return(EXIT_FAILURE);
1127:
1128: argc -= optind;
1129: argv += optind;
1130:
1131: if (argc > 1)
1132: return(EXIT_FAILURE);
1133: else if (argc > 0)
1134: fname = argv[0];
1135:
1136: /* Read from stdin or a file. */
1137: fd = 0 == strcmp(fname, "-") ?
1138: STDIN_FILENO : open(fname, O_RDONLY, 0);
1139:
1140: /*
1141: * Open file for reading.
1142: * Allocate a read buffer.
1143: * Create the parser context.
1144: * Dive directly into the parse.
1145: */
1146: if (-1 == fd)
1147: perror(fname);
1148: else if (NULL == (buf = malloc(4096)))
1149: perror(NULL);
1150: else if (NULL == (xp = XML_ParserCreate(NULL)))
1151: perror(NULL);
1152: else if ( ! readfile(xp, fd, buf, 4096, fname))
1153: rc = 1;
1154:
1155: XML_ParserFree(xp);
1156: free(buf);
1157: if (STDIN_FILENO != fd)
1158: close(fd);
1159: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1160: }
CVSweb