Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.21
1.21 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.20 2014/03/30 16:57:06 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.21 ! kristaps 90: { "acronym", 0 },
1.4 kristaps 91: { "arg", 0 },
1.1 kristaps 92: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 93: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 94: { "code", 0 },
1.4 kristaps 95: { "command", 0 },
1.15 kristaps 96: { "date", 0 },
1.13 kristaps 97: { "emphasis", 0 },
1.21 ! kristaps 98: { "envar", 0 },
1.17 kristaps 99: { "filename", 0 },
1.3 kristaps 100: { "funcdef", 0 },
101: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 102: { "funcsynopsis", NODE_IGNTEXT },
103: { "funcsynopsisinfo", 0 },
1.3 kristaps 104: { "function", 0 },
1.16 kristaps 105: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 106: { "link", 0 },
1.13 kristaps 107: { "listitem", NODE_IGNTEXT },
1.19 kristaps 108: { "literal", 0 },
1.1 kristaps 109: { "manvolnum", 0 },
1.4 kristaps 110: { "option", 0 },
1.21 ! kristaps 111: { "orderedlist", NODE_IGNTEXT },
1.1 kristaps 112: { "para", 0 },
1.3 kristaps 113: { "paramdef", 0 },
114: { "parameter", 0 },
1.1 kristaps 115: { "programlisting", 0 },
116: { "refclass", NODE_IGNTEXT },
117: { "refdescriptor", NODE_IGNTEXT },
118: { "refentry", NODE_IGNTEXT },
1.15 kristaps 119: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 120: { "refentrytitle", 0 },
121: { "refmeta", NODE_IGNTEXT },
122: { "refmiscinfo", NODE_IGNTEXT },
123: { "refname", 0 },
124: { "refnamediv", NODE_IGNTEXT },
125: { "refpurpose", 0 },
1.20 kristaps 126: { "refsect1", NODE_IGNTEXT },
127: { "refsect2", NODE_IGNTEXT },
1.1 kristaps 128: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 129: { "replaceable", 0 },
1.19 kristaps 130: { "sbr", NODE_IGNTEXT },
1.8 kristaps 131: { "structname", 0 },
1.1 kristaps 132: { "synopsis", 0 },
1.13 kristaps 133: { "term", 0 },
1.1 kristaps 134: { NULL, 0 },
135: { "title", 0 },
1.14 kristaps 136: { "ulink", 0 },
1.13 kristaps 137: { "variablelist", NODE_IGNTEXT },
138: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 139: };
140:
1.10 kristaps 141: static void
142: pnode_print(struct parse *p, struct pnode *pn);
143:
1.8 kristaps 144: /*
145: * Process a stream of characters.
146: * We store text as nodes in and of themselves.
147: * If a text node is already open, append to it.
148: * If it's not open, open one under the current context.
149: */
1.1 kristaps 150: static void
151: xml_char(void *arg, const XML_Char *p, int sz)
152: {
153: struct parse *ps = arg;
154: struct pnode *dat;
1.4 kristaps 155: int i;
1.1 kristaps 156:
157: /* Stopped or no tree yet. */
158: if (ps->stop || NODE_ROOT == ps->node)
159: return;
160:
161: /* Not supposed to be collecting text. */
162: assert(NULL != ps->cur);
163: if (NODE_IGNTEXT & nodes[ps->node].flags)
164: return;
165:
166: /*
167: * Are we in the midst of processing text?
168: * If we're not processing text right now, then create a text
169: * node for doing so.
1.4 kristaps 170: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 171: * process: strip out all leading whitespace to be sure.
1.1 kristaps 172: */
173: if (NODE_TEXT != ps->node) {
1.4 kristaps 174: for (i = 0; i < sz; i++)
175: if ( ! isspace((int)p[i]))
176: break;
177: if (i == sz)
178: return;
1.10 kristaps 179: p += i;
180: sz -= i;
1.1 kristaps 181: dat = calloc(1, sizeof(struct pnode));
182: if (NULL == dat) {
183: perror(NULL);
184: exit(EXIT_FAILURE);
185: }
186:
187: dat->node = ps->node = NODE_TEXT;
188: dat->parent = ps->cur;
189: TAILQ_INIT(&dat->childq);
1.12 kristaps 190: TAILQ_INIT(&dat->attrq);
1.1 kristaps 191: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
192: ps->cur = dat;
193: assert(NULL != ps->root);
194: }
195:
196: /* Append to current buffer. */
197: assert(sz >= 0);
198: ps->cur->b = realloc(ps->cur->b,
199: ps->cur->bsz + (size_t)sz);
200: if (NULL == ps->cur->b) {
201: perror(NULL);
202: exit(EXIT_FAILURE);
203: }
204: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
205: ps->cur->bsz += (size_t)sz;
206: }
207:
1.10 kristaps 208: static void
209: pnode_trim(struct pnode *pn)
210: {
211:
212: assert(NODE_TEXT == pn->node);
213: for ( ; pn->bsz > 0; pn->bsz--)
214: if ( ! isspace((int)pn->b[pn->bsz - 1]))
215: break;
216: }
217:
1.1 kristaps 218: /*
219: * Begin an element.
220: * First, look for the element.
221: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 222: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 223: * If we find it but we're not parsing yet (i.e., it's not a refentry
224: * and thus out of context), keep going.
1.8 kristaps 225: * If we find it and we're at the root and already have a tree, puke and
226: * exit (FIXME: I don't think this is right?).
227: * If we find it but we're parsing a text node, close out the text node,
228: * return to its parent, and keep going.
1.1 kristaps 229: * Make sure that the element is in the right context.
230: * Lastly, put the node onto our parse tree and continue.
231: */
232: static void
233: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
234: {
1.12 kristaps 235: struct parse *ps = arg;
236: enum nodeid node;
237: enum attrkey key;
238: enum attrval val;
239: struct pnode *dat;
240: struct pattr *pattr;
241: const XML_Char **att;
1.1 kristaps 242:
243: if (ps->stop)
244: return;
245:
246: /* Close out text node, if applicable... */
247: if (NODE_TEXT == ps->node) {
248: assert(NULL != ps->cur);
1.10 kristaps 249: pnode_trim(ps->cur);
1.1 kristaps 250: ps->cur = ps->cur->parent;
251: assert(NULL != ps->cur);
252: ps->node = ps->cur->node;
253: }
254:
255: for (node = 0; node < NODE__MAX; node++)
256: if (NULL == nodes[node].name)
257: continue;
258: else if (0 == strcmp(nodes[node].name, name))
259: break;
260:
261: if (NODE__MAX == node && NODE_ROOT == ps->node) {
262: return;
263: } else if (NODE__MAX == node) {
1.12 kristaps 264: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
265: ps->fname, XML_GetCurrentLineNumber(ps->xml),
266: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 267: ps->stop = 1;
268: return;
269: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 270: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
271: ps->fname, XML_GetCurrentLineNumber(ps->xml),
272: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 273: ps->stop = 1;
274: return;
275: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
276: return;
277: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 278: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
279: "of node \"%s\"\n",
1.12 kristaps 280: ps->fname, XML_GetCurrentLineNumber(ps->xml),
281: XML_GetCurrentColumnNumber(ps->xml),
282: NULL == nodes[ps->node].name ?
1.13 kristaps 283: "(none)" : nodes[ps->node].name,
284: NULL == nodes[node].name ?
285: "(none)" : nodes[node].name);
1.1 kristaps 286: ps->stop = 1;
287: return;
288: }
289:
290: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
291: perror(NULL);
292: exit(EXIT_FAILURE);
293: }
294:
295: dat->node = ps->node = node;
296: dat->parent = ps->cur;
297: TAILQ_INIT(&dat->childq);
1.12 kristaps 298: TAILQ_INIT(&dat->attrq);
1.1 kristaps 299:
300: if (NULL != ps->cur)
301: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
302:
303: ps->cur = dat;
304: if (NULL == ps->root)
305: ps->root = dat;
1.12 kristaps 306:
307: /*
308: * Process attributes.
309: */
310: for (att = atts; NULL != *att; att += 2) {
311: for (key = 0; key < ATTRKEY__MAX; key++)
312: if (0 == strcmp(*att, attrkeys[key]))
313: break;
314: if (ATTRKEY__MAX == key) {
315: fprintf(stderr, "%s:%zu:%zu: unknown "
316: "attribute \"%s\"\n", ps->fname,
317: XML_GetCurrentLineNumber(ps->xml),
318: XML_GetCurrentColumnNumber(ps->xml),
319: *att);
320: continue;
321: } else if ( ! isattrkey(node, key)) {
322: fprintf(stderr, "%s:%zu:%zu: bad "
323: "attribute \"%s\"\n", ps->fname,
324: XML_GetCurrentLineNumber(ps->xml),
325: XML_GetCurrentColumnNumber(ps->xml),
326: *att);
327: continue;
328: }
329: for (val = 0; val < ATTRVAL__MAX; val++)
330: if (0 == strcmp(*(att + 1), attrvals[val]))
331: break;
332: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
333: fprintf(stderr, "%s:%zu:%zu: bad "
334: "value \"%s\"\n", ps->fname,
335: XML_GetCurrentLineNumber(ps->xml),
336: XML_GetCurrentColumnNumber(ps->xml),
337: *(att + 1));
338: continue;
339: }
340: pattr = calloc(1, sizeof(struct pattr));
341: pattr->key = key;
342: pattr->val = val;
343: if (ATTRVAL__MAX == val)
344: pattr->rawval = strdup(*(att + 1));
345: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
346: }
347:
1.1 kristaps 348: }
349:
350: /*
351: * Roll up the parse tree.
1.8 kristaps 352: * If we're at a text node, roll that one up first.
1.1 kristaps 353: * If we hit the root, then assign ourselves as the NODE_ROOT.
354: */
355: static void
356: xml_elem_end(void *arg, const XML_Char *name)
357: {
358: struct parse *ps = arg;
359:
360: if (ps->stop || NODE_ROOT == ps->node)
361: return;
362:
363: /* Close out text node, if applicable... */
364: if (NODE_TEXT == ps->node) {
365: assert(NULL != ps->cur);
1.10 kristaps 366: pnode_trim(ps->cur);
1.1 kristaps 367: ps->cur = ps->cur->parent;
368: assert(NULL != ps->cur);
369: ps->node = ps->cur->node;
370: }
371:
372: if (NULL == (ps->cur = ps->cur->parent))
373: ps->node = NODE_ROOT;
374: else
375: ps->node = ps->cur->node;
376: }
377:
1.8 kristaps 378: /*
379: * Recursively free a node (NULL is ok).
380: */
1.1 kristaps 381: static void
382: pnode_free(struct pnode *pn)
383: {
384: struct pnode *pp;
1.12 kristaps 385: struct pattr *ap;
1.1 kristaps 386:
387: if (NULL == pn)
388: return;
389:
390: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
391: TAILQ_REMOVE(&pn->childq, pp, child);
392: pnode_free(pp);
393: }
394:
1.12 kristaps 395: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
396: TAILQ_REMOVE(&pn->attrq, ap, child);
397: free(ap->rawval);
398: free(ap);
399: }
400:
1.1 kristaps 401: free(pn->b);
402: free(pn);
403: }
404:
1.8 kristaps 405: /*
406: * Unlink a node from its parent and pnode_free() it.
407: */
1.1 kristaps 408: static void
409: pnode_unlink(struct pnode *pn)
410: {
411:
412: if (NULL != pn->parent)
413: TAILQ_REMOVE(&pn->parent->childq, pn, child);
414: pnode_free(pn);
415: }
416:
1.8 kristaps 417: /*
418: * Unlink all children of a node and pnode_free() them.
419: */
1.1 kristaps 420: static void
1.4 kristaps 421: pnode_unlinksub(struct pnode *pn)
422: {
423:
424: while ( ! TAILQ_EMPTY(&pn->childq))
425: pnode_unlink(TAILQ_FIRST(&pn->childq));
426: }
427:
1.8 kristaps 428: /*
429: * Reset the lookaside buffer.
430: */
1.4 kristaps 431: static void
1.1 kristaps 432: bufclear(struct parse *p)
433: {
434:
435: p->b[p->bsz = 0] = '\0';
436: }
437:
1.8 kristaps 438: /*
439: * Append NODE_TEXT contents to the current buffer, reallocating its
440: * size if necessary.
441: * The buffer is ALWAYS nil-terminated.
442: */
1.1 kristaps 443: static void
444: bufappend(struct parse *p, struct pnode *pn)
445: {
446:
447: assert(NODE_TEXT == pn->node);
448: if (p->bsz + pn->bsz + 1 > p->mbsz) {
449: p->mbsz = p->bsz + pn->bsz + 1;
450: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
451: perror(NULL);
452: exit(EXIT_FAILURE);
453: }
454: }
455: memcpy(p->b + p->bsz, pn->b, pn->bsz);
456: p->bsz += pn->bsz;
457: p->b[p->bsz] = '\0';
458: }
459:
1.8 kristaps 460: /*
461: * Recursively append all NODE_TEXT nodes to the buffer.
462: * This descends into non-text nodes, but doesn't do anything beyond
463: * them.
464: * In other words, this is a recursive text grok.
465: */
1.3 kristaps 466: static void
467: bufappend_r(struct parse *p, struct pnode *pn)
468: {
469: struct pnode *pp;
470:
471: if (NODE_TEXT == pn->node)
472: bufappend(p, pn);
473: TAILQ_FOREACH(pp, &pn->childq, child)
474: bufappend_r(p, pp);
475: }
476:
1.12 kristaps 477: #define MACROLINE_NORM 0
478: #define MACROLINE_UPPER 1
1.1 kristaps 479: /*
1.8 kristaps 480: * Recursively print text presumably on a macro line.
1.1 kristaps 481: * Convert all whitespace to regular spaces.
482: */
483: static void
1.12 kristaps 484: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 485: {
486: char *cp;
487:
1.13 kristaps 488: if (0 == p->newln)
489: putchar(' ');
490:
1.1 kristaps 491: bufclear(p);
1.3 kristaps 492: bufappend_r(p, pn);
1.1 kristaps 493:
494: /* Convert all space to spaces. */
495: for (cp = p->b; '\0' != *cp; cp++)
496: if (isspace((int)*cp))
497: *cp = ' ';
498:
499: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 500: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 501: for ( ; '\0' != *cp; cp++) {
502: /* Escape us if we look like a macro. */
503: if ((cp == p->b || ' ' == *(cp - 1)) &&
504: isupper((int)*cp) &&
505: '\0' != *(cp + 1) &&
506: islower((int)*(cp + 1)) &&
507: ('\0' == *(cp + 2) ||
508: ' ' == *(cp + 2) ||
509: (islower((int)*(cp + 2)) &&
510: ('\0' == *(cp + 3) ||
511: ' ' == *(cp + 3)))))
512: fputs("\\&", stdout);
1.12 kristaps 513: if (MACROLINE_UPPER & fl)
514: putchar(toupper((int)*cp));
515: else
516: putchar((int)*cp);
1.1 kristaps 517: /* If we're a character escape, escape us. */
518: if ('\\' == *cp)
519: putchar('e');
520: }
521: }
522:
1.12 kristaps 523: static void
524: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
525: {
526:
527: pnode_printmacrolinetext(p, pn, 0);
528: }
529:
1.1 kristaps 530: /*
531: * Just pnode_printmacrolinepart() but with a newline.
532: * If no text, just the newline.
533: */
534: static void
535: pnode_printmacroline(struct parse *p, struct pnode *pn)
536: {
537:
1.13 kristaps 538: assert(0 == p->newln);
1.12 kristaps 539: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 540: putchar('\n');
1.13 kristaps 541: p->newln = 1;
1.1 kristaps 542: }
543:
1.10 kristaps 544: static void
545: pnode_printmopen(struct parse *p)
546: {
547: if (p->newln) {
548: putchar('.');
549: p->newln = 0;
550: } else
551: putchar(' ');
552: }
553:
554: static void
555: pnode_printmclose(struct parse *p, int sv)
556: {
557:
558: if (sv && ! p->newln) {
559: putchar('\n');
560: p->newln = 1;
561: }
562: }
563:
1.8 kristaps 564: /*
1.10 kristaps 565: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 566: */
1.1 kristaps 567: static void
1.6 kristaps 568: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
569: {
570: struct pnode *pp;
571:
1.10 kristaps 572: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 573: if (NODE_TITLE == pp->node) {
574: pnode_unlink(pp);
1.10 kristaps 575: return;
1.6 kristaps 576: }
577: }
578:
1.8 kristaps 579: /*
580: * Start a hopefully-named `Sh' section.
581: */
1.6 kristaps 582: static void
1.1 kristaps 583: pnode_printrefsect(struct parse *p, struct pnode *pn)
584: {
585: struct pnode *pp;
586:
587: TAILQ_FOREACH(pp, &pn->childq, child)
588: if (NODE_TITLE == pp->node)
589: break;
590:
1.20 kristaps 591: if (NODE_REFSECT1 == pn->node)
592: fputs(".Sh", stdout);
593: else
594: fputs(".Ss", stdout);
595:
1.13 kristaps 596: p->newln = 0;
1.4 kristaps 597:
1.5 kristaps 598: if (NULL != pp) {
1.20 kristaps 599: pnode_printmacrolinetext(p, pp,
600: NODE_REFSECT1 == pn->node ?
601: MACROLINE_UPPER : 0);
1.18 kristaps 602: pnode_printmclose(p, 1);
1.5 kristaps 603: pnode_unlink(pp);
1.13 kristaps 604: } else {
1.4 kristaps 605: puts("UNKNOWN");
1.13 kristaps 606: p->newln = 1;
607: }
1.1 kristaps 608: }
609:
1.8 kristaps 610: /*
611: * Start a reference, extracting the title and volume.
612: */
1.1 kristaps 613: static void
614: pnode_printciterefentry(struct parse *p, struct pnode *pn)
615: {
616: struct pnode *pp, *title, *manvol;
617:
618: title = manvol = NULL;
1.13 kristaps 619: assert(p->newln);
1.1 kristaps 620: TAILQ_FOREACH(pp, &pn->childq, child)
621: if (NODE_MANVOLNUM == pp->node)
622: manvol = pp;
623: else if (NODE_REFENTRYTITLE == pp->node)
624: title = pp;
625:
1.13 kristaps 626: fputs(".Xr", stdout);
627: p->newln = 0;
1.4 kristaps 628:
1.1 kristaps 629: if (NULL != title) {
630: pnode_printmacrolinepart(p, title);
631: } else
1.13 kristaps 632: fputs(" unknown ", stdout);
1.4 kristaps 633:
1.13 kristaps 634: if (NULL == manvol) {
635: puts(" 1");
636: p->newln = 1;
637: } else
1.1 kristaps 638: pnode_printmacroline(p, manvol);
639: }
640:
641: static void
642: pnode_printrefmeta(struct parse *p, struct pnode *pn)
643: {
644: struct pnode *pp, *title, *manvol;
645:
646: title = manvol = NULL;
1.13 kristaps 647: assert(p->newln);
1.1 kristaps 648: TAILQ_FOREACH(pp, &pn->childq, child)
649: if (NODE_MANVOLNUM == pp->node)
650: manvol = pp;
651: else if (NODE_REFENTRYTITLE == pp->node)
652: title = pp;
653:
1.2 kristaps 654: puts(".Dd $Mdocdate" "$");
1.13 kristaps 655: fputs(".Dt", stdout);
656: p->newln = 0;
1.1 kristaps 657:
1.13 kristaps 658: if (NULL != title)
1.12 kristaps 659: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 660: else
661: fputs(" UNKNOWN ", stdout);
662:
663: if (NULL == manvol) {
664: puts(" 1");
665: p->newln = 1;
1.1 kristaps 666: } else
667: pnode_printmacroline(p, manvol);
668:
669: puts(".Os");
670: }
671:
1.3 kristaps 672: static void
673: pnode_printfuncdef(struct parse *p, struct pnode *pn)
674: {
675: struct pnode *pp, *ftype, *func;
676:
1.13 kristaps 677: assert(p->newln);
1.3 kristaps 678: ftype = func = NULL;
679: TAILQ_FOREACH(pp, &pn->childq, child)
680: if (NODE_TEXT == pp->node)
681: ftype = pp;
682: else if (NODE_FUNCTION == pp->node)
683: func = pp;
684:
685: if (NULL != ftype) {
1.13 kristaps 686: fputs(".Ft", stdout);
687: p->newln = 0;
1.3 kristaps 688: pnode_printmacroline(p, ftype);
689: }
690:
691: if (NULL != func) {
1.13 kristaps 692: fputs(".Fo", stdout);
693: p->newln = 0;
1.3 kristaps 694: pnode_printmacroline(p, func);
1.13 kristaps 695: } else {
1.3 kristaps 696: puts(".Fo UNKNOWN");
1.13 kristaps 697: p->newln = 1;
698: }
1.3 kristaps 699: }
700:
701: static void
702: pnode_printparamdef(struct parse *p, struct pnode *pn)
703: {
704: struct pnode *pp, *ptype, *param;
705:
1.13 kristaps 706: assert(p->newln);
1.3 kristaps 707: ptype = param = NULL;
708: TAILQ_FOREACH(pp, &pn->childq, child)
709: if (NODE_TEXT == pp->node)
710: ptype = pp;
711: else if (NODE_PARAMETER == pp->node)
712: param = pp;
713:
714: fputs(".Fa \"", stdout);
1.13 kristaps 715: p->newln = 0;
1.3 kristaps 716: if (NULL != ptype) {
717: pnode_printmacrolinepart(p, ptype);
718: putchar(' ');
719: }
720:
721: if (NULL != param)
722: pnode_printmacrolinepart(p, param);
723:
724: puts("\"");
1.13 kristaps 725: p->newln = 1;
1.3 kristaps 726: }
727:
728: static void
729: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
730: {
731: struct pnode *pp, *fdef;
732:
1.13 kristaps 733: assert(p->newln);
1.3 kristaps 734: TAILQ_FOREACH(fdef, &pn->childq, child)
735: if (NODE_FUNCDEF == fdef->node)
736: break;
737:
1.4 kristaps 738: if (NULL != fdef)
1.3 kristaps 739: pnode_printfuncdef(p, fdef);
1.4 kristaps 740: else
1.3 kristaps 741: puts(".Fo UNKNOWN");
742:
1.4 kristaps 743: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 744: if (NODE_PARAMDEF == pp->node)
745: pnode_printparamdef(p, pp);
746:
747: puts(".Fc");
1.13 kristaps 748: p->newln = 1;
1.3 kristaps 749: }
750:
1.10 kristaps 751: /*
752: * The <arg> element is more complicated than it should be because text
753: * nodes are treated like ".Ar foo", but non-text nodes need to be
754: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 755: * This also handles the case of "repetition" (or in other words, the
756: * ellipsis following an argument) and optionality.
1.10 kristaps 757: */
1.4 kristaps 758: static void
1.10 kristaps 759: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 760: {
761: struct pnode *pp;
1.12 kristaps 762: struct pattr *ap;
763: int isop, isrep;
764:
765: isop = 1;
766: isrep = 0;
767: TAILQ_FOREACH(ap, &pn->attrq, child)
768: if (ATTRKEY_CHOICE == ap->key &&
769: (ATTRVAL_PLAIN == ap->val ||
770: ATTRVAL_REQ == ap->val))
771: isop = 0;
772: else if (ATTRKEY_REP == ap->key &&
773: (ATTRVAL_REPEAT == ap->val))
774: isrep = 1;
775:
776: if (isop) {
777: pnode_printmopen(p);
1.13 kristaps 778: fputs("Op", stdout);
1.12 kristaps 779: }
1.4 kristaps 780:
1.10 kristaps 781: TAILQ_FOREACH(pp, &pn->childq, child) {
782: if (NODE_TEXT == pp->node) {
783: pnode_printmopen(p);
1.13 kristaps 784: fputs("Ar", stdout);
1.10 kristaps 785: }
786: pnode_print(p, pp);
1.12 kristaps 787: if (NODE_TEXT == pp->node && isrep)
788: fputs("...", stdout);
1.10 kristaps 789: }
1.4 kristaps 790: }
791:
1.7 kristaps 792: /*
793: * Recursively search and return the first instance of "node".
794: */
795: static struct pnode *
796: pnode_findfirst(struct pnode *pn, enum nodeid node)
797: {
798: struct pnode *pp, *res;
799:
800: res = NULL;
801: TAILQ_FOREACH(pp, &pn->childq, child) {
802: res = pp->node == node ? pp :
803: pnode_findfirst(pp, node);
804: if (NULL != res)
805: break;
806: }
807:
808: return(res);
809: }
810:
811: static void
812: pnode_printprologue(struct parse *p, struct pnode *pn)
813: {
814: struct pnode *pp;
815:
1.9 kristaps 816: pp = NULL == p->root ? NULL :
817: pnode_findfirst(p->root, NODE_REFMETA);
818:
819: if (NULL != pp) {
1.7 kristaps 820: pnode_printrefmeta(p, pp);
821: pnode_unlink(pp);
822: } else {
823: puts(".\\\" Supplying bogus prologue...");
824: puts(".Dd $Mdocdate" "$");
825: puts(".Dt UNKNOWN 1");
826: puts(".Os");
827: }
828: }
829:
1.13 kristaps 830: static void
831: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
832: {
833: struct pnode *pp;
834:
835: assert(p->newln);
836: TAILQ_FOREACH(pp, &pn->childq, child)
837: if (NODE_TERM == pp->node) {
838: fputs(".It", stdout);
839: p->newln = 0;
840: pnode_print(p, pp);
841: pnode_unlink(pp);
1.16 kristaps 842: pnode_printmclose(p, 1);
1.13 kristaps 843: return;
844: }
845:
846: puts(".It");
847: p->newln = 1;
848: }
849:
850: static void
1.16 kristaps 851: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
852: {
853: struct pnode *pp;
854:
855: assert(p->newln);
856: TAILQ_FOREACH(pp, &pn->childq, child)
857: if (NODE_TITLE == pp->node) {
858: puts(".Pp");
859: pnode_print(p, pp);
860: pnode_unlink(pp);
861: }
862:
863: assert(p->newln);
1.21 ! kristaps 864:
! 865: if (NODE_ORDEREDLIST == pn->node)
! 866: puts(".Bl -enum");
! 867: else
! 868: puts(".Bl -item");
! 869:
1.16 kristaps 870: TAILQ_FOREACH(pp, &pn->childq, child) {
871: assert(p->newln);
872: puts(".It");
873: pnode_print(p, pp);
874: pnode_printmclose(p, 1);
875: }
876: assert(p->newln);
877: puts(".El");
878: }
879:
880: static void
1.13 kristaps 881: pnode_printvariablelist(struct parse *p, struct pnode *pn)
882: {
883: struct pnode *pp;
884:
885: assert(p->newln);
886: TAILQ_FOREACH(pp, &pn->childq, child)
887: if (NODE_TITLE == pp->node) {
888: puts(".Pp");
889: pnode_print(p, pp);
890: pnode_unlink(pp);
891: }
892:
893: assert(p->newln);
894: puts(".Bl -tag -width Ds");
895: TAILQ_FOREACH(pp, &pn->childq, child)
896: if (NODE_VARLISTENTRY != pp->node) {
897: assert(p->newln);
898: fputs(".It", stdout);
899: pnode_printmacroline(p, pp);
900: } else {
901: assert(p->newln);
902: pnode_print(p, pp);
903: }
904: assert(p->newln);
905: puts(".El");
906: }
907:
1.1 kristaps 908: /*
909: * Print a parsed node (or ignore it--whatever).
910: * This is a recursive function.
911: * FIXME: macro line continuation?
912: */
913: static void
914: pnode_print(struct parse *p, struct pnode *pn)
915: {
916: struct pnode *pp;
917: char *cp;
1.10 kristaps 918: int last, sv;
1.1 kristaps 919:
920: if (NULL == pn)
921: return;
922:
1.10 kristaps 923: sv = p->newln;
1.1 kristaps 924:
925: switch (pn->node) {
1.4 kristaps 926: case (NODE_ARG):
1.10 kristaps 927: pnode_printarg(p, pn);
1.4 kristaps 928: pnode_unlinksub(pn);
929: break;
1.1 kristaps 930: case (NODE_CITEREFENTRY):
1.10 kristaps 931: assert(p->newln);
1.1 kristaps 932: pnode_printciterefentry(p, pn);
1.4 kristaps 933: pnode_unlinksub(pn);
1.1 kristaps 934: break;
935: case (NODE_CODE):
1.10 kristaps 936: pnode_printmopen(p);
1.13 kristaps 937: fputs("Li", stdout);
1.4 kristaps 938: break;
939: case (NODE_COMMAND):
1.10 kristaps 940: pnode_printmopen(p);
1.13 kristaps 941: fputs("Nm", stdout);
942: break;
943: case (NODE_EMPHASIS):
944: pnode_printmopen(p);
945: fputs("Em", stdout);
1.1 kristaps 946: break;
1.21 ! kristaps 947: case (NODE_ENVAR):
! 948: pnode_printmopen(p);
! 949: fputs("Ev", stdout);
! 950: break;
1.17 kristaps 951: case (NODE_FILENAME):
952: pnode_printmopen(p);
953: fputs("Pa", stdout);
954: break;
1.3 kristaps 955: case (NODE_FUNCTION):
1.10 kristaps 956: pnode_printmopen(p);
1.13 kristaps 957: fputs("Fn", stdout);
1.3 kristaps 958: break;
959: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 960: assert(p->newln);
1.3 kristaps 961: pnode_printfuncprototype(p, pn);
1.4 kristaps 962: pnode_unlinksub(pn);
1.3 kristaps 963: break;
1.1 kristaps 964: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 965: pnode_printmopen(p);
1.13 kristaps 966: fputs("Fd", stdout);
1.16 kristaps 967: break;
968: case (NODE_ITEMIZEDLIST):
1.21 ! kristaps 969: /* FALLTHROUGH */
! 970: case (NODE_ORDEREDLIST):
1.16 kristaps 971: assert(p->newln);
972: pnode_printitemizedlist(p, pn);
1.10 kristaps 973: break;
1.19 kristaps 974: case (NODE_LITERAL):
975: pnode_printmopen(p);
976: fputs("Li", stdout);
977: break;
1.10 kristaps 978: case (NODE_OPTION):
979: pnode_printmopen(p);
1.13 kristaps 980: fputs("Fl", stdout);
1.1 kristaps 981: break;
982: case (NODE_PARA):
1.10 kristaps 983: assert(p->newln);
1.13 kristaps 984: if (NULL != pn->parent &&
985: NODE_LISTITEM == pn->parent->node)
986: break;
1.1 kristaps 987: puts(".Pp");
1.3 kristaps 988: break;
989: case (NODE_PARAMETER):
1.10 kristaps 990: /* Suppress non-text children... */
991: pnode_printmopen(p);
992: fputs("Fa \"", stdout);
1.3 kristaps 993: pnode_printmacrolinepart(p, pn);
994: puts("\"");
1.4 kristaps 995: pnode_unlinksub(pn);
1.1 kristaps 996: break;
997: case (NODE_PROGRAMLISTING):
1.10 kristaps 998: assert(p->newln);
1.1 kristaps 999: puts(".Bd -literal");
1.15 kristaps 1000: break;
1001: case (NODE_REFENTRYINFO):
1002: /* Suppress. */
1003: pnode_unlinksub(pn);
1.1 kristaps 1004: break;
1005: case (NODE_REFMETA):
1.7 kristaps 1006: abort();
1.1 kristaps 1007: break;
1008: case (NODE_REFNAME):
1.10 kristaps 1009: /* Suppress non-text children... */
1010: pnode_printmopen(p);
1.13 kristaps 1011: fputs("Nm", stdout);
1012: p->newln = 0;
1.10 kristaps 1013: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1014: pnode_unlinksub(pn);
1.10 kristaps 1015: break;
1.1 kristaps 1016: case (NODE_REFNAMEDIV):
1.10 kristaps 1017: assert(p->newln);
1.1 kristaps 1018: puts(".Sh NAME");
1019: break;
1020: case (NODE_REFPURPOSE):
1.10 kristaps 1021: assert(p->newln);
1.13 kristaps 1022: pnode_printmopen(p);
1023: fputs("Nd", stdout);
1.10 kristaps 1024: break;
1.1 kristaps 1025: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1026: assert(p->newln);
1.6 kristaps 1027: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1028: puts(".Sh SYNOPSIS");
1.1 kristaps 1029: break;
1030: case (NODE_REFSECT1):
1.20 kristaps 1031: /* FALLTHROUGH */
1032: case (NODE_REFSECT2):
1.10 kristaps 1033: assert(p->newln);
1.1 kristaps 1034: pnode_printrefsect(p, pn);
1035: break;
1.13 kristaps 1036: case (NODE_REPLACEABLE):
1037: pnode_printmopen(p);
1038: fputs("Ar", stdout);
1039: break;
1.19 kristaps 1040: case (NODE_SBR):
1041: assert(p->newln);
1042: puts(".br");
1043: break;
1.8 kristaps 1044: case (NODE_STRUCTNAME):
1.10 kristaps 1045: pnode_printmopen(p);
1.13 kristaps 1046: fputs("Vt", stdout);
1.10 kristaps 1047: break;
1.1 kristaps 1048: case (NODE_TEXT):
1.13 kristaps 1049: if (0 == p->newln)
1050: putchar(' ');
1.1 kristaps 1051: bufclear(p);
1052: bufappend(p, pn);
1053: /*
1054: * Output all characters, squeezing out whitespace
1055: * between newlines.
1056: * XXX: all whitespace, including tabs (?).
1057: * Remember to escape control characters and escapes.
1058: */
1.10 kristaps 1059: assert(p->bsz);
1.20 kristaps 1060: cp = p->b;
1061: /*
1062: * There's often a superfluous "-" in its <option> tags
1063: * before the actual flags themselves.
1064: * "Fl" does this for us, so remove it.
1065: */
1066: if (NULL != pn->parent &&
1067: NODE_OPTION == pn->parent->node &&
1068: '-' == *cp)
1069: cp++;
1070: for (last = '\n'; '\0' != *cp; ) {
1.1 kristaps 1071: if ('\n' == last) {
1072: /* Consume all whitespace. */
1073: if (isspace((int)*cp)) {
1074: while (isspace((int)*cp))
1075: cp++;
1076: continue;
1077: } else if ('\'' == *cp || '.' == *cp)
1078: fputs("\\&", stdout);
1079: }
1080: putchar(last = *cp++);
1081: /* If we're a character escape, escape us. */
1082: if ('\\' == last)
1083: putchar('e');
1084: }
1.10 kristaps 1085: p->newln = 0;
1.1 kristaps 1086: break;
1.13 kristaps 1087: case (NODE_VARIABLELIST):
1088: assert(p->newln);
1089: pnode_printvariablelist(p, pn);
1090: pnode_unlinksub(pn);
1091: break;
1092: case (NODE_VARLISTENTRY):
1093: assert(p->newln);
1094: pnode_printvarlistentry(p, pn);
1095: break;
1.1 kristaps 1096: default:
1097: break;
1098: }
1099:
1100: TAILQ_FOREACH(pp, &pn->childq, child)
1101: pnode_print(p, pp);
1102:
1103: switch (pn->node) {
1.10 kristaps 1104: case (NODE_ARG):
1105: case (NODE_CODE):
1106: case (NODE_COMMAND):
1.13 kristaps 1107: case (NODE_EMPHASIS):
1.21 ! kristaps 1108: case (NODE_ENVAR):
1.17 kristaps 1109: case (NODE_FILENAME):
1.10 kristaps 1110: case (NODE_FUNCTION):
1111: case (NODE_FUNCSYNOPSISINFO):
1.19 kristaps 1112: case (NODE_LITERAL):
1.10 kristaps 1113: case (NODE_OPTION):
1114: case (NODE_PARAMETER):
1.13 kristaps 1115: case (NODE_REPLACEABLE):
1116: case (NODE_REFPURPOSE):
1.10 kristaps 1117: case (NODE_STRUCTNAME):
1118: case (NODE_TEXT):
1119: pnode_printmclose(p, sv);
1120: break;
1.12 kristaps 1121: case (NODE_REFNAME):
1122: /*
1123: * If we're in the NAME macro and we have multiple
1124: * <refname> macros in sequence, then print out a
1125: * trailing comma before the newline.
1126: */
1127: if (NULL != pn->parent &&
1128: NODE_REFNAMEDIV == pn->parent->node &&
1129: NULL != TAILQ_NEXT(pn, child) &&
1130: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1131: fputs(" ,", stdout);
1132: pnode_printmclose(p, sv);
1133: break;
1.1 kristaps 1134: case (NODE_PROGRAMLISTING):
1.10 kristaps 1135: assert(p->newln);
1.1 kristaps 1136: puts(".Ed");
1.10 kristaps 1137: p->newln = 1;
1.1 kristaps 1138: break;
1139: default:
1140: break;
1141: }
1142: }
1143:
1144: /*
1145: * Loop around the read buffer until we've drained it of all data.
1146: * Invoke the parser context with each buffer fill.
1147: */
1148: static int
1149: readfile(XML_Parser xp, int fd,
1150: char *b, size_t bsz, const char *fn)
1151: {
1152: struct parse p;
1153: int rc;
1154: ssize_t ssz;
1155:
1156: memset(&p, 0, sizeof(struct parse));
1157:
1158: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1159: p.fname = fn;
1160: p.xml = xp;
1.1 kristaps 1161:
1162: XML_SetCharacterDataHandler(xp, xml_char);
1163: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1164: XML_SetUserData(xp, &p);
1165:
1166: while ((ssz = read(fd, b, bsz)) >= 0) {
1167: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1168: fprintf(stderr, "%s: %s\n", fn,
1169: XML_ErrorString
1170: (XML_GetErrorCode(xp)));
1171: else if ( ! p.stop && ssz > 0)
1172: continue;
1173: /*
1174: * Exit when we've read all or errors have occured
1175: * during the parse sequence.
1176: */
1.10 kristaps 1177: p.newln = 1;
1.7 kristaps 1178: pnode_printprologue(&p, p.root);
1.1 kristaps 1179: pnode_print(&p, p.root);
1180: pnode_free(p.root);
1181: free(p.b);
1182: return(0 != rc && ! p.stop);
1183: }
1184:
1185: /* Read error has occured. */
1186: perror(fn);
1187: pnode_free(p.root);
1188: free(p.b);
1189: return(0);
1190: }
1191:
1192: int
1193: main(int argc, char *argv[])
1194: {
1195: XML_Parser xp;
1196: const char *fname;
1197: char *buf;
1198: int fd, rc;
1199:
1200: fname = "-";
1201: xp = NULL;
1202: buf = NULL;
1203: rc = 0;
1204:
1205: if (-1 != getopt(argc, argv, ""))
1206: return(EXIT_FAILURE);
1207:
1208: argc -= optind;
1209: argv += optind;
1210:
1211: if (argc > 1)
1212: return(EXIT_FAILURE);
1213: else if (argc > 0)
1214: fname = argv[0];
1215:
1216: /* Read from stdin or a file. */
1217: fd = 0 == strcmp(fname, "-") ?
1218: STDIN_FILENO : open(fname, O_RDONLY, 0);
1219:
1220: /*
1221: * Open file for reading.
1222: * Allocate a read buffer.
1223: * Create the parser context.
1224: * Dive directly into the parse.
1225: */
1226: if (-1 == fd)
1227: perror(fname);
1228: else if (NULL == (buf = malloc(4096)))
1229: perror(NULL);
1230: else if (NULL == (xp = XML_ParserCreate(NULL)))
1231: perror(NULL);
1232: else if ( ! readfile(xp, fd, buf, 4096, fname))
1233: rc = 1;
1234:
1235: XML_ParserFree(xp);
1236: free(buf);
1237: if (STDIN_FILENO != fd)
1238: close(fd);
1239: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1240: }
CVSweb