Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.22
1.22 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.21 2014/03/30 17:10:50 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.21 kristaps 90: { "acronym", 0 },
1.4 kristaps 91: { "arg", 0 },
1.1 kristaps 92: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 93: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 94: { "code", 0 },
1.4 kristaps 95: { "command", 0 },
1.15 kristaps 96: { "date", 0 },
1.13 kristaps 97: { "emphasis", 0 },
1.21 kristaps 98: { "envar", 0 },
1.17 kristaps 99: { "filename", 0 },
1.3 kristaps 100: { "funcdef", 0 },
101: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 102: { "funcsynopsis", NODE_IGNTEXT },
103: { "funcsynopsisinfo", 0 },
1.3 kristaps 104: { "function", 0 },
1.16 kristaps 105: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 106: { "link", 0 },
1.13 kristaps 107: { "listitem", NODE_IGNTEXT },
1.19 kristaps 108: { "literal", 0 },
1.1 kristaps 109: { "manvolnum", 0 },
1.4 kristaps 110: { "option", 0 },
1.21 kristaps 111: { "orderedlist", NODE_IGNTEXT },
1.1 kristaps 112: { "para", 0 },
1.3 kristaps 113: { "paramdef", 0 },
114: { "parameter", 0 },
1.1 kristaps 115: { "programlisting", 0 },
1.22 ! kristaps 116: { "prompt", 0 },
1.1 kristaps 117: { "refclass", NODE_IGNTEXT },
118: { "refdescriptor", NODE_IGNTEXT },
119: { "refentry", NODE_IGNTEXT },
1.15 kristaps 120: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 121: { "refentrytitle", 0 },
122: { "refmeta", NODE_IGNTEXT },
123: { "refmiscinfo", NODE_IGNTEXT },
124: { "refname", 0 },
125: { "refnamediv", NODE_IGNTEXT },
126: { "refpurpose", 0 },
1.20 kristaps 127: { "refsect1", NODE_IGNTEXT },
128: { "refsect2", NODE_IGNTEXT },
1.1 kristaps 129: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 130: { "replaceable", 0 },
1.19 kristaps 131: { "sbr", NODE_IGNTEXT },
1.22 ! kristaps 132: { "screen", NODE_IGNTEXT },
1.8 kristaps 133: { "structname", 0 },
1.1 kristaps 134: { "synopsis", 0 },
1.13 kristaps 135: { "term", 0 },
1.1 kristaps 136: { NULL, 0 },
137: { "title", 0 },
1.14 kristaps 138: { "ulink", 0 },
1.13 kristaps 139: { "variablelist", NODE_IGNTEXT },
140: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 141: };
142:
1.10 kristaps 143: static void
144: pnode_print(struct parse *p, struct pnode *pn);
145:
1.8 kristaps 146: /*
147: * Process a stream of characters.
148: * We store text as nodes in and of themselves.
149: * If a text node is already open, append to it.
150: * If it's not open, open one under the current context.
151: */
1.1 kristaps 152: static void
153: xml_char(void *arg, const XML_Char *p, int sz)
154: {
155: struct parse *ps = arg;
156: struct pnode *dat;
1.4 kristaps 157: int i;
1.1 kristaps 158:
159: /* Stopped or no tree yet. */
160: if (ps->stop || NODE_ROOT == ps->node)
161: return;
162:
163: /* Not supposed to be collecting text. */
164: assert(NULL != ps->cur);
165: if (NODE_IGNTEXT & nodes[ps->node].flags)
166: return;
167:
168: /*
169: * Are we in the midst of processing text?
170: * If we're not processing text right now, then create a text
171: * node for doing so.
1.4 kristaps 172: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 173: * process: strip out all leading whitespace to be sure.
1.1 kristaps 174: */
175: if (NODE_TEXT != ps->node) {
1.4 kristaps 176: for (i = 0; i < sz; i++)
177: if ( ! isspace((int)p[i]))
178: break;
179: if (i == sz)
180: return;
1.10 kristaps 181: p += i;
182: sz -= i;
1.1 kristaps 183: dat = calloc(1, sizeof(struct pnode));
184: if (NULL == dat) {
185: perror(NULL);
186: exit(EXIT_FAILURE);
187: }
188:
189: dat->node = ps->node = NODE_TEXT;
190: dat->parent = ps->cur;
191: TAILQ_INIT(&dat->childq);
1.12 kristaps 192: TAILQ_INIT(&dat->attrq);
1.1 kristaps 193: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
194: ps->cur = dat;
195: assert(NULL != ps->root);
196: }
197:
198: /* Append to current buffer. */
199: assert(sz >= 0);
200: ps->cur->b = realloc(ps->cur->b,
201: ps->cur->bsz + (size_t)sz);
202: if (NULL == ps->cur->b) {
203: perror(NULL);
204: exit(EXIT_FAILURE);
205: }
206: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
207: ps->cur->bsz += (size_t)sz;
208: }
209:
1.10 kristaps 210: static void
211: pnode_trim(struct pnode *pn)
212: {
213:
214: assert(NODE_TEXT == pn->node);
215: for ( ; pn->bsz > 0; pn->bsz--)
216: if ( ! isspace((int)pn->b[pn->bsz - 1]))
217: break;
218: }
219:
1.1 kristaps 220: /*
221: * Begin an element.
222: * First, look for the element.
223: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 224: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 225: * If we find it but we're not parsing yet (i.e., it's not a refentry
226: * and thus out of context), keep going.
1.8 kristaps 227: * If we find it and we're at the root and already have a tree, puke and
228: * exit (FIXME: I don't think this is right?).
229: * If we find it but we're parsing a text node, close out the text node,
230: * return to its parent, and keep going.
1.1 kristaps 231: * Make sure that the element is in the right context.
232: * Lastly, put the node onto our parse tree and continue.
233: */
234: static void
235: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
236: {
1.12 kristaps 237: struct parse *ps = arg;
238: enum nodeid node;
239: enum attrkey key;
240: enum attrval val;
241: struct pnode *dat;
242: struct pattr *pattr;
243: const XML_Char **att;
1.1 kristaps 244:
245: if (ps->stop)
246: return;
247:
248: /* Close out text node, if applicable... */
249: if (NODE_TEXT == ps->node) {
250: assert(NULL != ps->cur);
1.10 kristaps 251: pnode_trim(ps->cur);
1.1 kristaps 252: ps->cur = ps->cur->parent;
253: assert(NULL != ps->cur);
254: ps->node = ps->cur->node;
255: }
256:
257: for (node = 0; node < NODE__MAX; node++)
258: if (NULL == nodes[node].name)
259: continue;
260: else if (0 == strcmp(nodes[node].name, name))
261: break;
262:
263: if (NODE__MAX == node && NODE_ROOT == ps->node) {
264: return;
265: } else if (NODE__MAX == node) {
1.12 kristaps 266: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
267: ps->fname, XML_GetCurrentLineNumber(ps->xml),
268: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 269: ps->stop = 1;
270: return;
271: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 272: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
273: ps->fname, XML_GetCurrentLineNumber(ps->xml),
274: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 275: ps->stop = 1;
276: return;
277: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
278: return;
279: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 280: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
281: "of node \"%s\"\n",
1.12 kristaps 282: ps->fname, XML_GetCurrentLineNumber(ps->xml),
283: XML_GetCurrentColumnNumber(ps->xml),
284: NULL == nodes[ps->node].name ?
1.13 kristaps 285: "(none)" : nodes[ps->node].name,
286: NULL == nodes[node].name ?
287: "(none)" : nodes[node].name);
1.1 kristaps 288: ps->stop = 1;
289: return;
290: }
291:
292: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
293: perror(NULL);
294: exit(EXIT_FAILURE);
295: }
296:
297: dat->node = ps->node = node;
298: dat->parent = ps->cur;
299: TAILQ_INIT(&dat->childq);
1.12 kristaps 300: TAILQ_INIT(&dat->attrq);
1.1 kristaps 301:
302: if (NULL != ps->cur)
303: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
304:
305: ps->cur = dat;
306: if (NULL == ps->root)
307: ps->root = dat;
1.12 kristaps 308:
309: /*
310: * Process attributes.
311: */
312: for (att = atts; NULL != *att; att += 2) {
313: for (key = 0; key < ATTRKEY__MAX; key++)
314: if (0 == strcmp(*att, attrkeys[key]))
315: break;
316: if (ATTRKEY__MAX == key) {
317: fprintf(stderr, "%s:%zu:%zu: unknown "
318: "attribute \"%s\"\n", ps->fname,
319: XML_GetCurrentLineNumber(ps->xml),
320: XML_GetCurrentColumnNumber(ps->xml),
321: *att);
322: continue;
323: } else if ( ! isattrkey(node, key)) {
324: fprintf(stderr, "%s:%zu:%zu: bad "
325: "attribute \"%s\"\n", ps->fname,
326: XML_GetCurrentLineNumber(ps->xml),
327: XML_GetCurrentColumnNumber(ps->xml),
328: *att);
329: continue;
330: }
331: for (val = 0; val < ATTRVAL__MAX; val++)
332: if (0 == strcmp(*(att + 1), attrvals[val]))
333: break;
334: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
335: fprintf(stderr, "%s:%zu:%zu: bad "
336: "value \"%s\"\n", ps->fname,
337: XML_GetCurrentLineNumber(ps->xml),
338: XML_GetCurrentColumnNumber(ps->xml),
339: *(att + 1));
340: continue;
341: }
342: pattr = calloc(1, sizeof(struct pattr));
343: pattr->key = key;
344: pattr->val = val;
345: if (ATTRVAL__MAX == val)
346: pattr->rawval = strdup(*(att + 1));
347: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
348: }
349:
1.1 kristaps 350: }
351:
352: /*
353: * Roll up the parse tree.
1.8 kristaps 354: * If we're at a text node, roll that one up first.
1.1 kristaps 355: * If we hit the root, then assign ourselves as the NODE_ROOT.
356: */
357: static void
358: xml_elem_end(void *arg, const XML_Char *name)
359: {
360: struct parse *ps = arg;
361:
362: if (ps->stop || NODE_ROOT == ps->node)
363: return;
364:
365: /* Close out text node, if applicable... */
366: if (NODE_TEXT == ps->node) {
367: assert(NULL != ps->cur);
1.10 kristaps 368: pnode_trim(ps->cur);
1.1 kristaps 369: ps->cur = ps->cur->parent;
370: assert(NULL != ps->cur);
371: ps->node = ps->cur->node;
372: }
373:
374: if (NULL == (ps->cur = ps->cur->parent))
375: ps->node = NODE_ROOT;
376: else
377: ps->node = ps->cur->node;
378: }
379:
1.8 kristaps 380: /*
381: * Recursively free a node (NULL is ok).
382: */
1.1 kristaps 383: static void
384: pnode_free(struct pnode *pn)
385: {
386: struct pnode *pp;
1.12 kristaps 387: struct pattr *ap;
1.1 kristaps 388:
389: if (NULL == pn)
390: return;
391:
392: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
393: TAILQ_REMOVE(&pn->childq, pp, child);
394: pnode_free(pp);
395: }
396:
1.12 kristaps 397: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
398: TAILQ_REMOVE(&pn->attrq, ap, child);
399: free(ap->rawval);
400: free(ap);
401: }
402:
1.1 kristaps 403: free(pn->b);
404: free(pn);
405: }
406:
1.8 kristaps 407: /*
408: * Unlink a node from its parent and pnode_free() it.
409: */
1.1 kristaps 410: static void
411: pnode_unlink(struct pnode *pn)
412: {
413:
414: if (NULL != pn->parent)
415: TAILQ_REMOVE(&pn->parent->childq, pn, child);
416: pnode_free(pn);
417: }
418:
1.8 kristaps 419: /*
420: * Unlink all children of a node and pnode_free() them.
421: */
1.1 kristaps 422: static void
1.4 kristaps 423: pnode_unlinksub(struct pnode *pn)
424: {
425:
426: while ( ! TAILQ_EMPTY(&pn->childq))
427: pnode_unlink(TAILQ_FIRST(&pn->childq));
428: }
429:
1.8 kristaps 430: /*
431: * Reset the lookaside buffer.
432: */
1.4 kristaps 433: static void
1.1 kristaps 434: bufclear(struct parse *p)
435: {
436:
437: p->b[p->bsz = 0] = '\0';
438: }
439:
1.8 kristaps 440: /*
441: * Append NODE_TEXT contents to the current buffer, reallocating its
442: * size if necessary.
443: * The buffer is ALWAYS nil-terminated.
444: */
1.1 kristaps 445: static void
446: bufappend(struct parse *p, struct pnode *pn)
447: {
448:
449: assert(NODE_TEXT == pn->node);
450: if (p->bsz + pn->bsz + 1 > p->mbsz) {
451: p->mbsz = p->bsz + pn->bsz + 1;
452: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
453: perror(NULL);
454: exit(EXIT_FAILURE);
455: }
456: }
457: memcpy(p->b + p->bsz, pn->b, pn->bsz);
458: p->bsz += pn->bsz;
459: p->b[p->bsz] = '\0';
460: }
461:
1.8 kristaps 462: /*
463: * Recursively append all NODE_TEXT nodes to the buffer.
464: * This descends into non-text nodes, but doesn't do anything beyond
465: * them.
466: * In other words, this is a recursive text grok.
467: */
1.3 kristaps 468: static void
469: bufappend_r(struct parse *p, struct pnode *pn)
470: {
471: struct pnode *pp;
472:
473: if (NODE_TEXT == pn->node)
474: bufappend(p, pn);
475: TAILQ_FOREACH(pp, &pn->childq, child)
476: bufappend_r(p, pp);
477: }
478:
1.12 kristaps 479: #define MACROLINE_NORM 0
480: #define MACROLINE_UPPER 1
1.1 kristaps 481: /*
1.8 kristaps 482: * Recursively print text presumably on a macro line.
1.1 kristaps 483: * Convert all whitespace to regular spaces.
484: */
485: static void
1.12 kristaps 486: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 487: {
488: char *cp;
489:
1.13 kristaps 490: if (0 == p->newln)
491: putchar(' ');
492:
1.1 kristaps 493: bufclear(p);
1.3 kristaps 494: bufappend_r(p, pn);
1.1 kristaps 495:
496: /* Convert all space to spaces. */
497: for (cp = p->b; '\0' != *cp; cp++)
498: if (isspace((int)*cp))
499: *cp = ' ';
500:
501: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 502: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 503: for ( ; '\0' != *cp; cp++) {
504: /* Escape us if we look like a macro. */
505: if ((cp == p->b || ' ' == *(cp - 1)) &&
506: isupper((int)*cp) &&
507: '\0' != *(cp + 1) &&
508: islower((int)*(cp + 1)) &&
509: ('\0' == *(cp + 2) ||
510: ' ' == *(cp + 2) ||
511: (islower((int)*(cp + 2)) &&
512: ('\0' == *(cp + 3) ||
513: ' ' == *(cp + 3)))))
514: fputs("\\&", stdout);
1.12 kristaps 515: if (MACROLINE_UPPER & fl)
516: putchar(toupper((int)*cp));
517: else
518: putchar((int)*cp);
1.1 kristaps 519: /* If we're a character escape, escape us. */
520: if ('\\' == *cp)
521: putchar('e');
522: }
523: }
524:
1.12 kristaps 525: static void
526: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
527: {
528:
529: pnode_printmacrolinetext(p, pn, 0);
530: }
531:
1.1 kristaps 532: /*
533: * Just pnode_printmacrolinepart() but with a newline.
534: * If no text, just the newline.
535: */
536: static void
537: pnode_printmacroline(struct parse *p, struct pnode *pn)
538: {
539:
1.13 kristaps 540: assert(0 == p->newln);
1.12 kristaps 541: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 542: putchar('\n');
1.13 kristaps 543: p->newln = 1;
1.1 kristaps 544: }
545:
1.10 kristaps 546: static void
547: pnode_printmopen(struct parse *p)
548: {
549: if (p->newln) {
550: putchar('.');
551: p->newln = 0;
552: } else
553: putchar(' ');
554: }
555:
556: static void
557: pnode_printmclose(struct parse *p, int sv)
558: {
559:
560: if (sv && ! p->newln) {
561: putchar('\n');
562: p->newln = 1;
563: }
564: }
565:
1.8 kristaps 566: /*
1.10 kristaps 567: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 568: */
1.1 kristaps 569: static void
1.6 kristaps 570: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
571: {
572: struct pnode *pp;
573:
1.10 kristaps 574: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 575: if (NODE_TITLE == pp->node) {
576: pnode_unlink(pp);
1.10 kristaps 577: return;
1.6 kristaps 578: }
579: }
580:
1.8 kristaps 581: /*
582: * Start a hopefully-named `Sh' section.
583: */
1.6 kristaps 584: static void
1.1 kristaps 585: pnode_printrefsect(struct parse *p, struct pnode *pn)
586: {
587: struct pnode *pp;
588:
589: TAILQ_FOREACH(pp, &pn->childq, child)
590: if (NODE_TITLE == pp->node)
591: break;
592:
1.20 kristaps 593: if (NODE_REFSECT1 == pn->node)
594: fputs(".Sh", stdout);
595: else
596: fputs(".Ss", stdout);
597:
1.13 kristaps 598: p->newln = 0;
1.4 kristaps 599:
1.5 kristaps 600: if (NULL != pp) {
1.20 kristaps 601: pnode_printmacrolinetext(p, pp,
602: NODE_REFSECT1 == pn->node ?
603: MACROLINE_UPPER : 0);
1.18 kristaps 604: pnode_printmclose(p, 1);
1.5 kristaps 605: pnode_unlink(pp);
1.13 kristaps 606: } else {
1.4 kristaps 607: puts("UNKNOWN");
1.13 kristaps 608: p->newln = 1;
609: }
1.1 kristaps 610: }
611:
1.8 kristaps 612: /*
613: * Start a reference, extracting the title and volume.
614: */
1.1 kristaps 615: static void
616: pnode_printciterefentry(struct parse *p, struct pnode *pn)
617: {
618: struct pnode *pp, *title, *manvol;
619:
620: title = manvol = NULL;
1.13 kristaps 621: assert(p->newln);
1.1 kristaps 622: TAILQ_FOREACH(pp, &pn->childq, child)
623: if (NODE_MANVOLNUM == pp->node)
624: manvol = pp;
625: else if (NODE_REFENTRYTITLE == pp->node)
626: title = pp;
627:
1.13 kristaps 628: fputs(".Xr", stdout);
629: p->newln = 0;
1.4 kristaps 630:
1.1 kristaps 631: if (NULL != title) {
632: pnode_printmacrolinepart(p, title);
633: } else
1.13 kristaps 634: fputs(" unknown ", stdout);
1.4 kristaps 635:
1.13 kristaps 636: if (NULL == manvol) {
637: puts(" 1");
638: p->newln = 1;
639: } else
1.1 kristaps 640: pnode_printmacroline(p, manvol);
641: }
642:
643: static void
644: pnode_printrefmeta(struct parse *p, struct pnode *pn)
645: {
646: struct pnode *pp, *title, *manvol;
647:
648: title = manvol = NULL;
1.13 kristaps 649: assert(p->newln);
1.1 kristaps 650: TAILQ_FOREACH(pp, &pn->childq, child)
651: if (NODE_MANVOLNUM == pp->node)
652: manvol = pp;
653: else if (NODE_REFENTRYTITLE == pp->node)
654: title = pp;
655:
1.2 kristaps 656: puts(".Dd $Mdocdate" "$");
1.13 kristaps 657: fputs(".Dt", stdout);
658: p->newln = 0;
1.1 kristaps 659:
1.13 kristaps 660: if (NULL != title)
1.12 kristaps 661: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 662: else
663: fputs(" UNKNOWN ", stdout);
664:
665: if (NULL == manvol) {
666: puts(" 1");
667: p->newln = 1;
1.1 kristaps 668: } else
669: pnode_printmacroline(p, manvol);
670:
671: puts(".Os");
672: }
673:
1.3 kristaps 674: static void
675: pnode_printfuncdef(struct parse *p, struct pnode *pn)
676: {
677: struct pnode *pp, *ftype, *func;
678:
1.13 kristaps 679: assert(p->newln);
1.3 kristaps 680: ftype = func = NULL;
681: TAILQ_FOREACH(pp, &pn->childq, child)
682: if (NODE_TEXT == pp->node)
683: ftype = pp;
684: else if (NODE_FUNCTION == pp->node)
685: func = pp;
686:
687: if (NULL != ftype) {
1.13 kristaps 688: fputs(".Ft", stdout);
689: p->newln = 0;
1.3 kristaps 690: pnode_printmacroline(p, ftype);
691: }
692:
693: if (NULL != func) {
1.13 kristaps 694: fputs(".Fo", stdout);
695: p->newln = 0;
1.3 kristaps 696: pnode_printmacroline(p, func);
1.13 kristaps 697: } else {
1.3 kristaps 698: puts(".Fo UNKNOWN");
1.13 kristaps 699: p->newln = 1;
700: }
1.3 kristaps 701: }
702:
703: static void
704: pnode_printparamdef(struct parse *p, struct pnode *pn)
705: {
706: struct pnode *pp, *ptype, *param;
707:
1.13 kristaps 708: assert(p->newln);
1.3 kristaps 709: ptype = param = NULL;
710: TAILQ_FOREACH(pp, &pn->childq, child)
711: if (NODE_TEXT == pp->node)
712: ptype = pp;
713: else if (NODE_PARAMETER == pp->node)
714: param = pp;
715:
716: fputs(".Fa \"", stdout);
1.13 kristaps 717: p->newln = 0;
1.3 kristaps 718: if (NULL != ptype) {
719: pnode_printmacrolinepart(p, ptype);
720: putchar(' ');
721: }
722:
723: if (NULL != param)
724: pnode_printmacrolinepart(p, param);
725:
726: puts("\"");
1.13 kristaps 727: p->newln = 1;
1.3 kristaps 728: }
729:
730: static void
731: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
732: {
733: struct pnode *pp, *fdef;
734:
1.13 kristaps 735: assert(p->newln);
1.3 kristaps 736: TAILQ_FOREACH(fdef, &pn->childq, child)
737: if (NODE_FUNCDEF == fdef->node)
738: break;
739:
1.4 kristaps 740: if (NULL != fdef)
1.3 kristaps 741: pnode_printfuncdef(p, fdef);
1.4 kristaps 742: else
1.3 kristaps 743: puts(".Fo UNKNOWN");
744:
1.4 kristaps 745: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 746: if (NODE_PARAMDEF == pp->node)
747: pnode_printparamdef(p, pp);
748:
749: puts(".Fc");
1.13 kristaps 750: p->newln = 1;
1.3 kristaps 751: }
752:
1.10 kristaps 753: /*
754: * The <arg> element is more complicated than it should be because text
755: * nodes are treated like ".Ar foo", but non-text nodes need to be
756: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 757: * This also handles the case of "repetition" (or in other words, the
758: * ellipsis following an argument) and optionality.
1.10 kristaps 759: */
1.4 kristaps 760: static void
1.10 kristaps 761: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 762: {
763: struct pnode *pp;
1.12 kristaps 764: struct pattr *ap;
765: int isop, isrep;
766:
767: isop = 1;
768: isrep = 0;
769: TAILQ_FOREACH(ap, &pn->attrq, child)
770: if (ATTRKEY_CHOICE == ap->key &&
771: (ATTRVAL_PLAIN == ap->val ||
772: ATTRVAL_REQ == ap->val))
773: isop = 0;
774: else if (ATTRKEY_REP == ap->key &&
775: (ATTRVAL_REPEAT == ap->val))
776: isrep = 1;
777:
778: if (isop) {
779: pnode_printmopen(p);
1.13 kristaps 780: fputs("Op", stdout);
1.12 kristaps 781: }
1.4 kristaps 782:
1.10 kristaps 783: TAILQ_FOREACH(pp, &pn->childq, child) {
784: if (NODE_TEXT == pp->node) {
785: pnode_printmopen(p);
1.13 kristaps 786: fputs("Ar", stdout);
1.10 kristaps 787: }
788: pnode_print(p, pp);
1.12 kristaps 789: if (NODE_TEXT == pp->node && isrep)
790: fputs("...", stdout);
1.10 kristaps 791: }
1.4 kristaps 792: }
793:
1.7 kristaps 794: /*
795: * Recursively search and return the first instance of "node".
796: */
797: static struct pnode *
798: pnode_findfirst(struct pnode *pn, enum nodeid node)
799: {
800: struct pnode *pp, *res;
801:
802: res = NULL;
803: TAILQ_FOREACH(pp, &pn->childq, child) {
804: res = pp->node == node ? pp :
805: pnode_findfirst(pp, node);
806: if (NULL != res)
807: break;
808: }
809:
810: return(res);
811: }
812:
813: static void
814: pnode_printprologue(struct parse *p, struct pnode *pn)
815: {
816: struct pnode *pp;
817:
1.9 kristaps 818: pp = NULL == p->root ? NULL :
819: pnode_findfirst(p->root, NODE_REFMETA);
820:
821: if (NULL != pp) {
1.7 kristaps 822: pnode_printrefmeta(p, pp);
823: pnode_unlink(pp);
824: } else {
825: puts(".\\\" Supplying bogus prologue...");
826: puts(".Dd $Mdocdate" "$");
827: puts(".Dt UNKNOWN 1");
828: puts(".Os");
829: }
830: }
831:
1.13 kristaps 832: static void
833: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
834: {
835: struct pnode *pp;
836:
837: assert(p->newln);
838: TAILQ_FOREACH(pp, &pn->childq, child)
839: if (NODE_TERM == pp->node) {
840: fputs(".It", stdout);
841: p->newln = 0;
842: pnode_print(p, pp);
843: pnode_unlink(pp);
1.16 kristaps 844: pnode_printmclose(p, 1);
1.13 kristaps 845: return;
846: }
847:
848: puts(".It");
849: p->newln = 1;
850: }
851:
852: static void
1.16 kristaps 853: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
854: {
855: struct pnode *pp;
856:
857: assert(p->newln);
858: TAILQ_FOREACH(pp, &pn->childq, child)
859: if (NODE_TITLE == pp->node) {
860: puts(".Pp");
861: pnode_print(p, pp);
862: pnode_unlink(pp);
863: }
864:
865: assert(p->newln);
1.21 kristaps 866:
867: if (NODE_ORDEREDLIST == pn->node)
868: puts(".Bl -enum");
869: else
870: puts(".Bl -item");
871:
1.16 kristaps 872: TAILQ_FOREACH(pp, &pn->childq, child) {
873: assert(p->newln);
874: puts(".It");
875: pnode_print(p, pp);
876: pnode_printmclose(p, 1);
877: }
878: assert(p->newln);
879: puts(".El");
880: }
881:
882: static void
1.13 kristaps 883: pnode_printvariablelist(struct parse *p, struct pnode *pn)
884: {
885: struct pnode *pp;
886:
887: assert(p->newln);
888: TAILQ_FOREACH(pp, &pn->childq, child)
889: if (NODE_TITLE == pp->node) {
890: puts(".Pp");
891: pnode_print(p, pp);
892: pnode_unlink(pp);
893: }
894:
895: assert(p->newln);
896: puts(".Bl -tag -width Ds");
897: TAILQ_FOREACH(pp, &pn->childq, child)
898: if (NODE_VARLISTENTRY != pp->node) {
899: assert(p->newln);
900: fputs(".It", stdout);
901: pnode_printmacroline(p, pp);
902: } else {
903: assert(p->newln);
904: pnode_print(p, pp);
905: }
906: assert(p->newln);
907: puts(".El");
908: }
909:
1.1 kristaps 910: /*
911: * Print a parsed node (or ignore it--whatever).
912: * This is a recursive function.
913: * FIXME: macro line continuation?
914: */
915: static void
916: pnode_print(struct parse *p, struct pnode *pn)
917: {
918: struct pnode *pp;
919: char *cp;
1.10 kristaps 920: int last, sv;
1.1 kristaps 921:
922: if (NULL == pn)
923: return;
924:
1.10 kristaps 925: sv = p->newln;
1.1 kristaps 926:
927: switch (pn->node) {
1.4 kristaps 928: case (NODE_ARG):
1.10 kristaps 929: pnode_printarg(p, pn);
1.4 kristaps 930: pnode_unlinksub(pn);
931: break;
1.1 kristaps 932: case (NODE_CITEREFENTRY):
1.10 kristaps 933: assert(p->newln);
1.1 kristaps 934: pnode_printciterefentry(p, pn);
1.4 kristaps 935: pnode_unlinksub(pn);
1.1 kristaps 936: break;
937: case (NODE_CODE):
1.10 kristaps 938: pnode_printmopen(p);
1.13 kristaps 939: fputs("Li", stdout);
1.4 kristaps 940: break;
941: case (NODE_COMMAND):
1.10 kristaps 942: pnode_printmopen(p);
1.13 kristaps 943: fputs("Nm", stdout);
944: break;
945: case (NODE_EMPHASIS):
946: pnode_printmopen(p);
947: fputs("Em", stdout);
1.1 kristaps 948: break;
1.21 kristaps 949: case (NODE_ENVAR):
950: pnode_printmopen(p);
951: fputs("Ev", stdout);
952: break;
1.17 kristaps 953: case (NODE_FILENAME):
954: pnode_printmopen(p);
955: fputs("Pa", stdout);
956: break;
1.3 kristaps 957: case (NODE_FUNCTION):
1.10 kristaps 958: pnode_printmopen(p);
1.13 kristaps 959: fputs("Fn", stdout);
1.3 kristaps 960: break;
961: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 962: assert(p->newln);
1.3 kristaps 963: pnode_printfuncprototype(p, pn);
1.4 kristaps 964: pnode_unlinksub(pn);
1.3 kristaps 965: break;
1.1 kristaps 966: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 967: pnode_printmopen(p);
1.13 kristaps 968: fputs("Fd", stdout);
1.16 kristaps 969: break;
970: case (NODE_ITEMIZEDLIST):
1.21 kristaps 971: /* FALLTHROUGH */
972: case (NODE_ORDEREDLIST):
1.16 kristaps 973: assert(p->newln);
974: pnode_printitemizedlist(p, pn);
1.10 kristaps 975: break;
1.19 kristaps 976: case (NODE_LITERAL):
977: pnode_printmopen(p);
978: fputs("Li", stdout);
979: break;
1.10 kristaps 980: case (NODE_OPTION):
981: pnode_printmopen(p);
1.13 kristaps 982: fputs("Fl", stdout);
1.1 kristaps 983: break;
984: case (NODE_PARA):
1.10 kristaps 985: assert(p->newln);
1.13 kristaps 986: if (NULL != pn->parent &&
987: NODE_LISTITEM == pn->parent->node)
988: break;
1.1 kristaps 989: puts(".Pp");
1.3 kristaps 990: break;
991: case (NODE_PARAMETER):
1.10 kristaps 992: /* Suppress non-text children... */
993: pnode_printmopen(p);
994: fputs("Fa \"", stdout);
1.3 kristaps 995: pnode_printmacrolinepart(p, pn);
996: puts("\"");
1.4 kristaps 997: pnode_unlinksub(pn);
1.1 kristaps 998: break;
999: case (NODE_PROGRAMLISTING):
1.22 ! kristaps 1000: /* FALLTHROUGH */
! 1001: case (NODE_SCREEN):
1.10 kristaps 1002: assert(p->newln);
1.1 kristaps 1003: puts(".Bd -literal");
1.15 kristaps 1004: break;
1005: case (NODE_REFENTRYINFO):
1006: /* Suppress. */
1007: pnode_unlinksub(pn);
1.1 kristaps 1008: break;
1009: case (NODE_REFMETA):
1.7 kristaps 1010: abort();
1.1 kristaps 1011: break;
1012: case (NODE_REFNAME):
1.10 kristaps 1013: /* Suppress non-text children... */
1014: pnode_printmopen(p);
1.13 kristaps 1015: fputs("Nm", stdout);
1016: p->newln = 0;
1.10 kristaps 1017: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1018: pnode_unlinksub(pn);
1.10 kristaps 1019: break;
1.1 kristaps 1020: case (NODE_REFNAMEDIV):
1.10 kristaps 1021: assert(p->newln);
1.1 kristaps 1022: puts(".Sh NAME");
1023: break;
1024: case (NODE_REFPURPOSE):
1.10 kristaps 1025: assert(p->newln);
1.13 kristaps 1026: pnode_printmopen(p);
1027: fputs("Nd", stdout);
1.10 kristaps 1028: break;
1.1 kristaps 1029: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1030: assert(p->newln);
1.6 kristaps 1031: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1032: puts(".Sh SYNOPSIS");
1.1 kristaps 1033: break;
1034: case (NODE_REFSECT1):
1.20 kristaps 1035: /* FALLTHROUGH */
1036: case (NODE_REFSECT2):
1.10 kristaps 1037: assert(p->newln);
1.1 kristaps 1038: pnode_printrefsect(p, pn);
1039: break;
1.13 kristaps 1040: case (NODE_REPLACEABLE):
1041: pnode_printmopen(p);
1042: fputs("Ar", stdout);
1043: break;
1.19 kristaps 1044: case (NODE_SBR):
1045: assert(p->newln);
1046: puts(".br");
1047: break;
1.8 kristaps 1048: case (NODE_STRUCTNAME):
1.10 kristaps 1049: pnode_printmopen(p);
1.13 kristaps 1050: fputs("Vt", stdout);
1.10 kristaps 1051: break;
1.1 kristaps 1052: case (NODE_TEXT):
1.13 kristaps 1053: if (0 == p->newln)
1054: putchar(' ');
1.1 kristaps 1055: bufclear(p);
1056: bufappend(p, pn);
1057: /*
1058: * Output all characters, squeezing out whitespace
1059: * between newlines.
1060: * XXX: all whitespace, including tabs (?).
1061: * Remember to escape control characters and escapes.
1062: */
1.10 kristaps 1063: assert(p->bsz);
1.20 kristaps 1064: cp = p->b;
1065: /*
1066: * There's often a superfluous "-" in its <option> tags
1067: * before the actual flags themselves.
1068: * "Fl" does this for us, so remove it.
1069: */
1070: if (NULL != pn->parent &&
1071: NODE_OPTION == pn->parent->node &&
1072: '-' == *cp)
1073: cp++;
1074: for (last = '\n'; '\0' != *cp; ) {
1.1 kristaps 1075: if ('\n' == last) {
1076: /* Consume all whitespace. */
1077: if (isspace((int)*cp)) {
1078: while (isspace((int)*cp))
1079: cp++;
1080: continue;
1081: } else if ('\'' == *cp || '.' == *cp)
1082: fputs("\\&", stdout);
1083: }
1084: putchar(last = *cp++);
1085: /* If we're a character escape, escape us. */
1086: if ('\\' == last)
1087: putchar('e');
1088: }
1.10 kristaps 1089: p->newln = 0;
1.1 kristaps 1090: break;
1.13 kristaps 1091: case (NODE_VARIABLELIST):
1092: assert(p->newln);
1093: pnode_printvariablelist(p, pn);
1094: pnode_unlinksub(pn);
1095: break;
1096: case (NODE_VARLISTENTRY):
1097: assert(p->newln);
1098: pnode_printvarlistentry(p, pn);
1099: break;
1.1 kristaps 1100: default:
1101: break;
1102: }
1103:
1104: TAILQ_FOREACH(pp, &pn->childq, child)
1105: pnode_print(p, pp);
1106:
1107: switch (pn->node) {
1.10 kristaps 1108: case (NODE_ARG):
1109: case (NODE_CODE):
1110: case (NODE_COMMAND):
1.13 kristaps 1111: case (NODE_EMPHASIS):
1.21 kristaps 1112: case (NODE_ENVAR):
1.17 kristaps 1113: case (NODE_FILENAME):
1.10 kristaps 1114: case (NODE_FUNCTION):
1115: case (NODE_FUNCSYNOPSISINFO):
1.19 kristaps 1116: case (NODE_LITERAL):
1.10 kristaps 1117: case (NODE_OPTION):
1118: case (NODE_PARAMETER):
1.13 kristaps 1119: case (NODE_REPLACEABLE):
1120: case (NODE_REFPURPOSE):
1.10 kristaps 1121: case (NODE_STRUCTNAME):
1122: case (NODE_TEXT):
1123: pnode_printmclose(p, sv);
1124: break;
1.12 kristaps 1125: case (NODE_REFNAME):
1126: /*
1127: * If we're in the NAME macro and we have multiple
1128: * <refname> macros in sequence, then print out a
1129: * trailing comma before the newline.
1130: */
1131: if (NULL != pn->parent &&
1132: NODE_REFNAMEDIV == pn->parent->node &&
1133: NULL != TAILQ_NEXT(pn, child) &&
1134: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1135: fputs(" ,", stdout);
1136: pnode_printmclose(p, sv);
1137: break;
1.1 kristaps 1138: case (NODE_PROGRAMLISTING):
1.22 ! kristaps 1139: /* FALLTHROUGH */
! 1140: case (NODE_SCREEN):
1.10 kristaps 1141: assert(p->newln);
1.1 kristaps 1142: puts(".Ed");
1.10 kristaps 1143: p->newln = 1;
1.1 kristaps 1144: break;
1145: default:
1146: break;
1147: }
1148: }
1149:
1150: /*
1151: * Loop around the read buffer until we've drained it of all data.
1152: * Invoke the parser context with each buffer fill.
1153: */
1154: static int
1155: readfile(XML_Parser xp, int fd,
1156: char *b, size_t bsz, const char *fn)
1157: {
1158: struct parse p;
1159: int rc;
1160: ssize_t ssz;
1161:
1162: memset(&p, 0, sizeof(struct parse));
1163:
1164: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1165: p.fname = fn;
1166: p.xml = xp;
1.1 kristaps 1167:
1168: XML_SetCharacterDataHandler(xp, xml_char);
1169: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1170: XML_SetUserData(xp, &p);
1171:
1172: while ((ssz = read(fd, b, bsz)) >= 0) {
1173: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1174: fprintf(stderr, "%s: %s\n", fn,
1175: XML_ErrorString
1176: (XML_GetErrorCode(xp)));
1177: else if ( ! p.stop && ssz > 0)
1178: continue;
1179: /*
1180: * Exit when we've read all or errors have occured
1181: * during the parse sequence.
1182: */
1.10 kristaps 1183: p.newln = 1;
1.7 kristaps 1184: pnode_printprologue(&p, p.root);
1.1 kristaps 1185: pnode_print(&p, p.root);
1186: pnode_free(p.root);
1187: free(p.b);
1188: return(0 != rc && ! p.stop);
1189: }
1190:
1191: /* Read error has occured. */
1192: perror(fn);
1193: pnode_free(p.root);
1194: free(p.b);
1195: return(0);
1196: }
1197:
1198: int
1199: main(int argc, char *argv[])
1200: {
1201: XML_Parser xp;
1202: const char *fname;
1203: char *buf;
1204: int fd, rc;
1205:
1206: fname = "-";
1207: xp = NULL;
1208: buf = NULL;
1209: rc = 0;
1210:
1211: if (-1 != getopt(argc, argv, ""))
1212: return(EXIT_FAILURE);
1213:
1214: argc -= optind;
1215: argv += optind;
1216:
1217: if (argc > 1)
1218: return(EXIT_FAILURE);
1219: else if (argc > 0)
1220: fname = argv[0];
1221:
1222: /* Read from stdin or a file. */
1223: fd = 0 == strcmp(fname, "-") ?
1224: STDIN_FILENO : open(fname, O_RDONLY, 0);
1225:
1226: /*
1227: * Open file for reading.
1228: * Allocate a read buffer.
1229: * Create the parser context.
1230: * Dive directly into the parse.
1231: */
1232: if (-1 == fd)
1233: perror(fname);
1234: else if (NULL == (buf = malloc(4096)))
1235: perror(NULL);
1236: else if (NULL == (xp = XML_ParserCreate(NULL)))
1237: perror(NULL);
1238: else if ( ! readfile(xp, fd, buf, 4096, fname))
1239: rc = 1;
1240:
1241: XML_ParserFree(xp);
1242: free(buf);
1243: if (STDIN_FILENO != fd)
1244: close(fd);
1245: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1246: }
CVSweb