Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.14
1.14 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.13 2014/03/30 11:48:10 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.13 kristaps 95: { "emphasis", 0 },
1.3 kristaps 96: { "funcdef", 0 },
97: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 98: { "funcsynopsis", NODE_IGNTEXT },
99: { "funcsynopsisinfo", 0 },
1.3 kristaps 100: { "function", 0 },
1.14 ! kristaps 101: { "link", 0 },
1.13 kristaps 102: { "listitem", NODE_IGNTEXT },
1.1 kristaps 103: { "manvolnum", 0 },
1.4 kristaps 104: { "option", 0 },
1.1 kristaps 105: { "para", 0 },
1.3 kristaps 106: { "paramdef", 0 },
107: { "parameter", 0 },
1.1 kristaps 108: { "programlisting", 0 },
109: { "refclass", NODE_IGNTEXT },
110: { "refdescriptor", NODE_IGNTEXT },
111: { "refentry", NODE_IGNTEXT },
112: { "refentrytitle", 0 },
113: { "refmeta", NODE_IGNTEXT },
114: { "refmiscinfo", NODE_IGNTEXT },
115: { "refname", 0 },
116: { "refnamediv", NODE_IGNTEXT },
117: { "refpurpose", 0 },
118: { "refsect1", 0 },
119: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 120: { "replaceable", 0 },
1.8 kristaps 121: { "structname", 0 },
1.1 kristaps 122: { "synopsis", 0 },
1.13 kristaps 123: { "term", 0 },
1.1 kristaps 124: { NULL, 0 },
125: { "title", 0 },
1.14 ! kristaps 126: { "ulink", 0 },
1.13 kristaps 127: { "variablelist", NODE_IGNTEXT },
128: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 129: };
130:
1.10 kristaps 131: static void
132: pnode_print(struct parse *p, struct pnode *pn);
133:
1.8 kristaps 134: /*
135: * Process a stream of characters.
136: * We store text as nodes in and of themselves.
137: * If a text node is already open, append to it.
138: * If it's not open, open one under the current context.
139: */
1.1 kristaps 140: static void
141: xml_char(void *arg, const XML_Char *p, int sz)
142: {
143: struct parse *ps = arg;
144: struct pnode *dat;
1.4 kristaps 145: int i;
1.1 kristaps 146:
147: /* Stopped or no tree yet. */
148: if (ps->stop || NODE_ROOT == ps->node)
149: return;
150:
151: /* Not supposed to be collecting text. */
152: assert(NULL != ps->cur);
153: if (NODE_IGNTEXT & nodes[ps->node].flags)
154: return;
155:
156: /*
157: * Are we in the midst of processing text?
158: * If we're not processing text right now, then create a text
159: * node for doing so.
1.4 kristaps 160: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 161: * process: strip out all leading whitespace to be sure.
1.1 kristaps 162: */
163: if (NODE_TEXT != ps->node) {
1.4 kristaps 164: for (i = 0; i < sz; i++)
165: if ( ! isspace((int)p[i]))
166: break;
167: if (i == sz)
168: return;
1.10 kristaps 169: p += i;
170: sz -= i;
1.1 kristaps 171: dat = calloc(1, sizeof(struct pnode));
172: if (NULL == dat) {
173: perror(NULL);
174: exit(EXIT_FAILURE);
175: }
176:
177: dat->node = ps->node = NODE_TEXT;
178: dat->parent = ps->cur;
179: TAILQ_INIT(&dat->childq);
1.12 kristaps 180: TAILQ_INIT(&dat->attrq);
1.1 kristaps 181: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
182: ps->cur = dat;
183: assert(NULL != ps->root);
184: }
185:
186: /* Append to current buffer. */
187: assert(sz >= 0);
188: ps->cur->b = realloc(ps->cur->b,
189: ps->cur->bsz + (size_t)sz);
190: if (NULL == ps->cur->b) {
191: perror(NULL);
192: exit(EXIT_FAILURE);
193: }
194: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
195: ps->cur->bsz += (size_t)sz;
196: }
197:
1.10 kristaps 198: static void
199: pnode_trim(struct pnode *pn)
200: {
201:
202: assert(NODE_TEXT == pn->node);
203: for ( ; pn->bsz > 0; pn->bsz--)
204: if ( ! isspace((int)pn->b[pn->bsz - 1]))
205: break;
206: }
207:
1.1 kristaps 208: /*
209: * Begin an element.
210: * First, look for the element.
211: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 212: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 213: * If we find it but we're not parsing yet (i.e., it's not a refentry
214: * and thus out of context), keep going.
1.8 kristaps 215: * If we find it and we're at the root and already have a tree, puke and
216: * exit (FIXME: I don't think this is right?).
217: * If we find it but we're parsing a text node, close out the text node,
218: * return to its parent, and keep going.
1.1 kristaps 219: * Make sure that the element is in the right context.
220: * Lastly, put the node onto our parse tree and continue.
221: */
222: static void
223: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
224: {
1.12 kristaps 225: struct parse *ps = arg;
226: enum nodeid node;
227: enum attrkey key;
228: enum attrval val;
229: struct pnode *dat;
230: struct pattr *pattr;
231: const XML_Char **att;
1.1 kristaps 232:
233: if (ps->stop)
234: return;
235:
236: /* Close out text node, if applicable... */
237: if (NODE_TEXT == ps->node) {
238: assert(NULL != ps->cur);
1.10 kristaps 239: pnode_trim(ps->cur);
1.1 kristaps 240: ps->cur = ps->cur->parent;
241: assert(NULL != ps->cur);
242: ps->node = ps->cur->node;
243: }
244:
245: for (node = 0; node < NODE__MAX; node++)
246: if (NULL == nodes[node].name)
247: continue;
248: else if (0 == strcmp(nodes[node].name, name))
249: break;
250:
251: if (NODE__MAX == node && NODE_ROOT == ps->node) {
252: return;
253: } else if (NODE__MAX == node) {
1.12 kristaps 254: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
255: ps->fname, XML_GetCurrentLineNumber(ps->xml),
256: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 257: ps->stop = 1;
258: return;
259: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 260: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
261: ps->fname, XML_GetCurrentLineNumber(ps->xml),
262: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 263: ps->stop = 1;
264: return;
265: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
266: return;
267: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 268: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
269: "of node \"%s\"\n",
1.12 kristaps 270: ps->fname, XML_GetCurrentLineNumber(ps->xml),
271: XML_GetCurrentColumnNumber(ps->xml),
272: NULL == nodes[ps->node].name ?
1.13 kristaps 273: "(none)" : nodes[ps->node].name,
274: NULL == nodes[node].name ?
275: "(none)" : nodes[node].name);
1.1 kristaps 276: ps->stop = 1;
277: return;
278: }
279:
280: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
281: perror(NULL);
282: exit(EXIT_FAILURE);
283: }
284:
285: dat->node = ps->node = node;
286: dat->parent = ps->cur;
287: TAILQ_INIT(&dat->childq);
1.12 kristaps 288: TAILQ_INIT(&dat->attrq);
1.1 kristaps 289:
290: if (NULL != ps->cur)
291: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
292:
293: ps->cur = dat;
294: if (NULL == ps->root)
295: ps->root = dat;
1.12 kristaps 296:
297: /*
298: * Process attributes.
299: */
300: for (att = atts; NULL != *att; att += 2) {
301: for (key = 0; key < ATTRKEY__MAX; key++)
302: if (0 == strcmp(*att, attrkeys[key]))
303: break;
304: if (ATTRKEY__MAX == key) {
305: fprintf(stderr, "%s:%zu:%zu: unknown "
306: "attribute \"%s\"\n", ps->fname,
307: XML_GetCurrentLineNumber(ps->xml),
308: XML_GetCurrentColumnNumber(ps->xml),
309: *att);
310: continue;
311: } else if ( ! isattrkey(node, key)) {
312: fprintf(stderr, "%s:%zu:%zu: bad "
313: "attribute \"%s\"\n", ps->fname,
314: XML_GetCurrentLineNumber(ps->xml),
315: XML_GetCurrentColumnNumber(ps->xml),
316: *att);
317: continue;
318: }
319: for (val = 0; val < ATTRVAL__MAX; val++)
320: if (0 == strcmp(*(att + 1), attrvals[val]))
321: break;
322: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
323: fprintf(stderr, "%s:%zu:%zu: bad "
324: "value \"%s\"\n", ps->fname,
325: XML_GetCurrentLineNumber(ps->xml),
326: XML_GetCurrentColumnNumber(ps->xml),
327: *(att + 1));
328: continue;
329: }
330: pattr = calloc(1, sizeof(struct pattr));
331: pattr->key = key;
332: pattr->val = val;
333: if (ATTRVAL__MAX == val)
334: pattr->rawval = strdup(*(att + 1));
335: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
336: }
337:
1.1 kristaps 338: }
339:
340: /*
341: * Roll up the parse tree.
1.8 kristaps 342: * If we're at a text node, roll that one up first.
1.1 kristaps 343: * If we hit the root, then assign ourselves as the NODE_ROOT.
344: */
345: static void
346: xml_elem_end(void *arg, const XML_Char *name)
347: {
348: struct parse *ps = arg;
349:
350: if (ps->stop || NODE_ROOT == ps->node)
351: return;
352:
353: /* Close out text node, if applicable... */
354: if (NODE_TEXT == ps->node) {
355: assert(NULL != ps->cur);
1.10 kristaps 356: pnode_trim(ps->cur);
1.1 kristaps 357: ps->cur = ps->cur->parent;
358: assert(NULL != ps->cur);
359: ps->node = ps->cur->node;
360: }
361:
362: if (NULL == (ps->cur = ps->cur->parent))
363: ps->node = NODE_ROOT;
364: else
365: ps->node = ps->cur->node;
366: }
367:
1.8 kristaps 368: /*
369: * Recursively free a node (NULL is ok).
370: */
1.1 kristaps 371: static void
372: pnode_free(struct pnode *pn)
373: {
374: struct pnode *pp;
1.12 kristaps 375: struct pattr *ap;
1.1 kristaps 376:
377: if (NULL == pn)
378: return;
379:
380: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
381: TAILQ_REMOVE(&pn->childq, pp, child);
382: pnode_free(pp);
383: }
384:
1.12 kristaps 385: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
386: TAILQ_REMOVE(&pn->attrq, ap, child);
387: free(ap->rawval);
388: free(ap);
389: }
390:
1.1 kristaps 391: free(pn->b);
392: free(pn);
393: }
394:
1.8 kristaps 395: /*
396: * Unlink a node from its parent and pnode_free() it.
397: */
1.1 kristaps 398: static void
399: pnode_unlink(struct pnode *pn)
400: {
401:
402: if (NULL != pn->parent)
403: TAILQ_REMOVE(&pn->parent->childq, pn, child);
404: pnode_free(pn);
405: }
406:
1.8 kristaps 407: /*
408: * Unlink all children of a node and pnode_free() them.
409: */
1.1 kristaps 410: static void
1.4 kristaps 411: pnode_unlinksub(struct pnode *pn)
412: {
413:
414: while ( ! TAILQ_EMPTY(&pn->childq))
415: pnode_unlink(TAILQ_FIRST(&pn->childq));
416: }
417:
1.8 kristaps 418: /*
419: * Reset the lookaside buffer.
420: */
1.4 kristaps 421: static void
1.1 kristaps 422: bufclear(struct parse *p)
423: {
424:
425: p->b[p->bsz = 0] = '\0';
426: }
427:
1.8 kristaps 428: /*
429: * Append NODE_TEXT contents to the current buffer, reallocating its
430: * size if necessary.
431: * The buffer is ALWAYS nil-terminated.
432: */
1.1 kristaps 433: static void
434: bufappend(struct parse *p, struct pnode *pn)
435: {
436:
437: assert(NODE_TEXT == pn->node);
438: if (p->bsz + pn->bsz + 1 > p->mbsz) {
439: p->mbsz = p->bsz + pn->bsz + 1;
440: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
441: perror(NULL);
442: exit(EXIT_FAILURE);
443: }
444: }
445: memcpy(p->b + p->bsz, pn->b, pn->bsz);
446: p->bsz += pn->bsz;
447: p->b[p->bsz] = '\0';
448: }
449:
1.8 kristaps 450: /*
451: * Recursively append all NODE_TEXT nodes to the buffer.
452: * This descends into non-text nodes, but doesn't do anything beyond
453: * them.
454: * In other words, this is a recursive text grok.
455: */
1.3 kristaps 456: static void
457: bufappend_r(struct parse *p, struct pnode *pn)
458: {
459: struct pnode *pp;
460:
461: if (NODE_TEXT == pn->node)
462: bufappend(p, pn);
463: TAILQ_FOREACH(pp, &pn->childq, child)
464: bufappend_r(p, pp);
465: }
466:
1.12 kristaps 467: #define MACROLINE_NORM 0
468: #define MACROLINE_UPPER 1
1.1 kristaps 469: /*
1.8 kristaps 470: * Recursively print text presumably on a macro line.
1.1 kristaps 471: * Convert all whitespace to regular spaces.
472: */
473: static void
1.12 kristaps 474: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 475: {
476: char *cp;
477:
1.13 kristaps 478: if (0 == p->newln)
479: putchar(' ');
480:
1.1 kristaps 481: bufclear(p);
1.3 kristaps 482: bufappend_r(p, pn);
1.1 kristaps 483:
484: /* Convert all space to spaces. */
485: for (cp = p->b; '\0' != *cp; cp++)
486: if (isspace((int)*cp))
487: *cp = ' ';
488:
489: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 490: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 491: for ( ; '\0' != *cp; cp++) {
492: /* Escape us if we look like a macro. */
493: if ((cp == p->b || ' ' == *(cp - 1)) &&
494: isupper((int)*cp) &&
495: '\0' != *(cp + 1) &&
496: islower((int)*(cp + 1)) &&
497: ('\0' == *(cp + 2) ||
498: ' ' == *(cp + 2) ||
499: (islower((int)*(cp + 2)) &&
500: ('\0' == *(cp + 3) ||
501: ' ' == *(cp + 3)))))
502: fputs("\\&", stdout);
1.12 kristaps 503: if (MACROLINE_UPPER & fl)
504: putchar(toupper((int)*cp));
505: else
506: putchar((int)*cp);
1.1 kristaps 507: /* If we're a character escape, escape us. */
508: if ('\\' == *cp)
509: putchar('e');
510: }
511: }
512:
1.12 kristaps 513: static void
514: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
515: {
516:
517: pnode_printmacrolinetext(p, pn, 0);
518: }
519:
1.1 kristaps 520: /*
521: * Just pnode_printmacrolinepart() but with a newline.
522: * If no text, just the newline.
523: */
524: static void
525: pnode_printmacroline(struct parse *p, struct pnode *pn)
526: {
527:
1.13 kristaps 528: assert(0 == p->newln);
1.12 kristaps 529: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 530: putchar('\n');
1.13 kristaps 531: p->newln = 1;
1.1 kristaps 532: }
533:
1.10 kristaps 534: static void
535: pnode_printmopen(struct parse *p)
536: {
537: if (p->newln) {
538: putchar('.');
539: p->newln = 0;
540: } else
541: putchar(' ');
542: }
543:
544: static void
545: pnode_printmclose(struct parse *p, int sv)
546: {
547:
548: if (sv && ! p->newln) {
549: putchar('\n');
550: p->newln = 1;
551: }
552: }
553:
1.8 kristaps 554: /*
1.10 kristaps 555: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 556: */
1.1 kristaps 557: static void
1.6 kristaps 558: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
559: {
560: struct pnode *pp;
561:
1.10 kristaps 562: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 563: if (NODE_TITLE == pp->node) {
564: pnode_unlink(pp);
1.10 kristaps 565: return;
1.6 kristaps 566: }
567: }
568:
1.8 kristaps 569: /*
570: * Start a hopefully-named `Sh' section.
571: */
1.6 kristaps 572: static void
1.1 kristaps 573: pnode_printrefsect(struct parse *p, struct pnode *pn)
574: {
575: struct pnode *pp;
576:
577: TAILQ_FOREACH(pp, &pn->childq, child)
578: if (NODE_TITLE == pp->node)
579: break;
580:
1.13 kristaps 581: fputs(".Sh", stdout);
582: p->newln = 0;
1.4 kristaps 583:
1.5 kristaps 584: if (NULL != pp) {
1.1 kristaps 585: pnode_printmacroline(p, pp);
1.5 kristaps 586: pnode_unlink(pp);
1.13 kristaps 587: } else {
1.4 kristaps 588: puts("UNKNOWN");
1.13 kristaps 589: p->newln = 1;
590: }
1.1 kristaps 591: }
592:
1.8 kristaps 593: /*
594: * Start a reference, extracting the title and volume.
595: */
1.1 kristaps 596: static void
597: pnode_printciterefentry(struct parse *p, struct pnode *pn)
598: {
599: struct pnode *pp, *title, *manvol;
600:
601: title = manvol = NULL;
1.13 kristaps 602: assert(p->newln);
1.1 kristaps 603: TAILQ_FOREACH(pp, &pn->childq, child)
604: if (NODE_MANVOLNUM == pp->node)
605: manvol = pp;
606: else if (NODE_REFENTRYTITLE == pp->node)
607: title = pp;
608:
1.13 kristaps 609: fputs(".Xr", stdout);
610: p->newln = 0;
1.4 kristaps 611:
1.1 kristaps 612: if (NULL != title) {
613: pnode_printmacrolinepart(p, title);
614: } else
1.13 kristaps 615: fputs(" unknown ", stdout);
1.4 kristaps 616:
1.13 kristaps 617: if (NULL == manvol) {
618: puts(" 1");
619: p->newln = 1;
620: } else
1.1 kristaps 621: pnode_printmacroline(p, manvol);
622: }
623:
624: static void
625: pnode_printrefmeta(struct parse *p, struct pnode *pn)
626: {
627: struct pnode *pp, *title, *manvol;
628:
629: title = manvol = NULL;
1.13 kristaps 630: assert(p->newln);
1.1 kristaps 631: TAILQ_FOREACH(pp, &pn->childq, child)
632: if (NODE_MANVOLNUM == pp->node)
633: manvol = pp;
634: else if (NODE_REFENTRYTITLE == pp->node)
635: title = pp;
636:
1.2 kristaps 637: puts(".Dd $Mdocdate" "$");
1.13 kristaps 638: fputs(".Dt", stdout);
639: p->newln = 0;
1.1 kristaps 640:
1.13 kristaps 641: if (NULL != title)
1.12 kristaps 642: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 643: else
644: fputs(" UNKNOWN ", stdout);
645:
646: if (NULL == manvol) {
647: puts(" 1");
648: p->newln = 1;
1.1 kristaps 649: } else
650: pnode_printmacroline(p, manvol);
651:
652: puts(".Os");
653: }
654:
1.3 kristaps 655: static void
656: pnode_printfuncdef(struct parse *p, struct pnode *pn)
657: {
658: struct pnode *pp, *ftype, *func;
659:
1.13 kristaps 660: assert(p->newln);
1.3 kristaps 661: ftype = func = NULL;
662: TAILQ_FOREACH(pp, &pn->childq, child)
663: if (NODE_TEXT == pp->node)
664: ftype = pp;
665: else if (NODE_FUNCTION == pp->node)
666: func = pp;
667:
668: if (NULL != ftype) {
1.13 kristaps 669: fputs(".Ft", stdout);
670: p->newln = 0;
1.3 kristaps 671: pnode_printmacroline(p, ftype);
672: }
673:
674: if (NULL != func) {
1.13 kristaps 675: fputs(".Fo", stdout);
676: p->newln = 0;
1.3 kristaps 677: pnode_printmacroline(p, func);
1.13 kristaps 678: } else {
1.3 kristaps 679: puts(".Fo UNKNOWN");
1.13 kristaps 680: p->newln = 1;
681: }
1.3 kristaps 682: }
683:
684: static void
685: pnode_printparamdef(struct parse *p, struct pnode *pn)
686: {
687: struct pnode *pp, *ptype, *param;
688:
1.13 kristaps 689: assert(p->newln);
1.3 kristaps 690: ptype = param = NULL;
691: TAILQ_FOREACH(pp, &pn->childq, child)
692: if (NODE_TEXT == pp->node)
693: ptype = pp;
694: else if (NODE_PARAMETER == pp->node)
695: param = pp;
696:
697: fputs(".Fa \"", stdout);
1.13 kristaps 698: p->newln = 0;
1.3 kristaps 699: if (NULL != ptype) {
700: pnode_printmacrolinepart(p, ptype);
701: putchar(' ');
702: }
703:
704: if (NULL != param)
705: pnode_printmacrolinepart(p, param);
706:
707: puts("\"");
1.13 kristaps 708: p->newln = 1;
1.3 kristaps 709: }
710:
711: static void
712: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
713: {
714: struct pnode *pp, *fdef;
715:
1.13 kristaps 716: assert(p->newln);
1.3 kristaps 717: TAILQ_FOREACH(fdef, &pn->childq, child)
718: if (NODE_FUNCDEF == fdef->node)
719: break;
720:
1.4 kristaps 721: if (NULL != fdef)
1.3 kristaps 722: pnode_printfuncdef(p, fdef);
1.4 kristaps 723: else
1.3 kristaps 724: puts(".Fo UNKNOWN");
725:
1.4 kristaps 726: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 727: if (NODE_PARAMDEF == pp->node)
728: pnode_printparamdef(p, pp);
729:
730: puts(".Fc");
1.13 kristaps 731: p->newln = 1;
1.3 kristaps 732: }
733:
1.10 kristaps 734: /*
735: * The <arg> element is more complicated than it should be because text
736: * nodes are treated like ".Ar foo", but non-text nodes need to be
737: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 738: * This also handles the case of "repetition" (or in other words, the
739: * ellipsis following an argument) and optionality.
1.10 kristaps 740: */
1.4 kristaps 741: static void
1.10 kristaps 742: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 743: {
744: struct pnode *pp;
1.12 kristaps 745: struct pattr *ap;
746: int isop, isrep;
747:
748: isop = 1;
749: isrep = 0;
750: TAILQ_FOREACH(ap, &pn->attrq, child)
751: if (ATTRKEY_CHOICE == ap->key &&
752: (ATTRVAL_PLAIN == ap->val ||
753: ATTRVAL_REQ == ap->val))
754: isop = 0;
755: else if (ATTRKEY_REP == ap->key &&
756: (ATTRVAL_REPEAT == ap->val))
757: isrep = 1;
758:
759: if (isop) {
760: pnode_printmopen(p);
1.13 kristaps 761: fputs("Op", stdout);
1.12 kristaps 762: }
1.4 kristaps 763:
1.10 kristaps 764: TAILQ_FOREACH(pp, &pn->childq, child) {
765: if (NODE_TEXT == pp->node) {
766: pnode_printmopen(p);
1.13 kristaps 767: fputs("Ar", stdout);
1.10 kristaps 768: }
769: pnode_print(p, pp);
1.12 kristaps 770: if (NODE_TEXT == pp->node && isrep)
771: fputs("...", stdout);
1.10 kristaps 772: }
1.4 kristaps 773: }
774:
1.7 kristaps 775: /*
776: * Recursively search and return the first instance of "node".
777: */
778: static struct pnode *
779: pnode_findfirst(struct pnode *pn, enum nodeid node)
780: {
781: struct pnode *pp, *res;
782:
783: res = NULL;
784: TAILQ_FOREACH(pp, &pn->childq, child) {
785: res = pp->node == node ? pp :
786: pnode_findfirst(pp, node);
787: if (NULL != res)
788: break;
789: }
790:
791: return(res);
792: }
793:
794: static void
795: pnode_printprologue(struct parse *p, struct pnode *pn)
796: {
797: struct pnode *pp;
798:
1.9 kristaps 799: pp = NULL == p->root ? NULL :
800: pnode_findfirst(p->root, NODE_REFMETA);
801:
802: if (NULL != pp) {
1.7 kristaps 803: pnode_printrefmeta(p, pp);
804: pnode_unlink(pp);
805: } else {
806: puts(".\\\" Supplying bogus prologue...");
807: puts(".Dd $Mdocdate" "$");
808: puts(".Dt UNKNOWN 1");
809: puts(".Os");
810: }
811: }
812:
1.13 kristaps 813: static void
814: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
815: {
816: struct pnode *pp;
817:
818: assert(p->newln);
819: TAILQ_FOREACH(pp, &pn->childq, child)
820: if (NODE_TERM == pp->node) {
821: fputs(".It", stdout);
822: p->newln = 0;
823: pnode_print(p, pp);
824: pnode_unlink(pp);
825: putchar('\n');
826: p->newln = 1;
827: return;
828: }
829:
830: puts(".It");
831: p->newln = 1;
832: }
833:
834: static void
835: pnode_printvariablelist(struct parse *p, struct pnode *pn)
836: {
837: struct pnode *pp;
838:
839: assert(p->newln);
840: TAILQ_FOREACH(pp, &pn->childq, child)
841: if (NODE_TITLE == pp->node) {
842: puts(".Pp");
843: pnode_print(p, pp);
844: pnode_unlink(pp);
845: }
846:
847: assert(p->newln);
848: puts(".Bl -tag -width Ds");
849: TAILQ_FOREACH(pp, &pn->childq, child)
850: if (NODE_VARLISTENTRY != pp->node) {
851: assert(p->newln);
852: fputs(".It", stdout);
853: pnode_printmacroline(p, pp);
854: } else {
855: assert(p->newln);
856: pnode_print(p, pp);
857: }
858: assert(p->newln);
859: puts(".El");
860: }
861:
1.1 kristaps 862: /*
863: * Print a parsed node (or ignore it--whatever).
864: * This is a recursive function.
865: * FIXME: macro line continuation?
866: */
867: static void
868: pnode_print(struct parse *p, struct pnode *pn)
869: {
870: struct pnode *pp;
871: char *cp;
1.10 kristaps 872: int last, sv;
1.1 kristaps 873:
874: if (NULL == pn)
875: return;
876:
1.10 kristaps 877: sv = p->newln;
1.1 kristaps 878:
879: switch (pn->node) {
1.4 kristaps 880: case (NODE_ARG):
1.10 kristaps 881: pnode_printarg(p, pn);
1.4 kristaps 882: pnode_unlinksub(pn);
883: break;
1.1 kristaps 884: case (NODE_CITEREFENTRY):
1.10 kristaps 885: assert(p->newln);
1.1 kristaps 886: pnode_printciterefentry(p, pn);
1.4 kristaps 887: pnode_unlinksub(pn);
1.1 kristaps 888: break;
889: case (NODE_CODE):
1.10 kristaps 890: pnode_printmopen(p);
1.13 kristaps 891: fputs("Li", stdout);
1.4 kristaps 892: break;
893: case (NODE_COMMAND):
1.10 kristaps 894: pnode_printmopen(p);
1.13 kristaps 895: fputs("Nm", stdout);
896: break;
897: case (NODE_EMPHASIS):
898: pnode_printmopen(p);
899: fputs("Em", stdout);
1.1 kristaps 900: break;
1.3 kristaps 901: case (NODE_FUNCTION):
1.10 kristaps 902: pnode_printmopen(p);
1.13 kristaps 903: fputs("Fn", stdout);
1.3 kristaps 904: break;
905: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 906: assert(p->newln);
1.3 kristaps 907: pnode_printfuncprototype(p, pn);
1.4 kristaps 908: pnode_unlinksub(pn);
1.3 kristaps 909: break;
1.1 kristaps 910: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 911: pnode_printmopen(p);
1.13 kristaps 912: fputs("Fd", stdout);
1.10 kristaps 913: break;
914: case (NODE_OPTION):
915: pnode_printmopen(p);
1.13 kristaps 916: fputs("Fl", stdout);
917: /* FIXME: bogus leading '-'? */
1.1 kristaps 918: break;
919: case (NODE_PARA):
1.10 kristaps 920: assert(p->newln);
1.13 kristaps 921: if (NULL != pn->parent &&
922: NODE_LISTITEM == pn->parent->node)
923: break;
1.1 kristaps 924: puts(".Pp");
1.3 kristaps 925: break;
926: case (NODE_PARAMETER):
1.10 kristaps 927: /* Suppress non-text children... */
928: pnode_printmopen(p);
929: fputs("Fa \"", stdout);
1.3 kristaps 930: pnode_printmacrolinepart(p, pn);
931: puts("\"");
1.4 kristaps 932: pnode_unlinksub(pn);
1.1 kristaps 933: break;
934: case (NODE_PROGRAMLISTING):
1.10 kristaps 935: assert(p->newln);
1.1 kristaps 936: puts(".Bd -literal");
937: break;
938: case (NODE_REFMETA):
1.7 kristaps 939: abort();
1.1 kristaps 940: break;
941: case (NODE_REFNAME):
1.10 kristaps 942: /* Suppress non-text children... */
943: pnode_printmopen(p);
1.13 kristaps 944: fputs("Nm", stdout);
945: p->newln = 0;
1.10 kristaps 946: pnode_printmacrolinepart(p, pn);
1.4 kristaps 947: pnode_unlinksub(pn);
1.10 kristaps 948: break;
1.1 kristaps 949: case (NODE_REFNAMEDIV):
1.10 kristaps 950: assert(p->newln);
1.1 kristaps 951: puts(".Sh NAME");
952: break;
953: case (NODE_REFPURPOSE):
1.10 kristaps 954: assert(p->newln);
1.13 kristaps 955: pnode_printmopen(p);
956: fputs("Nd", stdout);
1.10 kristaps 957: break;
1.1 kristaps 958: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 959: assert(p->newln);
1.6 kristaps 960: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 961: puts(".Sh SYNOPSIS");
1.1 kristaps 962: break;
963: case (NODE_REFSECT1):
1.10 kristaps 964: assert(p->newln);
1.1 kristaps 965: pnode_printrefsect(p, pn);
966: break;
1.13 kristaps 967: case (NODE_REPLACEABLE):
968: pnode_printmopen(p);
969: fputs("Ar", stdout);
970: break;
1.8 kristaps 971: case (NODE_STRUCTNAME):
1.10 kristaps 972: pnode_printmopen(p);
1.13 kristaps 973: fputs("Vt", stdout);
1.10 kristaps 974: break;
1.1 kristaps 975: case (NODE_TEXT):
1.13 kristaps 976: if (0 == p->newln)
977: putchar(' ');
1.1 kristaps 978: bufclear(p);
979: bufappend(p, pn);
980: /*
981: * Output all characters, squeezing out whitespace
982: * between newlines.
983: * XXX: all whitespace, including tabs (?).
984: * Remember to escape control characters and escapes.
985: */
1.10 kristaps 986: assert(p->bsz);
1.1 kristaps 987: for (last = '\n', cp = p->b; '\0' != *cp; ) {
988: if ('\n' == last) {
989: /* Consume all whitespace. */
990: if (isspace((int)*cp)) {
991: while (isspace((int)*cp))
992: cp++;
993: continue;
994: } else if ('\'' == *cp || '.' == *cp)
995: fputs("\\&", stdout);
996: }
997: putchar(last = *cp++);
998: /* If we're a character escape, escape us. */
999: if ('\\' == last)
1000: putchar('e');
1001: }
1.10 kristaps 1002: p->newln = 0;
1.1 kristaps 1003: break;
1.13 kristaps 1004: case (NODE_VARIABLELIST):
1005: assert(p->newln);
1006: pnode_printvariablelist(p, pn);
1007: pnode_unlinksub(pn);
1008: break;
1009: case (NODE_VARLISTENTRY):
1010: assert(p->newln);
1011: pnode_printvarlistentry(p, pn);
1012: break;
1.1 kristaps 1013: default:
1014: break;
1015: }
1016:
1017: TAILQ_FOREACH(pp, &pn->childq, child)
1018: pnode_print(p, pp);
1019:
1020: switch (pn->node) {
1.10 kristaps 1021: case (NODE_ARG):
1022: case (NODE_CODE):
1023: case (NODE_COMMAND):
1.13 kristaps 1024: case (NODE_EMPHASIS):
1.10 kristaps 1025: case (NODE_FUNCTION):
1026: case (NODE_FUNCSYNOPSISINFO):
1027: case (NODE_OPTION):
1028: case (NODE_PARAMETER):
1.13 kristaps 1029: case (NODE_REPLACEABLE):
1030: case (NODE_REFPURPOSE):
1.10 kristaps 1031: case (NODE_STRUCTNAME):
1032: case (NODE_TEXT):
1033: pnode_printmclose(p, sv);
1034: break;
1.12 kristaps 1035: case (NODE_REFNAME):
1036: /*
1037: * If we're in the NAME macro and we have multiple
1038: * <refname> macros in sequence, then print out a
1039: * trailing comma before the newline.
1040: */
1041: if (NULL != pn->parent &&
1042: NODE_REFNAMEDIV == pn->parent->node &&
1043: NULL != TAILQ_NEXT(pn, child) &&
1044: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1045: fputs(" ,", stdout);
1046: pnode_printmclose(p, sv);
1047: break;
1.1 kristaps 1048: case (NODE_PROGRAMLISTING):
1.10 kristaps 1049: assert(p->newln);
1.1 kristaps 1050: puts(".Ed");
1.10 kristaps 1051: p->newln = 1;
1.1 kristaps 1052: break;
1053: default:
1054: break;
1055: }
1056: }
1057:
1058: /*
1059: * Loop around the read buffer until we've drained it of all data.
1060: * Invoke the parser context with each buffer fill.
1061: */
1062: static int
1063: readfile(XML_Parser xp, int fd,
1064: char *b, size_t bsz, const char *fn)
1065: {
1066: struct parse p;
1067: int rc;
1068: ssize_t ssz;
1069:
1070: memset(&p, 0, sizeof(struct parse));
1071:
1072: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1073: p.fname = fn;
1074: p.xml = xp;
1.1 kristaps 1075:
1076: XML_SetCharacterDataHandler(xp, xml_char);
1077: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1078: XML_SetUserData(xp, &p);
1079:
1080: while ((ssz = read(fd, b, bsz)) >= 0) {
1081: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1082: fprintf(stderr, "%s: %s\n", fn,
1083: XML_ErrorString
1084: (XML_GetErrorCode(xp)));
1085: else if ( ! p.stop && ssz > 0)
1086: continue;
1087: /*
1088: * Exit when we've read all or errors have occured
1089: * during the parse sequence.
1090: */
1.10 kristaps 1091: p.newln = 1;
1.7 kristaps 1092: pnode_printprologue(&p, p.root);
1.1 kristaps 1093: pnode_print(&p, p.root);
1094: pnode_free(p.root);
1095: free(p.b);
1096: return(0 != rc && ! p.stop);
1097: }
1098:
1099: /* Read error has occured. */
1100: perror(fn);
1101: pnode_free(p.root);
1102: free(p.b);
1103: return(0);
1104: }
1105:
1106: int
1107: main(int argc, char *argv[])
1108: {
1109: XML_Parser xp;
1110: const char *fname;
1111: char *buf;
1112: int fd, rc;
1113:
1114: fname = "-";
1115: xp = NULL;
1116: buf = NULL;
1117: rc = 0;
1118:
1119: if (-1 != getopt(argc, argv, ""))
1120: return(EXIT_FAILURE);
1121:
1122: argc -= optind;
1123: argv += optind;
1124:
1125: if (argc > 1)
1126: return(EXIT_FAILURE);
1127: else if (argc > 0)
1128: fname = argv[0];
1129:
1130: /* Read from stdin or a file. */
1131: fd = 0 == strcmp(fname, "-") ?
1132: STDIN_FILENO : open(fname, O_RDONLY, 0);
1133:
1134: /*
1135: * Open file for reading.
1136: * Allocate a read buffer.
1137: * Create the parser context.
1138: * Dive directly into the parse.
1139: */
1140: if (-1 == fd)
1141: perror(fname);
1142: else if (NULL == (buf = malloc(4096)))
1143: perror(NULL);
1144: else if (NULL == (xp = XML_ParserCreate(NULL)))
1145: perror(NULL);
1146: else if ( ! readfile(xp, fd, buf, 4096, fname))
1147: rc = 1;
1148:
1149: XML_ParserFree(xp);
1150: free(buf);
1151: if (STDIN_FILENO != fd)
1152: close(fd);
1153: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1154: }
CVSweb