Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.20
1.20 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.19 2014/03/30 16:44:13 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
1.13 kristaps 29: #include "extern.h"
1.12 kristaps 30:
31: /*
1.1 kristaps 32: * Global parse state.
33: * Keep this as simple and small as possible.
34: */
35: struct parse {
1.12 kristaps 36: XML_Parser xml;
1.1 kristaps 37: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 kristaps 38: const char *fname; /* filename */
1.1 kristaps 39: int stop; /* should we stop now? */
40: struct pnode *root; /* root of parse tree */
41: struct pnode *cur; /* current node in tree */
1.8 kristaps 42: char *b; /* nil-terminated buffer for pre-print */
43: size_t bsz; /* current length of b */
44: size_t mbsz; /* max bsz allocation */
1.10 kristaps 45: int newln; /* output: are we on a fresh line */
1.1 kristaps 46: };
47:
48: struct node {
1.8 kristaps 49: const char *name; /* docbook element name */
1.1 kristaps 50: unsigned int flags;
51: #define NODE_IGNTEXT 1 /* ignore all contained text */
52: };
53:
54: TAILQ_HEAD(pnodeq, pnode);
1.12 kristaps 55: TAILQ_HEAD(pattrq, pattr);
56:
57: struct pattr {
58: enum attrkey key;
59: enum attrval val;
60: char *rawval;
61: TAILQ_ENTRY(pattr) child;
62: };
1.1 kristaps 63:
64: struct pnode {
65: enum nodeid node; /* node type */
66: char *b; /* binary data buffer */
67: size_t bsz; /* data buffer size */
68: struct pnode *parent; /* parent (or NULL if top) */
69: struct pnodeq childq; /* queue of children */
1.12 kristaps 70: struct pattrq attrq; /* attributes of node */
1.1 kristaps 71: TAILQ_ENTRY(pnode) child;
72: };
73:
1.12 kristaps 74: static const char *attrkeys[ATTRKEY__MAX] = {
75: "choice",
76: "id",
77: "rep"
78: };
79:
80: static const char *attrvals[ATTRVAL__MAX] = {
81: "norepeat",
82: "opt",
83: "plain",
84: "repeat",
85: "req"
86: };
87:
1.1 kristaps 88: static const struct node nodes[NODE__MAX] = {
89: { NULL, 0 },
1.4 kristaps 90: { "arg", 0 },
1.1 kristaps 91: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 92: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 93: { "code", 0 },
1.4 kristaps 94: { "command", 0 },
1.15 kristaps 95: { "date", 0 },
1.13 kristaps 96: { "emphasis", 0 },
1.17 kristaps 97: { "filename", 0 },
1.3 kristaps 98: { "funcdef", 0 },
99: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 100: { "funcsynopsis", NODE_IGNTEXT },
101: { "funcsynopsisinfo", 0 },
1.3 kristaps 102: { "function", 0 },
1.16 kristaps 103: { "itemizedlist", NODE_IGNTEXT },
1.14 kristaps 104: { "link", 0 },
1.13 kristaps 105: { "listitem", NODE_IGNTEXT },
1.19 kristaps 106: { "literal", 0 },
1.1 kristaps 107: { "manvolnum", 0 },
1.4 kristaps 108: { "option", 0 },
1.1 kristaps 109: { "para", 0 },
1.3 kristaps 110: { "paramdef", 0 },
111: { "parameter", 0 },
1.1 kristaps 112: { "programlisting", 0 },
113: { "refclass", NODE_IGNTEXT },
114: { "refdescriptor", NODE_IGNTEXT },
115: { "refentry", NODE_IGNTEXT },
1.15 kristaps 116: { "refentryinfo", NODE_IGNTEXT },
1.1 kristaps 117: { "refentrytitle", 0 },
118: { "refmeta", NODE_IGNTEXT },
119: { "refmiscinfo", NODE_IGNTEXT },
120: { "refname", 0 },
121: { "refnamediv", NODE_IGNTEXT },
122: { "refpurpose", 0 },
1.20 ! kristaps 123: { "refsect1", NODE_IGNTEXT },
! 124: { "refsect2", NODE_IGNTEXT },
1.1 kristaps 125: { "refsynopsisdiv", NODE_IGNTEXT },
1.13 kristaps 126: { "replaceable", 0 },
1.19 kristaps 127: { "sbr", NODE_IGNTEXT },
1.8 kristaps 128: { "structname", 0 },
1.1 kristaps 129: { "synopsis", 0 },
1.13 kristaps 130: { "term", 0 },
1.1 kristaps 131: { NULL, 0 },
132: { "title", 0 },
1.14 kristaps 133: { "ulink", 0 },
1.13 kristaps 134: { "variablelist", NODE_IGNTEXT },
135: { "varlistentry", NODE_IGNTEXT },
1.1 kristaps 136: };
137:
1.10 kristaps 138: static void
139: pnode_print(struct parse *p, struct pnode *pn);
140:
1.8 kristaps 141: /*
142: * Process a stream of characters.
143: * We store text as nodes in and of themselves.
144: * If a text node is already open, append to it.
145: * If it's not open, open one under the current context.
146: */
1.1 kristaps 147: static void
148: xml_char(void *arg, const XML_Char *p, int sz)
149: {
150: struct parse *ps = arg;
151: struct pnode *dat;
1.4 kristaps 152: int i;
1.1 kristaps 153:
154: /* Stopped or no tree yet. */
155: if (ps->stop || NODE_ROOT == ps->node)
156: return;
157:
158: /* Not supposed to be collecting text. */
159: assert(NULL != ps->cur);
160: if (NODE_IGNTEXT & nodes[ps->node].flags)
161: return;
162:
163: /*
164: * Are we in the midst of processing text?
165: * If we're not processing text right now, then create a text
166: * node for doing so.
1.4 kristaps 167: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 168: * process: strip out all leading whitespace to be sure.
1.1 kristaps 169: */
170: if (NODE_TEXT != ps->node) {
1.4 kristaps 171: for (i = 0; i < sz; i++)
172: if ( ! isspace((int)p[i]))
173: break;
174: if (i == sz)
175: return;
1.10 kristaps 176: p += i;
177: sz -= i;
1.1 kristaps 178: dat = calloc(1, sizeof(struct pnode));
179: if (NULL == dat) {
180: perror(NULL);
181: exit(EXIT_FAILURE);
182: }
183:
184: dat->node = ps->node = NODE_TEXT;
185: dat->parent = ps->cur;
186: TAILQ_INIT(&dat->childq);
1.12 kristaps 187: TAILQ_INIT(&dat->attrq);
1.1 kristaps 188: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
189: ps->cur = dat;
190: assert(NULL != ps->root);
191: }
192:
193: /* Append to current buffer. */
194: assert(sz >= 0);
195: ps->cur->b = realloc(ps->cur->b,
196: ps->cur->bsz + (size_t)sz);
197: if (NULL == ps->cur->b) {
198: perror(NULL);
199: exit(EXIT_FAILURE);
200: }
201: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
202: ps->cur->bsz += (size_t)sz;
203: }
204:
1.10 kristaps 205: static void
206: pnode_trim(struct pnode *pn)
207: {
208:
209: assert(NODE_TEXT == pn->node);
210: for ( ; pn->bsz > 0; pn->bsz--)
211: if ( ! isspace((int)pn->b[pn->bsz - 1]))
212: break;
213: }
214:
1.1 kristaps 215: /*
216: * Begin an element.
217: * First, look for the element.
218: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 219: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 220: * If we find it but we're not parsing yet (i.e., it's not a refentry
221: * and thus out of context), keep going.
1.8 kristaps 222: * If we find it and we're at the root and already have a tree, puke and
223: * exit (FIXME: I don't think this is right?).
224: * If we find it but we're parsing a text node, close out the text node,
225: * return to its parent, and keep going.
1.1 kristaps 226: * Make sure that the element is in the right context.
227: * Lastly, put the node onto our parse tree and continue.
228: */
229: static void
230: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
231: {
1.12 kristaps 232: struct parse *ps = arg;
233: enum nodeid node;
234: enum attrkey key;
235: enum attrval val;
236: struct pnode *dat;
237: struct pattr *pattr;
238: const XML_Char **att;
1.1 kristaps 239:
240: if (ps->stop)
241: return;
242:
243: /* Close out text node, if applicable... */
244: if (NODE_TEXT == ps->node) {
245: assert(NULL != ps->cur);
1.10 kristaps 246: pnode_trim(ps->cur);
1.1 kristaps 247: ps->cur = ps->cur->parent;
248: assert(NULL != ps->cur);
249: ps->node = ps->cur->node;
250: }
251:
252: for (node = 0; node < NODE__MAX; node++)
253: if (NULL == nodes[node].name)
254: continue;
255: else if (0 == strcmp(nodes[node].name, name))
256: break;
257:
258: if (NODE__MAX == node && NODE_ROOT == ps->node) {
259: return;
260: } else if (NODE__MAX == node) {
1.12 kristaps 261: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
262: ps->fname, XML_GetCurrentLineNumber(ps->xml),
263: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 264: ps->stop = 1;
265: return;
266: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 kristaps 267: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
268: ps->fname, XML_GetCurrentLineNumber(ps->xml),
269: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 270: ps->stop = 1;
271: return;
272: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
273: return;
274: } else if ( ! isparent(node, ps->node)) {
1.13 kristaps 275: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\" "
276: "of node \"%s\"\n",
1.12 kristaps 277: ps->fname, XML_GetCurrentLineNumber(ps->xml),
278: XML_GetCurrentColumnNumber(ps->xml),
279: NULL == nodes[ps->node].name ?
1.13 kristaps 280: "(none)" : nodes[ps->node].name,
281: NULL == nodes[node].name ?
282: "(none)" : nodes[node].name);
1.1 kristaps 283: ps->stop = 1;
284: return;
285: }
286:
287: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
288: perror(NULL);
289: exit(EXIT_FAILURE);
290: }
291:
292: dat->node = ps->node = node;
293: dat->parent = ps->cur;
294: TAILQ_INIT(&dat->childq);
1.12 kristaps 295: TAILQ_INIT(&dat->attrq);
1.1 kristaps 296:
297: if (NULL != ps->cur)
298: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
299:
300: ps->cur = dat;
301: if (NULL == ps->root)
302: ps->root = dat;
1.12 kristaps 303:
304: /*
305: * Process attributes.
306: */
307: for (att = atts; NULL != *att; att += 2) {
308: for (key = 0; key < ATTRKEY__MAX; key++)
309: if (0 == strcmp(*att, attrkeys[key]))
310: break;
311: if (ATTRKEY__MAX == key) {
312: fprintf(stderr, "%s:%zu:%zu: unknown "
313: "attribute \"%s\"\n", ps->fname,
314: XML_GetCurrentLineNumber(ps->xml),
315: XML_GetCurrentColumnNumber(ps->xml),
316: *att);
317: continue;
318: } else if ( ! isattrkey(node, key)) {
319: fprintf(stderr, "%s:%zu:%zu: bad "
320: "attribute \"%s\"\n", ps->fname,
321: XML_GetCurrentLineNumber(ps->xml),
322: XML_GetCurrentColumnNumber(ps->xml),
323: *att);
324: continue;
325: }
326: for (val = 0; val < ATTRVAL__MAX; val++)
327: if (0 == strcmp(*(att + 1), attrvals[val]))
328: break;
329: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
330: fprintf(stderr, "%s:%zu:%zu: bad "
331: "value \"%s\"\n", ps->fname,
332: XML_GetCurrentLineNumber(ps->xml),
333: XML_GetCurrentColumnNumber(ps->xml),
334: *(att + 1));
335: continue;
336: }
337: pattr = calloc(1, sizeof(struct pattr));
338: pattr->key = key;
339: pattr->val = val;
340: if (ATTRVAL__MAX == val)
341: pattr->rawval = strdup(*(att + 1));
342: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
343: }
344:
1.1 kristaps 345: }
346:
347: /*
348: * Roll up the parse tree.
1.8 kristaps 349: * If we're at a text node, roll that one up first.
1.1 kristaps 350: * If we hit the root, then assign ourselves as the NODE_ROOT.
351: */
352: static void
353: xml_elem_end(void *arg, const XML_Char *name)
354: {
355: struct parse *ps = arg;
356:
357: if (ps->stop || NODE_ROOT == ps->node)
358: return;
359:
360: /* Close out text node, if applicable... */
361: if (NODE_TEXT == ps->node) {
362: assert(NULL != ps->cur);
1.10 kristaps 363: pnode_trim(ps->cur);
1.1 kristaps 364: ps->cur = ps->cur->parent;
365: assert(NULL != ps->cur);
366: ps->node = ps->cur->node;
367: }
368:
369: if (NULL == (ps->cur = ps->cur->parent))
370: ps->node = NODE_ROOT;
371: else
372: ps->node = ps->cur->node;
373: }
374:
1.8 kristaps 375: /*
376: * Recursively free a node (NULL is ok).
377: */
1.1 kristaps 378: static void
379: pnode_free(struct pnode *pn)
380: {
381: struct pnode *pp;
1.12 kristaps 382: struct pattr *ap;
1.1 kristaps 383:
384: if (NULL == pn)
385: return;
386:
387: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
388: TAILQ_REMOVE(&pn->childq, pp, child);
389: pnode_free(pp);
390: }
391:
1.12 kristaps 392: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
393: TAILQ_REMOVE(&pn->attrq, ap, child);
394: free(ap->rawval);
395: free(ap);
396: }
397:
1.1 kristaps 398: free(pn->b);
399: free(pn);
400: }
401:
1.8 kristaps 402: /*
403: * Unlink a node from its parent and pnode_free() it.
404: */
1.1 kristaps 405: static void
406: pnode_unlink(struct pnode *pn)
407: {
408:
409: if (NULL != pn->parent)
410: TAILQ_REMOVE(&pn->parent->childq, pn, child);
411: pnode_free(pn);
412: }
413:
1.8 kristaps 414: /*
415: * Unlink all children of a node and pnode_free() them.
416: */
1.1 kristaps 417: static void
1.4 kristaps 418: pnode_unlinksub(struct pnode *pn)
419: {
420:
421: while ( ! TAILQ_EMPTY(&pn->childq))
422: pnode_unlink(TAILQ_FIRST(&pn->childq));
423: }
424:
1.8 kristaps 425: /*
426: * Reset the lookaside buffer.
427: */
1.4 kristaps 428: static void
1.1 kristaps 429: bufclear(struct parse *p)
430: {
431:
432: p->b[p->bsz = 0] = '\0';
433: }
434:
1.8 kristaps 435: /*
436: * Append NODE_TEXT contents to the current buffer, reallocating its
437: * size if necessary.
438: * The buffer is ALWAYS nil-terminated.
439: */
1.1 kristaps 440: static void
441: bufappend(struct parse *p, struct pnode *pn)
442: {
443:
444: assert(NODE_TEXT == pn->node);
445: if (p->bsz + pn->bsz + 1 > p->mbsz) {
446: p->mbsz = p->bsz + pn->bsz + 1;
447: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
448: perror(NULL);
449: exit(EXIT_FAILURE);
450: }
451: }
452: memcpy(p->b + p->bsz, pn->b, pn->bsz);
453: p->bsz += pn->bsz;
454: p->b[p->bsz] = '\0';
455: }
456:
1.8 kristaps 457: /*
458: * Recursively append all NODE_TEXT nodes to the buffer.
459: * This descends into non-text nodes, but doesn't do anything beyond
460: * them.
461: * In other words, this is a recursive text grok.
462: */
1.3 kristaps 463: static void
464: bufappend_r(struct parse *p, struct pnode *pn)
465: {
466: struct pnode *pp;
467:
468: if (NODE_TEXT == pn->node)
469: bufappend(p, pn);
470: TAILQ_FOREACH(pp, &pn->childq, child)
471: bufappend_r(p, pp);
472: }
473:
1.12 kristaps 474: #define MACROLINE_NORM 0
475: #define MACROLINE_UPPER 1
1.1 kristaps 476: /*
1.8 kristaps 477: * Recursively print text presumably on a macro line.
1.1 kristaps 478: * Convert all whitespace to regular spaces.
479: */
480: static void
1.12 kristaps 481: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 482: {
483: char *cp;
484:
1.13 kristaps 485: if (0 == p->newln)
486: putchar(' ');
487:
1.1 kristaps 488: bufclear(p);
1.3 kristaps 489: bufappend_r(p, pn);
1.1 kristaps 490:
491: /* Convert all space to spaces. */
492: for (cp = p->b; '\0' != *cp; cp++)
493: if (isspace((int)*cp))
494: *cp = ' ';
495:
496: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 497: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 498: for ( ; '\0' != *cp; cp++) {
499: /* Escape us if we look like a macro. */
500: if ((cp == p->b || ' ' == *(cp - 1)) &&
501: isupper((int)*cp) &&
502: '\0' != *(cp + 1) &&
503: islower((int)*(cp + 1)) &&
504: ('\0' == *(cp + 2) ||
505: ' ' == *(cp + 2) ||
506: (islower((int)*(cp + 2)) &&
507: ('\0' == *(cp + 3) ||
508: ' ' == *(cp + 3)))))
509: fputs("\\&", stdout);
1.12 kristaps 510: if (MACROLINE_UPPER & fl)
511: putchar(toupper((int)*cp));
512: else
513: putchar((int)*cp);
1.1 kristaps 514: /* If we're a character escape, escape us. */
515: if ('\\' == *cp)
516: putchar('e');
517: }
518: }
519:
1.12 kristaps 520: static void
521: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
522: {
523:
524: pnode_printmacrolinetext(p, pn, 0);
525: }
526:
1.1 kristaps 527: /*
528: * Just pnode_printmacrolinepart() but with a newline.
529: * If no text, just the newline.
530: */
531: static void
532: pnode_printmacroline(struct parse *p, struct pnode *pn)
533: {
534:
1.13 kristaps 535: assert(0 == p->newln);
1.12 kristaps 536: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 537: putchar('\n');
1.13 kristaps 538: p->newln = 1;
1.1 kristaps 539: }
540:
1.10 kristaps 541: static void
542: pnode_printmopen(struct parse *p)
543: {
544: if (p->newln) {
545: putchar('.');
546: p->newln = 0;
547: } else
548: putchar(' ');
549: }
550:
551: static void
552: pnode_printmclose(struct parse *p, int sv)
553: {
554:
555: if (sv && ! p->newln) {
556: putchar('\n');
557: p->newln = 1;
558: }
559: }
560:
1.8 kristaps 561: /*
1.10 kristaps 562: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 563: */
1.1 kristaps 564: static void
1.6 kristaps 565: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
566: {
567: struct pnode *pp;
568:
1.10 kristaps 569: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 570: if (NODE_TITLE == pp->node) {
571: pnode_unlink(pp);
1.10 kristaps 572: return;
1.6 kristaps 573: }
574: }
575:
1.8 kristaps 576: /*
577: * Start a hopefully-named `Sh' section.
578: */
1.6 kristaps 579: static void
1.1 kristaps 580: pnode_printrefsect(struct parse *p, struct pnode *pn)
581: {
582: struct pnode *pp;
583:
584: TAILQ_FOREACH(pp, &pn->childq, child)
585: if (NODE_TITLE == pp->node)
586: break;
587:
1.20 ! kristaps 588: if (NODE_REFSECT1 == pn->node)
! 589: fputs(".Sh", stdout);
! 590: else
! 591: fputs(".Ss", stdout);
! 592:
1.13 kristaps 593: p->newln = 0;
1.4 kristaps 594:
1.5 kristaps 595: if (NULL != pp) {
1.20 ! kristaps 596: pnode_printmacrolinetext(p, pp,
! 597: NODE_REFSECT1 == pn->node ?
! 598: MACROLINE_UPPER : 0);
1.18 kristaps 599: pnode_printmclose(p, 1);
1.5 kristaps 600: pnode_unlink(pp);
1.13 kristaps 601: } else {
1.4 kristaps 602: puts("UNKNOWN");
1.13 kristaps 603: p->newln = 1;
604: }
1.1 kristaps 605: }
606:
1.8 kristaps 607: /*
608: * Start a reference, extracting the title and volume.
609: */
1.1 kristaps 610: static void
611: pnode_printciterefentry(struct parse *p, struct pnode *pn)
612: {
613: struct pnode *pp, *title, *manvol;
614:
615: title = manvol = NULL;
1.13 kristaps 616: assert(p->newln);
1.1 kristaps 617: TAILQ_FOREACH(pp, &pn->childq, child)
618: if (NODE_MANVOLNUM == pp->node)
619: manvol = pp;
620: else if (NODE_REFENTRYTITLE == pp->node)
621: title = pp;
622:
1.13 kristaps 623: fputs(".Xr", stdout);
624: p->newln = 0;
1.4 kristaps 625:
1.1 kristaps 626: if (NULL != title) {
627: pnode_printmacrolinepart(p, title);
628: } else
1.13 kristaps 629: fputs(" unknown ", stdout);
1.4 kristaps 630:
1.13 kristaps 631: if (NULL == manvol) {
632: puts(" 1");
633: p->newln = 1;
634: } else
1.1 kristaps 635: pnode_printmacroline(p, manvol);
636: }
637:
638: static void
639: pnode_printrefmeta(struct parse *p, struct pnode *pn)
640: {
641: struct pnode *pp, *title, *manvol;
642:
643: title = manvol = NULL;
1.13 kristaps 644: assert(p->newln);
1.1 kristaps 645: TAILQ_FOREACH(pp, &pn->childq, child)
646: if (NODE_MANVOLNUM == pp->node)
647: manvol = pp;
648: else if (NODE_REFENTRYTITLE == pp->node)
649: title = pp;
650:
1.2 kristaps 651: puts(".Dd $Mdocdate" "$");
1.13 kristaps 652: fputs(".Dt", stdout);
653: p->newln = 0;
1.1 kristaps 654:
1.13 kristaps 655: if (NULL != title)
1.12 kristaps 656: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.13 kristaps 657: else
658: fputs(" UNKNOWN ", stdout);
659:
660: if (NULL == manvol) {
661: puts(" 1");
662: p->newln = 1;
1.1 kristaps 663: } else
664: pnode_printmacroline(p, manvol);
665:
666: puts(".Os");
667: }
668:
1.3 kristaps 669: static void
670: pnode_printfuncdef(struct parse *p, struct pnode *pn)
671: {
672: struct pnode *pp, *ftype, *func;
673:
1.13 kristaps 674: assert(p->newln);
1.3 kristaps 675: ftype = func = NULL;
676: TAILQ_FOREACH(pp, &pn->childq, child)
677: if (NODE_TEXT == pp->node)
678: ftype = pp;
679: else if (NODE_FUNCTION == pp->node)
680: func = pp;
681:
682: if (NULL != ftype) {
1.13 kristaps 683: fputs(".Ft", stdout);
684: p->newln = 0;
1.3 kristaps 685: pnode_printmacroline(p, ftype);
686: }
687:
688: if (NULL != func) {
1.13 kristaps 689: fputs(".Fo", stdout);
690: p->newln = 0;
1.3 kristaps 691: pnode_printmacroline(p, func);
1.13 kristaps 692: } else {
1.3 kristaps 693: puts(".Fo UNKNOWN");
1.13 kristaps 694: p->newln = 1;
695: }
1.3 kristaps 696: }
697:
698: static void
699: pnode_printparamdef(struct parse *p, struct pnode *pn)
700: {
701: struct pnode *pp, *ptype, *param;
702:
1.13 kristaps 703: assert(p->newln);
1.3 kristaps 704: ptype = param = NULL;
705: TAILQ_FOREACH(pp, &pn->childq, child)
706: if (NODE_TEXT == pp->node)
707: ptype = pp;
708: else if (NODE_PARAMETER == pp->node)
709: param = pp;
710:
711: fputs(".Fa \"", stdout);
1.13 kristaps 712: p->newln = 0;
1.3 kristaps 713: if (NULL != ptype) {
714: pnode_printmacrolinepart(p, ptype);
715: putchar(' ');
716: }
717:
718: if (NULL != param)
719: pnode_printmacrolinepart(p, param);
720:
721: puts("\"");
1.13 kristaps 722: p->newln = 1;
1.3 kristaps 723: }
724:
725: static void
726: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
727: {
728: struct pnode *pp, *fdef;
729:
1.13 kristaps 730: assert(p->newln);
1.3 kristaps 731: TAILQ_FOREACH(fdef, &pn->childq, child)
732: if (NODE_FUNCDEF == fdef->node)
733: break;
734:
1.4 kristaps 735: if (NULL != fdef)
1.3 kristaps 736: pnode_printfuncdef(p, fdef);
1.4 kristaps 737: else
1.3 kristaps 738: puts(".Fo UNKNOWN");
739:
1.4 kristaps 740: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 741: if (NODE_PARAMDEF == pp->node)
742: pnode_printparamdef(p, pp);
743:
744: puts(".Fc");
1.13 kristaps 745: p->newln = 1;
1.3 kristaps 746: }
747:
1.10 kristaps 748: /*
749: * The <arg> element is more complicated than it should be because text
750: * nodes are treated like ".Ar foo", but non-text nodes need to be
751: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 kristaps 752: * This also handles the case of "repetition" (or in other words, the
753: * ellipsis following an argument) and optionality.
1.10 kristaps 754: */
1.4 kristaps 755: static void
1.10 kristaps 756: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 757: {
758: struct pnode *pp;
1.12 kristaps 759: struct pattr *ap;
760: int isop, isrep;
761:
762: isop = 1;
763: isrep = 0;
764: TAILQ_FOREACH(ap, &pn->attrq, child)
765: if (ATTRKEY_CHOICE == ap->key &&
766: (ATTRVAL_PLAIN == ap->val ||
767: ATTRVAL_REQ == ap->val))
768: isop = 0;
769: else if (ATTRKEY_REP == ap->key &&
770: (ATTRVAL_REPEAT == ap->val))
771: isrep = 1;
772:
773: if (isop) {
774: pnode_printmopen(p);
1.13 kristaps 775: fputs("Op", stdout);
1.12 kristaps 776: }
1.4 kristaps 777:
1.10 kristaps 778: TAILQ_FOREACH(pp, &pn->childq, child) {
779: if (NODE_TEXT == pp->node) {
780: pnode_printmopen(p);
1.13 kristaps 781: fputs("Ar", stdout);
1.10 kristaps 782: }
783: pnode_print(p, pp);
1.12 kristaps 784: if (NODE_TEXT == pp->node && isrep)
785: fputs("...", stdout);
1.10 kristaps 786: }
1.4 kristaps 787: }
788:
1.7 kristaps 789: /*
790: * Recursively search and return the first instance of "node".
791: */
792: static struct pnode *
793: pnode_findfirst(struct pnode *pn, enum nodeid node)
794: {
795: struct pnode *pp, *res;
796:
797: res = NULL;
798: TAILQ_FOREACH(pp, &pn->childq, child) {
799: res = pp->node == node ? pp :
800: pnode_findfirst(pp, node);
801: if (NULL != res)
802: break;
803: }
804:
805: return(res);
806: }
807:
808: static void
809: pnode_printprologue(struct parse *p, struct pnode *pn)
810: {
811: struct pnode *pp;
812:
1.9 kristaps 813: pp = NULL == p->root ? NULL :
814: pnode_findfirst(p->root, NODE_REFMETA);
815:
816: if (NULL != pp) {
1.7 kristaps 817: pnode_printrefmeta(p, pp);
818: pnode_unlink(pp);
819: } else {
820: puts(".\\\" Supplying bogus prologue...");
821: puts(".Dd $Mdocdate" "$");
822: puts(".Dt UNKNOWN 1");
823: puts(".Os");
824: }
825: }
826:
1.13 kristaps 827: static void
828: pnode_printvarlistentry(struct parse *p, struct pnode *pn)
829: {
830: struct pnode *pp;
831:
832: assert(p->newln);
833: TAILQ_FOREACH(pp, &pn->childq, child)
834: if (NODE_TERM == pp->node) {
835: fputs(".It", stdout);
836: p->newln = 0;
837: pnode_print(p, pp);
838: pnode_unlink(pp);
1.16 kristaps 839: pnode_printmclose(p, 1);
1.13 kristaps 840: return;
841: }
842:
843: puts(".It");
844: p->newln = 1;
845: }
846:
847: static void
1.16 kristaps 848: pnode_printitemizedlist(struct parse *p, struct pnode *pn)
849: {
850: struct pnode *pp;
851:
852: assert(p->newln);
853: TAILQ_FOREACH(pp, &pn->childq, child)
854: if (NODE_TITLE == pp->node) {
855: puts(".Pp");
856: pnode_print(p, pp);
857: pnode_unlink(pp);
858: }
859:
860: assert(p->newln);
861: puts(".Bl -item");
862: TAILQ_FOREACH(pp, &pn->childq, child) {
863: assert(p->newln);
864: puts(".It");
865: pnode_print(p, pp);
866: pnode_printmclose(p, 1);
867: }
868: assert(p->newln);
869: puts(".El");
870: }
871:
872: static void
1.13 kristaps 873: pnode_printvariablelist(struct parse *p, struct pnode *pn)
874: {
875: struct pnode *pp;
876:
877: assert(p->newln);
878: TAILQ_FOREACH(pp, &pn->childq, child)
879: if (NODE_TITLE == pp->node) {
880: puts(".Pp");
881: pnode_print(p, pp);
882: pnode_unlink(pp);
883: }
884:
885: assert(p->newln);
886: puts(".Bl -tag -width Ds");
887: TAILQ_FOREACH(pp, &pn->childq, child)
888: if (NODE_VARLISTENTRY != pp->node) {
889: assert(p->newln);
890: fputs(".It", stdout);
891: pnode_printmacroline(p, pp);
892: } else {
893: assert(p->newln);
894: pnode_print(p, pp);
895: }
896: assert(p->newln);
897: puts(".El");
898: }
899:
1.1 kristaps 900: /*
901: * Print a parsed node (or ignore it--whatever).
902: * This is a recursive function.
903: * FIXME: macro line continuation?
904: */
905: static void
906: pnode_print(struct parse *p, struct pnode *pn)
907: {
908: struct pnode *pp;
909: char *cp;
1.10 kristaps 910: int last, sv;
1.1 kristaps 911:
912: if (NULL == pn)
913: return;
914:
1.10 kristaps 915: sv = p->newln;
1.1 kristaps 916:
917: switch (pn->node) {
1.4 kristaps 918: case (NODE_ARG):
1.10 kristaps 919: pnode_printarg(p, pn);
1.4 kristaps 920: pnode_unlinksub(pn);
921: break;
1.1 kristaps 922: case (NODE_CITEREFENTRY):
1.10 kristaps 923: assert(p->newln);
1.1 kristaps 924: pnode_printciterefentry(p, pn);
1.4 kristaps 925: pnode_unlinksub(pn);
1.1 kristaps 926: break;
927: case (NODE_CODE):
1.10 kristaps 928: pnode_printmopen(p);
1.13 kristaps 929: fputs("Li", stdout);
1.4 kristaps 930: break;
931: case (NODE_COMMAND):
1.10 kristaps 932: pnode_printmopen(p);
1.13 kristaps 933: fputs("Nm", stdout);
934: break;
935: case (NODE_EMPHASIS):
936: pnode_printmopen(p);
937: fputs("Em", stdout);
1.1 kristaps 938: break;
1.17 kristaps 939: case (NODE_FILENAME):
940: pnode_printmopen(p);
941: fputs("Pa", stdout);
942: break;
1.3 kristaps 943: case (NODE_FUNCTION):
1.10 kristaps 944: pnode_printmopen(p);
1.13 kristaps 945: fputs("Fn", stdout);
1.3 kristaps 946: break;
947: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 948: assert(p->newln);
1.3 kristaps 949: pnode_printfuncprototype(p, pn);
1.4 kristaps 950: pnode_unlinksub(pn);
1.3 kristaps 951: break;
1.1 kristaps 952: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 953: pnode_printmopen(p);
1.13 kristaps 954: fputs("Fd", stdout);
1.16 kristaps 955: break;
956: case (NODE_ITEMIZEDLIST):
957: assert(p->newln);
958: pnode_printitemizedlist(p, pn);
1.10 kristaps 959: break;
1.19 kristaps 960: case (NODE_LITERAL):
961: pnode_printmopen(p);
962: fputs("Li", stdout);
963: break;
1.10 kristaps 964: case (NODE_OPTION):
965: pnode_printmopen(p);
1.13 kristaps 966: fputs("Fl", stdout);
1.1 kristaps 967: break;
968: case (NODE_PARA):
1.10 kristaps 969: assert(p->newln);
1.13 kristaps 970: if (NULL != pn->parent &&
971: NODE_LISTITEM == pn->parent->node)
972: break;
1.1 kristaps 973: puts(".Pp");
1.3 kristaps 974: break;
975: case (NODE_PARAMETER):
1.10 kristaps 976: /* Suppress non-text children... */
977: pnode_printmopen(p);
978: fputs("Fa \"", stdout);
1.3 kristaps 979: pnode_printmacrolinepart(p, pn);
980: puts("\"");
1.4 kristaps 981: pnode_unlinksub(pn);
1.1 kristaps 982: break;
983: case (NODE_PROGRAMLISTING):
1.10 kristaps 984: assert(p->newln);
1.1 kristaps 985: puts(".Bd -literal");
1.15 kristaps 986: break;
987: case (NODE_REFENTRYINFO):
988: /* Suppress. */
989: pnode_unlinksub(pn);
1.1 kristaps 990: break;
991: case (NODE_REFMETA):
1.7 kristaps 992: abort();
1.1 kristaps 993: break;
994: case (NODE_REFNAME):
1.10 kristaps 995: /* Suppress non-text children... */
996: pnode_printmopen(p);
1.13 kristaps 997: fputs("Nm", stdout);
998: p->newln = 0;
1.10 kristaps 999: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1000: pnode_unlinksub(pn);
1.10 kristaps 1001: break;
1.1 kristaps 1002: case (NODE_REFNAMEDIV):
1.10 kristaps 1003: assert(p->newln);
1.1 kristaps 1004: puts(".Sh NAME");
1005: break;
1006: case (NODE_REFPURPOSE):
1.10 kristaps 1007: assert(p->newln);
1.13 kristaps 1008: pnode_printmopen(p);
1009: fputs("Nd", stdout);
1.10 kristaps 1010: break;
1.1 kristaps 1011: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1012: assert(p->newln);
1.6 kristaps 1013: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1014: puts(".Sh SYNOPSIS");
1.1 kristaps 1015: break;
1016: case (NODE_REFSECT1):
1.20 ! kristaps 1017: /* FALLTHROUGH */
! 1018: case (NODE_REFSECT2):
1.10 kristaps 1019: assert(p->newln);
1.1 kristaps 1020: pnode_printrefsect(p, pn);
1021: break;
1.13 kristaps 1022: case (NODE_REPLACEABLE):
1023: pnode_printmopen(p);
1024: fputs("Ar", stdout);
1025: break;
1.19 kristaps 1026: case (NODE_SBR):
1027: assert(p->newln);
1028: puts(".br");
1029: break;
1.8 kristaps 1030: case (NODE_STRUCTNAME):
1.10 kristaps 1031: pnode_printmopen(p);
1.13 kristaps 1032: fputs("Vt", stdout);
1.10 kristaps 1033: break;
1.1 kristaps 1034: case (NODE_TEXT):
1.13 kristaps 1035: if (0 == p->newln)
1036: putchar(' ');
1.1 kristaps 1037: bufclear(p);
1038: bufappend(p, pn);
1039: /*
1040: * Output all characters, squeezing out whitespace
1041: * between newlines.
1042: * XXX: all whitespace, including tabs (?).
1043: * Remember to escape control characters and escapes.
1044: */
1.10 kristaps 1045: assert(p->bsz);
1.20 ! kristaps 1046: cp = p->b;
! 1047: /*
! 1048: * There's often a superfluous "-" in its <option> tags
! 1049: * before the actual flags themselves.
! 1050: * "Fl" does this for us, so remove it.
! 1051: */
! 1052: if (NULL != pn->parent &&
! 1053: NODE_OPTION == pn->parent->node &&
! 1054: '-' == *cp)
! 1055: cp++;
! 1056: for (last = '\n'; '\0' != *cp; ) {
1.1 kristaps 1057: if ('\n' == last) {
1058: /* Consume all whitespace. */
1059: if (isspace((int)*cp)) {
1060: while (isspace((int)*cp))
1061: cp++;
1062: continue;
1063: } else if ('\'' == *cp || '.' == *cp)
1064: fputs("\\&", stdout);
1065: }
1066: putchar(last = *cp++);
1067: /* If we're a character escape, escape us. */
1068: if ('\\' == last)
1069: putchar('e');
1070: }
1.10 kristaps 1071: p->newln = 0;
1.1 kristaps 1072: break;
1.13 kristaps 1073: case (NODE_VARIABLELIST):
1074: assert(p->newln);
1075: pnode_printvariablelist(p, pn);
1076: pnode_unlinksub(pn);
1077: break;
1078: case (NODE_VARLISTENTRY):
1079: assert(p->newln);
1080: pnode_printvarlistentry(p, pn);
1081: break;
1.1 kristaps 1082: default:
1083: break;
1084: }
1085:
1086: TAILQ_FOREACH(pp, &pn->childq, child)
1087: pnode_print(p, pp);
1088:
1089: switch (pn->node) {
1.10 kristaps 1090: case (NODE_ARG):
1091: case (NODE_CODE):
1092: case (NODE_COMMAND):
1.13 kristaps 1093: case (NODE_EMPHASIS):
1.17 kristaps 1094: case (NODE_FILENAME):
1.10 kristaps 1095: case (NODE_FUNCTION):
1096: case (NODE_FUNCSYNOPSISINFO):
1.19 kristaps 1097: case (NODE_LITERAL):
1.10 kristaps 1098: case (NODE_OPTION):
1099: case (NODE_PARAMETER):
1.13 kristaps 1100: case (NODE_REPLACEABLE):
1101: case (NODE_REFPURPOSE):
1.10 kristaps 1102: case (NODE_STRUCTNAME):
1103: case (NODE_TEXT):
1104: pnode_printmclose(p, sv);
1105: break;
1.12 kristaps 1106: case (NODE_REFNAME):
1107: /*
1108: * If we're in the NAME macro and we have multiple
1109: * <refname> macros in sequence, then print out a
1110: * trailing comma before the newline.
1111: */
1112: if (NULL != pn->parent &&
1113: NODE_REFNAMEDIV == pn->parent->node &&
1114: NULL != TAILQ_NEXT(pn, child) &&
1115: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
1116: fputs(" ,", stdout);
1117: pnode_printmclose(p, sv);
1118: break;
1.1 kristaps 1119: case (NODE_PROGRAMLISTING):
1.10 kristaps 1120: assert(p->newln);
1.1 kristaps 1121: puts(".Ed");
1.10 kristaps 1122: p->newln = 1;
1.1 kristaps 1123: break;
1124: default:
1125: break;
1126: }
1127: }
1128:
1129: /*
1130: * Loop around the read buffer until we've drained it of all data.
1131: * Invoke the parser context with each buffer fill.
1132: */
1133: static int
1134: readfile(XML_Parser xp, int fd,
1135: char *b, size_t bsz, const char *fn)
1136: {
1137: struct parse p;
1138: int rc;
1139: ssize_t ssz;
1140:
1141: memset(&p, 0, sizeof(struct parse));
1142:
1143: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 kristaps 1144: p.fname = fn;
1145: p.xml = xp;
1.1 kristaps 1146:
1147: XML_SetCharacterDataHandler(xp, xml_char);
1148: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1149: XML_SetUserData(xp, &p);
1150:
1151: while ((ssz = read(fd, b, bsz)) >= 0) {
1152: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1153: fprintf(stderr, "%s: %s\n", fn,
1154: XML_ErrorString
1155: (XML_GetErrorCode(xp)));
1156: else if ( ! p.stop && ssz > 0)
1157: continue;
1158: /*
1159: * Exit when we've read all or errors have occured
1160: * during the parse sequence.
1161: */
1.10 kristaps 1162: p.newln = 1;
1.7 kristaps 1163: pnode_printprologue(&p, p.root);
1.1 kristaps 1164: pnode_print(&p, p.root);
1165: pnode_free(p.root);
1166: free(p.b);
1167: return(0 != rc && ! p.stop);
1168: }
1169:
1170: /* Read error has occured. */
1171: perror(fn);
1172: pnode_free(p.root);
1173: free(p.b);
1174: return(0);
1175: }
1176:
1177: int
1178: main(int argc, char *argv[])
1179: {
1180: XML_Parser xp;
1181: const char *fname;
1182: char *buf;
1183: int fd, rc;
1184:
1185: fname = "-";
1186: xp = NULL;
1187: buf = NULL;
1188: rc = 0;
1189:
1190: if (-1 != getopt(argc, argv, ""))
1191: return(EXIT_FAILURE);
1192:
1193: argc -= optind;
1194: argv += optind;
1195:
1196: if (argc > 1)
1197: return(EXIT_FAILURE);
1198: else if (argc > 0)
1199: fname = argv[0];
1200:
1201: /* Read from stdin or a file. */
1202: fd = 0 == strcmp(fname, "-") ?
1203: STDIN_FILENO : open(fname, O_RDONLY, 0);
1204:
1205: /*
1206: * Open file for reading.
1207: * Allocate a read buffer.
1208: * Create the parser context.
1209: * Dive directly into the parse.
1210: */
1211: if (-1 == fd)
1212: perror(fname);
1213: else if (NULL == (buf = malloc(4096)))
1214: perror(NULL);
1215: else if (NULL == (xp = XML_ParserCreate(NULL)))
1216: perror(NULL);
1217: else if ( ! readfile(xp, fd, buf, 4096, fname))
1218: rc = 1;
1219:
1220: XML_ParserFree(xp);
1221: free(buf);
1222: if (STDIN_FILENO != fd)
1223: close(fd);
1224: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1225: }
CVSweb