Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.8
1.8 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.7 2014/03/28 10:37:50 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
29: /*
30: * All recognised node types.
31: */
32: enum nodeid {
33: NODE_ROOT = 0, /* Must comes first. */
34: /* Alpha-ordered hereafter. */
1.4 kristaps 35: NODE_ARG,
1.1 kristaps 36: NODE_CITEREFENTRY,
1.4 kristaps 37: NODE_CMDSYNOPSIS,
1.1 kristaps 38: NODE_CODE,
1.4 kristaps 39: NODE_COMMAND,
1.3 kristaps 40: NODE_FUNCDEF,
41: NODE_FUNCPROTOTYPE,
1.1 kristaps 42: NODE_FUNCSYNOPSIS,
43: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 44: NODE_FUNCTION,
1.1 kristaps 45: NODE_MANVOLNUM,
1.4 kristaps 46: NODE_OPTION,
1.1 kristaps 47: NODE_PARA,
1.3 kristaps 48: NODE_PARAMDEF,
49: NODE_PARAMETER,
1.1 kristaps 50: NODE_PROGRAMLISTING,
51: NODE_REFCLASS,
52: NODE_REFDESCRIPTOR,
53: NODE_REFENTRY,
54: NODE_REFENTRYTITLE,
55: NODE_REFMETA,
56: NODE_REFMISCINFO,
57: NODE_REFNAME,
58: NODE_REFNAMEDIV,
59: NODE_REFPURPOSE,
60: NODE_REFSECT1,
61: NODE_REFSYNOPSISDIV,
1.8 ! kristaps 62: NODE_STRUCTNAME,
1.1 kristaps 63: NODE_SYNOPSIS,
64: NODE_TEXT,
65: NODE_TITLE,
66: NODE__MAX
67: };
68:
69: /*
70: * Global parse state.
71: * Keep this as simple and small as possible.
72: */
73: struct parse {
74: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
75: int stop; /* should we stop now? */
76: struct pnode *root; /* root of parse tree */
77: struct pnode *cur; /* current node in tree */
1.8 ! kristaps 78: char *b; /* nil-terminated buffer for pre-print */
! 79: size_t bsz; /* current length of b */
! 80: size_t mbsz; /* max bsz allocation */
1.1 kristaps 81: };
82:
83: struct node {
1.8 ! kristaps 84: const char *name; /* docbook element name */
1.1 kristaps 85: unsigned int flags;
86: #define NODE_IGNTEXT 1 /* ignore all contained text */
87: };
88:
89: TAILQ_HEAD(pnodeq, pnode);
90:
91: struct pnode {
92: enum nodeid node; /* node type */
93: char *b; /* binary data buffer */
94: size_t bsz; /* data buffer size */
95: struct pnode *parent; /* parent (or NULL if top) */
96: struct pnodeq childq; /* queue of children */
97: TAILQ_ENTRY(pnode) child;
98: };
99:
100: static const struct node nodes[NODE__MAX] = {
101: { NULL, 0 },
1.4 kristaps 102: { "arg", 0 },
1.1 kristaps 103: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 104: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 105: { "code", 0 },
1.4 kristaps 106: { "command", 0 },
1.3 kristaps 107: { "funcdef", 0 },
108: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 109: { "funcsynopsis", NODE_IGNTEXT },
110: { "funcsynopsisinfo", 0 },
1.3 kristaps 111: { "function", 0 },
1.1 kristaps 112: { "manvolnum", 0 },
1.4 kristaps 113: { "option", 0 },
1.1 kristaps 114: { "para", 0 },
1.3 kristaps 115: { "paramdef", 0 },
116: { "parameter", 0 },
1.1 kristaps 117: { "programlisting", 0 },
118: { "refclass", NODE_IGNTEXT },
119: { "refdescriptor", NODE_IGNTEXT },
120: { "refentry", NODE_IGNTEXT },
121: { "refentrytitle", 0 },
122: { "refmeta", NODE_IGNTEXT },
123: { "refmiscinfo", NODE_IGNTEXT },
124: { "refname", 0 },
125: { "refnamediv", NODE_IGNTEXT },
126: { "refpurpose", 0 },
127: { "refsect1", 0 },
128: { "refsynopsisdiv", NODE_IGNTEXT },
1.8 ! kristaps 129: { "structname", 0 },
1.1 kristaps 130: { "synopsis", 0 },
131: { NULL, 0 },
132: { "title", 0 },
133: };
134:
135: /*
136: * Look up whether "parent" is a valid parent for "node".
1.8 ! kristaps 137: * This is sucked directly from the DocBook specification: look at the
! 138: * "children" and "parent" sections of each node.
1.1 kristaps 139: */
140: static int
141: isparent(enum nodeid node, enum nodeid parent)
142: {
143:
144: switch (node) {
145: case (NODE_ROOT):
146: return(0);
1.4 kristaps 147: case (NODE_ARG):
148: switch (parent) {
149: case (NODE_ARG):
150: case (NODE_CMDSYNOPSIS):
151: return(1);
152: default:
153: break;
154: }
155: return(0);
1.1 kristaps 156: case (NODE_CITEREFENTRY):
157: switch (parent) {
158: case (NODE_FUNCSYNOPSISINFO):
159: case (NODE_PARA):
160: case (NODE_PROGRAMLISTING):
161: case (NODE_REFDESCRIPTOR):
162: case (NODE_REFENTRYTITLE):
163: case (NODE_REFNAME):
164: case (NODE_REFPURPOSE):
165: case (NODE_SYNOPSIS):
166: case (NODE_TITLE):
167: return(1);
168: default:
169: break;
170: }
171: return(0);
1.4 kristaps 172: case (NODE_CMDSYNOPSIS):
173: switch (parent) {
174: case (NODE_PARA):
175: case (NODE_REFSECT1):
176: case (NODE_REFSYNOPSISDIV):
177: return(1);
178: default:
179: break;
180: }
181: return(0);
1.1 kristaps 182: case (NODE_CODE):
183: switch (parent) {
184: case (NODE_FUNCSYNOPSISINFO):
185: case (NODE_PARA):
186: case (NODE_PROGRAMLISTING):
187: case (NODE_REFDESCRIPTOR):
188: case (NODE_REFENTRYTITLE):
189: case (NODE_REFNAME):
190: case (NODE_REFPURPOSE):
191: case (NODE_SYNOPSIS):
192: case (NODE_TITLE):
193: return(1);
194: default:
195: break;
196: }
197: return(0);
1.4 kristaps 198: case (NODE_COMMAND):
199: switch (parent) {
200: case (NODE_CMDSYNOPSIS):
201: case (NODE_FUNCSYNOPSISINFO):
202: case (NODE_PARA):
203: case (NODE_PROGRAMLISTING):
204: case (NODE_REFDESCRIPTOR):
205: case (NODE_REFENTRYTITLE):
206: case (NODE_REFNAME):
207: case (NODE_REFPURPOSE):
208: case (NODE_SYNOPSIS):
209: case (NODE_TITLE):
210: return(1);
211: default:
212: break;
213: }
214: return(0);
1.3 kristaps 215: case (NODE_FUNCDEF):
216: return(NODE_FUNCPROTOTYPE == parent);
217: case (NODE_FUNCPROTOTYPE):
218: return(NODE_FUNCSYNOPSIS == parent);
219: case (NODE_FUNCSYNOPSIS):
220: switch (parent) {
221: case (NODE_PARA):
222: case (NODE_REFSECT1):
223: case (NODE_REFSYNOPSISDIV):
224: return(1);
225: default:
226: break;
227: }
228: return(0);
229: case (NODE_FUNCSYNOPSISINFO):
230: return(NODE_FUNCSYNOPSIS == parent);
231: case (NODE_FUNCTION):
232: switch (parent) {
233: case (NODE_CODE):
234: case (NODE_FUNCDEF):
235: case (NODE_FUNCSYNOPSISINFO):
236: case (NODE_PARA):
1.4 kristaps 237: case (NODE_PROGRAMLISTING):
1.3 kristaps 238: case (NODE_REFDESCRIPTOR):
239: case (NODE_REFENTRYTITLE):
240: case (NODE_REFNAME):
241: case (NODE_REFPURPOSE):
242: case (NODE_SYNOPSIS):
243: case (NODE_TITLE):
244: return(1);
245: default:
246: break;
247: }
248: return(0);
1.1 kristaps 249: case (NODE_MANVOLNUM):
250: switch (parent) {
251: case (NODE_CITEREFENTRY):
252: case (NODE_REFMETA):
253: return(1);
254: default:
255: break;
256: }
257: return(0);
1.4 kristaps 258: case (NODE_OPTION):
259: switch (parent) {
260: case (NODE_ARG):
261: case (NODE_FUNCSYNOPSISINFO):
262: case (NODE_PARA):
263: case (NODE_PROGRAMLISTING):
264: case (NODE_REFDESCRIPTOR):
265: case (NODE_REFENTRYTITLE):
266: case (NODE_REFNAME):
267: case (NODE_REFPURPOSE):
268: case (NODE_SYNOPSIS):
269: case (NODE_TITLE):
270: return(1);
271: default:
272: break;
273: }
274: return(0);
1.3 kristaps 275: case (NODE_PARA):
1.1 kristaps 276: switch (parent) {
277: case (NODE_REFSECT1):
278: case (NODE_REFSYNOPSISDIV):
279: return(1);
280: default:
281: break;
282: }
283: return(0);
1.3 kristaps 284: case (NODE_PARAMDEF):
285: return(NODE_FUNCPROTOTYPE == parent);
286: case (NODE_PARAMETER):
1.1 kristaps 287: switch (parent) {
1.3 kristaps 288: case (NODE_CODE):
289: case (NODE_FUNCSYNOPSISINFO):
290: case (NODE_PARA):
291: case (NODE_PARAMDEF):
1.4 kristaps 292: case (NODE_PROGRAMLISTING):
1.3 kristaps 293: case (NODE_REFDESCRIPTOR):
294: case (NODE_REFENTRYTITLE):
295: case (NODE_REFNAME):
296: case (NODE_REFPURPOSE):
297: case (NODE_SYNOPSIS):
298: case (NODE_TITLE):
1.1 kristaps 299: return(1);
300: default:
301: break;
302: }
303: return(0);
304: case (NODE_PROGRAMLISTING):
305: switch (parent) {
306: case (NODE_PARA):
307: case (NODE_REFSECT1):
308: case (NODE_REFSYNOPSISDIV):
309: return(1);
310: default:
311: break;
312: }
313: return(0);
314: case (NODE_REFCLASS):
315: return(parent == NODE_REFNAMEDIV);
316: case (NODE_REFDESCRIPTOR):
317: return(parent == NODE_REFNAMEDIV);
318: case (NODE_REFENTRY):
319: return(parent == NODE_ROOT);
320: case (NODE_REFENTRYTITLE):
321: switch (parent) {
322: case (NODE_CITEREFENTRY):
323: case (NODE_REFMETA):
324: return(1);
325: default:
326: break;
327: }
328: case (NODE_REFMETA):
329: return(parent == NODE_REFENTRY);
330: case (NODE_REFMISCINFO):
331: return(parent == NODE_REFMETA);
332: case (NODE_REFNAME):
333: return(parent == NODE_REFNAMEDIV);
334: case (NODE_REFNAMEDIV):
335: return(parent == NODE_REFENTRY);
336: case (NODE_REFPURPOSE):
337: return(parent == NODE_REFNAMEDIV);
338: case (NODE_REFSECT1):
339: return(parent == NODE_REFENTRY);
340: case (NODE_REFSYNOPSISDIV):
341: return(parent == NODE_REFENTRY);
1.8 ! kristaps 342: case (NODE_STRUCTNAME):
! 343: switch (parent) {
! 344: case (NODE_CODE):
! 345: case (NODE_FUNCSYNOPSISINFO):
! 346: case (NODE_FUNCTION):
! 347: case (NODE_OPTION):
! 348: case (NODE_PARA):
! 349: case (NODE_PARAMETER):
! 350: case (NODE_PROGRAMLISTING):
! 351: case (NODE_REFDESCRIPTOR):
! 352: case (NODE_REFENTRYTITLE):
! 353: case (NODE_REFNAME):
! 354: case (NODE_REFPURPOSE):
! 355: case (NODE_SYNOPSIS):
! 356: case (NODE_TITLE):
! 357: return(1);
! 358: default:
! 359: break;
! 360: }
! 361: return(0);
1.1 kristaps 362: case (NODE_SYNOPSIS):
363: switch (parent) {
364: case (NODE_REFSYNOPSISDIV):
365: case (NODE_REFSECT1):
366: return(1);
367: default:
368: break;
369: }
370: return(0);
371: case (NODE_TITLE):
372: switch (parent) {
373: case (NODE_REFSECT1):
374: case (NODE_REFSYNOPSISDIV):
375: return(1);
376: default:
377: break;
378: }
379: return(0);
380: case (NODE_TEXT):
381: return(1);
382: case (NODE__MAX):
383: break;
384: }
385:
386: abort();
387: return(0);
388: }
389:
1.8 ! kristaps 390: /*
! 391: * Process a stream of characters.
! 392: * We store text as nodes in and of themselves.
! 393: * If a text node is already open, append to it.
! 394: * If it's not open, open one under the current context.
! 395: */
1.1 kristaps 396: static void
397: xml_char(void *arg, const XML_Char *p, int sz)
398: {
399: struct parse *ps = arg;
400: struct pnode *dat;
1.4 kristaps 401: int i;
1.1 kristaps 402:
403: /* Stopped or no tree yet. */
404: if (ps->stop || NODE_ROOT == ps->node)
405: return;
406:
407: /* Not supposed to be collecting text. */
408: assert(NULL != ps->cur);
409: if (NODE_IGNTEXT & nodes[ps->node].flags)
410: return;
411:
412: /*
413: * Are we in the midst of processing text?
414: * If we're not processing text right now, then create a text
415: * node for doing so.
1.4 kristaps 416: * However, don't do so unless we have some non-whitespace to
417: * process!
1.1 kristaps 418: */
419: if (NODE_TEXT != ps->node) {
1.4 kristaps 420: for (i = 0; i < sz; i++)
421: if ( ! isspace((int)p[i]))
422: break;
423: if (i == sz)
424: return;
1.1 kristaps 425: dat = calloc(1, sizeof(struct pnode));
426: if (NULL == dat) {
427: perror(NULL);
428: exit(EXIT_FAILURE);
429: }
430:
431: dat->node = ps->node = NODE_TEXT;
432: dat->parent = ps->cur;
433: TAILQ_INIT(&dat->childq);
434: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
435: ps->cur = dat;
436: assert(NULL != ps->root);
437:
438: }
439:
440: /* Append to current buffer. */
441: assert(sz >= 0);
442: ps->cur->b = realloc(ps->cur->b,
443: ps->cur->bsz + (size_t)sz);
444: if (NULL == ps->cur->b) {
445: perror(NULL);
446: exit(EXIT_FAILURE);
447: }
448: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
449: ps->cur->bsz += (size_t)sz;
450: }
451:
452: /*
453: * Begin an element.
454: * First, look for the element.
455: * If we don't find it and we're not parsing, keep going.
1.8 ! kristaps 456: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 457: * If we find it but we're not parsing yet (i.e., it's not a refentry
458: * and thus out of context), keep going.
1.8 ! kristaps 459: * If we find it and we're at the root and already have a tree, puke and
! 460: * exit (FIXME: I don't think this is right?).
! 461: * If we find it but we're parsing a text node, close out the text node,
! 462: * return to its parent, and keep going.
1.1 kristaps 463: * Make sure that the element is in the right context.
464: * Lastly, put the node onto our parse tree and continue.
465: */
466: static void
467: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
468: {
469: struct parse *ps = arg;
470: enum nodeid node;
471: struct pnode *dat;
472:
473: if (ps->stop)
474: return;
475:
476: /* Close out text node, if applicable... */
477: if (NODE_TEXT == ps->node) {
478: assert(NULL != ps->cur);
479: ps->cur = ps->cur->parent;
480: assert(NULL != ps->cur);
481: ps->node = ps->cur->node;
482: }
483:
484: for (node = 0; node < NODE__MAX; node++)
485: if (NULL == nodes[node].name)
486: continue;
487: else if (0 == strcmp(nodes[node].name, name))
488: break;
489:
1.8 ! kristaps 490: /* FIXME: do more with these error messages... */
1.1 kristaps 491: if (NODE__MAX == node && NODE_ROOT == ps->node) {
492: fprintf(stderr, "%s: ignoring node\n", name);
493: return;
494: } else if (NODE__MAX == node) {
495: fprintf(stderr, "%s: unknown node\n", name);
496: ps->stop = 1;
497: return;
498: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
499: fprintf(stderr, "%s: reentering?\n", name);
500: ps->stop = 1;
501: return;
502: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
503: fprintf(stderr, "%s: known node w/o context\n", name);
504: return;
505: } else if ( ! isparent(node, ps->node)) {
506: fprintf(stderr, "%s: bad parent\n", name);
507: ps->stop = 1;
508: return;
509: }
510:
511: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
512: perror(NULL);
513: exit(EXIT_FAILURE);
514: }
515:
516: dat->node = ps->node = node;
517: dat->parent = ps->cur;
518: TAILQ_INIT(&dat->childq);
519:
520: if (NULL != ps->cur)
521: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
522:
523: ps->cur = dat;
524: if (NULL == ps->root)
525: ps->root = dat;
526: }
527:
528: /*
529: * Roll up the parse tree.
1.8 ! kristaps 530: * If we're at a text node, roll that one up first.
1.1 kristaps 531: * If we hit the root, then assign ourselves as the NODE_ROOT.
532: */
533: static void
534: xml_elem_end(void *arg, const XML_Char *name)
535: {
536: struct parse *ps = arg;
537:
538: if (ps->stop || NODE_ROOT == ps->node)
539: return;
540:
541: /* Close out text node, if applicable... */
542: if (NODE_TEXT == ps->node) {
543: assert(NULL != ps->cur);
544: ps->cur = ps->cur->parent;
545: assert(NULL != ps->cur);
546: ps->node = ps->cur->node;
547: }
548:
549: if (NULL == (ps->cur = ps->cur->parent))
550: ps->node = NODE_ROOT;
551: else
552: ps->node = ps->cur->node;
553: }
554:
1.8 ! kristaps 555: /*
! 556: * Recursively free a node (NULL is ok).
! 557: */
1.1 kristaps 558: static void
559: pnode_free(struct pnode *pn)
560: {
561: struct pnode *pp;
562:
563: if (NULL == pn)
564: return;
565:
566: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
567: TAILQ_REMOVE(&pn->childq, pp, child);
568: pnode_free(pp);
569: }
570:
571: free(pn->b);
572: free(pn);
573: }
574:
1.8 ! kristaps 575: /*
! 576: * Unlink a node from its parent and pnode_free() it.
! 577: */
1.1 kristaps 578: static void
579: pnode_unlink(struct pnode *pn)
580: {
581:
582: if (NULL != pn->parent)
583: TAILQ_REMOVE(&pn->parent->childq, pn, child);
584: pnode_free(pn);
585: }
586:
1.8 ! kristaps 587: /*
! 588: * Unlink all children of a node and pnode_free() them.
! 589: */
1.1 kristaps 590: static void
1.4 kristaps 591: pnode_unlinksub(struct pnode *pn)
592: {
593:
594: while ( ! TAILQ_EMPTY(&pn->childq))
595: pnode_unlink(TAILQ_FIRST(&pn->childq));
596: }
597:
1.8 ! kristaps 598: /*
! 599: * Reset the lookaside buffer.
! 600: */
1.4 kristaps 601: static void
1.1 kristaps 602: bufclear(struct parse *p)
603: {
604:
605: p->b[p->bsz = 0] = '\0';
606: }
607:
1.8 ! kristaps 608: /*
! 609: * Append NODE_TEXT contents to the current buffer, reallocating its
! 610: * size if necessary.
! 611: * The buffer is ALWAYS nil-terminated.
! 612: */
1.1 kristaps 613: static void
614: bufappend(struct parse *p, struct pnode *pn)
615: {
616:
617: assert(NODE_TEXT == pn->node);
618: if (p->bsz + pn->bsz + 1 > p->mbsz) {
619: p->mbsz = p->bsz + pn->bsz + 1;
620: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
621: perror(NULL);
622: exit(EXIT_FAILURE);
623: }
624: }
625: memcpy(p->b + p->bsz, pn->b, pn->bsz);
626: p->bsz += pn->bsz;
627: p->b[p->bsz] = '\0';
628: }
629:
1.8 ! kristaps 630: /*
! 631: * Recursively append all NODE_TEXT nodes to the buffer.
! 632: * This descends into non-text nodes, but doesn't do anything beyond
! 633: * them.
! 634: * In other words, this is a recursive text grok.
! 635: */
1.3 kristaps 636: static void
637: bufappend_r(struct parse *p, struct pnode *pn)
638: {
639: struct pnode *pp;
640:
641: if (NODE_TEXT == pn->node)
642: bufappend(p, pn);
643: TAILQ_FOREACH(pp, &pn->childq, child)
644: bufappend_r(p, pp);
645: }
646:
1.1 kristaps 647: /*
1.8 ! kristaps 648: * Recursively print text presumably on a macro line.
1.1 kristaps 649: * Convert all whitespace to regular spaces.
650: */
651: static void
652: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
653: {
654: char *cp;
655:
656: bufclear(p);
1.3 kristaps 657: bufappend_r(p, pn);
1.1 kristaps 658:
659: /* Convert all space to spaces. */
660: for (cp = p->b; '\0' != *cp; cp++)
661: if (isspace((int)*cp))
662: *cp = ' ';
663:
664: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 665: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 666: for ( ; '\0' != *cp; cp++) {
667: /* Escape us if we look like a macro. */
668: if ((cp == p->b || ' ' == *(cp - 1)) &&
669: isupper((int)*cp) &&
670: '\0' != *(cp + 1) &&
671: islower((int)*(cp + 1)) &&
672: ('\0' == *(cp + 2) ||
673: ' ' == *(cp + 2) ||
674: (islower((int)*(cp + 2)) &&
675: ('\0' == *(cp + 3) ||
676: ' ' == *(cp + 3)))))
677: fputs("\\&", stdout);
678: putchar(*cp);
679: /* If we're a character escape, escape us. */
680: if ('\\' == *cp)
681: putchar('e');
682: }
683: }
684:
685: /*
686: * Just pnode_printmacrolinepart() but with a newline.
687: * If no text, just the newline.
688: */
689: static void
690: pnode_printmacroline(struct parse *p, struct pnode *pn)
691: {
692:
693: pnode_printmacrolinepart(p, pn);
694: putchar('\n');
695: }
696:
1.8 ! kristaps 697: /*
! 698: * Start the SYNOPSIS macro, unlinking its [superfluous] title.
! 699: */
1.1 kristaps 700: static void
1.6 kristaps 701: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
702: {
703: struct pnode *pp;
704:
705: TAILQ_FOREACH(pp, &pn->childq, child)
706: if (NODE_TITLE == pp->node) {
707: pnode_unlink(pp);
708: break;
709: }
710:
711: puts(".Sh SYNOPSIS");
712: }
713:
1.8 ! kristaps 714: /*
! 715: * Start a hopefully-named `Sh' section.
! 716: */
1.6 kristaps 717: static void
1.1 kristaps 718: pnode_printrefsect(struct parse *p, struct pnode *pn)
719: {
720: struct pnode *pp;
721:
722: TAILQ_FOREACH(pp, &pn->childq, child)
723: if (NODE_TITLE == pp->node)
724: break;
725:
1.4 kristaps 726: fputs(".Sh ", stdout);
727:
1.5 kristaps 728: if (NULL != pp) {
1.1 kristaps 729: pnode_printmacroline(p, pp);
1.5 kristaps 730: pnode_unlink(pp);
731: } else
1.4 kristaps 732: puts("UNKNOWN");
1.1 kristaps 733: }
734:
1.8 ! kristaps 735: /*
! 736: * Start a reference, extracting the title and volume.
! 737: */
1.1 kristaps 738: static void
739: pnode_printciterefentry(struct parse *p, struct pnode *pn)
740: {
741: struct pnode *pp, *title, *manvol;
742:
743: title = manvol = NULL;
744: TAILQ_FOREACH(pp, &pn->childq, child)
745: if (NODE_MANVOLNUM == pp->node)
746: manvol = pp;
747: else if (NODE_REFENTRYTITLE == pp->node)
748: title = pp;
749:
750: fputs(".Xr ", stdout);
1.4 kristaps 751:
1.1 kristaps 752: if (NULL != title) {
753: pnode_printmacrolinepart(p, title);
1.4 kristaps 754: putchar(' ');
1.1 kristaps 755: } else
1.4 kristaps 756: fputs("unknown ", stdout);
757:
758: if (NULL != manvol)
1.1 kristaps 759: pnode_printmacroline(p, manvol);
1.4 kristaps 760: else
1.1 kristaps 761: puts("1");
762: }
763:
764: static void
765: pnode_printrefmeta(struct parse *p, struct pnode *pn)
766: {
767: struct pnode *pp, *title, *manvol;
768:
769: title = manvol = NULL;
770: TAILQ_FOREACH(pp, &pn->childq, child)
771: if (NODE_MANVOLNUM == pp->node)
772: manvol = pp;
773: else if (NODE_REFENTRYTITLE == pp->node)
774: title = pp;
775:
1.2 kristaps 776: puts(".Dd $Mdocdate" "$");
1.1 kristaps 777: fputs(".Dt ", stdout);
778:
779: if (NULL != title) {
1.7 kristaps 780: /* FIXME: uppercase. */
1.1 kristaps 781: pnode_printmacrolinepart(p, title);
1.4 kristaps 782: putchar(' ');
1.1 kristaps 783: } else
1.4 kristaps 784: fputs("UNKNOWN ", stdout);
785:
786: if (NULL != manvol)
1.1 kristaps 787: pnode_printmacroline(p, manvol);
1.4 kristaps 788: else
1.1 kristaps 789: puts("1");
790:
791: puts(".Os");
792: }
793:
1.3 kristaps 794: static void
795: pnode_printfuncdef(struct parse *p, struct pnode *pn)
796: {
797: struct pnode *pp, *ftype, *func;
798:
799: ftype = func = NULL;
800: TAILQ_FOREACH(pp, &pn->childq, child)
801: if (NODE_TEXT == pp->node)
802: ftype = pp;
803: else if (NODE_FUNCTION == pp->node)
804: func = pp;
805:
806: if (NULL != ftype) {
807: fputs(".Ft ", stdout);
808: pnode_printmacroline(p, ftype);
809: }
810:
811: if (NULL != func) {
812: fputs(".Fo ", stdout);
813: pnode_printmacroline(p, func);
814: } else
815: puts(".Fo UNKNOWN");
816: }
817:
818: static void
819: pnode_printparamdef(struct parse *p, struct pnode *pn)
820: {
821: struct pnode *pp, *ptype, *param;
822:
823: ptype = param = NULL;
824: TAILQ_FOREACH(pp, &pn->childq, child)
825: if (NODE_TEXT == pp->node)
826: ptype = pp;
827: else if (NODE_PARAMETER == pp->node)
828: param = pp;
829:
830: fputs(".Fa \"", stdout);
831: if (NULL != ptype) {
832: pnode_printmacrolinepart(p, ptype);
833: putchar(' ');
834: }
835:
836: if (NULL != param)
837: pnode_printmacrolinepart(p, param);
838: else
839: fputs("UNKNOWN", stdout);
840:
841: puts("\"");
842: }
843:
844: static void
845: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
846: {
847: struct pnode *pp, *fdef;
848:
849: TAILQ_FOREACH(fdef, &pn->childq, child)
850: if (NODE_FUNCDEF == fdef->node)
851: break;
852:
1.4 kristaps 853: if (NULL != fdef)
1.3 kristaps 854: pnode_printfuncdef(p, fdef);
1.4 kristaps 855: else
1.3 kristaps 856: puts(".Fo UNKNOWN");
857:
1.4 kristaps 858: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 859: if (NODE_PARAMDEF == pp->node)
860: pnode_printparamdef(p, pp);
861:
862: puts(".Fc");
863: }
864:
1.4 kristaps 865: /* TODO: handle "optional" values. */
866: static void
867: pnode_printarg(struct parse *p, struct pnode *pn, int nested)
868: {
869: struct pnode *pp;
870: int sv = nested;
871:
872: if ( ! nested)
873: fputs(".", stdout);
874: nested = 1;
875: TAILQ_FOREACH(pp, &pn->childq, child)
876: if (NODE_OPTION == pp->node) {
877: fputs("Fl ", stdout);
878: pnode_printmacrolinepart(p, pp);
879: } else if (NODE_TEXT == pp->node) {
880: fputs("Ar ", stdout);
881: pnode_printmacrolinepart(p, pp);
882: } else if (NODE_ARG == pp->node)
883: pnode_printarg(p, pp, nested);
884:
885: if ( ! sv)
886: puts("");
887: }
888:
1.7 kristaps 889: /*
890: * Recursively search and return the first instance of "node".
891: */
892: static struct pnode *
893: pnode_findfirst(struct pnode *pn, enum nodeid node)
894: {
895: struct pnode *pp, *res;
896:
897: res = NULL;
898: TAILQ_FOREACH(pp, &pn->childq, child) {
899: res = pp->node == node ? pp :
900: pnode_findfirst(pp, node);
901: if (NULL != res)
902: break;
903: }
904:
905: return(res);
906: }
907:
908: static void
909: pnode_printprologue(struct parse *p, struct pnode *pn)
910: {
911: struct pnode *pp;
912:
913: if (NULL != (pp = pnode_findfirst(p->root, NODE_REFMETA))) {
914: pnode_printrefmeta(p, pp);
915: pnode_unlink(pp);
916: } else {
917: puts(".\\\" Supplying bogus prologue...");
918: puts(".Dd $Mdocdate" "$");
919: puts(".Dt UNKNOWN 1");
920: puts(".Os");
921: }
922: }
923:
1.1 kristaps 924: /*
925: * Print a parsed node (or ignore it--whatever).
926: * This is a recursive function.
927: * FIXME: macro line continuation?
928: */
929: static void
930: pnode_print(struct parse *p, struct pnode *pn)
931: {
932: struct pnode *pp;
933: char *cp;
934: int last;
935:
936: if (NULL == pn)
937: return;
938:
939: if (NODE_TEXT != pn->node && NODE_ROOT != pn->node)
940: printf(".\\\" %s\n", nodes[pn->node].name);
941:
942: switch (pn->node) {
1.4 kristaps 943: case (NODE_ARG):
944: pnode_printarg(p, pn, 0);
945: pnode_unlinksub(pn);
946: break;
1.1 kristaps 947: case (NODE_CITEREFENTRY):
948: pnode_printciterefentry(p, pn);
1.4 kristaps 949: pnode_unlinksub(pn);
1.1 kristaps 950: break;
951: case (NODE_CODE):
952: fputs(".Li ", stdout);
953: pnode_printmacroline(p, pn);
1.4 kristaps 954: pnode_unlinksub(pn);
955: break;
956: case (NODE_COMMAND):
957: fputs(".Nm ", stdout);
958: pnode_printmacroline(p, pn);
959: pnode_unlinksub(pn);
1.1 kristaps 960: break;
1.3 kristaps 961: case (NODE_FUNCTION):
962: fputs(".Fn ", stdout);
963: pnode_printmacroline(p, pn);
1.4 kristaps 964: pnode_unlinksub(pn);
1.3 kristaps 965: break;
966: case (NODE_FUNCPROTOTYPE):
967: pnode_printfuncprototype(p, pn);
1.4 kristaps 968: pnode_unlinksub(pn);
1.3 kristaps 969: break;
1.1 kristaps 970: case (NODE_FUNCSYNOPSISINFO):
971: fputs(".Fd ", stdout);
972: pnode_printmacroline(p, pn);
1.4 kristaps 973: pnode_unlinksub(pn);
1.1 kristaps 974: break;
975: case (NODE_PARA):
976: /* FIXME: not always. */
977: puts(".Pp");
1.3 kristaps 978: break;
979: case (NODE_PARAMETER):
980: fputs(".Fa \"", stdout);
981: pnode_printmacrolinepart(p, pn);
982: puts("\"");
1.4 kristaps 983: pnode_unlinksub(pn);
1.1 kristaps 984: break;
985: case (NODE_PROGRAMLISTING):
986: puts(".Bd -literal");
987: break;
988: case (NODE_REFMETA):
1.7 kristaps 989: abort();
1.1 kristaps 990: break;
991: case (NODE_REFNAME):
992: fputs(".Nm ", stdout);
993: pnode_printmacroline(p, pn);
1.4 kristaps 994: pnode_unlinksub(pn);
1.1 kristaps 995: return;
996: case (NODE_REFNAMEDIV):
997: puts(".Sh NAME");
998: break;
999: case (NODE_REFPURPOSE):
1000: fputs(".Nd ", stdout);
1001: pnode_printmacroline(p, pn);
1.4 kristaps 1002: pnode_unlinksub(pn);
1.1 kristaps 1003: return;
1004: case (NODE_REFSYNOPSISDIV):
1.6 kristaps 1005: pnode_printrefsynopsisdiv(p, pn);
1.1 kristaps 1006: break;
1007: case (NODE_REFSECT1):
1008: pnode_printrefsect(p, pn);
1009: break;
1.8 ! kristaps 1010: case (NODE_STRUCTNAME):
! 1011: fputs(".Vt ", stdout);
! 1012: pnode_printmacroline(p, pn);
! 1013: pnode_unlinksub(pn);
! 1014: return;
1.1 kristaps 1015: case (NODE_TEXT):
1016: bufclear(p);
1017: bufappend(p, pn);
1018: /*
1019: * Output all characters, squeezing out whitespace
1020: * between newlines.
1021: * XXX: all whitespace, including tabs (?).
1022: * Remember to escape control characters and escapes.
1023: */
1024: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1025: if ('\n' == last) {
1026: /* Consume all whitespace. */
1027: if (isspace((int)*cp)) {
1028: while (isspace((int)*cp))
1029: cp++;
1030: continue;
1031: } else if ('\'' == *cp || '.' == *cp)
1032: fputs("\\&", stdout);
1033: }
1034: putchar(last = *cp++);
1035: /* If we're a character escape, escape us. */
1036: if ('\\' == last)
1037: putchar('e');
1038: }
1039: if ('\n' != last)
1040: putchar('\n');
1041: break;
1042: default:
1043: break;
1044: }
1045:
1046: TAILQ_FOREACH(pp, &pn->childq, child)
1047: pnode_print(p, pp);
1048:
1049: switch (pn->node) {
1050: case (NODE_PROGRAMLISTING):
1051: puts(".Ed");
1052: break;
1053: default:
1054: break;
1055: }
1056: }
1057:
1058: /*
1059: * Loop around the read buffer until we've drained it of all data.
1060: * Invoke the parser context with each buffer fill.
1061: */
1062: static int
1063: readfile(XML_Parser xp, int fd,
1064: char *b, size_t bsz, const char *fn)
1065: {
1066: struct parse p;
1067: int rc;
1068: ssize_t ssz;
1069:
1070: memset(&p, 0, sizeof(struct parse));
1071:
1072: p.b = malloc(p.bsz = p.mbsz = 1024);
1073:
1074: XML_SetCharacterDataHandler(xp, xml_char);
1075: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1076: XML_SetUserData(xp, &p);
1077:
1078: while ((ssz = read(fd, b, bsz)) >= 0) {
1079: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1080: fprintf(stderr, "%s: %s\n", fn,
1081: XML_ErrorString
1082: (XML_GetErrorCode(xp)));
1083: else if ( ! p.stop && ssz > 0)
1084: continue;
1085: /*
1086: * Exit when we've read all or errors have occured
1087: * during the parse sequence.
1088: */
1.7 kristaps 1089: pnode_printprologue(&p, p.root);
1.1 kristaps 1090: pnode_print(&p, p.root);
1091: pnode_free(p.root);
1092: free(p.b);
1093: return(0 != rc && ! p.stop);
1094: }
1095:
1096: /* Read error has occured. */
1097: perror(fn);
1098: pnode_free(p.root);
1099: free(p.b);
1100: return(0);
1101: }
1102:
1103: int
1104: main(int argc, char *argv[])
1105: {
1106: XML_Parser xp;
1107: const char *fname;
1108: char *buf;
1109: int fd, rc;
1110:
1111: fname = "-";
1112: xp = NULL;
1113: buf = NULL;
1114: rc = 0;
1115:
1116: if (-1 != getopt(argc, argv, ""))
1117: return(EXIT_FAILURE);
1118:
1119: argc -= optind;
1120: argv += optind;
1121:
1122: if (argc > 1)
1123: return(EXIT_FAILURE);
1124: else if (argc > 0)
1125: fname = argv[0];
1126:
1127: /* Read from stdin or a file. */
1128: fd = 0 == strcmp(fname, "-") ?
1129: STDIN_FILENO : open(fname, O_RDONLY, 0);
1130:
1131: /*
1132: * Open file for reading.
1133: * Allocate a read buffer.
1134: * Create the parser context.
1135: * Dive directly into the parse.
1136: */
1137: if (-1 == fd)
1138: perror(fname);
1139: else if (NULL == (buf = malloc(4096)))
1140: perror(NULL);
1141: else if (NULL == (xp = XML_ParserCreate(NULL)))
1142: perror(NULL);
1143: else if ( ! readfile(xp, fd, buf, 4096, fname))
1144: rc = 1;
1145:
1146: XML_ParserFree(xp);
1147: free(buf);
1148: if (STDIN_FILENO != fd)
1149: close(fd);
1150: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1151: }
CVSweb