Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.6
1.6 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.5 2014/03/28 10:03:36 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27:
28: /*
29: * All recognised node types.
30: */
31: enum nodeid {
32: NODE_ROOT = 0, /* Must comes first. */
33: /* Alpha-ordered hereafter. */
1.4 kristaps 34: NODE_ARG,
1.1 kristaps 35: NODE_CITEREFENTRY,
1.4 kristaps 36: NODE_CMDSYNOPSIS,
1.1 kristaps 37: NODE_CODE,
1.4 kristaps 38: NODE_COMMAND,
1.3 kristaps 39: NODE_FUNCDEF,
40: NODE_FUNCPROTOTYPE,
1.1 kristaps 41: NODE_FUNCSYNOPSIS,
42: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 43: NODE_FUNCTION,
1.1 kristaps 44: NODE_MANVOLNUM,
1.4 kristaps 45: NODE_OPTION,
1.1 kristaps 46: NODE_PARA,
1.3 kristaps 47: NODE_PARAMDEF,
48: NODE_PARAMETER,
1.1 kristaps 49: NODE_PROGRAMLISTING,
50: NODE_REFCLASS,
51: NODE_REFDESCRIPTOR,
52: NODE_REFENTRY,
53: NODE_REFENTRYTITLE,
54: NODE_REFMETA,
55: NODE_REFMISCINFO,
56: NODE_REFNAME,
57: NODE_REFNAMEDIV,
58: NODE_REFPURPOSE,
59: NODE_REFSECT1,
60: NODE_REFSYNOPSISDIV,
61: NODE_SYNOPSIS,
62: NODE_TEXT,
63: NODE_TITLE,
64: NODE__MAX
65: };
66:
67: /*
68: * Global parse state.
69: * Keep this as simple and small as possible.
70: */
71: struct parse {
72: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
73: int stop; /* should we stop now? */
74: struct pnode *root; /* root of parse tree */
75: struct pnode *cur; /* current node in tree */
1.6 ! kristaps 76: unsigned int flags;
! 77: #define PARSE_HAS_META 1
1.1 kristaps 78: char *b;
79: size_t bsz;
80: size_t mbsz;
81: };
82:
83: struct node {
84: const char *name;
85: unsigned int flags;
86: #define NODE_IGNTEXT 1 /* ignore all contained text */
87: };
88:
89: TAILQ_HEAD(pnodeq, pnode);
90:
91: struct pnode {
92: enum nodeid node; /* node type */
93: char *b; /* binary data buffer */
94: size_t bsz; /* data buffer size */
95: struct pnode *parent; /* parent (or NULL if top) */
96: struct pnodeq childq; /* queue of children */
97: TAILQ_ENTRY(pnode) child;
98: };
99:
100: static const struct node nodes[NODE__MAX] = {
101: { NULL, 0 },
1.4 kristaps 102: { "arg", 0 },
1.1 kristaps 103: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 104: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 105: { "code", 0 },
1.4 kristaps 106: { "command", 0 },
1.3 kristaps 107: { "funcdef", 0 },
108: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 109: { "funcsynopsis", NODE_IGNTEXT },
110: { "funcsynopsisinfo", 0 },
1.3 kristaps 111: { "function", 0 },
1.1 kristaps 112: { "manvolnum", 0 },
1.4 kristaps 113: { "option", 0 },
1.1 kristaps 114: { "para", 0 },
1.3 kristaps 115: { "paramdef", 0 },
116: { "parameter", 0 },
1.1 kristaps 117: { "programlisting", 0 },
118: { "refclass", NODE_IGNTEXT },
119: { "refdescriptor", NODE_IGNTEXT },
120: { "refentry", NODE_IGNTEXT },
121: { "refentrytitle", 0 },
122: { "refmeta", NODE_IGNTEXT },
123: { "refmiscinfo", NODE_IGNTEXT },
124: { "refname", 0 },
125: { "refnamediv", NODE_IGNTEXT },
126: { "refpurpose", 0 },
127: { "refsect1", 0 },
128: { "refsynopsisdiv", NODE_IGNTEXT },
129: { "synopsis", 0 },
130: { NULL, 0 },
131: { "title", 0 },
132: };
133:
134: /*
135: * Look up whether "parent" is a valid parent for "node".
136: */
137: static int
138: isparent(enum nodeid node, enum nodeid parent)
139: {
140:
141: switch (node) {
142: case (NODE_ROOT):
143: return(0);
1.4 kristaps 144: case (NODE_ARG):
145: switch (parent) {
146: case (NODE_ARG):
147: case (NODE_CMDSYNOPSIS):
148: return(1);
149: default:
150: break;
151: }
152: return(0);
1.1 kristaps 153: case (NODE_CITEREFENTRY):
154: switch (parent) {
155: case (NODE_FUNCSYNOPSISINFO):
156: case (NODE_PARA):
157: case (NODE_PROGRAMLISTING):
158: case (NODE_REFDESCRIPTOR):
159: case (NODE_REFENTRYTITLE):
160: case (NODE_REFNAME):
161: case (NODE_REFPURPOSE):
162: case (NODE_SYNOPSIS):
163: case (NODE_TITLE):
164: return(1);
165: default:
166: break;
167: }
168: return(0);
1.4 kristaps 169: case (NODE_CMDSYNOPSIS):
170: switch (parent) {
171: case (NODE_PARA):
172: case (NODE_REFSECT1):
173: case (NODE_REFSYNOPSISDIV):
174: return(1);
175: default:
176: break;
177: }
178: return(0);
1.1 kristaps 179: case (NODE_CODE):
180: switch (parent) {
181: case (NODE_FUNCSYNOPSISINFO):
182: case (NODE_PARA):
183: case (NODE_PROGRAMLISTING):
184: case (NODE_REFDESCRIPTOR):
185: case (NODE_REFENTRYTITLE):
186: case (NODE_REFNAME):
187: case (NODE_REFPURPOSE):
188: case (NODE_SYNOPSIS):
189: case (NODE_TITLE):
190: return(1);
191: default:
192: break;
193: }
194: return(0);
1.4 kristaps 195: case (NODE_COMMAND):
196: switch (parent) {
197: case (NODE_CMDSYNOPSIS):
198: case (NODE_FUNCSYNOPSISINFO):
199: case (NODE_PARA):
200: case (NODE_PROGRAMLISTING):
201: case (NODE_REFDESCRIPTOR):
202: case (NODE_REFENTRYTITLE):
203: case (NODE_REFNAME):
204: case (NODE_REFPURPOSE):
205: case (NODE_SYNOPSIS):
206: case (NODE_TITLE):
207: return(1);
208: default:
209: break;
210: }
211: return(0);
1.3 kristaps 212: case (NODE_FUNCDEF):
213: return(NODE_FUNCPROTOTYPE == parent);
214: case (NODE_FUNCPROTOTYPE):
215: return(NODE_FUNCSYNOPSIS == parent);
216: case (NODE_FUNCSYNOPSIS):
217: switch (parent) {
218: case (NODE_PARA):
219: case (NODE_REFSECT1):
220: case (NODE_REFSYNOPSISDIV):
221: return(1);
222: default:
223: break;
224: }
225: return(0);
226: case (NODE_FUNCSYNOPSISINFO):
227: return(NODE_FUNCSYNOPSIS == parent);
228: case (NODE_FUNCTION):
229: switch (parent) {
230: case (NODE_CODE):
231: case (NODE_FUNCDEF):
232: case (NODE_FUNCSYNOPSISINFO):
233: case (NODE_PARA):
1.4 kristaps 234: case (NODE_PROGRAMLISTING):
1.3 kristaps 235: case (NODE_REFDESCRIPTOR):
236: case (NODE_REFENTRYTITLE):
237: case (NODE_REFNAME):
238: case (NODE_REFPURPOSE):
239: case (NODE_SYNOPSIS):
240: case (NODE_TITLE):
241: return(1);
242: default:
243: break;
244: }
245: return(0);
1.1 kristaps 246: case (NODE_MANVOLNUM):
247: switch (parent) {
248: case (NODE_CITEREFENTRY):
249: case (NODE_REFMETA):
250: return(1);
251: default:
252: break;
253: }
254: return(0);
1.4 kristaps 255: case (NODE_OPTION):
256: switch (parent) {
257: case (NODE_ARG):
258: case (NODE_FUNCSYNOPSISINFO):
259: case (NODE_PARA):
260: case (NODE_PROGRAMLISTING):
261: case (NODE_REFDESCRIPTOR):
262: case (NODE_REFENTRYTITLE):
263: case (NODE_REFNAME):
264: case (NODE_REFPURPOSE):
265: case (NODE_SYNOPSIS):
266: case (NODE_TITLE):
267: return(1);
268: default:
269: break;
270: }
271: return(0);
1.3 kristaps 272: case (NODE_PARA):
1.1 kristaps 273: switch (parent) {
274: case (NODE_REFSECT1):
275: case (NODE_REFSYNOPSISDIV):
276: return(1);
277: default:
278: break;
279: }
280: return(0);
1.3 kristaps 281: case (NODE_PARAMDEF):
282: return(NODE_FUNCPROTOTYPE == parent);
283: case (NODE_PARAMETER):
1.1 kristaps 284: switch (parent) {
1.3 kristaps 285: case (NODE_CODE):
286: case (NODE_FUNCSYNOPSISINFO):
287: case (NODE_PARA):
288: case (NODE_PARAMDEF):
1.4 kristaps 289: case (NODE_PROGRAMLISTING):
1.3 kristaps 290: case (NODE_REFDESCRIPTOR):
291: case (NODE_REFENTRYTITLE):
292: case (NODE_REFNAME):
293: case (NODE_REFPURPOSE):
294: case (NODE_SYNOPSIS):
295: case (NODE_TITLE):
1.1 kristaps 296: return(1);
297: default:
298: break;
299: }
300: return(0);
301: case (NODE_PROGRAMLISTING):
302: switch (parent) {
303: case (NODE_PARA):
304: case (NODE_REFSECT1):
305: case (NODE_REFSYNOPSISDIV):
306: return(1);
307: default:
308: break;
309: }
310: return(0);
311: case (NODE_REFCLASS):
312: return(parent == NODE_REFNAMEDIV);
313: case (NODE_REFDESCRIPTOR):
314: return(parent == NODE_REFNAMEDIV);
315: case (NODE_REFENTRY):
316: return(parent == NODE_ROOT);
317: case (NODE_REFENTRYTITLE):
318: switch (parent) {
319: case (NODE_CITEREFENTRY):
320: case (NODE_REFMETA):
321: return(1);
322: default:
323: break;
324: }
325: case (NODE_REFMETA):
326: return(parent == NODE_REFENTRY);
327: case (NODE_REFMISCINFO):
328: return(parent == NODE_REFMETA);
329: case (NODE_REFNAME):
330: return(parent == NODE_REFNAMEDIV);
331: case (NODE_REFNAMEDIV):
332: return(parent == NODE_REFENTRY);
333: case (NODE_REFPURPOSE):
334: return(parent == NODE_REFNAMEDIV);
335: case (NODE_REFSECT1):
336: return(parent == NODE_REFENTRY);
337: case (NODE_REFSYNOPSISDIV):
338: return(parent == NODE_REFENTRY);
339: case (NODE_SYNOPSIS):
340: switch (parent) {
341: case (NODE_REFSYNOPSISDIV):
342: case (NODE_REFSECT1):
343: return(1);
344: default:
345: break;
346: }
347: return(0);
348: case (NODE_TITLE):
349: switch (parent) {
350: case (NODE_REFSECT1):
351: case (NODE_REFSYNOPSISDIV):
352: return(1);
353: default:
354: break;
355: }
356: return(0);
357: case (NODE_TEXT):
358: return(1);
359: case (NODE__MAX):
360: break;
361: }
362:
363: abort();
364: return(0);
365: }
366:
367: static void
368: xml_char(void *arg, const XML_Char *p, int sz)
369: {
370: struct parse *ps = arg;
371: struct pnode *dat;
1.4 kristaps 372: int i;
1.1 kristaps 373:
374: /* Stopped or no tree yet. */
375: if (ps->stop || NODE_ROOT == ps->node)
376: return;
377:
378: /* Not supposed to be collecting text. */
379: assert(NULL != ps->cur);
380: if (NODE_IGNTEXT & nodes[ps->node].flags)
381: return;
382:
383: /*
384: * Are we in the midst of processing text?
385: * If we're not processing text right now, then create a text
386: * node for doing so.
1.4 kristaps 387: * However, don't do so unless we have some non-whitespace to
388: * process!
1.1 kristaps 389: */
390: if (NODE_TEXT != ps->node) {
1.4 kristaps 391: for (i = 0; i < sz; i++)
392: if ( ! isspace((int)p[i]))
393: break;
394: if (i == sz)
395: return;
1.1 kristaps 396: dat = calloc(1, sizeof(struct pnode));
397: if (NULL == dat) {
398: perror(NULL);
399: exit(EXIT_FAILURE);
400: }
401:
402: dat->node = ps->node = NODE_TEXT;
403: dat->parent = ps->cur;
404: TAILQ_INIT(&dat->childq);
405: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
406: ps->cur = dat;
407: assert(NULL != ps->root);
408:
409: }
410:
411: /* Append to current buffer. */
412: assert(sz >= 0);
413: ps->cur->b = realloc(ps->cur->b,
414: ps->cur->bsz + (size_t)sz);
415: if (NULL == ps->cur->b) {
416: perror(NULL);
417: exit(EXIT_FAILURE);
418: }
419: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
420: ps->cur->bsz += (size_t)sz;
421: }
422:
423: /*
424: * Begin an element.
425: * First, look for the element.
426: * If we don't find it and we're not parsing, keep going.
427: * If we don't find it (and we're parsing), puke and exit.
428: * If we find it but we're not parsing yet (i.e., it's not a refentry
429: * and thus out of context), keep going.
430: * If we're at the root and already have a tree, puke and exit.
431: * Make sure that the element is in the right context.
432: * Lastly, put the node onto our parse tree and continue.
433: */
434: static void
435: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
436: {
437: struct parse *ps = arg;
438: enum nodeid node;
439: struct pnode *dat;
440:
441: if (ps->stop)
442: return;
443:
444: /* Close out text node, if applicable... */
445: if (NODE_TEXT == ps->node) {
446: assert(NULL != ps->cur);
447: ps->cur = ps->cur->parent;
448: assert(NULL != ps->cur);
449: ps->node = ps->cur->node;
450: }
451:
452: for (node = 0; node < NODE__MAX; node++)
453: if (NULL == nodes[node].name)
454: continue;
455: else if (0 == strcmp(nodes[node].name, name))
456: break;
457:
458: if (NODE__MAX == node && NODE_ROOT == ps->node) {
459: fprintf(stderr, "%s: ignoring node\n", name);
460: return;
461: } else if (NODE__MAX == node) {
462: fprintf(stderr, "%s: unknown node\n", name);
463: ps->stop = 1;
464: return;
465: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
466: fprintf(stderr, "%s: reentering?\n", name);
467: ps->stop = 1;
468: return;
469: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
470: fprintf(stderr, "%s: known node w/o context\n", name);
471: return;
472: } else if ( ! isparent(node, ps->node)) {
473: fprintf(stderr, "%s: bad parent\n", name);
474: ps->stop = 1;
475: return;
476: }
477:
478: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
479: perror(NULL);
480: exit(EXIT_FAILURE);
481: }
482:
483: dat->node = ps->node = node;
484: dat->parent = ps->cur;
485: TAILQ_INIT(&dat->childq);
486:
487: if (NULL != ps->cur)
488: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
489:
490: ps->cur = dat;
491: if (NULL == ps->root)
492: ps->root = dat;
493: }
494:
495: /*
496: * Roll up the parse tree.
497: * Does nothing else special.
498: * If we hit the root, then assign ourselves as the NODE_ROOT.
499: */
500: static void
501: xml_elem_end(void *arg, const XML_Char *name)
502: {
503: struct parse *ps = arg;
504:
505: if (ps->stop || NODE_ROOT == ps->node)
506: return;
507:
508: /* Close out text node, if applicable... */
509: if (NODE_TEXT == ps->node) {
510: assert(NULL != ps->cur);
511: ps->cur = ps->cur->parent;
512: assert(NULL != ps->cur);
513: ps->node = ps->cur->node;
514: }
515:
516: if (NULL == (ps->cur = ps->cur->parent))
517: ps->node = NODE_ROOT;
518: else
519: ps->node = ps->cur->node;
520: }
521:
522: static void
523: pnode_free(struct pnode *pn)
524: {
525: struct pnode *pp;
526:
527: if (NULL == pn)
528: return;
529:
530: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
531: TAILQ_REMOVE(&pn->childq, pp, child);
532: pnode_free(pp);
533: }
534:
535: free(pn->b);
536: free(pn);
537: }
538:
539: static void
540: pnode_unlink(struct pnode *pn)
541: {
542:
543: if (NULL != pn->parent)
544: TAILQ_REMOVE(&pn->parent->childq, pn, child);
545: pnode_free(pn);
546: }
547:
548: static void
1.4 kristaps 549: pnode_unlinksub(struct pnode *pn)
550: {
551:
552: while ( ! TAILQ_EMPTY(&pn->childq))
553: pnode_unlink(TAILQ_FIRST(&pn->childq));
554: }
555:
556: static void
1.1 kristaps 557: bufclear(struct parse *p)
558: {
559:
560: p->b[p->bsz = 0] = '\0';
561: }
562:
563: static void
564: bufappend(struct parse *p, struct pnode *pn)
565: {
566:
567: assert(NODE_TEXT == pn->node);
568: if (p->bsz + pn->bsz + 1 > p->mbsz) {
569: p->mbsz = p->bsz + pn->bsz + 1;
570: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
571: perror(NULL);
572: exit(EXIT_FAILURE);
573: }
574: }
575: memcpy(p->b + p->bsz, pn->b, pn->bsz);
576: p->bsz += pn->bsz;
577: p->b[p->bsz] = '\0';
578: }
579:
1.3 kristaps 580: static void
581: bufappend_r(struct parse *p, struct pnode *pn)
582: {
583: struct pnode *pp;
584:
585: if (NODE_TEXT == pn->node)
586: bufappend(p, pn);
587: TAILQ_FOREACH(pp, &pn->childq, child)
588: bufappend_r(p, pp);
589: }
590:
1.1 kristaps 591: /*
592: * Print text presumably on a macro line.
593: * Ignore any child macros.
594: * Convert all whitespace to regular spaces.
595: */
596: static void
597: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
598: {
599: char *cp;
600:
601: bufclear(p);
1.3 kristaps 602: bufappend_r(p, pn);
1.1 kristaps 603:
604: /* Convert all space to spaces. */
605: for (cp = p->b; '\0' != *cp; cp++)
606: if (isspace((int)*cp))
607: *cp = ' ';
608:
609: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 610: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 611: for ( ; '\0' != *cp; cp++) {
612: /* Escape us if we look like a macro. */
613: if ((cp == p->b || ' ' == *(cp - 1)) &&
614: isupper((int)*cp) &&
615: '\0' != *(cp + 1) &&
616: islower((int)*(cp + 1)) &&
617: ('\0' == *(cp + 2) ||
618: ' ' == *(cp + 2) ||
619: (islower((int)*(cp + 2)) &&
620: ('\0' == *(cp + 3) ||
621: ' ' == *(cp + 3)))))
622: fputs("\\&", stdout);
623: putchar(*cp);
624: /* If we're a character escape, escape us. */
625: if ('\\' == *cp)
626: putchar('e');
627: }
628: }
629:
630: /*
631: * Just pnode_printmacrolinepart() but with a newline.
632: * If no text, just the newline.
633: */
634: static void
635: pnode_printmacroline(struct parse *p, struct pnode *pn)
636: {
637:
638: pnode_printmacrolinepart(p, pn);
639: putchar('\n');
640: }
641:
642: static void
1.6 ! kristaps 643: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
! 644: {
! 645: struct pnode *pp;
! 646:
! 647: TAILQ_FOREACH(pp, &pn->childq, child)
! 648: if (NODE_TITLE == pp->node) {
! 649: pnode_unlink(pp);
! 650: break;
! 651: }
! 652:
! 653: puts(".Sh SYNOPSIS");
! 654: }
! 655:
! 656: static void
1.1 kristaps 657: pnode_printrefsect(struct parse *p, struct pnode *pn)
658: {
659: struct pnode *pp;
660:
661: TAILQ_FOREACH(pp, &pn->childq, child)
662: if (NODE_TITLE == pp->node)
663: break;
664:
1.4 kristaps 665: fputs(".Sh ", stdout);
666:
1.5 kristaps 667: if (NULL != pp) {
1.1 kristaps 668: pnode_printmacroline(p, pp);
1.5 kristaps 669: pnode_unlink(pp);
670: } else
1.4 kristaps 671: puts("UNKNOWN");
1.1 kristaps 672: }
673:
674: static void
675: pnode_printciterefentry(struct parse *p, struct pnode *pn)
676: {
677: struct pnode *pp, *title, *manvol;
678:
679: title = manvol = NULL;
680: TAILQ_FOREACH(pp, &pn->childq, child)
681: if (NODE_MANVOLNUM == pp->node)
682: manvol = pp;
683: else if (NODE_REFENTRYTITLE == pp->node)
684: title = pp;
685:
686: fputs(".Xr ", stdout);
1.4 kristaps 687:
1.1 kristaps 688: if (NULL != title) {
689: pnode_printmacrolinepart(p, title);
1.4 kristaps 690: putchar(' ');
1.1 kristaps 691: } else
1.4 kristaps 692: fputs("unknown ", stdout);
693:
694: if (NULL != manvol)
1.1 kristaps 695: pnode_printmacroline(p, manvol);
1.4 kristaps 696: else
1.1 kristaps 697: puts("1");
698: }
699:
700: static void
701: pnode_printrefmeta(struct parse *p, struct pnode *pn)
702: {
703: struct pnode *pp, *title, *manvol;
704:
705: title = manvol = NULL;
706: TAILQ_FOREACH(pp, &pn->childq, child)
707: if (NODE_MANVOLNUM == pp->node)
708: manvol = pp;
709: else if (NODE_REFENTRYTITLE == pp->node)
710: title = pp;
711:
1.2 kristaps 712: puts(".Dd $Mdocdate" "$");
1.1 kristaps 713: fputs(".Dt ", stdout);
714:
715: if (NULL != title) {
716: pnode_printmacrolinepart(p, title);
1.4 kristaps 717: putchar(' ');
1.1 kristaps 718: } else
1.4 kristaps 719: fputs("UNKNOWN ", stdout);
720:
721: if (NULL != manvol)
1.1 kristaps 722: pnode_printmacroline(p, manvol);
1.4 kristaps 723: else
1.1 kristaps 724: puts("1");
725:
726: puts(".Os");
727: }
728:
1.3 kristaps 729: static void
730: pnode_printfuncdef(struct parse *p, struct pnode *pn)
731: {
732: struct pnode *pp, *ftype, *func;
733:
734: ftype = func = NULL;
735: TAILQ_FOREACH(pp, &pn->childq, child)
736: if (NODE_TEXT == pp->node)
737: ftype = pp;
738: else if (NODE_FUNCTION == pp->node)
739: func = pp;
740:
741: if (NULL != ftype) {
742: fputs(".Ft ", stdout);
743: pnode_printmacroline(p, ftype);
744: }
745:
746: if (NULL != func) {
747: fputs(".Fo ", stdout);
748: pnode_printmacroline(p, func);
749: } else
750: puts(".Fo UNKNOWN");
751: }
752:
753: static void
754: pnode_printparamdef(struct parse *p, struct pnode *pn)
755: {
756: struct pnode *pp, *ptype, *param;
757:
758: ptype = param = NULL;
759: TAILQ_FOREACH(pp, &pn->childq, child)
760: if (NODE_TEXT == pp->node)
761: ptype = pp;
762: else if (NODE_PARAMETER == pp->node)
763: param = pp;
764:
765: fputs(".Fa \"", stdout);
766: if (NULL != ptype) {
767: pnode_printmacrolinepart(p, ptype);
768: putchar(' ');
769: }
770:
771: if (NULL != param)
772: pnode_printmacrolinepart(p, param);
773: else
774: fputs("UNKNOWN", stdout);
775:
776: puts("\"");
777: }
778:
779: static void
780: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
781: {
782: struct pnode *pp, *fdef;
783:
784: TAILQ_FOREACH(fdef, &pn->childq, child)
785: if (NODE_FUNCDEF == fdef->node)
786: break;
787:
1.4 kristaps 788: if (NULL != fdef)
1.3 kristaps 789: pnode_printfuncdef(p, fdef);
1.4 kristaps 790: else
1.3 kristaps 791: puts(".Fo UNKNOWN");
792:
1.4 kristaps 793: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 794: if (NODE_PARAMDEF == pp->node)
795: pnode_printparamdef(p, pp);
796:
797: puts(".Fc");
798: }
799:
1.4 kristaps 800: /* TODO: handle "optional" values. */
801: static void
802: pnode_printarg(struct parse *p, struct pnode *pn, int nested)
803: {
804: struct pnode *pp;
805: int sv = nested;
806:
807: if ( ! nested)
808: fputs(".", stdout);
809: nested = 1;
810: TAILQ_FOREACH(pp, &pn->childq, child)
811: if (NODE_OPTION == pp->node) {
812: fputs("Fl ", stdout);
813: pnode_printmacrolinepart(p, pp);
814: } else if (NODE_TEXT == pp->node) {
815: fputs("Ar ", stdout);
816: pnode_printmacrolinepart(p, pp);
817: } else if (NODE_ARG == pp->node)
818: pnode_printarg(p, pp, nested);
819:
820: if ( ! sv)
821: puts("");
822: }
823:
1.1 kristaps 824: /*
825: * Print a parsed node (or ignore it--whatever).
826: * This is a recursive function.
827: * FIXME: macro line continuation?
828: */
829: static void
830: pnode_print(struct parse *p, struct pnode *pn)
831: {
832: struct pnode *pp;
833: char *cp;
834: int last;
835:
836: if (NULL == pn)
837: return;
838:
839: if (NODE_TEXT != pn->node && NODE_ROOT != pn->node)
840: printf(".\\\" %s\n", nodes[pn->node].name);
841:
842: switch (pn->node) {
1.4 kristaps 843: case (NODE_ARG):
844: pnode_printarg(p, pn, 0);
845: pnode_unlinksub(pn);
846: break;
1.1 kristaps 847: case (NODE_CITEREFENTRY):
848: pnode_printciterefentry(p, pn);
1.4 kristaps 849: pnode_unlinksub(pn);
1.1 kristaps 850: break;
851: case (NODE_CODE):
852: fputs(".Li ", stdout);
853: pnode_printmacroline(p, pn);
1.4 kristaps 854: pnode_unlinksub(pn);
855: break;
856: case (NODE_COMMAND):
857: fputs(".Nm ", stdout);
858: pnode_printmacroline(p, pn);
859: pnode_unlinksub(pn);
1.1 kristaps 860: break;
1.3 kristaps 861: case (NODE_FUNCTION):
862: fputs(".Fn ", stdout);
863: pnode_printmacroline(p, pn);
1.4 kristaps 864: pnode_unlinksub(pn);
1.3 kristaps 865: break;
866: case (NODE_FUNCPROTOTYPE):
867: pnode_printfuncprototype(p, pn);
1.4 kristaps 868: pnode_unlinksub(pn);
1.3 kristaps 869: break;
1.1 kristaps 870: case (NODE_FUNCSYNOPSISINFO):
871: fputs(".Fd ", stdout);
872: pnode_printmacroline(p, pn);
1.4 kristaps 873: pnode_unlinksub(pn);
1.1 kristaps 874: break;
875: case (NODE_PARA):
876: /* FIXME: not always. */
877: puts(".Pp");
1.3 kristaps 878: break;
879: case (NODE_PARAMETER):
880: fputs(".Fa \"", stdout);
881: pnode_printmacrolinepart(p, pn);
882: puts("\"");
1.4 kristaps 883: pnode_unlinksub(pn);
1.1 kristaps 884: break;
885: case (NODE_PROGRAMLISTING):
886: puts(".Bd -literal");
887: break;
888: case (NODE_REFMETA):
889: pnode_printrefmeta(p, pn);
1.4 kristaps 890: pnode_unlinksub(pn);
1.6 ! kristaps 891: p->flags |= PARSE_HAS_META;
1.1 kristaps 892: break;
893: case (NODE_REFNAME):
894: fputs(".Nm ", stdout);
895: pnode_printmacroline(p, pn);
1.4 kristaps 896: pnode_unlinksub(pn);
1.1 kristaps 897: return;
898: case (NODE_REFNAMEDIV):
1.6 ! kristaps 899: if ( ! (PARSE_HAS_META & p->flags)) {
! 900: puts(".Dd $Mdocdate" "$");
! 901: puts(".Dt UNKNOWN 1");
! 902: puts(".Os");
! 903: }
1.1 kristaps 904: puts(".Sh NAME");
905: break;
906: case (NODE_REFPURPOSE):
907: fputs(".Nd ", stdout);
908: pnode_printmacroline(p, pn);
1.4 kristaps 909: pnode_unlinksub(pn);
1.1 kristaps 910: return;
911: case (NODE_REFSYNOPSISDIV):
1.6 ! kristaps 912: pnode_printrefsynopsisdiv(p, pn);
1.1 kristaps 913: break;
914: case (NODE_REFSECT1):
915: pnode_printrefsect(p, pn);
916: break;
917: case (NODE_TEXT):
918: bufclear(p);
919: bufappend(p, pn);
920: /*
921: * Output all characters, squeezing out whitespace
922: * between newlines.
923: * XXX: all whitespace, including tabs (?).
924: * Remember to escape control characters and escapes.
925: */
926: for (last = '\n', cp = p->b; '\0' != *cp; ) {
927: if ('\n' == last) {
928: /* Consume all whitespace. */
929: if (isspace((int)*cp)) {
930: while (isspace((int)*cp))
931: cp++;
932: continue;
933: } else if ('\'' == *cp || '.' == *cp)
934: fputs("\\&", stdout);
935: }
936: putchar(last = *cp++);
937: /* If we're a character escape, escape us. */
938: if ('\\' == last)
939: putchar('e');
940: }
941: if ('\n' != last)
942: putchar('\n');
943: break;
944: default:
945: break;
946: }
947:
948: TAILQ_FOREACH(pp, &pn->childq, child)
949: pnode_print(p, pp);
950:
951: switch (pn->node) {
952: case (NODE_PROGRAMLISTING):
953: puts(".Ed");
954: break;
955: default:
956: break;
957: }
958: }
959:
960: /*
961: * Loop around the read buffer until we've drained it of all data.
962: * Invoke the parser context with each buffer fill.
963: */
964: static int
965: readfile(XML_Parser xp, int fd,
966: char *b, size_t bsz, const char *fn)
967: {
968: struct parse p;
969: int rc;
970: ssize_t ssz;
971:
972: memset(&p, 0, sizeof(struct parse));
973:
974: p.b = malloc(p.bsz = p.mbsz = 1024);
975:
976: XML_SetCharacterDataHandler(xp, xml_char);
977: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
978: XML_SetUserData(xp, &p);
979:
980: while ((ssz = read(fd, b, bsz)) >= 0) {
981: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
982: fprintf(stderr, "%s: %s\n", fn,
983: XML_ErrorString
984: (XML_GetErrorCode(xp)));
985: else if ( ! p.stop && ssz > 0)
986: continue;
987: /*
988: * Exit when we've read all or errors have occured
989: * during the parse sequence.
990: */
991: pnode_print(&p, p.root);
992: pnode_free(p.root);
993: free(p.b);
994: return(0 != rc && ! p.stop);
995: }
996:
997: /* Read error has occured. */
998: perror(fn);
999: pnode_free(p.root);
1000: free(p.b);
1001: return(0);
1002: }
1003:
1004: int
1005: main(int argc, char *argv[])
1006: {
1007: XML_Parser xp;
1008: const char *fname;
1009: char *buf;
1010: int fd, rc;
1011:
1012: fname = "-";
1013: xp = NULL;
1014: buf = NULL;
1015: rc = 0;
1016:
1017: if (-1 != getopt(argc, argv, ""))
1018: return(EXIT_FAILURE);
1019:
1020: argc -= optind;
1021: argv += optind;
1022:
1023: if (argc > 1)
1024: return(EXIT_FAILURE);
1025: else if (argc > 0)
1026: fname = argv[0];
1027:
1028: /* Read from stdin or a file. */
1029: fd = 0 == strcmp(fname, "-") ?
1030: STDIN_FILENO : open(fname, O_RDONLY, 0);
1031:
1032: /*
1033: * Open file for reading.
1034: * Allocate a read buffer.
1035: * Create the parser context.
1036: * Dive directly into the parse.
1037: */
1038: if (-1 == fd)
1039: perror(fname);
1040: else if (NULL == (buf = malloc(4096)))
1041: perror(NULL);
1042: else if (NULL == (xp = XML_ParserCreate(NULL)))
1043: perror(NULL);
1044: else if ( ! readfile(xp, fd, buf, 4096, fname))
1045: rc = 1;
1046:
1047: XML_ParserFree(xp);
1048: free(buf);
1049: if (STDIN_FILENO != fd)
1050: close(fd);
1051: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1052: }
CVSweb