Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.5
1.5 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.4 2014/03/28 10:00:40 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27:
28: /*
29: * All recognised node types.
30: */
31: enum nodeid {
32: NODE_ROOT = 0, /* Must comes first. */
33: /* Alpha-ordered hereafter. */
1.4 kristaps 34: NODE_ARG,
1.1 kristaps 35: NODE_CITEREFENTRY,
1.4 kristaps 36: NODE_CMDSYNOPSIS,
1.1 kristaps 37: NODE_CODE,
1.4 kristaps 38: NODE_COMMAND,
1.3 kristaps 39: NODE_FUNCDEF,
40: NODE_FUNCPROTOTYPE,
1.1 kristaps 41: NODE_FUNCSYNOPSIS,
42: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 43: NODE_FUNCTION,
1.1 kristaps 44: NODE_MANVOLNUM,
1.4 kristaps 45: NODE_OPTION,
1.1 kristaps 46: NODE_PARA,
1.3 kristaps 47: NODE_PARAMDEF,
48: NODE_PARAMETER,
1.1 kristaps 49: NODE_PROGRAMLISTING,
50: NODE_REFCLASS,
51: NODE_REFDESCRIPTOR,
52: NODE_REFENTRY,
53: NODE_REFENTRYTITLE,
54: NODE_REFMETA,
55: NODE_REFMISCINFO,
56: NODE_REFNAME,
57: NODE_REFNAMEDIV,
58: NODE_REFPURPOSE,
59: NODE_REFSECT1,
60: NODE_REFSYNOPSISDIV,
61: NODE_SYNOPSIS,
62: NODE_TEXT,
63: NODE_TITLE,
64: NODE__MAX
65: };
66:
67: /*
68: * Global parse state.
69: * Keep this as simple and small as possible.
70: */
71: struct parse {
72: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
73: int stop; /* should we stop now? */
74: struct pnode *root; /* root of parse tree */
75: struct pnode *cur; /* current node in tree */
76: char *b;
77: size_t bsz;
78: size_t mbsz;
79: };
80:
81: struct node {
82: const char *name;
83: unsigned int flags;
84: #define NODE_IGNTEXT 1 /* ignore all contained text */
85: };
86:
87: TAILQ_HEAD(pnodeq, pnode);
88:
89: struct pnode {
90: enum nodeid node; /* node type */
91: char *b; /* binary data buffer */
92: size_t bsz; /* data buffer size */
93: struct pnode *parent; /* parent (or NULL if top) */
94: struct pnodeq childq; /* queue of children */
95: TAILQ_ENTRY(pnode) child;
96: };
97:
98: static const struct node nodes[NODE__MAX] = {
99: { NULL, 0 },
1.4 kristaps 100: { "arg", 0 },
1.1 kristaps 101: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 102: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 103: { "code", 0 },
1.4 kristaps 104: { "command", 0 },
1.3 kristaps 105: { "funcdef", 0 },
106: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 107: { "funcsynopsis", NODE_IGNTEXT },
108: { "funcsynopsisinfo", 0 },
1.3 kristaps 109: { "function", 0 },
1.1 kristaps 110: { "manvolnum", 0 },
1.4 kristaps 111: { "option", 0 },
1.1 kristaps 112: { "para", 0 },
1.3 kristaps 113: { "paramdef", 0 },
114: { "parameter", 0 },
1.1 kristaps 115: { "programlisting", 0 },
116: { "refclass", NODE_IGNTEXT },
117: { "refdescriptor", NODE_IGNTEXT },
118: { "refentry", NODE_IGNTEXT },
119: { "refentrytitle", 0 },
120: { "refmeta", NODE_IGNTEXT },
121: { "refmiscinfo", NODE_IGNTEXT },
122: { "refname", 0 },
123: { "refnamediv", NODE_IGNTEXT },
124: { "refpurpose", 0 },
125: { "refsect1", 0 },
126: { "refsynopsisdiv", NODE_IGNTEXT },
127: { "synopsis", 0 },
128: { NULL, 0 },
129: { "title", 0 },
130: };
131:
132: /*
133: * Look up whether "parent" is a valid parent for "node".
134: */
135: static int
136: isparent(enum nodeid node, enum nodeid parent)
137: {
138:
139: switch (node) {
140: case (NODE_ROOT):
141: return(0);
1.4 kristaps 142: case (NODE_ARG):
143: switch (parent) {
144: case (NODE_ARG):
145: case (NODE_CMDSYNOPSIS):
146: return(1);
147: default:
148: break;
149: }
150: return(0);
1.1 kristaps 151: case (NODE_CITEREFENTRY):
152: switch (parent) {
153: case (NODE_FUNCSYNOPSISINFO):
154: case (NODE_PARA):
155: case (NODE_PROGRAMLISTING):
156: case (NODE_REFDESCRIPTOR):
157: case (NODE_REFENTRYTITLE):
158: case (NODE_REFNAME):
159: case (NODE_REFPURPOSE):
160: case (NODE_SYNOPSIS):
161: case (NODE_TITLE):
162: return(1);
163: default:
164: break;
165: }
166: return(0);
1.4 kristaps 167: case (NODE_CMDSYNOPSIS):
168: switch (parent) {
169: case (NODE_PARA):
170: case (NODE_REFSECT1):
171: case (NODE_REFSYNOPSISDIV):
172: return(1);
173: default:
174: break;
175: }
176: return(0);
1.1 kristaps 177: case (NODE_CODE):
178: switch (parent) {
179: case (NODE_FUNCSYNOPSISINFO):
180: case (NODE_PARA):
181: case (NODE_PROGRAMLISTING):
182: case (NODE_REFDESCRIPTOR):
183: case (NODE_REFENTRYTITLE):
184: case (NODE_REFNAME):
185: case (NODE_REFPURPOSE):
186: case (NODE_SYNOPSIS):
187: case (NODE_TITLE):
188: return(1);
189: default:
190: break;
191: }
192: return(0);
1.4 kristaps 193: case (NODE_COMMAND):
194: switch (parent) {
195: case (NODE_CMDSYNOPSIS):
196: case (NODE_FUNCSYNOPSISINFO):
197: case (NODE_PARA):
198: case (NODE_PROGRAMLISTING):
199: case (NODE_REFDESCRIPTOR):
200: case (NODE_REFENTRYTITLE):
201: case (NODE_REFNAME):
202: case (NODE_REFPURPOSE):
203: case (NODE_SYNOPSIS):
204: case (NODE_TITLE):
205: return(1);
206: default:
207: break;
208: }
209: return(0);
1.3 kristaps 210: case (NODE_FUNCDEF):
211: return(NODE_FUNCPROTOTYPE == parent);
212: case (NODE_FUNCPROTOTYPE):
213: return(NODE_FUNCSYNOPSIS == parent);
214: case (NODE_FUNCSYNOPSIS):
215: switch (parent) {
216: case (NODE_PARA):
217: case (NODE_REFSECT1):
218: case (NODE_REFSYNOPSISDIV):
219: return(1);
220: default:
221: break;
222: }
223: return(0);
224: case (NODE_FUNCSYNOPSISINFO):
225: return(NODE_FUNCSYNOPSIS == parent);
226: case (NODE_FUNCTION):
227: switch (parent) {
228: case (NODE_CODE):
229: case (NODE_FUNCDEF):
230: case (NODE_FUNCSYNOPSISINFO):
231: case (NODE_PARA):
1.4 kristaps 232: case (NODE_PROGRAMLISTING):
1.3 kristaps 233: case (NODE_REFDESCRIPTOR):
234: case (NODE_REFENTRYTITLE):
235: case (NODE_REFNAME):
236: case (NODE_REFPURPOSE):
237: case (NODE_SYNOPSIS):
238: case (NODE_TITLE):
239: return(1);
240: default:
241: break;
242: }
243: return(0);
1.1 kristaps 244: case (NODE_MANVOLNUM):
245: switch (parent) {
246: case (NODE_CITEREFENTRY):
247: case (NODE_REFMETA):
248: return(1);
249: default:
250: break;
251: }
252: return(0);
1.4 kristaps 253: case (NODE_OPTION):
254: switch (parent) {
255: case (NODE_ARG):
256: case (NODE_FUNCSYNOPSISINFO):
257: case (NODE_PARA):
258: case (NODE_PROGRAMLISTING):
259: case (NODE_REFDESCRIPTOR):
260: case (NODE_REFENTRYTITLE):
261: case (NODE_REFNAME):
262: case (NODE_REFPURPOSE):
263: case (NODE_SYNOPSIS):
264: case (NODE_TITLE):
265: return(1);
266: default:
267: break;
268: }
269: return(0);
1.3 kristaps 270: case (NODE_PARA):
1.1 kristaps 271: switch (parent) {
272: case (NODE_REFSECT1):
273: case (NODE_REFSYNOPSISDIV):
274: return(1);
275: default:
276: break;
277: }
278: return(0);
1.3 kristaps 279: case (NODE_PARAMDEF):
280: return(NODE_FUNCPROTOTYPE == parent);
281: case (NODE_PARAMETER):
1.1 kristaps 282: switch (parent) {
1.3 kristaps 283: case (NODE_CODE):
284: case (NODE_FUNCSYNOPSISINFO):
285: case (NODE_PARA):
286: case (NODE_PARAMDEF):
1.4 kristaps 287: case (NODE_PROGRAMLISTING):
1.3 kristaps 288: case (NODE_REFDESCRIPTOR):
289: case (NODE_REFENTRYTITLE):
290: case (NODE_REFNAME):
291: case (NODE_REFPURPOSE):
292: case (NODE_SYNOPSIS):
293: case (NODE_TITLE):
1.1 kristaps 294: return(1);
295: default:
296: break;
297: }
298: return(0);
299: case (NODE_PROGRAMLISTING):
300: switch (parent) {
301: case (NODE_PARA):
302: case (NODE_REFSECT1):
303: case (NODE_REFSYNOPSISDIV):
304: return(1);
305: default:
306: break;
307: }
308: return(0);
309: case (NODE_REFCLASS):
310: return(parent == NODE_REFNAMEDIV);
311: case (NODE_REFDESCRIPTOR):
312: return(parent == NODE_REFNAMEDIV);
313: case (NODE_REFENTRY):
314: return(parent == NODE_ROOT);
315: case (NODE_REFENTRYTITLE):
316: switch (parent) {
317: case (NODE_CITEREFENTRY):
318: case (NODE_REFMETA):
319: return(1);
320: default:
321: break;
322: }
323: case (NODE_REFMETA):
324: return(parent == NODE_REFENTRY);
325: case (NODE_REFMISCINFO):
326: return(parent == NODE_REFMETA);
327: case (NODE_REFNAME):
328: return(parent == NODE_REFNAMEDIV);
329: case (NODE_REFNAMEDIV):
330: return(parent == NODE_REFENTRY);
331: case (NODE_REFPURPOSE):
332: return(parent == NODE_REFNAMEDIV);
333: case (NODE_REFSECT1):
334: return(parent == NODE_REFENTRY);
335: case (NODE_REFSYNOPSISDIV):
336: return(parent == NODE_REFENTRY);
337: case (NODE_SYNOPSIS):
338: switch (parent) {
339: case (NODE_REFSYNOPSISDIV):
340: case (NODE_REFSECT1):
341: return(1);
342: default:
343: break;
344: }
345: return(0);
346: case (NODE_TITLE):
347: switch (parent) {
348: case (NODE_REFSECT1):
349: case (NODE_REFSYNOPSISDIV):
350: return(1);
351: default:
352: break;
353: }
354: return(0);
355: case (NODE_TEXT):
356: return(1);
357: case (NODE__MAX):
358: break;
359: }
360:
361: abort();
362: return(0);
363: }
364:
365: static void
366: xml_char(void *arg, const XML_Char *p, int sz)
367: {
368: struct parse *ps = arg;
369: struct pnode *dat;
1.4 kristaps 370: int i;
1.1 kristaps 371:
372: /* Stopped or no tree yet. */
373: if (ps->stop || NODE_ROOT == ps->node)
374: return;
375:
376: /* Not supposed to be collecting text. */
377: assert(NULL != ps->cur);
378: if (NODE_IGNTEXT & nodes[ps->node].flags)
379: return;
380:
381: /*
382: * Are we in the midst of processing text?
383: * If we're not processing text right now, then create a text
384: * node for doing so.
1.4 kristaps 385: * However, don't do so unless we have some non-whitespace to
386: * process!
1.1 kristaps 387: */
388: if (NODE_TEXT != ps->node) {
1.4 kristaps 389: for (i = 0; i < sz; i++)
390: if ( ! isspace((int)p[i]))
391: break;
392: if (i == sz)
393: return;
1.1 kristaps 394: dat = calloc(1, sizeof(struct pnode));
395: if (NULL == dat) {
396: perror(NULL);
397: exit(EXIT_FAILURE);
398: }
399:
400: dat->node = ps->node = NODE_TEXT;
401: dat->parent = ps->cur;
402: TAILQ_INIT(&dat->childq);
403: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
404: ps->cur = dat;
405: assert(NULL != ps->root);
406:
407: }
408:
409: /* Append to current buffer. */
410: assert(sz >= 0);
411: ps->cur->b = realloc(ps->cur->b,
412: ps->cur->bsz + (size_t)sz);
413: if (NULL == ps->cur->b) {
414: perror(NULL);
415: exit(EXIT_FAILURE);
416: }
417: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
418: ps->cur->bsz += (size_t)sz;
419: }
420:
421: /*
422: * Begin an element.
423: * First, look for the element.
424: * If we don't find it and we're not parsing, keep going.
425: * If we don't find it (and we're parsing), puke and exit.
426: * If we find it but we're not parsing yet (i.e., it's not a refentry
427: * and thus out of context), keep going.
428: * If we're at the root and already have a tree, puke and exit.
429: * Make sure that the element is in the right context.
430: * Lastly, put the node onto our parse tree and continue.
431: */
432: static void
433: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
434: {
435: struct parse *ps = arg;
436: enum nodeid node;
437: struct pnode *dat;
438:
439: if (ps->stop)
440: return;
441:
442: /* Close out text node, if applicable... */
443: if (NODE_TEXT == ps->node) {
444: assert(NULL != ps->cur);
445: ps->cur = ps->cur->parent;
446: assert(NULL != ps->cur);
447: ps->node = ps->cur->node;
448: }
449:
450: for (node = 0; node < NODE__MAX; node++)
451: if (NULL == nodes[node].name)
452: continue;
453: else if (0 == strcmp(nodes[node].name, name))
454: break;
455:
456: if (NODE__MAX == node && NODE_ROOT == ps->node) {
457: fprintf(stderr, "%s: ignoring node\n", name);
458: return;
459: } else if (NODE__MAX == node) {
460: fprintf(stderr, "%s: unknown node\n", name);
461: ps->stop = 1;
462: return;
463: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
464: fprintf(stderr, "%s: reentering?\n", name);
465: ps->stop = 1;
466: return;
467: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
468: fprintf(stderr, "%s: known node w/o context\n", name);
469: return;
470: } else if ( ! isparent(node, ps->node)) {
471: fprintf(stderr, "%s: bad parent\n", name);
472: ps->stop = 1;
473: return;
474: }
475:
476: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
477: perror(NULL);
478: exit(EXIT_FAILURE);
479: }
480:
481: dat->node = ps->node = node;
482: dat->parent = ps->cur;
483: TAILQ_INIT(&dat->childq);
484:
485: if (NULL != ps->cur)
486: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
487:
488: ps->cur = dat;
489: if (NULL == ps->root)
490: ps->root = dat;
491: }
492:
493: /*
494: * Roll up the parse tree.
495: * Does nothing else special.
496: * If we hit the root, then assign ourselves as the NODE_ROOT.
497: */
498: static void
499: xml_elem_end(void *arg, const XML_Char *name)
500: {
501: struct parse *ps = arg;
502:
503: if (ps->stop || NODE_ROOT == ps->node)
504: return;
505:
506: /* Close out text node, if applicable... */
507: if (NODE_TEXT == ps->node) {
508: assert(NULL != ps->cur);
509: ps->cur = ps->cur->parent;
510: assert(NULL != ps->cur);
511: ps->node = ps->cur->node;
512: }
513:
514: if (NULL == (ps->cur = ps->cur->parent))
515: ps->node = NODE_ROOT;
516: else
517: ps->node = ps->cur->node;
518: }
519:
520: static void
521: pnode_free(struct pnode *pn)
522: {
523: struct pnode *pp;
524:
525: if (NULL == pn)
526: return;
527:
528: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
529: TAILQ_REMOVE(&pn->childq, pp, child);
530: pnode_free(pp);
531: }
532:
533: free(pn->b);
534: free(pn);
535: }
536:
537: static void
538: pnode_unlink(struct pnode *pn)
539: {
540:
541: if (NULL != pn->parent)
542: TAILQ_REMOVE(&pn->parent->childq, pn, child);
543: pnode_free(pn);
544: }
545:
546: static void
1.4 kristaps 547: pnode_unlinksub(struct pnode *pn)
548: {
549:
550: while ( ! TAILQ_EMPTY(&pn->childq))
551: pnode_unlink(TAILQ_FIRST(&pn->childq));
552: }
553:
554: static void
1.1 kristaps 555: bufclear(struct parse *p)
556: {
557:
558: p->b[p->bsz = 0] = '\0';
559: }
560:
561: static void
562: bufappend(struct parse *p, struct pnode *pn)
563: {
564:
565: assert(NODE_TEXT == pn->node);
566: if (p->bsz + pn->bsz + 1 > p->mbsz) {
567: p->mbsz = p->bsz + pn->bsz + 1;
568: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
569: perror(NULL);
570: exit(EXIT_FAILURE);
571: }
572: }
573: memcpy(p->b + p->bsz, pn->b, pn->bsz);
574: p->bsz += pn->bsz;
575: p->b[p->bsz] = '\0';
576: }
577:
1.3 kristaps 578: static void
579: bufappend_r(struct parse *p, struct pnode *pn)
580: {
581: struct pnode *pp;
582:
583: if (NODE_TEXT == pn->node)
584: bufappend(p, pn);
585: TAILQ_FOREACH(pp, &pn->childq, child)
586: bufappend_r(p, pp);
587: }
588:
1.1 kristaps 589: /*
590: * Print text presumably on a macro line.
591: * Ignore any child macros.
592: * Convert all whitespace to regular spaces.
593: */
594: static void
595: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
596: {
597: char *cp;
598:
599: bufclear(p);
1.3 kristaps 600: bufappend_r(p, pn);
1.1 kristaps 601:
602: /* Convert all space to spaces. */
603: for (cp = p->b; '\0' != *cp; cp++)
604: if (isspace((int)*cp))
605: *cp = ' ';
606:
607: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 608: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 609: for ( ; '\0' != *cp; cp++) {
610: /* Escape us if we look like a macro. */
611: if ((cp == p->b || ' ' == *(cp - 1)) &&
612: isupper((int)*cp) &&
613: '\0' != *(cp + 1) &&
614: islower((int)*(cp + 1)) &&
615: ('\0' == *(cp + 2) ||
616: ' ' == *(cp + 2) ||
617: (islower((int)*(cp + 2)) &&
618: ('\0' == *(cp + 3) ||
619: ' ' == *(cp + 3)))))
620: fputs("\\&", stdout);
621: putchar(*cp);
622: /* If we're a character escape, escape us. */
623: if ('\\' == *cp)
624: putchar('e');
625: }
626: }
627:
628: /*
629: * Just pnode_printmacrolinepart() but with a newline.
630: * If no text, just the newline.
631: */
632: static void
633: pnode_printmacroline(struct parse *p, struct pnode *pn)
634: {
635:
636: pnode_printmacrolinepart(p, pn);
637: putchar('\n');
638: }
639:
640: static void
641: pnode_printrefsect(struct parse *p, struct pnode *pn)
642: {
643: struct pnode *pp;
644:
645: TAILQ_FOREACH(pp, &pn->childq, child)
646: if (NODE_TITLE == pp->node)
647: break;
648:
1.4 kristaps 649: fputs(".Sh ", stdout);
650:
1.5 ! kristaps 651: if (NULL != pp) {
1.1 kristaps 652: pnode_printmacroline(p, pp);
1.5 ! kristaps 653: pnode_unlink(pp);
! 654: } else
1.4 kristaps 655: puts("UNKNOWN");
1.1 kristaps 656: }
657:
658: static void
659: pnode_printciterefentry(struct parse *p, struct pnode *pn)
660: {
661: struct pnode *pp, *title, *manvol;
662:
663: title = manvol = NULL;
664: TAILQ_FOREACH(pp, &pn->childq, child)
665: if (NODE_MANVOLNUM == pp->node)
666: manvol = pp;
667: else if (NODE_REFENTRYTITLE == pp->node)
668: title = pp;
669:
670: fputs(".Xr ", stdout);
1.4 kristaps 671:
1.1 kristaps 672: if (NULL != title) {
673: pnode_printmacrolinepart(p, title);
1.4 kristaps 674: putchar(' ');
1.1 kristaps 675: } else
1.4 kristaps 676: fputs("unknown ", stdout);
677:
678: if (NULL != manvol)
1.1 kristaps 679: pnode_printmacroline(p, manvol);
1.4 kristaps 680: else
1.1 kristaps 681: puts("1");
682: }
683:
684: static void
685: pnode_printrefmeta(struct parse *p, struct pnode *pn)
686: {
687: struct pnode *pp, *title, *manvol;
688:
689: title = manvol = NULL;
690: TAILQ_FOREACH(pp, &pn->childq, child)
691: if (NODE_MANVOLNUM == pp->node)
692: manvol = pp;
693: else if (NODE_REFENTRYTITLE == pp->node)
694: title = pp;
695:
1.2 kristaps 696: puts(".Dd $Mdocdate" "$");
1.1 kristaps 697: fputs(".Dt ", stdout);
698:
699: if (NULL != title) {
700: pnode_printmacrolinepart(p, title);
1.4 kristaps 701: putchar(' ');
1.1 kristaps 702: } else
1.4 kristaps 703: fputs("UNKNOWN ", stdout);
704:
705: if (NULL != manvol)
1.1 kristaps 706: pnode_printmacroline(p, manvol);
1.4 kristaps 707: else
1.1 kristaps 708: puts("1");
709:
710: puts(".Os");
711: }
712:
1.3 kristaps 713: static void
714: pnode_printfuncdef(struct parse *p, struct pnode *pn)
715: {
716: struct pnode *pp, *ftype, *func;
717:
718: ftype = func = NULL;
719: TAILQ_FOREACH(pp, &pn->childq, child)
720: if (NODE_TEXT == pp->node)
721: ftype = pp;
722: else if (NODE_FUNCTION == pp->node)
723: func = pp;
724:
725: if (NULL != ftype) {
726: fputs(".Ft ", stdout);
727: pnode_printmacroline(p, ftype);
728: }
729:
730: if (NULL != func) {
731: fputs(".Fo ", stdout);
732: pnode_printmacroline(p, func);
733: } else
734: puts(".Fo UNKNOWN");
735: }
736:
737: static void
738: pnode_printparamdef(struct parse *p, struct pnode *pn)
739: {
740: struct pnode *pp, *ptype, *param;
741:
742: ptype = param = NULL;
743: TAILQ_FOREACH(pp, &pn->childq, child)
744: if (NODE_TEXT == pp->node)
745: ptype = pp;
746: else if (NODE_PARAMETER == pp->node)
747: param = pp;
748:
749: fputs(".Fa \"", stdout);
750: if (NULL != ptype) {
751: pnode_printmacrolinepart(p, ptype);
752: putchar(' ');
753: }
754:
755: if (NULL != param)
756: pnode_printmacrolinepart(p, param);
757: else
758: fputs("UNKNOWN", stdout);
759:
760: puts("\"");
761: }
762:
763: static void
764: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
765: {
766: struct pnode *pp, *fdef;
767:
768: TAILQ_FOREACH(fdef, &pn->childq, child)
769: if (NODE_FUNCDEF == fdef->node)
770: break;
771:
1.4 kristaps 772: if (NULL != fdef)
1.3 kristaps 773: pnode_printfuncdef(p, fdef);
1.4 kristaps 774: else
1.3 kristaps 775: puts(".Fo UNKNOWN");
776:
1.4 kristaps 777: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 778: if (NODE_PARAMDEF == pp->node)
779: pnode_printparamdef(p, pp);
780:
781: puts(".Fc");
782: }
783:
1.4 kristaps 784: /* TODO: handle "optional" values. */
785: static void
786: pnode_printarg(struct parse *p, struct pnode *pn, int nested)
787: {
788: struct pnode *pp;
789: int sv = nested;
790:
791: if ( ! nested)
792: fputs(".", stdout);
793: nested = 1;
794: TAILQ_FOREACH(pp, &pn->childq, child)
795: if (NODE_OPTION == pp->node) {
796: fputs("Fl ", stdout);
797: pnode_printmacrolinepart(p, pp);
798: } else if (NODE_TEXT == pp->node) {
799: fputs("Ar ", stdout);
800: pnode_printmacrolinepart(p, pp);
801: } else if (NODE_ARG == pp->node)
802: pnode_printarg(p, pp, nested);
803:
804: if ( ! sv)
805: puts("");
806: }
807:
1.1 kristaps 808: /*
809: * Print a parsed node (or ignore it--whatever).
810: * This is a recursive function.
811: * FIXME: macro line continuation?
812: */
813: static void
814: pnode_print(struct parse *p, struct pnode *pn)
815: {
816: struct pnode *pp;
817: char *cp;
818: int last;
819:
820: if (NULL == pn)
821: return;
822:
823: if (NODE_TEXT != pn->node && NODE_ROOT != pn->node)
824: printf(".\\\" %s\n", nodes[pn->node].name);
825:
826: switch (pn->node) {
1.4 kristaps 827: case (NODE_ARG):
828: pnode_printarg(p, pn, 0);
829: pnode_unlinksub(pn);
830: break;
1.1 kristaps 831: case (NODE_CITEREFENTRY):
832: pnode_printciterefentry(p, pn);
1.4 kristaps 833: pnode_unlinksub(pn);
1.1 kristaps 834: break;
835: case (NODE_CODE):
836: fputs(".Li ", stdout);
837: pnode_printmacroline(p, pn);
1.4 kristaps 838: pnode_unlinksub(pn);
839: break;
840: case (NODE_COMMAND):
841: fputs(".Nm ", stdout);
842: pnode_printmacroline(p, pn);
843: pnode_unlinksub(pn);
1.1 kristaps 844: break;
1.3 kristaps 845: case (NODE_FUNCTION):
846: fputs(".Fn ", stdout);
847: pnode_printmacroline(p, pn);
1.4 kristaps 848: pnode_unlinksub(pn);
1.3 kristaps 849: break;
850: case (NODE_FUNCPROTOTYPE):
851: pnode_printfuncprototype(p, pn);
1.4 kristaps 852: pnode_unlinksub(pn);
1.3 kristaps 853: break;
1.1 kristaps 854: case (NODE_FUNCSYNOPSISINFO):
855: fputs(".Fd ", stdout);
856: pnode_printmacroline(p, pn);
1.4 kristaps 857: pnode_unlinksub(pn);
1.1 kristaps 858: break;
859: case (NODE_PARA):
860: /* FIXME: not always. */
861: puts(".Pp");
1.3 kristaps 862: break;
863: case (NODE_PARAMETER):
864: fputs(".Fa \"", stdout);
865: pnode_printmacrolinepart(p, pn);
866: puts("\"");
1.4 kristaps 867: pnode_unlinksub(pn);
1.1 kristaps 868: break;
869: case (NODE_PROGRAMLISTING):
870: puts(".Bd -literal");
871: break;
872: case (NODE_REFMETA):
873: pnode_printrefmeta(p, pn);
1.4 kristaps 874: pnode_unlinksub(pn);
1.1 kristaps 875: break;
876: case (NODE_REFNAME):
877: fputs(".Nm ", stdout);
878: pnode_printmacroline(p, pn);
1.4 kristaps 879: pnode_unlinksub(pn);
1.1 kristaps 880: return;
881: case (NODE_REFNAMEDIV):
882: puts(".Sh NAME");
883: break;
884: case (NODE_REFPURPOSE):
885: fputs(".Nd ", stdout);
886: pnode_printmacroline(p, pn);
1.4 kristaps 887: pnode_unlinksub(pn);
1.1 kristaps 888: return;
889: case (NODE_REFSYNOPSISDIV):
890: puts(".Sh SYNOPSIS");
891: break;
892: case (NODE_REFSECT1):
893: pnode_printrefsect(p, pn);
894: break;
895: case (NODE_TEXT):
896: bufclear(p);
897: bufappend(p, pn);
898: /*
899: * Output all characters, squeezing out whitespace
900: * between newlines.
901: * XXX: all whitespace, including tabs (?).
902: * Remember to escape control characters and escapes.
903: */
904: for (last = '\n', cp = p->b; '\0' != *cp; ) {
905: if ('\n' == last) {
906: /* Consume all whitespace. */
907: if (isspace((int)*cp)) {
908: while (isspace((int)*cp))
909: cp++;
910: continue;
911: } else if ('\'' == *cp || '.' == *cp)
912: fputs("\\&", stdout);
913: }
914: putchar(last = *cp++);
915: /* If we're a character escape, escape us. */
916: if ('\\' == last)
917: putchar('e');
918: }
919: if ('\n' != last)
920: putchar('\n');
921: break;
922: default:
923: break;
924: }
925:
926: TAILQ_FOREACH(pp, &pn->childq, child)
927: pnode_print(p, pp);
928:
929: switch (pn->node) {
930: case (NODE_PROGRAMLISTING):
931: puts(".Ed");
932: break;
933: default:
934: break;
935: }
936: }
937:
938: /*
939: * Loop around the read buffer until we've drained it of all data.
940: * Invoke the parser context with each buffer fill.
941: */
942: static int
943: readfile(XML_Parser xp, int fd,
944: char *b, size_t bsz, const char *fn)
945: {
946: struct parse p;
947: int rc;
948: ssize_t ssz;
949:
950: memset(&p, 0, sizeof(struct parse));
951:
952: p.b = malloc(p.bsz = p.mbsz = 1024);
953:
954: XML_SetCharacterDataHandler(xp, xml_char);
955: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
956: XML_SetUserData(xp, &p);
957:
958: while ((ssz = read(fd, b, bsz)) >= 0) {
959: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
960: fprintf(stderr, "%s: %s\n", fn,
961: XML_ErrorString
962: (XML_GetErrorCode(xp)));
963: else if ( ! p.stop && ssz > 0)
964: continue;
965: /*
966: * Exit when we've read all or errors have occured
967: * during the parse sequence.
968: */
969: pnode_print(&p, p.root);
970: pnode_free(p.root);
971: free(p.b);
972: return(0 != rc && ! p.stop);
973: }
974:
975: /* Read error has occured. */
976: perror(fn);
977: pnode_free(p.root);
978: free(p.b);
979: return(0);
980: }
981:
982: int
983: main(int argc, char *argv[])
984: {
985: XML_Parser xp;
986: const char *fname;
987: char *buf;
988: int fd, rc;
989:
990: fname = "-";
991: xp = NULL;
992: buf = NULL;
993: rc = 0;
994:
995: if (-1 != getopt(argc, argv, ""))
996: return(EXIT_FAILURE);
997:
998: argc -= optind;
999: argv += optind;
1000:
1001: if (argc > 1)
1002: return(EXIT_FAILURE);
1003: else if (argc > 0)
1004: fname = argv[0];
1005:
1006: /* Read from stdin or a file. */
1007: fd = 0 == strcmp(fname, "-") ?
1008: STDIN_FILENO : open(fname, O_RDONLY, 0);
1009:
1010: /*
1011: * Open file for reading.
1012: * Allocate a read buffer.
1013: * Create the parser context.
1014: * Dive directly into the parse.
1015: */
1016: if (-1 == fd)
1017: perror(fname);
1018: else if (NULL == (buf = malloc(4096)))
1019: perror(NULL);
1020: else if (NULL == (xp = XML_ParserCreate(NULL)))
1021: perror(NULL);
1022: else if ( ! readfile(xp, fd, buf, 4096, fname))
1023: rc = 1;
1024:
1025: XML_ParserFree(xp);
1026: free(buf);
1027: if (STDIN_FILENO != fd)
1028: close(fd);
1029: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1030: }
CVSweb