Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.7
1.7 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.6 2014/03/28 10:08:24 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 ! kristaps 27: #include <unistd.h>
1.1 kristaps 28:
29: /*
30: * All recognised node types.
31: */
32: enum nodeid {
33: NODE_ROOT = 0, /* Must comes first. */
34: /* Alpha-ordered hereafter. */
1.4 kristaps 35: NODE_ARG,
1.1 kristaps 36: NODE_CITEREFENTRY,
1.4 kristaps 37: NODE_CMDSYNOPSIS,
1.1 kristaps 38: NODE_CODE,
1.4 kristaps 39: NODE_COMMAND,
1.3 kristaps 40: NODE_FUNCDEF,
41: NODE_FUNCPROTOTYPE,
1.1 kristaps 42: NODE_FUNCSYNOPSIS,
43: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 44: NODE_FUNCTION,
1.1 kristaps 45: NODE_MANVOLNUM,
1.4 kristaps 46: NODE_OPTION,
1.1 kristaps 47: NODE_PARA,
1.3 kristaps 48: NODE_PARAMDEF,
49: NODE_PARAMETER,
1.1 kristaps 50: NODE_PROGRAMLISTING,
51: NODE_REFCLASS,
52: NODE_REFDESCRIPTOR,
53: NODE_REFENTRY,
54: NODE_REFENTRYTITLE,
55: NODE_REFMETA,
56: NODE_REFMISCINFO,
57: NODE_REFNAME,
58: NODE_REFNAMEDIV,
59: NODE_REFPURPOSE,
60: NODE_REFSECT1,
61: NODE_REFSYNOPSISDIV,
62: NODE_SYNOPSIS,
63: NODE_TEXT,
64: NODE_TITLE,
65: NODE__MAX
66: };
67:
68: /*
69: * Global parse state.
70: * Keep this as simple and small as possible.
71: */
72: struct parse {
73: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
74: int stop; /* should we stop now? */
75: struct pnode *root; /* root of parse tree */
76: struct pnode *cur; /* current node in tree */
77: char *b;
78: size_t bsz;
79: size_t mbsz;
80: };
81:
82: struct node {
83: const char *name;
84: unsigned int flags;
85: #define NODE_IGNTEXT 1 /* ignore all contained text */
86: };
87:
88: TAILQ_HEAD(pnodeq, pnode);
89:
90: struct pnode {
91: enum nodeid node; /* node type */
92: char *b; /* binary data buffer */
93: size_t bsz; /* data buffer size */
94: struct pnode *parent; /* parent (or NULL if top) */
95: struct pnodeq childq; /* queue of children */
96: TAILQ_ENTRY(pnode) child;
97: };
98:
99: static const struct node nodes[NODE__MAX] = {
100: { NULL, 0 },
1.4 kristaps 101: { "arg", 0 },
1.1 kristaps 102: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 103: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 104: { "code", 0 },
1.4 kristaps 105: { "command", 0 },
1.3 kristaps 106: { "funcdef", 0 },
107: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 108: { "funcsynopsis", NODE_IGNTEXT },
109: { "funcsynopsisinfo", 0 },
1.3 kristaps 110: { "function", 0 },
1.1 kristaps 111: { "manvolnum", 0 },
1.4 kristaps 112: { "option", 0 },
1.1 kristaps 113: { "para", 0 },
1.3 kristaps 114: { "paramdef", 0 },
115: { "parameter", 0 },
1.1 kristaps 116: { "programlisting", 0 },
117: { "refclass", NODE_IGNTEXT },
118: { "refdescriptor", NODE_IGNTEXT },
119: { "refentry", NODE_IGNTEXT },
120: { "refentrytitle", 0 },
121: { "refmeta", NODE_IGNTEXT },
122: { "refmiscinfo", NODE_IGNTEXT },
123: { "refname", 0 },
124: { "refnamediv", NODE_IGNTEXT },
125: { "refpurpose", 0 },
126: { "refsect1", 0 },
127: { "refsynopsisdiv", NODE_IGNTEXT },
128: { "synopsis", 0 },
129: { NULL, 0 },
130: { "title", 0 },
131: };
132:
133: /*
134: * Look up whether "parent" is a valid parent for "node".
135: */
136: static int
137: isparent(enum nodeid node, enum nodeid parent)
138: {
139:
140: switch (node) {
141: case (NODE_ROOT):
142: return(0);
1.4 kristaps 143: case (NODE_ARG):
144: switch (parent) {
145: case (NODE_ARG):
146: case (NODE_CMDSYNOPSIS):
147: return(1);
148: default:
149: break;
150: }
151: return(0);
1.1 kristaps 152: case (NODE_CITEREFENTRY):
153: switch (parent) {
154: case (NODE_FUNCSYNOPSISINFO):
155: case (NODE_PARA):
156: case (NODE_PROGRAMLISTING):
157: case (NODE_REFDESCRIPTOR):
158: case (NODE_REFENTRYTITLE):
159: case (NODE_REFNAME):
160: case (NODE_REFPURPOSE):
161: case (NODE_SYNOPSIS):
162: case (NODE_TITLE):
163: return(1);
164: default:
165: break;
166: }
167: return(0);
1.4 kristaps 168: case (NODE_CMDSYNOPSIS):
169: switch (parent) {
170: case (NODE_PARA):
171: case (NODE_REFSECT1):
172: case (NODE_REFSYNOPSISDIV):
173: return(1);
174: default:
175: break;
176: }
177: return(0);
1.1 kristaps 178: case (NODE_CODE):
179: switch (parent) {
180: case (NODE_FUNCSYNOPSISINFO):
181: case (NODE_PARA):
182: case (NODE_PROGRAMLISTING):
183: case (NODE_REFDESCRIPTOR):
184: case (NODE_REFENTRYTITLE):
185: case (NODE_REFNAME):
186: case (NODE_REFPURPOSE):
187: case (NODE_SYNOPSIS):
188: case (NODE_TITLE):
189: return(1);
190: default:
191: break;
192: }
193: return(0);
1.4 kristaps 194: case (NODE_COMMAND):
195: switch (parent) {
196: case (NODE_CMDSYNOPSIS):
197: case (NODE_FUNCSYNOPSISINFO):
198: case (NODE_PARA):
199: case (NODE_PROGRAMLISTING):
200: case (NODE_REFDESCRIPTOR):
201: case (NODE_REFENTRYTITLE):
202: case (NODE_REFNAME):
203: case (NODE_REFPURPOSE):
204: case (NODE_SYNOPSIS):
205: case (NODE_TITLE):
206: return(1);
207: default:
208: break;
209: }
210: return(0);
1.3 kristaps 211: case (NODE_FUNCDEF):
212: return(NODE_FUNCPROTOTYPE == parent);
213: case (NODE_FUNCPROTOTYPE):
214: return(NODE_FUNCSYNOPSIS == parent);
215: case (NODE_FUNCSYNOPSIS):
216: switch (parent) {
217: case (NODE_PARA):
218: case (NODE_REFSECT1):
219: case (NODE_REFSYNOPSISDIV):
220: return(1);
221: default:
222: break;
223: }
224: return(0);
225: case (NODE_FUNCSYNOPSISINFO):
226: return(NODE_FUNCSYNOPSIS == parent);
227: case (NODE_FUNCTION):
228: switch (parent) {
229: case (NODE_CODE):
230: case (NODE_FUNCDEF):
231: case (NODE_FUNCSYNOPSISINFO):
232: case (NODE_PARA):
1.4 kristaps 233: case (NODE_PROGRAMLISTING):
1.3 kristaps 234: case (NODE_REFDESCRIPTOR):
235: case (NODE_REFENTRYTITLE):
236: case (NODE_REFNAME):
237: case (NODE_REFPURPOSE):
238: case (NODE_SYNOPSIS):
239: case (NODE_TITLE):
240: return(1);
241: default:
242: break;
243: }
244: return(0);
1.1 kristaps 245: case (NODE_MANVOLNUM):
246: switch (parent) {
247: case (NODE_CITEREFENTRY):
248: case (NODE_REFMETA):
249: return(1);
250: default:
251: break;
252: }
253: return(0);
1.4 kristaps 254: case (NODE_OPTION):
255: switch (parent) {
256: case (NODE_ARG):
257: case (NODE_FUNCSYNOPSISINFO):
258: case (NODE_PARA):
259: case (NODE_PROGRAMLISTING):
260: case (NODE_REFDESCRIPTOR):
261: case (NODE_REFENTRYTITLE):
262: case (NODE_REFNAME):
263: case (NODE_REFPURPOSE):
264: case (NODE_SYNOPSIS):
265: case (NODE_TITLE):
266: return(1);
267: default:
268: break;
269: }
270: return(0);
1.3 kristaps 271: case (NODE_PARA):
1.1 kristaps 272: switch (parent) {
273: case (NODE_REFSECT1):
274: case (NODE_REFSYNOPSISDIV):
275: return(1);
276: default:
277: break;
278: }
279: return(0);
1.3 kristaps 280: case (NODE_PARAMDEF):
281: return(NODE_FUNCPROTOTYPE == parent);
282: case (NODE_PARAMETER):
1.1 kristaps 283: switch (parent) {
1.3 kristaps 284: case (NODE_CODE):
285: case (NODE_FUNCSYNOPSISINFO):
286: case (NODE_PARA):
287: case (NODE_PARAMDEF):
1.4 kristaps 288: case (NODE_PROGRAMLISTING):
1.3 kristaps 289: case (NODE_REFDESCRIPTOR):
290: case (NODE_REFENTRYTITLE):
291: case (NODE_REFNAME):
292: case (NODE_REFPURPOSE):
293: case (NODE_SYNOPSIS):
294: case (NODE_TITLE):
1.1 kristaps 295: return(1);
296: default:
297: break;
298: }
299: return(0);
300: case (NODE_PROGRAMLISTING):
301: switch (parent) {
302: case (NODE_PARA):
303: case (NODE_REFSECT1):
304: case (NODE_REFSYNOPSISDIV):
305: return(1);
306: default:
307: break;
308: }
309: return(0);
310: case (NODE_REFCLASS):
311: return(parent == NODE_REFNAMEDIV);
312: case (NODE_REFDESCRIPTOR):
313: return(parent == NODE_REFNAMEDIV);
314: case (NODE_REFENTRY):
315: return(parent == NODE_ROOT);
316: case (NODE_REFENTRYTITLE):
317: switch (parent) {
318: case (NODE_CITEREFENTRY):
319: case (NODE_REFMETA):
320: return(1);
321: default:
322: break;
323: }
324: case (NODE_REFMETA):
325: return(parent == NODE_REFENTRY);
326: case (NODE_REFMISCINFO):
327: return(parent == NODE_REFMETA);
328: case (NODE_REFNAME):
329: return(parent == NODE_REFNAMEDIV);
330: case (NODE_REFNAMEDIV):
331: return(parent == NODE_REFENTRY);
332: case (NODE_REFPURPOSE):
333: return(parent == NODE_REFNAMEDIV);
334: case (NODE_REFSECT1):
335: return(parent == NODE_REFENTRY);
336: case (NODE_REFSYNOPSISDIV):
337: return(parent == NODE_REFENTRY);
338: case (NODE_SYNOPSIS):
339: switch (parent) {
340: case (NODE_REFSYNOPSISDIV):
341: case (NODE_REFSECT1):
342: return(1);
343: default:
344: break;
345: }
346: return(0);
347: case (NODE_TITLE):
348: switch (parent) {
349: case (NODE_REFSECT1):
350: case (NODE_REFSYNOPSISDIV):
351: return(1);
352: default:
353: break;
354: }
355: return(0);
356: case (NODE_TEXT):
357: return(1);
358: case (NODE__MAX):
359: break;
360: }
361:
362: abort();
363: return(0);
364: }
365:
366: static void
367: xml_char(void *arg, const XML_Char *p, int sz)
368: {
369: struct parse *ps = arg;
370: struct pnode *dat;
1.4 kristaps 371: int i;
1.1 kristaps 372:
373: /* Stopped or no tree yet. */
374: if (ps->stop || NODE_ROOT == ps->node)
375: return;
376:
377: /* Not supposed to be collecting text. */
378: assert(NULL != ps->cur);
379: if (NODE_IGNTEXT & nodes[ps->node].flags)
380: return;
381:
382: /*
383: * Are we in the midst of processing text?
384: * If we're not processing text right now, then create a text
385: * node for doing so.
1.4 kristaps 386: * However, don't do so unless we have some non-whitespace to
387: * process!
1.1 kristaps 388: */
389: if (NODE_TEXT != ps->node) {
1.4 kristaps 390: for (i = 0; i < sz; i++)
391: if ( ! isspace((int)p[i]))
392: break;
393: if (i == sz)
394: return;
1.1 kristaps 395: dat = calloc(1, sizeof(struct pnode));
396: if (NULL == dat) {
397: perror(NULL);
398: exit(EXIT_FAILURE);
399: }
400:
401: dat->node = ps->node = NODE_TEXT;
402: dat->parent = ps->cur;
403: TAILQ_INIT(&dat->childq);
404: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
405: ps->cur = dat;
406: assert(NULL != ps->root);
407:
408: }
409:
410: /* Append to current buffer. */
411: assert(sz >= 0);
412: ps->cur->b = realloc(ps->cur->b,
413: ps->cur->bsz + (size_t)sz);
414: if (NULL == ps->cur->b) {
415: perror(NULL);
416: exit(EXIT_FAILURE);
417: }
418: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
419: ps->cur->bsz += (size_t)sz;
420: }
421:
422: /*
423: * Begin an element.
424: * First, look for the element.
425: * If we don't find it and we're not parsing, keep going.
426: * If we don't find it (and we're parsing), puke and exit.
427: * If we find it but we're not parsing yet (i.e., it's not a refentry
428: * and thus out of context), keep going.
429: * If we're at the root and already have a tree, puke and exit.
430: * Make sure that the element is in the right context.
431: * Lastly, put the node onto our parse tree and continue.
432: */
433: static void
434: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
435: {
436: struct parse *ps = arg;
437: enum nodeid node;
438: struct pnode *dat;
439:
440: if (ps->stop)
441: return;
442:
443: /* Close out text node, if applicable... */
444: if (NODE_TEXT == ps->node) {
445: assert(NULL != ps->cur);
446: ps->cur = ps->cur->parent;
447: assert(NULL != ps->cur);
448: ps->node = ps->cur->node;
449: }
450:
451: for (node = 0; node < NODE__MAX; node++)
452: if (NULL == nodes[node].name)
453: continue;
454: else if (0 == strcmp(nodes[node].name, name))
455: break;
456:
457: if (NODE__MAX == node && NODE_ROOT == ps->node) {
458: fprintf(stderr, "%s: ignoring node\n", name);
459: return;
460: } else if (NODE__MAX == node) {
461: fprintf(stderr, "%s: unknown node\n", name);
462: ps->stop = 1;
463: return;
464: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
465: fprintf(stderr, "%s: reentering?\n", name);
466: ps->stop = 1;
467: return;
468: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
469: fprintf(stderr, "%s: known node w/o context\n", name);
470: return;
471: } else if ( ! isparent(node, ps->node)) {
472: fprintf(stderr, "%s: bad parent\n", name);
473: ps->stop = 1;
474: return;
475: }
476:
477: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
478: perror(NULL);
479: exit(EXIT_FAILURE);
480: }
481:
482: dat->node = ps->node = node;
483: dat->parent = ps->cur;
484: TAILQ_INIT(&dat->childq);
485:
486: if (NULL != ps->cur)
487: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
488:
489: ps->cur = dat;
490: if (NULL == ps->root)
491: ps->root = dat;
492: }
493:
494: /*
495: * Roll up the parse tree.
496: * Does nothing else special.
497: * If we hit the root, then assign ourselves as the NODE_ROOT.
498: */
499: static void
500: xml_elem_end(void *arg, const XML_Char *name)
501: {
502: struct parse *ps = arg;
503:
504: if (ps->stop || NODE_ROOT == ps->node)
505: return;
506:
507: /* Close out text node, if applicable... */
508: if (NODE_TEXT == ps->node) {
509: assert(NULL != ps->cur);
510: ps->cur = ps->cur->parent;
511: assert(NULL != ps->cur);
512: ps->node = ps->cur->node;
513: }
514:
515: if (NULL == (ps->cur = ps->cur->parent))
516: ps->node = NODE_ROOT;
517: else
518: ps->node = ps->cur->node;
519: }
520:
521: static void
522: pnode_free(struct pnode *pn)
523: {
524: struct pnode *pp;
525:
526: if (NULL == pn)
527: return;
528:
529: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
530: TAILQ_REMOVE(&pn->childq, pp, child);
531: pnode_free(pp);
532: }
533:
534: free(pn->b);
535: free(pn);
536: }
537:
538: static void
539: pnode_unlink(struct pnode *pn)
540: {
541:
542: if (NULL != pn->parent)
543: TAILQ_REMOVE(&pn->parent->childq, pn, child);
544: pnode_free(pn);
545: }
546:
547: static void
1.4 kristaps 548: pnode_unlinksub(struct pnode *pn)
549: {
550:
551: while ( ! TAILQ_EMPTY(&pn->childq))
552: pnode_unlink(TAILQ_FIRST(&pn->childq));
553: }
554:
555: static void
1.1 kristaps 556: bufclear(struct parse *p)
557: {
558:
559: p->b[p->bsz = 0] = '\0';
560: }
561:
562: static void
563: bufappend(struct parse *p, struct pnode *pn)
564: {
565:
566: assert(NODE_TEXT == pn->node);
567: if (p->bsz + pn->bsz + 1 > p->mbsz) {
568: p->mbsz = p->bsz + pn->bsz + 1;
569: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
570: perror(NULL);
571: exit(EXIT_FAILURE);
572: }
573: }
574: memcpy(p->b + p->bsz, pn->b, pn->bsz);
575: p->bsz += pn->bsz;
576: p->b[p->bsz] = '\0';
577: }
578:
1.3 kristaps 579: static void
580: bufappend_r(struct parse *p, struct pnode *pn)
581: {
582: struct pnode *pp;
583:
584: if (NODE_TEXT == pn->node)
585: bufappend(p, pn);
586: TAILQ_FOREACH(pp, &pn->childq, child)
587: bufappend_r(p, pp);
588: }
589:
1.1 kristaps 590: /*
591: * Print text presumably on a macro line.
592: * Ignore any child macros.
593: * Convert all whitespace to regular spaces.
594: */
595: static void
596: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
597: {
598: char *cp;
599:
600: bufclear(p);
1.3 kristaps 601: bufappend_r(p, pn);
1.1 kristaps 602:
603: /* Convert all space to spaces. */
604: for (cp = p->b; '\0' != *cp; cp++)
605: if (isspace((int)*cp))
606: *cp = ' ';
607:
608: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 609: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 610: for ( ; '\0' != *cp; cp++) {
611: /* Escape us if we look like a macro. */
612: if ((cp == p->b || ' ' == *(cp - 1)) &&
613: isupper((int)*cp) &&
614: '\0' != *(cp + 1) &&
615: islower((int)*(cp + 1)) &&
616: ('\0' == *(cp + 2) ||
617: ' ' == *(cp + 2) ||
618: (islower((int)*(cp + 2)) &&
619: ('\0' == *(cp + 3) ||
620: ' ' == *(cp + 3)))))
621: fputs("\\&", stdout);
622: putchar(*cp);
623: /* If we're a character escape, escape us. */
624: if ('\\' == *cp)
625: putchar('e');
626: }
627: }
628:
629: /*
630: * Just pnode_printmacrolinepart() but with a newline.
631: * If no text, just the newline.
632: */
633: static void
634: pnode_printmacroline(struct parse *p, struct pnode *pn)
635: {
636:
637: pnode_printmacrolinepart(p, pn);
638: putchar('\n');
639: }
640:
641: static void
1.6 kristaps 642: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
643: {
644: struct pnode *pp;
645:
646: TAILQ_FOREACH(pp, &pn->childq, child)
647: if (NODE_TITLE == pp->node) {
648: pnode_unlink(pp);
649: break;
650: }
651:
652: puts(".Sh SYNOPSIS");
653: }
654:
655: static void
1.1 kristaps 656: pnode_printrefsect(struct parse *p, struct pnode *pn)
657: {
658: struct pnode *pp;
659:
660: TAILQ_FOREACH(pp, &pn->childq, child)
661: if (NODE_TITLE == pp->node)
662: break;
663:
1.4 kristaps 664: fputs(".Sh ", stdout);
665:
1.5 kristaps 666: if (NULL != pp) {
1.1 kristaps 667: pnode_printmacroline(p, pp);
1.5 kristaps 668: pnode_unlink(pp);
669: } else
1.4 kristaps 670: puts("UNKNOWN");
1.1 kristaps 671: }
672:
673: static void
674: pnode_printciterefentry(struct parse *p, struct pnode *pn)
675: {
676: struct pnode *pp, *title, *manvol;
677:
678: title = manvol = NULL;
679: TAILQ_FOREACH(pp, &pn->childq, child)
680: if (NODE_MANVOLNUM == pp->node)
681: manvol = pp;
682: else if (NODE_REFENTRYTITLE == pp->node)
683: title = pp;
684:
685: fputs(".Xr ", stdout);
1.4 kristaps 686:
1.1 kristaps 687: if (NULL != title) {
688: pnode_printmacrolinepart(p, title);
1.4 kristaps 689: putchar(' ');
1.1 kristaps 690: } else
1.4 kristaps 691: fputs("unknown ", stdout);
692:
693: if (NULL != manvol)
1.1 kristaps 694: pnode_printmacroline(p, manvol);
1.4 kristaps 695: else
1.1 kristaps 696: puts("1");
697: }
698:
699: static void
700: pnode_printrefmeta(struct parse *p, struct pnode *pn)
701: {
702: struct pnode *pp, *title, *manvol;
703:
704: title = manvol = NULL;
705: TAILQ_FOREACH(pp, &pn->childq, child)
706: if (NODE_MANVOLNUM == pp->node)
707: manvol = pp;
708: else if (NODE_REFENTRYTITLE == pp->node)
709: title = pp;
710:
1.2 kristaps 711: puts(".Dd $Mdocdate" "$");
1.1 kristaps 712: fputs(".Dt ", stdout);
713:
714: if (NULL != title) {
1.7 ! kristaps 715: /* FIXME: uppercase. */
1.1 kristaps 716: pnode_printmacrolinepart(p, title);
1.4 kristaps 717: putchar(' ');
1.1 kristaps 718: } else
1.4 kristaps 719: fputs("UNKNOWN ", stdout);
720:
721: if (NULL != manvol)
1.1 kristaps 722: pnode_printmacroline(p, manvol);
1.4 kristaps 723: else
1.1 kristaps 724: puts("1");
725:
726: puts(".Os");
727: }
728:
1.3 kristaps 729: static void
730: pnode_printfuncdef(struct parse *p, struct pnode *pn)
731: {
732: struct pnode *pp, *ftype, *func;
733:
734: ftype = func = NULL;
735: TAILQ_FOREACH(pp, &pn->childq, child)
736: if (NODE_TEXT == pp->node)
737: ftype = pp;
738: else if (NODE_FUNCTION == pp->node)
739: func = pp;
740:
741: if (NULL != ftype) {
742: fputs(".Ft ", stdout);
743: pnode_printmacroline(p, ftype);
744: }
745:
746: if (NULL != func) {
747: fputs(".Fo ", stdout);
748: pnode_printmacroline(p, func);
749: } else
750: puts(".Fo UNKNOWN");
751: }
752:
753: static void
754: pnode_printparamdef(struct parse *p, struct pnode *pn)
755: {
756: struct pnode *pp, *ptype, *param;
757:
758: ptype = param = NULL;
759: TAILQ_FOREACH(pp, &pn->childq, child)
760: if (NODE_TEXT == pp->node)
761: ptype = pp;
762: else if (NODE_PARAMETER == pp->node)
763: param = pp;
764:
765: fputs(".Fa \"", stdout);
766: if (NULL != ptype) {
767: pnode_printmacrolinepart(p, ptype);
768: putchar(' ');
769: }
770:
771: if (NULL != param)
772: pnode_printmacrolinepart(p, param);
773: else
774: fputs("UNKNOWN", stdout);
775:
776: puts("\"");
777: }
778:
779: static void
780: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
781: {
782: struct pnode *pp, *fdef;
783:
784: TAILQ_FOREACH(fdef, &pn->childq, child)
785: if (NODE_FUNCDEF == fdef->node)
786: break;
787:
1.4 kristaps 788: if (NULL != fdef)
1.3 kristaps 789: pnode_printfuncdef(p, fdef);
1.4 kristaps 790: else
1.3 kristaps 791: puts(".Fo UNKNOWN");
792:
1.4 kristaps 793: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 794: if (NODE_PARAMDEF == pp->node)
795: pnode_printparamdef(p, pp);
796:
797: puts(".Fc");
798: }
799:
1.4 kristaps 800: /* TODO: handle "optional" values. */
801: static void
802: pnode_printarg(struct parse *p, struct pnode *pn, int nested)
803: {
804: struct pnode *pp;
805: int sv = nested;
806:
807: if ( ! nested)
808: fputs(".", stdout);
809: nested = 1;
810: TAILQ_FOREACH(pp, &pn->childq, child)
811: if (NODE_OPTION == pp->node) {
812: fputs("Fl ", stdout);
813: pnode_printmacrolinepart(p, pp);
814: } else if (NODE_TEXT == pp->node) {
815: fputs("Ar ", stdout);
816: pnode_printmacrolinepart(p, pp);
817: } else if (NODE_ARG == pp->node)
818: pnode_printarg(p, pp, nested);
819:
820: if ( ! sv)
821: puts("");
822: }
823:
1.7 ! kristaps 824: /*
! 825: * Recursively search and return the first instance of "node".
! 826: */
! 827: static struct pnode *
! 828: pnode_findfirst(struct pnode *pn, enum nodeid node)
! 829: {
! 830: struct pnode *pp, *res;
! 831:
! 832: res = NULL;
! 833: TAILQ_FOREACH(pp, &pn->childq, child) {
! 834: res = pp->node == node ? pp :
! 835: pnode_findfirst(pp, node);
! 836: if (NULL != res)
! 837: break;
! 838: }
! 839:
! 840: return(res);
! 841: }
! 842:
! 843: static void
! 844: pnode_printprologue(struct parse *p, struct pnode *pn)
! 845: {
! 846: struct pnode *pp;
! 847:
! 848: if (NULL != (pp = pnode_findfirst(p->root, NODE_REFMETA))) {
! 849: pnode_printrefmeta(p, pp);
! 850: pnode_unlink(pp);
! 851: } else {
! 852: puts(".\\\" Supplying bogus prologue...");
! 853: puts(".Dd $Mdocdate" "$");
! 854: puts(".Dt UNKNOWN 1");
! 855: puts(".Os");
! 856: }
! 857: }
! 858:
1.1 kristaps 859: /*
860: * Print a parsed node (or ignore it--whatever).
861: * This is a recursive function.
862: * FIXME: macro line continuation?
863: */
864: static void
865: pnode_print(struct parse *p, struct pnode *pn)
866: {
867: struct pnode *pp;
868: char *cp;
869: int last;
870:
871: if (NULL == pn)
872: return;
873:
874: if (NODE_TEXT != pn->node && NODE_ROOT != pn->node)
875: printf(".\\\" %s\n", nodes[pn->node].name);
876:
877: switch (pn->node) {
1.4 kristaps 878: case (NODE_ARG):
879: pnode_printarg(p, pn, 0);
880: pnode_unlinksub(pn);
881: break;
1.1 kristaps 882: case (NODE_CITEREFENTRY):
883: pnode_printciterefentry(p, pn);
1.4 kristaps 884: pnode_unlinksub(pn);
1.1 kristaps 885: break;
886: case (NODE_CODE):
887: fputs(".Li ", stdout);
888: pnode_printmacroline(p, pn);
1.4 kristaps 889: pnode_unlinksub(pn);
890: break;
891: case (NODE_COMMAND):
892: fputs(".Nm ", stdout);
893: pnode_printmacroline(p, pn);
894: pnode_unlinksub(pn);
1.1 kristaps 895: break;
1.3 kristaps 896: case (NODE_FUNCTION):
897: fputs(".Fn ", stdout);
898: pnode_printmacroline(p, pn);
1.4 kristaps 899: pnode_unlinksub(pn);
1.3 kristaps 900: break;
901: case (NODE_FUNCPROTOTYPE):
902: pnode_printfuncprototype(p, pn);
1.4 kristaps 903: pnode_unlinksub(pn);
1.3 kristaps 904: break;
1.1 kristaps 905: case (NODE_FUNCSYNOPSISINFO):
906: fputs(".Fd ", stdout);
907: pnode_printmacroline(p, pn);
1.4 kristaps 908: pnode_unlinksub(pn);
1.1 kristaps 909: break;
910: case (NODE_PARA):
911: /* FIXME: not always. */
912: puts(".Pp");
1.3 kristaps 913: break;
914: case (NODE_PARAMETER):
915: fputs(".Fa \"", stdout);
916: pnode_printmacrolinepart(p, pn);
917: puts("\"");
1.4 kristaps 918: pnode_unlinksub(pn);
1.1 kristaps 919: break;
920: case (NODE_PROGRAMLISTING):
921: puts(".Bd -literal");
922: break;
923: case (NODE_REFMETA):
1.7 ! kristaps 924: abort();
1.1 kristaps 925: break;
926: case (NODE_REFNAME):
927: fputs(".Nm ", stdout);
928: pnode_printmacroline(p, pn);
1.4 kristaps 929: pnode_unlinksub(pn);
1.1 kristaps 930: return;
931: case (NODE_REFNAMEDIV):
932: puts(".Sh NAME");
933: break;
934: case (NODE_REFPURPOSE):
935: fputs(".Nd ", stdout);
936: pnode_printmacroline(p, pn);
1.4 kristaps 937: pnode_unlinksub(pn);
1.1 kristaps 938: return;
939: case (NODE_REFSYNOPSISDIV):
1.6 kristaps 940: pnode_printrefsynopsisdiv(p, pn);
1.1 kristaps 941: break;
942: case (NODE_REFSECT1):
943: pnode_printrefsect(p, pn);
944: break;
945: case (NODE_TEXT):
946: bufclear(p);
947: bufappend(p, pn);
948: /*
949: * Output all characters, squeezing out whitespace
950: * between newlines.
951: * XXX: all whitespace, including tabs (?).
952: * Remember to escape control characters and escapes.
953: */
954: for (last = '\n', cp = p->b; '\0' != *cp; ) {
955: if ('\n' == last) {
956: /* Consume all whitespace. */
957: if (isspace((int)*cp)) {
958: while (isspace((int)*cp))
959: cp++;
960: continue;
961: } else if ('\'' == *cp || '.' == *cp)
962: fputs("\\&", stdout);
963: }
964: putchar(last = *cp++);
965: /* If we're a character escape, escape us. */
966: if ('\\' == last)
967: putchar('e');
968: }
969: if ('\n' != last)
970: putchar('\n');
971: break;
972: default:
973: break;
974: }
975:
976: TAILQ_FOREACH(pp, &pn->childq, child)
977: pnode_print(p, pp);
978:
979: switch (pn->node) {
980: case (NODE_PROGRAMLISTING):
981: puts(".Ed");
982: break;
983: default:
984: break;
985: }
986: }
987:
988: /*
989: * Loop around the read buffer until we've drained it of all data.
990: * Invoke the parser context with each buffer fill.
991: */
992: static int
993: readfile(XML_Parser xp, int fd,
994: char *b, size_t bsz, const char *fn)
995: {
996: struct parse p;
997: int rc;
998: ssize_t ssz;
999:
1000: memset(&p, 0, sizeof(struct parse));
1001:
1002: p.b = malloc(p.bsz = p.mbsz = 1024);
1003:
1004: XML_SetCharacterDataHandler(xp, xml_char);
1005: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1006: XML_SetUserData(xp, &p);
1007:
1008: while ((ssz = read(fd, b, bsz)) >= 0) {
1009: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1010: fprintf(stderr, "%s: %s\n", fn,
1011: XML_ErrorString
1012: (XML_GetErrorCode(xp)));
1013: else if ( ! p.stop && ssz > 0)
1014: continue;
1015: /*
1016: * Exit when we've read all or errors have occured
1017: * during the parse sequence.
1018: */
1.7 ! kristaps 1019: pnode_printprologue(&p, p.root);
1.1 kristaps 1020: pnode_print(&p, p.root);
1021: pnode_free(p.root);
1022: free(p.b);
1023: return(0 != rc && ! p.stop);
1024: }
1025:
1026: /* Read error has occured. */
1027: perror(fn);
1028: pnode_free(p.root);
1029: free(p.b);
1030: return(0);
1031: }
1032:
1033: int
1034: main(int argc, char *argv[])
1035: {
1036: XML_Parser xp;
1037: const char *fname;
1038: char *buf;
1039: int fd, rc;
1040:
1041: fname = "-";
1042: xp = NULL;
1043: buf = NULL;
1044: rc = 0;
1045:
1046: if (-1 != getopt(argc, argv, ""))
1047: return(EXIT_FAILURE);
1048:
1049: argc -= optind;
1050: argv += optind;
1051:
1052: if (argc > 1)
1053: return(EXIT_FAILURE);
1054: else if (argc > 0)
1055: fname = argv[0];
1056:
1057: /* Read from stdin or a file. */
1058: fd = 0 == strcmp(fname, "-") ?
1059: STDIN_FILENO : open(fname, O_RDONLY, 0);
1060:
1061: /*
1062: * Open file for reading.
1063: * Allocate a read buffer.
1064: * Create the parser context.
1065: * Dive directly into the parse.
1066: */
1067: if (-1 == fd)
1068: perror(fname);
1069: else if (NULL == (buf = malloc(4096)))
1070: perror(NULL);
1071: else if (NULL == (xp = XML_ParserCreate(NULL)))
1072: perror(NULL);
1073: else if ( ! readfile(xp, fd, buf, 4096, fname))
1074: rc = 1;
1075:
1076: XML_ParserFree(xp);
1077: free(buf);
1078: if (STDIN_FILENO != fd)
1079: close(fd);
1080: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1081: }
CVSweb