Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.11
1.11 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.10 2014/03/29 10:56:21 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
29: /*
30: * All recognised node types.
31: */
32: enum nodeid {
33: NODE_ROOT = 0, /* Must comes first. */
34: /* Alpha-ordered hereafter. */
1.4 kristaps 35: NODE_ARG,
1.1 kristaps 36: NODE_CITEREFENTRY,
1.4 kristaps 37: NODE_CMDSYNOPSIS,
1.1 kristaps 38: NODE_CODE,
1.4 kristaps 39: NODE_COMMAND,
1.3 kristaps 40: NODE_FUNCDEF,
41: NODE_FUNCPROTOTYPE,
1.1 kristaps 42: NODE_FUNCSYNOPSIS,
43: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 44: NODE_FUNCTION,
1.1 kristaps 45: NODE_MANVOLNUM,
1.4 kristaps 46: NODE_OPTION,
1.1 kristaps 47: NODE_PARA,
1.3 kristaps 48: NODE_PARAMDEF,
49: NODE_PARAMETER,
1.1 kristaps 50: NODE_PROGRAMLISTING,
51: NODE_REFCLASS,
52: NODE_REFDESCRIPTOR,
53: NODE_REFENTRY,
54: NODE_REFENTRYTITLE,
55: NODE_REFMETA,
56: NODE_REFMISCINFO,
57: NODE_REFNAME,
58: NODE_REFNAMEDIV,
59: NODE_REFPURPOSE,
60: NODE_REFSECT1,
61: NODE_REFSYNOPSISDIV,
1.8 kristaps 62: NODE_STRUCTNAME,
1.1 kristaps 63: NODE_SYNOPSIS,
64: NODE_TEXT,
65: NODE_TITLE,
66: NODE__MAX
67: };
68:
69: /*
70: * Global parse state.
71: * Keep this as simple and small as possible.
72: */
73: struct parse {
74: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
75: int stop; /* should we stop now? */
76: struct pnode *root; /* root of parse tree */
77: struct pnode *cur; /* current node in tree */
1.8 kristaps 78: char *b; /* nil-terminated buffer for pre-print */
79: size_t bsz; /* current length of b */
80: size_t mbsz; /* max bsz allocation */
1.10 kristaps 81: int newln; /* output: are we on a fresh line */
1.1 kristaps 82: };
83:
84: struct node {
1.8 kristaps 85: const char *name; /* docbook element name */
1.1 kristaps 86: unsigned int flags;
87: #define NODE_IGNTEXT 1 /* ignore all contained text */
88: };
89:
90: TAILQ_HEAD(pnodeq, pnode);
91:
92: struct pnode {
93: enum nodeid node; /* node type */
94: char *b; /* binary data buffer */
95: size_t bsz; /* data buffer size */
96: struct pnode *parent; /* parent (or NULL if top) */
97: struct pnodeq childq; /* queue of children */
98: TAILQ_ENTRY(pnode) child;
99: };
100:
101: static const struct node nodes[NODE__MAX] = {
102: { NULL, 0 },
1.4 kristaps 103: { "arg", 0 },
1.1 kristaps 104: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 105: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 106: { "code", 0 },
1.4 kristaps 107: { "command", 0 },
1.3 kristaps 108: { "funcdef", 0 },
109: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 110: { "funcsynopsis", NODE_IGNTEXT },
111: { "funcsynopsisinfo", 0 },
1.3 kristaps 112: { "function", 0 },
1.1 kristaps 113: { "manvolnum", 0 },
1.4 kristaps 114: { "option", 0 },
1.1 kristaps 115: { "para", 0 },
1.3 kristaps 116: { "paramdef", 0 },
117: { "parameter", 0 },
1.1 kristaps 118: { "programlisting", 0 },
119: { "refclass", NODE_IGNTEXT },
120: { "refdescriptor", NODE_IGNTEXT },
121: { "refentry", NODE_IGNTEXT },
122: { "refentrytitle", 0 },
123: { "refmeta", NODE_IGNTEXT },
124: { "refmiscinfo", NODE_IGNTEXT },
125: { "refname", 0 },
126: { "refnamediv", NODE_IGNTEXT },
127: { "refpurpose", 0 },
128: { "refsect1", 0 },
129: { "refsynopsisdiv", NODE_IGNTEXT },
1.8 kristaps 130: { "structname", 0 },
1.1 kristaps 131: { "synopsis", 0 },
132: { NULL, 0 },
133: { "title", 0 },
134: };
135:
1.10 kristaps 136: static void
137: pnode_print(struct parse *p, struct pnode *pn);
138:
1.1 kristaps 139: /*
140: * Look up whether "parent" is a valid parent for "node".
1.8 kristaps 141: * This is sucked directly from the DocBook specification: look at the
142: * "children" and "parent" sections of each node.
1.1 kristaps 143: */
144: static int
145: isparent(enum nodeid node, enum nodeid parent)
146: {
147:
148: switch (node) {
149: case (NODE_ROOT):
150: return(0);
1.4 kristaps 151: case (NODE_ARG):
152: switch (parent) {
153: case (NODE_ARG):
154: case (NODE_CMDSYNOPSIS):
155: return(1);
156: default:
157: break;
158: }
159: return(0);
1.1 kristaps 160: case (NODE_CITEREFENTRY):
161: switch (parent) {
162: case (NODE_FUNCSYNOPSISINFO):
163: case (NODE_PARA):
164: case (NODE_PROGRAMLISTING):
165: case (NODE_REFDESCRIPTOR):
166: case (NODE_REFENTRYTITLE):
167: case (NODE_REFNAME):
168: case (NODE_REFPURPOSE):
169: case (NODE_SYNOPSIS):
170: case (NODE_TITLE):
171: return(1);
172: default:
173: break;
174: }
175: return(0);
1.4 kristaps 176: case (NODE_CMDSYNOPSIS):
177: switch (parent) {
178: case (NODE_PARA):
179: case (NODE_REFSECT1):
180: case (NODE_REFSYNOPSISDIV):
181: return(1);
182: default:
183: break;
184: }
185: return(0);
1.1 kristaps 186: case (NODE_CODE):
187: switch (parent) {
188: case (NODE_FUNCSYNOPSISINFO):
189: case (NODE_PARA):
190: case (NODE_PROGRAMLISTING):
191: case (NODE_REFDESCRIPTOR):
192: case (NODE_REFENTRYTITLE):
193: case (NODE_REFNAME):
194: case (NODE_REFPURPOSE):
195: case (NODE_SYNOPSIS):
196: case (NODE_TITLE):
197: return(1);
198: default:
199: break;
200: }
201: return(0);
1.4 kristaps 202: case (NODE_COMMAND):
203: switch (parent) {
204: case (NODE_CMDSYNOPSIS):
205: case (NODE_FUNCSYNOPSISINFO):
206: case (NODE_PARA):
207: case (NODE_PROGRAMLISTING):
208: case (NODE_REFDESCRIPTOR):
209: case (NODE_REFENTRYTITLE):
210: case (NODE_REFNAME):
211: case (NODE_REFPURPOSE):
212: case (NODE_SYNOPSIS):
213: case (NODE_TITLE):
214: return(1);
215: default:
216: break;
217: }
218: return(0);
1.3 kristaps 219: case (NODE_FUNCDEF):
220: return(NODE_FUNCPROTOTYPE == parent);
221: case (NODE_FUNCPROTOTYPE):
222: return(NODE_FUNCSYNOPSIS == parent);
223: case (NODE_FUNCSYNOPSIS):
224: switch (parent) {
225: case (NODE_PARA):
226: case (NODE_REFSECT1):
227: case (NODE_REFSYNOPSISDIV):
228: return(1);
229: default:
230: break;
231: }
232: return(0);
233: case (NODE_FUNCSYNOPSISINFO):
234: return(NODE_FUNCSYNOPSIS == parent);
235: case (NODE_FUNCTION):
236: switch (parent) {
237: case (NODE_CODE):
238: case (NODE_FUNCDEF):
239: case (NODE_FUNCSYNOPSISINFO):
240: case (NODE_PARA):
1.4 kristaps 241: case (NODE_PROGRAMLISTING):
1.3 kristaps 242: case (NODE_REFDESCRIPTOR):
243: case (NODE_REFENTRYTITLE):
244: case (NODE_REFNAME):
245: case (NODE_REFPURPOSE):
246: case (NODE_SYNOPSIS):
247: case (NODE_TITLE):
248: return(1);
249: default:
250: break;
251: }
252: return(0);
1.1 kristaps 253: case (NODE_MANVOLNUM):
254: switch (parent) {
255: case (NODE_CITEREFENTRY):
256: case (NODE_REFMETA):
257: return(1);
258: default:
259: break;
260: }
261: return(0);
1.4 kristaps 262: case (NODE_OPTION):
263: switch (parent) {
264: case (NODE_ARG):
265: case (NODE_FUNCSYNOPSISINFO):
266: case (NODE_PARA):
267: case (NODE_PROGRAMLISTING):
268: case (NODE_REFDESCRIPTOR):
269: case (NODE_REFENTRYTITLE):
270: case (NODE_REFNAME):
271: case (NODE_REFPURPOSE):
272: case (NODE_SYNOPSIS):
273: case (NODE_TITLE):
274: return(1);
275: default:
276: break;
277: }
278: return(0);
1.3 kristaps 279: case (NODE_PARA):
1.1 kristaps 280: switch (parent) {
281: case (NODE_REFSECT1):
282: case (NODE_REFSYNOPSISDIV):
283: return(1);
284: default:
285: break;
286: }
287: return(0);
1.3 kristaps 288: case (NODE_PARAMDEF):
289: return(NODE_FUNCPROTOTYPE == parent);
290: case (NODE_PARAMETER):
1.1 kristaps 291: switch (parent) {
1.3 kristaps 292: case (NODE_CODE):
293: case (NODE_FUNCSYNOPSISINFO):
294: case (NODE_PARA):
295: case (NODE_PARAMDEF):
1.4 kristaps 296: case (NODE_PROGRAMLISTING):
1.3 kristaps 297: case (NODE_REFDESCRIPTOR):
298: case (NODE_REFENTRYTITLE):
299: case (NODE_REFNAME):
300: case (NODE_REFPURPOSE):
301: case (NODE_SYNOPSIS):
302: case (NODE_TITLE):
1.1 kristaps 303: return(1);
304: default:
305: break;
306: }
307: return(0);
308: case (NODE_PROGRAMLISTING):
309: switch (parent) {
310: case (NODE_PARA):
311: case (NODE_REFSECT1):
312: case (NODE_REFSYNOPSISDIV):
313: return(1);
314: default:
315: break;
316: }
317: return(0);
318: case (NODE_REFCLASS):
319: return(parent == NODE_REFNAMEDIV);
320: case (NODE_REFDESCRIPTOR):
321: return(parent == NODE_REFNAMEDIV);
322: case (NODE_REFENTRY):
323: return(parent == NODE_ROOT);
324: case (NODE_REFENTRYTITLE):
325: switch (parent) {
326: case (NODE_CITEREFENTRY):
327: case (NODE_REFMETA):
328: return(1);
329: default:
330: break;
331: }
332: case (NODE_REFMETA):
333: return(parent == NODE_REFENTRY);
334: case (NODE_REFMISCINFO):
335: return(parent == NODE_REFMETA);
336: case (NODE_REFNAME):
337: return(parent == NODE_REFNAMEDIV);
338: case (NODE_REFNAMEDIV):
339: return(parent == NODE_REFENTRY);
340: case (NODE_REFPURPOSE):
341: return(parent == NODE_REFNAMEDIV);
342: case (NODE_REFSECT1):
343: return(parent == NODE_REFENTRY);
344: case (NODE_REFSYNOPSISDIV):
345: return(parent == NODE_REFENTRY);
1.8 kristaps 346: case (NODE_STRUCTNAME):
347: switch (parent) {
348: case (NODE_CODE):
349: case (NODE_FUNCSYNOPSISINFO):
350: case (NODE_FUNCTION):
351: case (NODE_OPTION):
352: case (NODE_PARA):
353: case (NODE_PARAMETER):
354: case (NODE_PROGRAMLISTING):
355: case (NODE_REFDESCRIPTOR):
356: case (NODE_REFENTRYTITLE):
357: case (NODE_REFNAME):
358: case (NODE_REFPURPOSE):
359: case (NODE_SYNOPSIS):
360: case (NODE_TITLE):
361: return(1);
362: default:
363: break;
364: }
365: return(0);
1.1 kristaps 366: case (NODE_SYNOPSIS):
367: switch (parent) {
368: case (NODE_REFSYNOPSISDIV):
369: case (NODE_REFSECT1):
370: return(1);
371: default:
372: break;
373: }
374: return(0);
375: case (NODE_TITLE):
376: switch (parent) {
377: case (NODE_REFSECT1):
378: case (NODE_REFSYNOPSISDIV):
379: return(1);
380: default:
381: break;
382: }
383: return(0);
384: case (NODE_TEXT):
385: return(1);
386: case (NODE__MAX):
387: break;
388: }
389:
390: abort();
391: return(0);
392: }
393:
1.8 kristaps 394: /*
395: * Process a stream of characters.
396: * We store text as nodes in and of themselves.
397: * If a text node is already open, append to it.
398: * If it's not open, open one under the current context.
399: */
1.1 kristaps 400: static void
401: xml_char(void *arg, const XML_Char *p, int sz)
402: {
403: struct parse *ps = arg;
404: struct pnode *dat;
1.4 kristaps 405: int i;
1.1 kristaps 406:
407: /* Stopped or no tree yet. */
408: if (ps->stop || NODE_ROOT == ps->node)
409: return;
410:
411: /* Not supposed to be collecting text. */
412: assert(NULL != ps->cur);
413: if (NODE_IGNTEXT & nodes[ps->node].flags)
414: return;
415:
416: /*
417: * Are we in the midst of processing text?
418: * If we're not processing text right now, then create a text
419: * node for doing so.
1.4 kristaps 420: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 421: * process: strip out all leading whitespace to be sure.
1.1 kristaps 422: */
423: if (NODE_TEXT != ps->node) {
1.4 kristaps 424: for (i = 0; i < sz; i++)
425: if ( ! isspace((int)p[i]))
426: break;
427: if (i == sz)
428: return;
1.10 kristaps 429: p += i;
430: sz -= i;
1.1 kristaps 431: dat = calloc(1, sizeof(struct pnode));
432: if (NULL == dat) {
433: perror(NULL);
434: exit(EXIT_FAILURE);
435: }
436:
437: dat->node = ps->node = NODE_TEXT;
438: dat->parent = ps->cur;
439: TAILQ_INIT(&dat->childq);
440: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
441: ps->cur = dat;
442: assert(NULL != ps->root);
443: }
444:
445: /* Append to current buffer. */
446: assert(sz >= 0);
447: ps->cur->b = realloc(ps->cur->b,
448: ps->cur->bsz + (size_t)sz);
449: if (NULL == ps->cur->b) {
450: perror(NULL);
451: exit(EXIT_FAILURE);
452: }
453: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
454: ps->cur->bsz += (size_t)sz;
455: }
456:
1.10 kristaps 457: static void
458: pnode_trim(struct pnode *pn)
459: {
460:
461: assert(NODE_TEXT == pn->node);
462: for ( ; pn->bsz > 0; pn->bsz--)
463: if ( ! isspace((int)pn->b[pn->bsz - 1]))
464: break;
465: }
466:
1.1 kristaps 467: /*
468: * Begin an element.
469: * First, look for the element.
470: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 471: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 472: * If we find it but we're not parsing yet (i.e., it's not a refentry
473: * and thus out of context), keep going.
1.8 kristaps 474: * If we find it and we're at the root and already have a tree, puke and
475: * exit (FIXME: I don't think this is right?).
476: * If we find it but we're parsing a text node, close out the text node,
477: * return to its parent, and keep going.
1.1 kristaps 478: * Make sure that the element is in the right context.
479: * Lastly, put the node onto our parse tree and continue.
480: */
481: static void
482: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
483: {
484: struct parse *ps = arg;
485: enum nodeid node;
486: struct pnode *dat;
487:
488: if (ps->stop)
489: return;
490:
491: /* Close out text node, if applicable... */
492: if (NODE_TEXT == ps->node) {
493: assert(NULL != ps->cur);
1.10 kristaps 494: pnode_trim(ps->cur);
1.1 kristaps 495: ps->cur = ps->cur->parent;
496: assert(NULL != ps->cur);
497: ps->node = ps->cur->node;
498: }
499:
500: for (node = 0; node < NODE__MAX; node++)
501: if (NULL == nodes[node].name)
502: continue;
503: else if (0 == strcmp(nodes[node].name, name))
504: break;
505:
1.8 kristaps 506: /* FIXME: do more with these error messages... */
1.1 kristaps 507: if (NODE__MAX == node && NODE_ROOT == ps->node) {
508: fprintf(stderr, "%s: ignoring node\n", name);
509: return;
510: } else if (NODE__MAX == node) {
511: fprintf(stderr, "%s: unknown node\n", name);
512: ps->stop = 1;
513: return;
514: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
515: fprintf(stderr, "%s: reentering?\n", name);
516: ps->stop = 1;
517: return;
518: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
519: fprintf(stderr, "%s: known node w/o context\n", name);
520: return;
521: } else if ( ! isparent(node, ps->node)) {
522: fprintf(stderr, "%s: bad parent\n", name);
523: ps->stop = 1;
524: return;
525: }
526:
527: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
528: perror(NULL);
529: exit(EXIT_FAILURE);
530: }
531:
532: dat->node = ps->node = node;
533: dat->parent = ps->cur;
534: TAILQ_INIT(&dat->childq);
535:
536: if (NULL != ps->cur)
537: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
538:
539: ps->cur = dat;
540: if (NULL == ps->root)
541: ps->root = dat;
542: }
543:
544: /*
545: * Roll up the parse tree.
1.8 kristaps 546: * If we're at a text node, roll that one up first.
1.1 kristaps 547: * If we hit the root, then assign ourselves as the NODE_ROOT.
548: */
549: static void
550: xml_elem_end(void *arg, const XML_Char *name)
551: {
552: struct parse *ps = arg;
553:
554: if (ps->stop || NODE_ROOT == ps->node)
555: return;
556:
557: /* Close out text node, if applicable... */
558: if (NODE_TEXT == ps->node) {
559: assert(NULL != ps->cur);
1.10 kristaps 560: pnode_trim(ps->cur);
1.1 kristaps 561: ps->cur = ps->cur->parent;
562: assert(NULL != ps->cur);
563: ps->node = ps->cur->node;
564: }
565:
566: if (NULL == (ps->cur = ps->cur->parent))
567: ps->node = NODE_ROOT;
568: else
569: ps->node = ps->cur->node;
570: }
571:
1.8 kristaps 572: /*
573: * Recursively free a node (NULL is ok).
574: */
1.1 kristaps 575: static void
576: pnode_free(struct pnode *pn)
577: {
578: struct pnode *pp;
579:
580: if (NULL == pn)
581: return;
582:
583: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
584: TAILQ_REMOVE(&pn->childq, pp, child);
585: pnode_free(pp);
586: }
587:
588: free(pn->b);
589: free(pn);
590: }
591:
1.8 kristaps 592: /*
593: * Unlink a node from its parent and pnode_free() it.
594: */
1.1 kristaps 595: static void
596: pnode_unlink(struct pnode *pn)
597: {
598:
599: if (NULL != pn->parent)
600: TAILQ_REMOVE(&pn->parent->childq, pn, child);
601: pnode_free(pn);
602: }
603:
1.8 kristaps 604: /*
605: * Unlink all children of a node and pnode_free() them.
606: */
1.1 kristaps 607: static void
1.4 kristaps 608: pnode_unlinksub(struct pnode *pn)
609: {
610:
611: while ( ! TAILQ_EMPTY(&pn->childq))
612: pnode_unlink(TAILQ_FIRST(&pn->childq));
613: }
614:
1.8 kristaps 615: /*
616: * Reset the lookaside buffer.
617: */
1.4 kristaps 618: static void
1.1 kristaps 619: bufclear(struct parse *p)
620: {
621:
622: p->b[p->bsz = 0] = '\0';
623: }
624:
1.8 kristaps 625: /*
626: * Append NODE_TEXT contents to the current buffer, reallocating its
627: * size if necessary.
628: * The buffer is ALWAYS nil-terminated.
629: */
1.1 kristaps 630: static void
631: bufappend(struct parse *p, struct pnode *pn)
632: {
633:
634: assert(NODE_TEXT == pn->node);
635: if (p->bsz + pn->bsz + 1 > p->mbsz) {
636: p->mbsz = p->bsz + pn->bsz + 1;
637: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
638: perror(NULL);
639: exit(EXIT_FAILURE);
640: }
641: }
642: memcpy(p->b + p->bsz, pn->b, pn->bsz);
643: p->bsz += pn->bsz;
644: p->b[p->bsz] = '\0';
645: }
646:
1.8 kristaps 647: /*
648: * Recursively append all NODE_TEXT nodes to the buffer.
649: * This descends into non-text nodes, but doesn't do anything beyond
650: * them.
651: * In other words, this is a recursive text grok.
652: */
1.3 kristaps 653: static void
654: bufappend_r(struct parse *p, struct pnode *pn)
655: {
656: struct pnode *pp;
657:
658: if (NODE_TEXT == pn->node)
659: bufappend(p, pn);
660: TAILQ_FOREACH(pp, &pn->childq, child)
661: bufappend_r(p, pp);
662: }
663:
1.1 kristaps 664: /*
1.8 kristaps 665: * Recursively print text presumably on a macro line.
1.1 kristaps 666: * Convert all whitespace to regular spaces.
667: */
668: static void
669: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
670: {
671: char *cp;
672:
673: bufclear(p);
1.3 kristaps 674: bufappend_r(p, pn);
1.1 kristaps 675:
676: /* Convert all space to spaces. */
677: for (cp = p->b; '\0' != *cp; cp++)
678: if (isspace((int)*cp))
679: *cp = ' ';
680:
681: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 682: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 683: for ( ; '\0' != *cp; cp++) {
684: /* Escape us if we look like a macro. */
685: if ((cp == p->b || ' ' == *(cp - 1)) &&
686: isupper((int)*cp) &&
687: '\0' != *(cp + 1) &&
688: islower((int)*(cp + 1)) &&
689: ('\0' == *(cp + 2) ||
690: ' ' == *(cp + 2) ||
691: (islower((int)*(cp + 2)) &&
692: ('\0' == *(cp + 3) ||
693: ' ' == *(cp + 3)))))
694: fputs("\\&", stdout);
695: putchar(*cp);
696: /* If we're a character escape, escape us. */
697: if ('\\' == *cp)
698: putchar('e');
699: }
700: }
701:
702: /*
703: * Just pnode_printmacrolinepart() but with a newline.
704: * If no text, just the newline.
705: */
706: static void
707: pnode_printmacroline(struct parse *p, struct pnode *pn)
708: {
709:
710: pnode_printmacrolinepart(p, pn);
711: putchar('\n');
712: }
713:
1.10 kristaps 714: static void
715: pnode_printmopen(struct parse *p)
716: {
717: if (p->newln) {
718: putchar('.');
719: p->newln = 0;
720: } else
721: putchar(' ');
722: }
723:
724: static void
725: pnode_printmclose(struct parse *p, int sv)
726: {
727:
728: if (sv && ! p->newln) {
729: putchar('\n');
730: p->newln = 1;
731: }
732: }
733:
1.8 kristaps 734: /*
1.10 kristaps 735: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 736: */
1.1 kristaps 737: static void
1.6 kristaps 738: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
739: {
740: struct pnode *pp;
741:
1.10 kristaps 742: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 743: if (NODE_TITLE == pp->node) {
744: pnode_unlink(pp);
1.10 kristaps 745: return;
1.6 kristaps 746: }
747: }
748:
1.8 kristaps 749: /*
750: * Start a hopefully-named `Sh' section.
751: */
1.6 kristaps 752: static void
1.1 kristaps 753: pnode_printrefsect(struct parse *p, struct pnode *pn)
754: {
755: struct pnode *pp;
756:
757: TAILQ_FOREACH(pp, &pn->childq, child)
758: if (NODE_TITLE == pp->node)
759: break;
760:
1.4 kristaps 761: fputs(".Sh ", stdout);
762:
1.5 kristaps 763: if (NULL != pp) {
1.1 kristaps 764: pnode_printmacroline(p, pp);
1.5 kristaps 765: pnode_unlink(pp);
766: } else
1.4 kristaps 767: puts("UNKNOWN");
1.1 kristaps 768: }
769:
1.8 kristaps 770: /*
771: * Start a reference, extracting the title and volume.
772: */
1.1 kristaps 773: static void
774: pnode_printciterefentry(struct parse *p, struct pnode *pn)
775: {
776: struct pnode *pp, *title, *manvol;
777:
778: title = manvol = NULL;
779: TAILQ_FOREACH(pp, &pn->childq, child)
780: if (NODE_MANVOLNUM == pp->node)
781: manvol = pp;
782: else if (NODE_REFENTRYTITLE == pp->node)
783: title = pp;
784:
785: fputs(".Xr ", stdout);
1.4 kristaps 786:
1.1 kristaps 787: if (NULL != title) {
788: pnode_printmacrolinepart(p, title);
1.4 kristaps 789: putchar(' ');
1.1 kristaps 790: } else
1.4 kristaps 791: fputs("unknown ", stdout);
792:
793: if (NULL != manvol)
1.1 kristaps 794: pnode_printmacroline(p, manvol);
1.4 kristaps 795: else
1.1 kristaps 796: puts("1");
797: }
798:
799: static void
800: pnode_printrefmeta(struct parse *p, struct pnode *pn)
801: {
802: struct pnode *pp, *title, *manvol;
803:
804: title = manvol = NULL;
805: TAILQ_FOREACH(pp, &pn->childq, child)
806: if (NODE_MANVOLNUM == pp->node)
807: manvol = pp;
808: else if (NODE_REFENTRYTITLE == pp->node)
809: title = pp;
810:
1.2 kristaps 811: puts(".Dd $Mdocdate" "$");
1.1 kristaps 812: fputs(".Dt ", stdout);
813:
814: if (NULL != title) {
1.7 kristaps 815: /* FIXME: uppercase. */
1.1 kristaps 816: pnode_printmacrolinepart(p, title);
1.4 kristaps 817: putchar(' ');
1.1 kristaps 818: } else
1.4 kristaps 819: fputs("UNKNOWN ", stdout);
820:
821: if (NULL != manvol)
1.1 kristaps 822: pnode_printmacroline(p, manvol);
1.4 kristaps 823: else
1.1 kristaps 824: puts("1");
825:
826: puts(".Os");
827: }
828:
1.3 kristaps 829: static void
830: pnode_printfuncdef(struct parse *p, struct pnode *pn)
831: {
832: struct pnode *pp, *ftype, *func;
833:
834: ftype = func = NULL;
835: TAILQ_FOREACH(pp, &pn->childq, child)
836: if (NODE_TEXT == pp->node)
837: ftype = pp;
838: else if (NODE_FUNCTION == pp->node)
839: func = pp;
840:
841: if (NULL != ftype) {
842: fputs(".Ft ", stdout);
843: pnode_printmacroline(p, ftype);
844: }
845:
846: if (NULL != func) {
847: fputs(".Fo ", stdout);
848: pnode_printmacroline(p, func);
849: } else
850: puts(".Fo UNKNOWN");
851: }
852:
853: static void
854: pnode_printparamdef(struct parse *p, struct pnode *pn)
855: {
856: struct pnode *pp, *ptype, *param;
857:
858: ptype = param = NULL;
859: TAILQ_FOREACH(pp, &pn->childq, child)
860: if (NODE_TEXT == pp->node)
861: ptype = pp;
862: else if (NODE_PARAMETER == pp->node)
863: param = pp;
864:
865: fputs(".Fa \"", stdout);
866: if (NULL != ptype) {
867: pnode_printmacrolinepart(p, ptype);
868: putchar(' ');
869: }
870:
871: if (NULL != param)
872: pnode_printmacrolinepart(p, param);
873:
874: puts("\"");
875: }
876:
877: static void
878: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
879: {
880: struct pnode *pp, *fdef;
881:
882: TAILQ_FOREACH(fdef, &pn->childq, child)
883: if (NODE_FUNCDEF == fdef->node)
884: break;
885:
1.4 kristaps 886: if (NULL != fdef)
1.3 kristaps 887: pnode_printfuncdef(p, fdef);
1.4 kristaps 888: else
1.3 kristaps 889: puts(".Fo UNKNOWN");
890:
1.4 kristaps 891: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 892: if (NODE_PARAMDEF == pp->node)
893: pnode_printparamdef(p, pp);
894:
895: puts(".Fc");
896: }
897:
1.10 kristaps 898: /*
899: * The <arg> element is more complicated than it should be because text
900: * nodes are treated like ".Ar foo", but non-text nodes need to be
901: * re-sent into the printer (i.e., without the preceding ".Ar").
902: * TODO: handle "optional" attribute.
903: */
1.4 kristaps 904: static void
1.10 kristaps 905: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 906: {
907: struct pnode *pp;
908:
1.10 kristaps 909: TAILQ_FOREACH(pp, &pn->childq, child) {
910: if (NODE_TEXT == pp->node) {
911: pnode_printmopen(p);
1.4 kristaps 912: fputs("Ar ", stdout);
1.10 kristaps 913: }
914: pnode_print(p, pp);
915: }
1.4 kristaps 916: }
917:
1.7 kristaps 918: /*
919: * Recursively search and return the first instance of "node".
920: */
921: static struct pnode *
922: pnode_findfirst(struct pnode *pn, enum nodeid node)
923: {
924: struct pnode *pp, *res;
925:
926: res = NULL;
927: TAILQ_FOREACH(pp, &pn->childq, child) {
928: res = pp->node == node ? pp :
929: pnode_findfirst(pp, node);
930: if (NULL != res)
931: break;
932: }
933:
934: return(res);
935: }
936:
937: static void
938: pnode_printprologue(struct parse *p, struct pnode *pn)
939: {
940: struct pnode *pp;
941:
1.9 kristaps 942: pp = NULL == p->root ? NULL :
943: pnode_findfirst(p->root, NODE_REFMETA);
944:
945: if (NULL != pp) {
1.7 kristaps 946: pnode_printrefmeta(p, pp);
947: pnode_unlink(pp);
948: } else {
949: puts(".\\\" Supplying bogus prologue...");
950: puts(".Dd $Mdocdate" "$");
951: puts(".Dt UNKNOWN 1");
952: puts(".Os");
953: }
954: }
955:
1.1 kristaps 956: /*
957: * Print a parsed node (or ignore it--whatever).
958: * This is a recursive function.
959: * FIXME: macro line continuation?
960: */
961: static void
962: pnode_print(struct parse *p, struct pnode *pn)
963: {
964: struct pnode *pp;
965: char *cp;
1.10 kristaps 966: int last, sv;
1.1 kristaps 967:
968: if (NULL == pn)
969: return;
970:
1.10 kristaps 971: sv = p->newln;
1.1 kristaps 972:
973: switch (pn->node) {
1.4 kristaps 974: case (NODE_ARG):
1.10 kristaps 975: pnode_printarg(p, pn);
1.4 kristaps 976: pnode_unlinksub(pn);
977: break;
1.1 kristaps 978: case (NODE_CITEREFENTRY):
1.10 kristaps 979: assert(p->newln);
1.1 kristaps 980: pnode_printciterefentry(p, pn);
1.4 kristaps 981: pnode_unlinksub(pn);
1.1 kristaps 982: break;
983: case (NODE_CODE):
1.10 kristaps 984: pnode_printmopen(p);
985: fputs("Li ", stdout);
1.4 kristaps 986: break;
987: case (NODE_COMMAND):
1.10 kristaps 988: pnode_printmopen(p);
989: fputs("Nm ", stdout);
1.1 kristaps 990: break;
1.3 kristaps 991: case (NODE_FUNCTION):
1.10 kristaps 992: pnode_printmopen(p);
993: fputs("Fn ", stdout);
1.3 kristaps 994: break;
995: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 996: assert(p->newln);
1.3 kristaps 997: pnode_printfuncprototype(p, pn);
1.4 kristaps 998: pnode_unlinksub(pn);
1.3 kristaps 999: break;
1.1 kristaps 1000: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 1001: pnode_printmopen(p);
1002: fputs("Fd ", stdout);
1003: break;
1004: case (NODE_OPTION):
1005: pnode_printmopen(p);
1006: fputs("Fl ", stdout);
1.1 kristaps 1007: break;
1008: case (NODE_PARA):
1.10 kristaps 1009: assert(p->newln);
1.1 kristaps 1010: puts(".Pp");
1.3 kristaps 1011: break;
1012: case (NODE_PARAMETER):
1.10 kristaps 1013: /* Suppress non-text children... */
1014: pnode_printmopen(p);
1015: fputs("Fa \"", stdout);
1.3 kristaps 1016: pnode_printmacrolinepart(p, pn);
1017: puts("\"");
1.4 kristaps 1018: pnode_unlinksub(pn);
1.1 kristaps 1019: break;
1020: case (NODE_PROGRAMLISTING):
1.10 kristaps 1021: assert(p->newln);
1.1 kristaps 1022: puts(".Bd -literal");
1023: break;
1024: case (NODE_REFMETA):
1.7 kristaps 1025: abort();
1.1 kristaps 1026: break;
1027: case (NODE_REFNAME):
1.10 kristaps 1028: /* Suppress non-text children... */
1029: pnode_printmopen(p);
1030: fputs("Nm ", stdout);
1031: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1032: pnode_unlinksub(pn);
1.10 kristaps 1033: break;
1.1 kristaps 1034: case (NODE_REFNAMEDIV):
1.10 kristaps 1035: assert(p->newln);
1.1 kristaps 1036: puts(".Sh NAME");
1037: break;
1038: case (NODE_REFPURPOSE):
1.10 kristaps 1039: assert(p->newln);
1.1 kristaps 1040: fputs(".Nd ", stdout);
1.10 kristaps 1041: break;
1.1 kristaps 1042: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1043: assert(p->newln);
1.6 kristaps 1044: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1045: puts(".Sh SYNOPSIS");
1.1 kristaps 1046: break;
1047: case (NODE_REFSECT1):
1.10 kristaps 1048: assert(p->newln);
1.1 kristaps 1049: pnode_printrefsect(p, pn);
1050: break;
1.8 kristaps 1051: case (NODE_STRUCTNAME):
1.10 kristaps 1052: pnode_printmopen(p);
1053: fputs("Vt ", stdout);
1054: break;
1.1 kristaps 1055: case (NODE_TEXT):
1056: bufclear(p);
1057: bufappend(p, pn);
1058: /*
1059: * Output all characters, squeezing out whitespace
1060: * between newlines.
1061: * XXX: all whitespace, including tabs (?).
1062: * Remember to escape control characters and escapes.
1063: */
1.10 kristaps 1064: assert(p->bsz);
1.1 kristaps 1065: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1066: if ('\n' == last) {
1067: /* Consume all whitespace. */
1068: if (isspace((int)*cp)) {
1069: while (isspace((int)*cp))
1070: cp++;
1071: continue;
1072: } else if ('\'' == *cp || '.' == *cp)
1073: fputs("\\&", stdout);
1074: }
1075: putchar(last = *cp++);
1076: /* If we're a character escape, escape us. */
1077: if ('\\' == last)
1078: putchar('e');
1079: }
1.10 kristaps 1080: p->newln = 0;
1.1 kristaps 1081: break;
1082: default:
1083: break;
1084: }
1085:
1086: TAILQ_FOREACH(pp, &pn->childq, child)
1087: pnode_print(p, pp);
1088:
1089: switch (pn->node) {
1.10 kristaps 1090: case (NODE_ARG):
1091: case (NODE_CODE):
1092: case (NODE_COMMAND):
1093: case (NODE_FUNCTION):
1094: case (NODE_FUNCSYNOPSISINFO):
1095: case (NODE_OPTION):
1096: case (NODE_PARAMETER):
1097: case (NODE_REFNAME):
1098: case (NODE_STRUCTNAME):
1099: case (NODE_TEXT):
1100: pnode_printmclose(p, sv);
1101: break;
1.1 kristaps 1102: case (NODE_PROGRAMLISTING):
1.10 kristaps 1103: assert(p->newln);
1.1 kristaps 1104: puts(".Ed");
1.10 kristaps 1105: p->newln = 1;
1.1 kristaps 1106: break;
1107: default:
1108: break;
1109: }
1110: }
1111:
1112: /*
1113: * Loop around the read buffer until we've drained it of all data.
1114: * Invoke the parser context with each buffer fill.
1115: */
1116: static int
1117: readfile(XML_Parser xp, int fd,
1118: char *b, size_t bsz, const char *fn)
1119: {
1120: struct parse p;
1121: int rc;
1122: ssize_t ssz;
1123:
1124: memset(&p, 0, sizeof(struct parse));
1125:
1126: p.b = malloc(p.bsz = p.mbsz = 1024);
1127:
1128: XML_SetCharacterDataHandler(xp, xml_char);
1129: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1130: XML_SetUserData(xp, &p);
1131:
1132: while ((ssz = read(fd, b, bsz)) >= 0) {
1133: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1134: fprintf(stderr, "%s: %s\n", fn,
1135: XML_ErrorString
1136: (XML_GetErrorCode(xp)));
1137: else if ( ! p.stop && ssz > 0)
1138: continue;
1139: /*
1140: * Exit when we've read all or errors have occured
1141: * during the parse sequence.
1142: */
1.10 kristaps 1143: p.newln = 1;
1.7 kristaps 1144: pnode_printprologue(&p, p.root);
1.1 kristaps 1145: pnode_print(&p, p.root);
1146: pnode_free(p.root);
1147: free(p.b);
1148: return(0 != rc && ! p.stop);
1149: }
1150:
1151: /* Read error has occured. */
1152: perror(fn);
1153: pnode_free(p.root);
1154: free(p.b);
1155: return(0);
1156: }
1157:
1158: int
1159: main(int argc, char *argv[])
1160: {
1161: XML_Parser xp;
1162: const char *fname;
1163: char *buf;
1164: int fd, rc;
1165:
1166: fname = "-";
1167: xp = NULL;
1168: buf = NULL;
1169: rc = 0;
1170:
1171: if (-1 != getopt(argc, argv, ""))
1172: return(EXIT_FAILURE);
1173:
1174: argc -= optind;
1175: argv += optind;
1176:
1177: if (argc > 1)
1178: return(EXIT_FAILURE);
1179: else if (argc > 0)
1180: fname = argv[0];
1181:
1182: /* Read from stdin or a file. */
1183: fd = 0 == strcmp(fname, "-") ?
1184: STDIN_FILENO : open(fname, O_RDONLY, 0);
1185:
1186: /*
1187: * Open file for reading.
1188: * Allocate a read buffer.
1189: * Create the parser context.
1190: * Dive directly into the parse.
1191: */
1192: if (-1 == fd)
1193: perror(fname);
1194: else if (NULL == (buf = malloc(4096)))
1195: perror(NULL);
1196: else if (NULL == (xp = XML_ParserCreate(NULL)))
1197: perror(NULL);
1198: else if ( ! readfile(xp, fd, buf, 4096, fname))
1199: rc = 1;
1200:
1201: XML_ParserFree(xp);
1202: free(buf);
1203: if (STDIN_FILENO != fd)
1204: close(fd);
1205: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1206: }
CVSweb