Annotation of docbook2mdoc/docbook2mdoc.c, Revision 1.12
1.12 ! kristaps 1: /* $Id: docbook2mdoc.c,v 1.11 2014/03/29 11:13:49 kristaps Exp $ */
1.1 kristaps 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/queue.h>
18:
19: #include <assert.h>
20: #include <ctype.h>
21: #include <expat.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
1.7 kristaps 27: #include <unistd.h>
1.1 kristaps 28:
29: /*
30: * All recognised node types.
31: */
32: enum nodeid {
33: NODE_ROOT = 0, /* Must comes first. */
34: /* Alpha-ordered hereafter. */
1.4 kristaps 35: NODE_ARG,
1.1 kristaps 36: NODE_CITEREFENTRY,
1.4 kristaps 37: NODE_CMDSYNOPSIS,
1.1 kristaps 38: NODE_CODE,
1.4 kristaps 39: NODE_COMMAND,
1.3 kristaps 40: NODE_FUNCDEF,
41: NODE_FUNCPROTOTYPE,
1.1 kristaps 42: NODE_FUNCSYNOPSIS,
43: NODE_FUNCSYNOPSISINFO,
1.3 kristaps 44: NODE_FUNCTION,
1.1 kristaps 45: NODE_MANVOLNUM,
1.4 kristaps 46: NODE_OPTION,
1.1 kristaps 47: NODE_PARA,
1.3 kristaps 48: NODE_PARAMDEF,
49: NODE_PARAMETER,
1.1 kristaps 50: NODE_PROGRAMLISTING,
51: NODE_REFCLASS,
52: NODE_REFDESCRIPTOR,
53: NODE_REFENTRY,
54: NODE_REFENTRYTITLE,
55: NODE_REFMETA,
56: NODE_REFMISCINFO,
57: NODE_REFNAME,
58: NODE_REFNAMEDIV,
59: NODE_REFPURPOSE,
60: NODE_REFSECT1,
61: NODE_REFSYNOPSISDIV,
1.8 kristaps 62: NODE_STRUCTNAME,
1.1 kristaps 63: NODE_SYNOPSIS,
64: NODE_TEXT,
65: NODE_TITLE,
66: NODE__MAX
67: };
68:
69: /*
1.12 ! kristaps 70: * All recognised attribute keys.
! 71: */
! 72: enum attrkey {
! 73: /* Alpha-order... */
! 74: ATTRKEY_CHOICE = 0,
! 75: ATTRKEY_ID,
! 76: ATTRKEY_REP,
! 77: ATTRKEY__MAX
! 78: };
! 79:
! 80: /*
! 81: * All [explicitly] recognised attribute values.
! 82: * If an attribute has ATTRVAL__MAX, it could be a free-form.
! 83: */
! 84: enum attrval {
! 85: /* Alpha-order... */
! 86: ATTRVAL_NOREPEAT,
! 87: ATTRVAL_OPT,
! 88: ATTRVAL_PLAIN,
! 89: ATTRVAL_REPEAT,
! 90: ATTRVAL_REQ,
! 91: ATTRVAL__MAX
! 92: };
! 93:
! 94: /*
1.1 kristaps 95: * Global parse state.
96: * Keep this as simple and small as possible.
97: */
98: struct parse {
1.12 ! kristaps 99: XML_Parser xml;
1.1 kristaps 100: enum nodeid node; /* current (NODE_ROOT if pre-tree) */
1.12 ! kristaps 101: const char *fname; /* filename */
1.1 kristaps 102: int stop; /* should we stop now? */
103: struct pnode *root; /* root of parse tree */
104: struct pnode *cur; /* current node in tree */
1.8 kristaps 105: char *b; /* nil-terminated buffer for pre-print */
106: size_t bsz; /* current length of b */
107: size_t mbsz; /* max bsz allocation */
1.10 kristaps 108: int newln; /* output: are we on a fresh line */
1.1 kristaps 109: };
110:
111: struct node {
1.8 kristaps 112: const char *name; /* docbook element name */
1.1 kristaps 113: unsigned int flags;
114: #define NODE_IGNTEXT 1 /* ignore all contained text */
115: };
116:
117: TAILQ_HEAD(pnodeq, pnode);
1.12 ! kristaps 118: TAILQ_HEAD(pattrq, pattr);
! 119:
! 120: struct pattr {
! 121: enum attrkey key;
! 122: enum attrval val;
! 123: char *rawval;
! 124: TAILQ_ENTRY(pattr) child;
! 125: };
1.1 kristaps 126:
127: struct pnode {
128: enum nodeid node; /* node type */
129: char *b; /* binary data buffer */
130: size_t bsz; /* data buffer size */
131: struct pnode *parent; /* parent (or NULL if top) */
132: struct pnodeq childq; /* queue of children */
1.12 ! kristaps 133: struct pattrq attrq; /* attributes of node */
1.1 kristaps 134: TAILQ_ENTRY(pnode) child;
135: };
136:
1.12 ! kristaps 137: static const char *attrkeys[ATTRKEY__MAX] = {
! 138: "choice",
! 139: "id",
! 140: "rep"
! 141: };
! 142:
! 143: static const char *attrvals[ATTRVAL__MAX] = {
! 144: "norepeat",
! 145: "opt",
! 146: "plain",
! 147: "repeat",
! 148: "req"
! 149: };
! 150:
1.1 kristaps 151: static const struct node nodes[NODE__MAX] = {
152: { NULL, 0 },
1.4 kristaps 153: { "arg", 0 },
1.1 kristaps 154: { "citerefentry", NODE_IGNTEXT },
1.4 kristaps 155: { "cmdsynopsis", NODE_IGNTEXT },
1.1 kristaps 156: { "code", 0 },
1.4 kristaps 157: { "command", 0 },
1.3 kristaps 158: { "funcdef", 0 },
159: { "funcprototype", NODE_IGNTEXT },
1.1 kristaps 160: { "funcsynopsis", NODE_IGNTEXT },
161: { "funcsynopsisinfo", 0 },
1.3 kristaps 162: { "function", 0 },
1.1 kristaps 163: { "manvolnum", 0 },
1.4 kristaps 164: { "option", 0 },
1.1 kristaps 165: { "para", 0 },
1.3 kristaps 166: { "paramdef", 0 },
167: { "parameter", 0 },
1.1 kristaps 168: { "programlisting", 0 },
169: { "refclass", NODE_IGNTEXT },
170: { "refdescriptor", NODE_IGNTEXT },
171: { "refentry", NODE_IGNTEXT },
172: { "refentrytitle", 0 },
173: { "refmeta", NODE_IGNTEXT },
174: { "refmiscinfo", NODE_IGNTEXT },
175: { "refname", 0 },
176: { "refnamediv", NODE_IGNTEXT },
177: { "refpurpose", 0 },
178: { "refsect1", 0 },
179: { "refsynopsisdiv", NODE_IGNTEXT },
1.8 kristaps 180: { "structname", 0 },
1.1 kristaps 181: { "synopsis", 0 },
182: { NULL, 0 },
183: { "title", 0 },
184: };
185:
1.10 kristaps 186: static void
187: pnode_print(struct parse *p, struct pnode *pn);
188:
1.12 ! kristaps 189: static int
! 190: isattrkey(enum nodeid node, enum attrkey key)
! 191: {
! 192:
! 193: switch (key) {
! 194: case (ATTRKEY_CHOICE):
! 195: return(node == NODE_ARG);
! 196: case (ATTRKEY_ID):
! 197: /* Common to all. */
! 198: return(1);
! 199: case (ATTRKEY_REP):
! 200: return(node == NODE_ARG);
! 201: default:
! 202: break;
! 203: }
! 204: abort();
! 205: return(0);
! 206: }
! 207:
! 208: static int
! 209: isattrval(enum attrkey key, enum attrval val)
! 210: {
! 211:
! 212: switch (val) {
! 213: case (ATTRVAL_OPT):
! 214: case (ATTRVAL_PLAIN):
! 215: case (ATTRVAL_REQ):
! 216: return(key == ATTRKEY_CHOICE);
! 217: case (ATTRVAL_REPEAT):
! 218: case (ATTRVAL_NOREPEAT):
! 219: return(key == ATTRKEY_REP);
! 220: default:
! 221: break;
! 222: }
! 223: abort();
! 224: return(0);
! 225: }
! 226:
1.1 kristaps 227: /*
228: * Look up whether "parent" is a valid parent for "node".
1.8 kristaps 229: * This is sucked directly from the DocBook specification: look at the
230: * "children" and "parent" sections of each node.
1.1 kristaps 231: */
232: static int
233: isparent(enum nodeid node, enum nodeid parent)
234: {
235:
236: switch (node) {
237: case (NODE_ROOT):
238: return(0);
1.4 kristaps 239: case (NODE_ARG):
240: switch (parent) {
241: case (NODE_ARG):
242: case (NODE_CMDSYNOPSIS):
243: return(1);
244: default:
245: break;
246: }
247: return(0);
1.1 kristaps 248: case (NODE_CITEREFENTRY):
249: switch (parent) {
250: case (NODE_FUNCSYNOPSISINFO):
251: case (NODE_PARA):
252: case (NODE_PROGRAMLISTING):
253: case (NODE_REFDESCRIPTOR):
254: case (NODE_REFENTRYTITLE):
255: case (NODE_REFNAME):
256: case (NODE_REFPURPOSE):
257: case (NODE_SYNOPSIS):
258: case (NODE_TITLE):
259: return(1);
260: default:
261: break;
262: }
263: return(0);
1.4 kristaps 264: case (NODE_CMDSYNOPSIS):
265: switch (parent) {
266: case (NODE_PARA):
267: case (NODE_REFSECT1):
268: case (NODE_REFSYNOPSISDIV):
269: return(1);
270: default:
271: break;
272: }
273: return(0);
1.1 kristaps 274: case (NODE_CODE):
275: switch (parent) {
276: case (NODE_FUNCSYNOPSISINFO):
277: case (NODE_PARA):
278: case (NODE_PROGRAMLISTING):
279: case (NODE_REFDESCRIPTOR):
280: case (NODE_REFENTRYTITLE):
281: case (NODE_REFNAME):
282: case (NODE_REFPURPOSE):
283: case (NODE_SYNOPSIS):
284: case (NODE_TITLE):
285: return(1);
286: default:
287: break;
288: }
289: return(0);
1.4 kristaps 290: case (NODE_COMMAND):
291: switch (parent) {
292: case (NODE_CMDSYNOPSIS):
293: case (NODE_FUNCSYNOPSISINFO):
294: case (NODE_PARA):
295: case (NODE_PROGRAMLISTING):
296: case (NODE_REFDESCRIPTOR):
297: case (NODE_REFENTRYTITLE):
298: case (NODE_REFNAME):
299: case (NODE_REFPURPOSE):
300: case (NODE_SYNOPSIS):
301: case (NODE_TITLE):
302: return(1);
303: default:
304: break;
305: }
306: return(0);
1.3 kristaps 307: case (NODE_FUNCDEF):
308: return(NODE_FUNCPROTOTYPE == parent);
309: case (NODE_FUNCPROTOTYPE):
310: return(NODE_FUNCSYNOPSIS == parent);
311: case (NODE_FUNCSYNOPSIS):
312: switch (parent) {
313: case (NODE_PARA):
314: case (NODE_REFSECT1):
315: case (NODE_REFSYNOPSISDIV):
316: return(1);
317: default:
318: break;
319: }
320: return(0);
321: case (NODE_FUNCSYNOPSISINFO):
322: return(NODE_FUNCSYNOPSIS == parent);
323: case (NODE_FUNCTION):
324: switch (parent) {
325: case (NODE_CODE):
326: case (NODE_FUNCDEF):
327: case (NODE_FUNCSYNOPSISINFO):
328: case (NODE_PARA):
1.4 kristaps 329: case (NODE_PROGRAMLISTING):
1.3 kristaps 330: case (NODE_REFDESCRIPTOR):
331: case (NODE_REFENTRYTITLE):
332: case (NODE_REFNAME):
333: case (NODE_REFPURPOSE):
334: case (NODE_SYNOPSIS):
335: case (NODE_TITLE):
336: return(1);
337: default:
338: break;
339: }
340: return(0);
1.1 kristaps 341: case (NODE_MANVOLNUM):
342: switch (parent) {
343: case (NODE_CITEREFENTRY):
344: case (NODE_REFMETA):
345: return(1);
346: default:
347: break;
348: }
349: return(0);
1.4 kristaps 350: case (NODE_OPTION):
351: switch (parent) {
352: case (NODE_ARG):
353: case (NODE_FUNCSYNOPSISINFO):
354: case (NODE_PARA):
355: case (NODE_PROGRAMLISTING):
356: case (NODE_REFDESCRIPTOR):
357: case (NODE_REFENTRYTITLE):
358: case (NODE_REFNAME):
359: case (NODE_REFPURPOSE):
360: case (NODE_SYNOPSIS):
361: case (NODE_TITLE):
362: return(1);
363: default:
364: break;
365: }
366: return(0);
1.3 kristaps 367: case (NODE_PARA):
1.1 kristaps 368: switch (parent) {
369: case (NODE_REFSECT1):
370: case (NODE_REFSYNOPSISDIV):
371: return(1);
372: default:
373: break;
374: }
375: return(0);
1.3 kristaps 376: case (NODE_PARAMDEF):
377: return(NODE_FUNCPROTOTYPE == parent);
378: case (NODE_PARAMETER):
1.1 kristaps 379: switch (parent) {
1.3 kristaps 380: case (NODE_CODE):
381: case (NODE_FUNCSYNOPSISINFO):
382: case (NODE_PARA):
383: case (NODE_PARAMDEF):
1.4 kristaps 384: case (NODE_PROGRAMLISTING):
1.3 kristaps 385: case (NODE_REFDESCRIPTOR):
386: case (NODE_REFENTRYTITLE):
387: case (NODE_REFNAME):
388: case (NODE_REFPURPOSE):
389: case (NODE_SYNOPSIS):
390: case (NODE_TITLE):
1.1 kristaps 391: return(1);
392: default:
393: break;
394: }
395: return(0);
396: case (NODE_PROGRAMLISTING):
397: switch (parent) {
398: case (NODE_PARA):
399: case (NODE_REFSECT1):
400: case (NODE_REFSYNOPSISDIV):
401: return(1);
402: default:
403: break;
404: }
405: return(0);
406: case (NODE_REFCLASS):
407: return(parent == NODE_REFNAMEDIV);
408: case (NODE_REFDESCRIPTOR):
409: return(parent == NODE_REFNAMEDIV);
410: case (NODE_REFENTRY):
411: return(parent == NODE_ROOT);
412: case (NODE_REFENTRYTITLE):
413: switch (parent) {
414: case (NODE_CITEREFENTRY):
415: case (NODE_REFMETA):
416: return(1);
417: default:
418: break;
419: }
420: case (NODE_REFMETA):
421: return(parent == NODE_REFENTRY);
422: case (NODE_REFMISCINFO):
423: return(parent == NODE_REFMETA);
424: case (NODE_REFNAME):
425: return(parent == NODE_REFNAMEDIV);
426: case (NODE_REFNAMEDIV):
427: return(parent == NODE_REFENTRY);
428: case (NODE_REFPURPOSE):
429: return(parent == NODE_REFNAMEDIV);
430: case (NODE_REFSECT1):
431: return(parent == NODE_REFENTRY);
432: case (NODE_REFSYNOPSISDIV):
433: return(parent == NODE_REFENTRY);
1.8 kristaps 434: case (NODE_STRUCTNAME):
435: switch (parent) {
436: case (NODE_CODE):
437: case (NODE_FUNCSYNOPSISINFO):
438: case (NODE_FUNCTION):
439: case (NODE_OPTION):
440: case (NODE_PARA):
441: case (NODE_PARAMETER):
442: case (NODE_PROGRAMLISTING):
443: case (NODE_REFDESCRIPTOR):
444: case (NODE_REFENTRYTITLE):
445: case (NODE_REFNAME):
446: case (NODE_REFPURPOSE):
447: case (NODE_SYNOPSIS):
448: case (NODE_TITLE):
449: return(1);
450: default:
451: break;
452: }
453: return(0);
1.1 kristaps 454: case (NODE_SYNOPSIS):
455: switch (parent) {
456: case (NODE_REFSYNOPSISDIV):
457: case (NODE_REFSECT1):
458: return(1);
459: default:
460: break;
461: }
462: return(0);
463: case (NODE_TITLE):
464: switch (parent) {
465: case (NODE_REFSECT1):
466: case (NODE_REFSYNOPSISDIV):
467: return(1);
468: default:
469: break;
470: }
471: return(0);
472: case (NODE_TEXT):
473: return(1);
474: case (NODE__MAX):
475: break;
476: }
477:
478: abort();
479: return(0);
480: }
481:
1.8 kristaps 482: /*
483: * Process a stream of characters.
484: * We store text as nodes in and of themselves.
485: * If a text node is already open, append to it.
486: * If it's not open, open one under the current context.
487: */
1.1 kristaps 488: static void
489: xml_char(void *arg, const XML_Char *p, int sz)
490: {
491: struct parse *ps = arg;
492: struct pnode *dat;
1.4 kristaps 493: int i;
1.1 kristaps 494:
495: /* Stopped or no tree yet. */
496: if (ps->stop || NODE_ROOT == ps->node)
497: return;
498:
499: /* Not supposed to be collecting text. */
500: assert(NULL != ps->cur);
501: if (NODE_IGNTEXT & nodes[ps->node].flags)
502: return;
503:
504: /*
505: * Are we in the midst of processing text?
506: * If we're not processing text right now, then create a text
507: * node for doing so.
1.4 kristaps 508: * However, don't do so unless we have some non-whitespace to
1.10 kristaps 509: * process: strip out all leading whitespace to be sure.
1.1 kristaps 510: */
511: if (NODE_TEXT != ps->node) {
1.4 kristaps 512: for (i = 0; i < sz; i++)
513: if ( ! isspace((int)p[i]))
514: break;
515: if (i == sz)
516: return;
1.10 kristaps 517: p += i;
518: sz -= i;
1.1 kristaps 519: dat = calloc(1, sizeof(struct pnode));
520: if (NULL == dat) {
521: perror(NULL);
522: exit(EXIT_FAILURE);
523: }
524:
525: dat->node = ps->node = NODE_TEXT;
526: dat->parent = ps->cur;
527: TAILQ_INIT(&dat->childq);
1.12 ! kristaps 528: TAILQ_INIT(&dat->attrq);
1.1 kristaps 529: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
530: ps->cur = dat;
531: assert(NULL != ps->root);
532: }
533:
534: /* Append to current buffer. */
535: assert(sz >= 0);
536: ps->cur->b = realloc(ps->cur->b,
537: ps->cur->bsz + (size_t)sz);
538: if (NULL == ps->cur->b) {
539: perror(NULL);
540: exit(EXIT_FAILURE);
541: }
542: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
543: ps->cur->bsz += (size_t)sz;
544: }
545:
1.10 kristaps 546: static void
547: pnode_trim(struct pnode *pn)
548: {
549:
550: assert(NODE_TEXT == pn->node);
551: for ( ; pn->bsz > 0; pn->bsz--)
552: if ( ! isspace((int)pn->b[pn->bsz - 1]))
553: break;
554: }
555:
1.1 kristaps 556: /*
557: * Begin an element.
558: * First, look for the element.
559: * If we don't find it and we're not parsing, keep going.
1.8 kristaps 560: * If we don't find it and we're parsing, puke and exit.
1.1 kristaps 561: * If we find it but we're not parsing yet (i.e., it's not a refentry
562: * and thus out of context), keep going.
1.8 kristaps 563: * If we find it and we're at the root and already have a tree, puke and
564: * exit (FIXME: I don't think this is right?).
565: * If we find it but we're parsing a text node, close out the text node,
566: * return to its parent, and keep going.
1.1 kristaps 567: * Make sure that the element is in the right context.
568: * Lastly, put the node onto our parse tree and continue.
569: */
570: static void
571: xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
572: {
1.12 ! kristaps 573: struct parse *ps = arg;
! 574: enum nodeid node;
! 575: enum attrkey key;
! 576: enum attrval val;
! 577: struct pnode *dat;
! 578: struct pattr *pattr;
! 579: const XML_Char **att;
1.1 kristaps 580:
581: if (ps->stop)
582: return;
583:
584: /* Close out text node, if applicable... */
585: if (NODE_TEXT == ps->node) {
586: assert(NULL != ps->cur);
1.10 kristaps 587: pnode_trim(ps->cur);
1.1 kristaps 588: ps->cur = ps->cur->parent;
589: assert(NULL != ps->cur);
590: ps->node = ps->cur->node;
591: }
592:
593: for (node = 0; node < NODE__MAX; node++)
594: if (NULL == nodes[node].name)
595: continue;
596: else if (0 == strcmp(nodes[node].name, name))
597: break;
598:
599: if (NODE__MAX == node && NODE_ROOT == ps->node) {
600: return;
601: } else if (NODE__MAX == node) {
1.12 ! kristaps 602: fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
! 603: ps->fname, XML_GetCurrentLineNumber(ps->xml),
! 604: XML_GetCurrentColumnNumber(ps->xml), name);
1.1 kristaps 605: ps->stop = 1;
606: return;
607: } else if (NODE_ROOT == ps->node && NULL != ps->root) {
1.12 ! kristaps 608: fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
! 609: ps->fname, XML_GetCurrentLineNumber(ps->xml),
! 610: XML_GetCurrentColumnNumber(ps->xml));
1.1 kristaps 611: ps->stop = 1;
612: return;
613: } else if (NODE_ROOT == ps->node && NODE_REFENTRY != node) {
614: return;
615: } else if ( ! isparent(node, ps->node)) {
1.12 ! kristaps 616: fprintf(stderr, "%s:%zu:%zu: bad parent \"%s\"\n",
! 617: ps->fname, XML_GetCurrentLineNumber(ps->xml),
! 618: XML_GetCurrentColumnNumber(ps->xml),
! 619: NULL == nodes[ps->node].name ?
! 620: "(none)" : nodes[ps->node].name);
1.1 kristaps 621: ps->stop = 1;
622: return;
623: }
624:
625: if (NULL == (dat = calloc(1, sizeof(struct pnode)))) {
626: perror(NULL);
627: exit(EXIT_FAILURE);
628: }
629:
630: dat->node = ps->node = node;
631: dat->parent = ps->cur;
632: TAILQ_INIT(&dat->childq);
1.12 ! kristaps 633: TAILQ_INIT(&dat->attrq);
1.1 kristaps 634:
635: if (NULL != ps->cur)
636: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
637:
638: ps->cur = dat;
639: if (NULL == ps->root)
640: ps->root = dat;
1.12 ! kristaps 641:
! 642: /*
! 643: * Process attributes.
! 644: */
! 645: for (att = atts; NULL != *att; att += 2) {
! 646: for (key = 0; key < ATTRKEY__MAX; key++)
! 647: if (0 == strcmp(*att, attrkeys[key]))
! 648: break;
! 649: if (ATTRKEY__MAX == key) {
! 650: fprintf(stderr, "%s:%zu:%zu: unknown "
! 651: "attribute \"%s\"\n", ps->fname,
! 652: XML_GetCurrentLineNumber(ps->xml),
! 653: XML_GetCurrentColumnNumber(ps->xml),
! 654: *att);
! 655: continue;
! 656: } else if ( ! isattrkey(node, key)) {
! 657: fprintf(stderr, "%s:%zu:%zu: bad "
! 658: "attribute \"%s\"\n", ps->fname,
! 659: XML_GetCurrentLineNumber(ps->xml),
! 660: XML_GetCurrentColumnNumber(ps->xml),
! 661: *att);
! 662: continue;
! 663: }
! 664: for (val = 0; val < ATTRVAL__MAX; val++)
! 665: if (0 == strcmp(*(att + 1), attrvals[val]))
! 666: break;
! 667: if (ATTRVAL__MAX != val && ! isattrval(key, val)) {
! 668: fprintf(stderr, "%s:%zu:%zu: bad "
! 669: "value \"%s\"\n", ps->fname,
! 670: XML_GetCurrentLineNumber(ps->xml),
! 671: XML_GetCurrentColumnNumber(ps->xml),
! 672: *(att + 1));
! 673: continue;
! 674: }
! 675: pattr = calloc(1, sizeof(struct pattr));
! 676: pattr->key = key;
! 677: pattr->val = val;
! 678: if (ATTRVAL__MAX == val)
! 679: pattr->rawval = strdup(*(att + 1));
! 680: TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
! 681: }
! 682:
1.1 kristaps 683: }
684:
685: /*
686: * Roll up the parse tree.
1.8 kristaps 687: * If we're at a text node, roll that one up first.
1.1 kristaps 688: * If we hit the root, then assign ourselves as the NODE_ROOT.
689: */
690: static void
691: xml_elem_end(void *arg, const XML_Char *name)
692: {
693: struct parse *ps = arg;
694:
695: if (ps->stop || NODE_ROOT == ps->node)
696: return;
697:
698: /* Close out text node, if applicable... */
699: if (NODE_TEXT == ps->node) {
700: assert(NULL != ps->cur);
1.10 kristaps 701: pnode_trim(ps->cur);
1.1 kristaps 702: ps->cur = ps->cur->parent;
703: assert(NULL != ps->cur);
704: ps->node = ps->cur->node;
705: }
706:
707: if (NULL == (ps->cur = ps->cur->parent))
708: ps->node = NODE_ROOT;
709: else
710: ps->node = ps->cur->node;
711: }
712:
1.8 kristaps 713: /*
714: * Recursively free a node (NULL is ok).
715: */
1.1 kristaps 716: static void
717: pnode_free(struct pnode *pn)
718: {
719: struct pnode *pp;
1.12 ! kristaps 720: struct pattr *ap;
1.1 kristaps 721:
722: if (NULL == pn)
723: return;
724:
725: while (NULL != (pp = TAILQ_FIRST(&pn->childq))) {
726: TAILQ_REMOVE(&pn->childq, pp, child);
727: pnode_free(pp);
728: }
729:
1.12 ! kristaps 730: while (NULL != (ap = TAILQ_FIRST(&pn->attrq))) {
! 731: TAILQ_REMOVE(&pn->attrq, ap, child);
! 732: free(ap->rawval);
! 733: free(ap);
! 734: }
! 735:
1.1 kristaps 736: free(pn->b);
737: free(pn);
738: }
739:
1.8 kristaps 740: /*
741: * Unlink a node from its parent and pnode_free() it.
742: */
1.1 kristaps 743: static void
744: pnode_unlink(struct pnode *pn)
745: {
746:
747: if (NULL != pn->parent)
748: TAILQ_REMOVE(&pn->parent->childq, pn, child);
749: pnode_free(pn);
750: }
751:
1.8 kristaps 752: /*
753: * Unlink all children of a node and pnode_free() them.
754: */
1.1 kristaps 755: static void
1.4 kristaps 756: pnode_unlinksub(struct pnode *pn)
757: {
758:
759: while ( ! TAILQ_EMPTY(&pn->childq))
760: pnode_unlink(TAILQ_FIRST(&pn->childq));
761: }
762:
1.8 kristaps 763: /*
764: * Reset the lookaside buffer.
765: */
1.4 kristaps 766: static void
1.1 kristaps 767: bufclear(struct parse *p)
768: {
769:
770: p->b[p->bsz = 0] = '\0';
771: }
772:
1.8 kristaps 773: /*
774: * Append NODE_TEXT contents to the current buffer, reallocating its
775: * size if necessary.
776: * The buffer is ALWAYS nil-terminated.
777: */
1.1 kristaps 778: static void
779: bufappend(struct parse *p, struct pnode *pn)
780: {
781:
782: assert(NODE_TEXT == pn->node);
783: if (p->bsz + pn->bsz + 1 > p->mbsz) {
784: p->mbsz = p->bsz + pn->bsz + 1;
785: if (NULL == (p->b = realloc(p->b, p->mbsz))) {
786: perror(NULL);
787: exit(EXIT_FAILURE);
788: }
789: }
790: memcpy(p->b + p->bsz, pn->b, pn->bsz);
791: p->bsz += pn->bsz;
792: p->b[p->bsz] = '\0';
793: }
794:
1.8 kristaps 795: /*
796: * Recursively append all NODE_TEXT nodes to the buffer.
797: * This descends into non-text nodes, but doesn't do anything beyond
798: * them.
799: * In other words, this is a recursive text grok.
800: */
1.3 kristaps 801: static void
802: bufappend_r(struct parse *p, struct pnode *pn)
803: {
804: struct pnode *pp;
805:
806: if (NODE_TEXT == pn->node)
807: bufappend(p, pn);
808: TAILQ_FOREACH(pp, &pn->childq, child)
809: bufappend_r(p, pp);
810: }
811:
1.12 ! kristaps 812: #define MACROLINE_NORM 0
! 813: #define MACROLINE_UPPER 1
1.1 kristaps 814: /*
1.8 kristaps 815: * Recursively print text presumably on a macro line.
1.1 kristaps 816: * Convert all whitespace to regular spaces.
817: */
818: static void
1.12 ! kristaps 819: pnode_printmacrolinetext(struct parse *p, struct pnode *pn, int fl)
1.1 kristaps 820: {
821: char *cp;
822:
823: bufclear(p);
1.3 kristaps 824: bufappend_r(p, pn);
1.1 kristaps 825:
826: /* Convert all space to spaces. */
827: for (cp = p->b; '\0' != *cp; cp++)
828: if (isspace((int)*cp))
829: *cp = ' ';
830:
831: for (cp = p->b; isspace((int)*cp); cp++)
1.4 kristaps 832: /* Spin past whitespace (XXX: necessary?) */ ;
1.1 kristaps 833: for ( ; '\0' != *cp; cp++) {
834: /* Escape us if we look like a macro. */
835: if ((cp == p->b || ' ' == *(cp - 1)) &&
836: isupper((int)*cp) &&
837: '\0' != *(cp + 1) &&
838: islower((int)*(cp + 1)) &&
839: ('\0' == *(cp + 2) ||
840: ' ' == *(cp + 2) ||
841: (islower((int)*(cp + 2)) &&
842: ('\0' == *(cp + 3) ||
843: ' ' == *(cp + 3)))))
844: fputs("\\&", stdout);
1.12 ! kristaps 845: if (MACROLINE_UPPER & fl)
! 846: putchar(toupper((int)*cp));
! 847: else
! 848: putchar((int)*cp);
1.1 kristaps 849: /* If we're a character escape, escape us. */
850: if ('\\' == *cp)
851: putchar('e');
852: }
853: }
854:
1.12 ! kristaps 855: static void
! 856: pnode_printmacrolinepart(struct parse *p, struct pnode *pn)
! 857: {
! 858:
! 859: pnode_printmacrolinetext(p, pn, 0);
! 860: }
! 861:
1.1 kristaps 862: /*
863: * Just pnode_printmacrolinepart() but with a newline.
864: * If no text, just the newline.
865: */
866: static void
867: pnode_printmacroline(struct parse *p, struct pnode *pn)
868: {
869:
1.12 ! kristaps 870: pnode_printmacrolinetext(p, pn, 0);
1.1 kristaps 871: putchar('\n');
872: }
873:
1.10 kristaps 874: static void
875: pnode_printmopen(struct parse *p)
876: {
877: if (p->newln) {
878: putchar('.');
879: p->newln = 0;
880: } else
881: putchar(' ');
882: }
883:
884: static void
885: pnode_printmclose(struct parse *p, int sv)
886: {
887:
888: if (sv && ! p->newln) {
889: putchar('\n');
890: p->newln = 1;
891: }
892: }
893:
1.8 kristaps 894: /*
1.10 kristaps 895: * If the SYNOPSIS macro has a superfluous title, kill it.
1.8 kristaps 896: */
1.1 kristaps 897: static void
1.6 kristaps 898: pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
899: {
900: struct pnode *pp;
901:
1.10 kristaps 902: TAILQ_FOREACH(pp, &pn->childq, child)
1.6 kristaps 903: if (NODE_TITLE == pp->node) {
904: pnode_unlink(pp);
1.10 kristaps 905: return;
1.6 kristaps 906: }
907: }
908:
1.8 kristaps 909: /*
910: * Start a hopefully-named `Sh' section.
911: */
1.6 kristaps 912: static void
1.1 kristaps 913: pnode_printrefsect(struct parse *p, struct pnode *pn)
914: {
915: struct pnode *pp;
916:
917: TAILQ_FOREACH(pp, &pn->childq, child)
918: if (NODE_TITLE == pp->node)
919: break;
920:
1.4 kristaps 921: fputs(".Sh ", stdout);
922:
1.5 kristaps 923: if (NULL != pp) {
1.1 kristaps 924: pnode_printmacroline(p, pp);
1.5 kristaps 925: pnode_unlink(pp);
926: } else
1.4 kristaps 927: puts("UNKNOWN");
1.1 kristaps 928: }
929:
1.8 kristaps 930: /*
931: * Start a reference, extracting the title and volume.
932: */
1.1 kristaps 933: static void
934: pnode_printciterefentry(struct parse *p, struct pnode *pn)
935: {
936: struct pnode *pp, *title, *manvol;
937:
938: title = manvol = NULL;
939: TAILQ_FOREACH(pp, &pn->childq, child)
940: if (NODE_MANVOLNUM == pp->node)
941: manvol = pp;
942: else if (NODE_REFENTRYTITLE == pp->node)
943: title = pp;
944:
945: fputs(".Xr ", stdout);
1.4 kristaps 946:
1.1 kristaps 947: if (NULL != title) {
948: pnode_printmacrolinepart(p, title);
1.4 kristaps 949: putchar(' ');
1.1 kristaps 950: } else
1.4 kristaps 951: fputs("unknown ", stdout);
952:
953: if (NULL != manvol)
1.1 kristaps 954: pnode_printmacroline(p, manvol);
1.4 kristaps 955: else
1.1 kristaps 956: puts("1");
957: }
958:
959: static void
960: pnode_printrefmeta(struct parse *p, struct pnode *pn)
961: {
962: struct pnode *pp, *title, *manvol;
963:
964: title = manvol = NULL;
965: TAILQ_FOREACH(pp, &pn->childq, child)
966: if (NODE_MANVOLNUM == pp->node)
967: manvol = pp;
968: else if (NODE_REFENTRYTITLE == pp->node)
969: title = pp;
970:
1.2 kristaps 971: puts(".Dd $Mdocdate" "$");
1.1 kristaps 972: fputs(".Dt ", stdout);
973:
974: if (NULL != title) {
1.7 kristaps 975: /* FIXME: uppercase. */
1.12 ! kristaps 976: pnode_printmacrolinetext(p, title, MACROLINE_UPPER);
1.4 kristaps 977: putchar(' ');
1.1 kristaps 978: } else
1.4 kristaps 979: fputs("UNKNOWN ", stdout);
980:
981: if (NULL != manvol)
1.1 kristaps 982: pnode_printmacroline(p, manvol);
1.4 kristaps 983: else
1.1 kristaps 984: puts("1");
985:
986: puts(".Os");
987: }
988:
1.3 kristaps 989: static void
990: pnode_printfuncdef(struct parse *p, struct pnode *pn)
991: {
992: struct pnode *pp, *ftype, *func;
993:
994: ftype = func = NULL;
995: TAILQ_FOREACH(pp, &pn->childq, child)
996: if (NODE_TEXT == pp->node)
997: ftype = pp;
998: else if (NODE_FUNCTION == pp->node)
999: func = pp;
1000:
1001: if (NULL != ftype) {
1002: fputs(".Ft ", stdout);
1003: pnode_printmacroline(p, ftype);
1004: }
1005:
1006: if (NULL != func) {
1007: fputs(".Fo ", stdout);
1008: pnode_printmacroline(p, func);
1009: } else
1010: puts(".Fo UNKNOWN");
1011: }
1012:
1013: static void
1014: pnode_printparamdef(struct parse *p, struct pnode *pn)
1015: {
1016: struct pnode *pp, *ptype, *param;
1017:
1018: ptype = param = NULL;
1019: TAILQ_FOREACH(pp, &pn->childq, child)
1020: if (NODE_TEXT == pp->node)
1021: ptype = pp;
1022: else if (NODE_PARAMETER == pp->node)
1023: param = pp;
1024:
1025: fputs(".Fa \"", stdout);
1026: if (NULL != ptype) {
1027: pnode_printmacrolinepart(p, ptype);
1028: putchar(' ');
1029: }
1030:
1031: if (NULL != param)
1032: pnode_printmacrolinepart(p, param);
1033:
1034: puts("\"");
1035: }
1036:
1037: static void
1038: pnode_printfuncprototype(struct parse *p, struct pnode *pn)
1039: {
1040: struct pnode *pp, *fdef;
1041:
1042: TAILQ_FOREACH(fdef, &pn->childq, child)
1043: if (NODE_FUNCDEF == fdef->node)
1044: break;
1045:
1.4 kristaps 1046: if (NULL != fdef)
1.3 kristaps 1047: pnode_printfuncdef(p, fdef);
1.4 kristaps 1048: else
1.3 kristaps 1049: puts(".Fo UNKNOWN");
1050:
1.4 kristaps 1051: TAILQ_FOREACH(pp, &pn->childq, child)
1.3 kristaps 1052: if (NODE_PARAMDEF == pp->node)
1053: pnode_printparamdef(p, pp);
1054:
1055: puts(".Fc");
1056: }
1057:
1.10 kristaps 1058: /*
1059: * The <arg> element is more complicated than it should be because text
1060: * nodes are treated like ".Ar foo", but non-text nodes need to be
1061: * re-sent into the printer (i.e., without the preceding ".Ar").
1.12 ! kristaps 1062: * This also handles the case of "repetition" (or in other words, the
! 1063: * ellipsis following an argument) and optionality.
1.10 kristaps 1064: */
1.4 kristaps 1065: static void
1.10 kristaps 1066: pnode_printarg(struct parse *p, struct pnode *pn)
1.4 kristaps 1067: {
1068: struct pnode *pp;
1.12 ! kristaps 1069: struct pattr *ap;
! 1070: int isop, isrep;
! 1071:
! 1072: isop = 1;
! 1073: isrep = 0;
! 1074: TAILQ_FOREACH(ap, &pn->attrq, child)
! 1075: if (ATTRKEY_CHOICE == ap->key &&
! 1076: (ATTRVAL_PLAIN == ap->val ||
! 1077: ATTRVAL_REQ == ap->val))
! 1078: isop = 0;
! 1079: else if (ATTRKEY_REP == ap->key &&
! 1080: (ATTRVAL_REPEAT == ap->val))
! 1081: isrep = 1;
! 1082:
! 1083: if (isop) {
! 1084: pnode_printmopen(p);
! 1085: fputs("Op ", stdout);
! 1086: }
1.4 kristaps 1087:
1.10 kristaps 1088: TAILQ_FOREACH(pp, &pn->childq, child) {
1089: if (NODE_TEXT == pp->node) {
1090: pnode_printmopen(p);
1.4 kristaps 1091: fputs("Ar ", stdout);
1.10 kristaps 1092: }
1093: pnode_print(p, pp);
1.12 ! kristaps 1094: if (NODE_TEXT == pp->node && isrep)
! 1095: fputs("...", stdout);
1.10 kristaps 1096: }
1.4 kristaps 1097: }
1098:
1.7 kristaps 1099: /*
1100: * Recursively search and return the first instance of "node".
1101: */
1102: static struct pnode *
1103: pnode_findfirst(struct pnode *pn, enum nodeid node)
1104: {
1105: struct pnode *pp, *res;
1106:
1107: res = NULL;
1108: TAILQ_FOREACH(pp, &pn->childq, child) {
1109: res = pp->node == node ? pp :
1110: pnode_findfirst(pp, node);
1111: if (NULL != res)
1112: break;
1113: }
1114:
1115: return(res);
1116: }
1117:
1118: static void
1119: pnode_printprologue(struct parse *p, struct pnode *pn)
1120: {
1121: struct pnode *pp;
1122:
1.9 kristaps 1123: pp = NULL == p->root ? NULL :
1124: pnode_findfirst(p->root, NODE_REFMETA);
1125:
1126: if (NULL != pp) {
1.7 kristaps 1127: pnode_printrefmeta(p, pp);
1128: pnode_unlink(pp);
1129: } else {
1130: puts(".\\\" Supplying bogus prologue...");
1131: puts(".Dd $Mdocdate" "$");
1132: puts(".Dt UNKNOWN 1");
1133: puts(".Os");
1134: }
1135: }
1136:
1.1 kristaps 1137: /*
1138: * Print a parsed node (or ignore it--whatever).
1139: * This is a recursive function.
1140: * FIXME: macro line continuation?
1141: */
1142: static void
1143: pnode_print(struct parse *p, struct pnode *pn)
1144: {
1145: struct pnode *pp;
1146: char *cp;
1.10 kristaps 1147: int last, sv;
1.1 kristaps 1148:
1149: if (NULL == pn)
1150: return;
1151:
1.10 kristaps 1152: sv = p->newln;
1.1 kristaps 1153:
1154: switch (pn->node) {
1.4 kristaps 1155: case (NODE_ARG):
1.10 kristaps 1156: pnode_printarg(p, pn);
1.4 kristaps 1157: pnode_unlinksub(pn);
1158: break;
1.1 kristaps 1159: case (NODE_CITEREFENTRY):
1.10 kristaps 1160: assert(p->newln);
1.1 kristaps 1161: pnode_printciterefentry(p, pn);
1.4 kristaps 1162: pnode_unlinksub(pn);
1.1 kristaps 1163: break;
1164: case (NODE_CODE):
1.10 kristaps 1165: pnode_printmopen(p);
1166: fputs("Li ", stdout);
1.4 kristaps 1167: break;
1168: case (NODE_COMMAND):
1.10 kristaps 1169: pnode_printmopen(p);
1170: fputs("Nm ", stdout);
1.1 kristaps 1171: break;
1.3 kristaps 1172: case (NODE_FUNCTION):
1.10 kristaps 1173: pnode_printmopen(p);
1174: fputs("Fn ", stdout);
1.3 kristaps 1175: break;
1176: case (NODE_FUNCPROTOTYPE):
1.10 kristaps 1177: assert(p->newln);
1.3 kristaps 1178: pnode_printfuncprototype(p, pn);
1.4 kristaps 1179: pnode_unlinksub(pn);
1.3 kristaps 1180: break;
1.1 kristaps 1181: case (NODE_FUNCSYNOPSISINFO):
1.10 kristaps 1182: pnode_printmopen(p);
1183: fputs("Fd ", stdout);
1184: break;
1185: case (NODE_OPTION):
1186: pnode_printmopen(p);
1187: fputs("Fl ", stdout);
1.1 kristaps 1188: break;
1189: case (NODE_PARA):
1.10 kristaps 1190: assert(p->newln);
1.1 kristaps 1191: puts(".Pp");
1.3 kristaps 1192: break;
1193: case (NODE_PARAMETER):
1.10 kristaps 1194: /* Suppress non-text children... */
1195: pnode_printmopen(p);
1196: fputs("Fa \"", stdout);
1.3 kristaps 1197: pnode_printmacrolinepart(p, pn);
1198: puts("\"");
1.4 kristaps 1199: pnode_unlinksub(pn);
1.1 kristaps 1200: break;
1201: case (NODE_PROGRAMLISTING):
1.10 kristaps 1202: assert(p->newln);
1.1 kristaps 1203: puts(".Bd -literal");
1204: break;
1205: case (NODE_REFMETA):
1.7 kristaps 1206: abort();
1.1 kristaps 1207: break;
1208: case (NODE_REFNAME):
1.10 kristaps 1209: /* Suppress non-text children... */
1210: pnode_printmopen(p);
1211: fputs("Nm ", stdout);
1212: pnode_printmacrolinepart(p, pn);
1.4 kristaps 1213: pnode_unlinksub(pn);
1.10 kristaps 1214: break;
1.1 kristaps 1215: case (NODE_REFNAMEDIV):
1.10 kristaps 1216: assert(p->newln);
1.1 kristaps 1217: puts(".Sh NAME");
1218: break;
1219: case (NODE_REFPURPOSE):
1.10 kristaps 1220: assert(p->newln);
1.1 kristaps 1221: fputs(".Nd ", stdout);
1.10 kristaps 1222: break;
1.1 kristaps 1223: case (NODE_REFSYNOPSISDIV):
1.10 kristaps 1224: assert(p->newln);
1.6 kristaps 1225: pnode_printrefsynopsisdiv(p, pn);
1.10 kristaps 1226: puts(".Sh SYNOPSIS");
1.1 kristaps 1227: break;
1228: case (NODE_REFSECT1):
1.10 kristaps 1229: assert(p->newln);
1.1 kristaps 1230: pnode_printrefsect(p, pn);
1231: break;
1.8 kristaps 1232: case (NODE_STRUCTNAME):
1.10 kristaps 1233: pnode_printmopen(p);
1234: fputs("Vt ", stdout);
1235: break;
1.1 kristaps 1236: case (NODE_TEXT):
1237: bufclear(p);
1238: bufappend(p, pn);
1239: /*
1240: * Output all characters, squeezing out whitespace
1241: * between newlines.
1242: * XXX: all whitespace, including tabs (?).
1243: * Remember to escape control characters and escapes.
1244: */
1.10 kristaps 1245: assert(p->bsz);
1.1 kristaps 1246: for (last = '\n', cp = p->b; '\0' != *cp; ) {
1247: if ('\n' == last) {
1248: /* Consume all whitespace. */
1249: if (isspace((int)*cp)) {
1250: while (isspace((int)*cp))
1251: cp++;
1252: continue;
1253: } else if ('\'' == *cp || '.' == *cp)
1254: fputs("\\&", stdout);
1255: }
1256: putchar(last = *cp++);
1257: /* If we're a character escape, escape us. */
1258: if ('\\' == last)
1259: putchar('e');
1260: }
1.10 kristaps 1261: p->newln = 0;
1.1 kristaps 1262: break;
1263: default:
1264: break;
1265: }
1266:
1267: TAILQ_FOREACH(pp, &pn->childq, child)
1268: pnode_print(p, pp);
1269:
1270: switch (pn->node) {
1.10 kristaps 1271: case (NODE_ARG):
1272: case (NODE_CODE):
1273: case (NODE_COMMAND):
1274: case (NODE_FUNCTION):
1275: case (NODE_FUNCSYNOPSISINFO):
1276: case (NODE_OPTION):
1277: case (NODE_PARAMETER):
1278: case (NODE_STRUCTNAME):
1279: case (NODE_TEXT):
1280: pnode_printmclose(p, sv);
1281: break;
1.12 ! kristaps 1282: case (NODE_REFNAME):
! 1283: /*
! 1284: * If we're in the NAME macro and we have multiple
! 1285: * <refname> macros in sequence, then print out a
! 1286: * trailing comma before the newline.
! 1287: */
! 1288: if (NULL != pn->parent &&
! 1289: NODE_REFNAMEDIV == pn->parent->node &&
! 1290: NULL != TAILQ_NEXT(pn, child) &&
! 1291: NODE_REFNAME == TAILQ_NEXT(pn, child)->node)
! 1292: fputs(" ,", stdout);
! 1293: pnode_printmclose(p, sv);
! 1294: break;
1.1 kristaps 1295: case (NODE_PROGRAMLISTING):
1.10 kristaps 1296: assert(p->newln);
1.1 kristaps 1297: puts(".Ed");
1.10 kristaps 1298: p->newln = 1;
1.1 kristaps 1299: break;
1300: default:
1301: break;
1302: }
1303: }
1304:
1305: /*
1306: * Loop around the read buffer until we've drained it of all data.
1307: * Invoke the parser context with each buffer fill.
1308: */
1309: static int
1310: readfile(XML_Parser xp, int fd,
1311: char *b, size_t bsz, const char *fn)
1312: {
1313: struct parse p;
1314: int rc;
1315: ssize_t ssz;
1316:
1317: memset(&p, 0, sizeof(struct parse));
1318:
1319: p.b = malloc(p.bsz = p.mbsz = 1024);
1.12 ! kristaps 1320: p.fname = fn;
! 1321: p.xml = xp;
1.1 kristaps 1322:
1323: XML_SetCharacterDataHandler(xp, xml_char);
1324: XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
1325: XML_SetUserData(xp, &p);
1326:
1327: while ((ssz = read(fd, b, bsz)) >= 0) {
1328: if (0 == (rc = XML_Parse(xp, b, ssz, 0 == ssz)))
1329: fprintf(stderr, "%s: %s\n", fn,
1330: XML_ErrorString
1331: (XML_GetErrorCode(xp)));
1332: else if ( ! p.stop && ssz > 0)
1333: continue;
1334: /*
1335: * Exit when we've read all or errors have occured
1336: * during the parse sequence.
1337: */
1.10 kristaps 1338: p.newln = 1;
1.7 kristaps 1339: pnode_printprologue(&p, p.root);
1.1 kristaps 1340: pnode_print(&p, p.root);
1341: pnode_free(p.root);
1342: free(p.b);
1343: return(0 != rc && ! p.stop);
1344: }
1345:
1346: /* Read error has occured. */
1347: perror(fn);
1348: pnode_free(p.root);
1349: free(p.b);
1350: return(0);
1351: }
1352:
1353: int
1354: main(int argc, char *argv[])
1355: {
1356: XML_Parser xp;
1357: const char *fname;
1358: char *buf;
1359: int fd, rc;
1360:
1361: fname = "-";
1362: xp = NULL;
1363: buf = NULL;
1364: rc = 0;
1365:
1366: if (-1 != getopt(argc, argv, ""))
1367: return(EXIT_FAILURE);
1368:
1369: argc -= optind;
1370: argv += optind;
1371:
1372: if (argc > 1)
1373: return(EXIT_FAILURE);
1374: else if (argc > 0)
1375: fname = argv[0];
1376:
1377: /* Read from stdin or a file. */
1378: fd = 0 == strcmp(fname, "-") ?
1379: STDIN_FILENO : open(fname, O_RDONLY, 0);
1380:
1381: /*
1382: * Open file for reading.
1383: * Allocate a read buffer.
1384: * Create the parser context.
1385: * Dive directly into the parse.
1386: */
1387: if (-1 == fd)
1388: perror(fname);
1389: else if (NULL == (buf = malloc(4096)))
1390: perror(NULL);
1391: else if (NULL == (xp = XML_ParserCreate(NULL)))
1392: perror(NULL);
1393: else if ( ! readfile(xp, fd, buf, 4096, fname))
1394: rc = 1;
1395:
1396: XML_ParserFree(xp);
1397: free(buf);
1398: if (STDIN_FILENO != fd)
1399: close(fd);
1400: return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
1401: }
CVSweb