Annotation of docbook2mdoc/parse.c, Revision 1.6
1.6 ! schwarze 1: /* $Id: parse.c,v 1.5 2019/03/28 12:21:10 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <ctype.h>
1.6 ! schwarze 20: #include <stdarg.h>
1.1 schwarze 21: #include <stdio.h>
1.5 schwarze 22: #include <stdlib.h>
1.1 schwarze 23: #include <string.h>
24: #include <unistd.h>
25:
26: #include "node.h"
27: #include "parse.h"
28:
29: /*
30: * The implementation of the DocBook parser.
31: */
32:
33: /*
34: * Global parse state.
35: * Keep this as simple and small as possible.
36: */
37: struct parse {
38: const char *fname; /* Name of the input file. */
39: struct ptree *tree; /* Complete parse result. */
40: struct pnode *cur; /* Current node in the tree. */
1.5 schwarze 41: enum nodeid ncur; /* Type of the current node. */
42: int line; /* Line number in the input file. */
43: int col; /* Column number in the input file. */
44: int nline; /* Line number of next token. */
45: int ncol; /* Column number of next token. */
1.4 schwarze 46: int del; /* Levels of nested nodes being deleted. */
1.5 schwarze 47: int attr; /* The most recent attribute is valid. */
1.1 schwarze 48: int warn;
49: };
50:
51: struct element {
52: const char *name; /* DocBook element name. */
53: enum nodeid node; /* Node type to generate. */
54: };
55:
56: static const struct element elements[] = {
1.3 schwarze 57: { "acronym", NODE_IGNORE },
1.1 schwarze 58: { "affiliation", NODE_AFFILIATION },
1.4 schwarze 59: { "anchor", NODE_DELETE },
1.1 schwarze 60: { "application", NODE_APPLICATION },
61: { "arg", NODE_ARG },
62: { "author", NODE_AUTHOR },
63: { "authorgroup", NODE_AUTHORGROUP },
64: { "blockquote", NODE_BLOCKQUOTE },
65: { "book", NODE_BOOK },
66: { "bookinfo", NODE_BOOKINFO },
67: { "caution", NODE_CAUTION },
68: { "chapter", NODE_SECTION },
69: { "citerefentry", NODE_CITEREFENTRY },
70: { "citetitle", NODE_CITETITLE },
71: { "cmdsynopsis", NODE_CMDSYNOPSIS },
72: { "code", NODE_CODE },
73: { "colspec", NODE_COLSPEC },
74: { "command", NODE_COMMAND },
75: { "constant", NODE_CONSTANT },
76: { "copyright", NODE_COPYRIGHT },
77: { "date", NODE_DATE },
78: { "editor", NODE_EDITOR },
79: { "email", NODE_EMAIL },
80: { "emphasis", NODE_EMPHASIS },
81: { "entry", NODE_ENTRY },
82: { "envar", NODE_ENVAR },
83: { "fieldsynopsis", NODE_FIELDSYNOPSIS },
84: { "filename", NODE_FILENAME },
1.3 schwarze 85: { "firstname", NODE_IGNORE },
1.1 schwarze 86: { "firstterm", NODE_FIRSTTERM },
87: { "footnote", NODE_FOOTNOTE },
88: { "funcdef", NODE_FUNCDEF },
89: { "funcprototype", NODE_FUNCPROTOTYPE },
90: { "funcsynopsis", NODE_FUNCSYNOPSIS },
91: { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO },
92: { "function", NODE_FUNCTION },
93: { "glossterm", NODE_GLOSSTERM },
94: { "group", NODE_GROUP },
95: { "holder", NODE_HOLDER },
96: { "index", NODE_INDEX },
1.4 schwarze 97: { "indexterm", NODE_DELETE },
1.1 schwarze 98: { "info", NODE_INFO },
99: { "informalequation", NODE_INFORMALEQUATION },
100: { "informaltable", NODE_INFORMALTABLE },
101: { "inlineequation", NODE_INLINEEQUATION },
102: { "itemizedlist", NODE_ITEMIZEDLIST },
103: { "keysym", NODE_KEYSYM },
104: { "legalnotice", NODE_LEGALNOTICE },
105: { "link", NODE_LINK },
106: { "listitem", NODE_LISTITEM },
107: { "literal", NODE_LITERAL },
108: { "literallayout", NODE_LITERALLAYOUT },
109: { "manvolnum", NODE_MANVOLNUM },
110: { "member", NODE_MEMBER },
111: { "mml:math", NODE_MML_MATH },
112: { "mml:mfenced", NODE_MML_MFENCED },
113: { "mml:mfrac", NODE_MML_MFRAC },
114: { "mml:mi", NODE_MML_MI },
115: { "mml:mn", NODE_MML_MN },
116: { "mml:mo", NODE_MML_MO },
117: { "mml:mrow", NODE_MML_MROW },
118: { "mml:msub", NODE_MML_MSUB },
119: { "mml:msup", NODE_MML_MSUP },
120: { "modifier", NODE_MODIFIER },
121: { "note", NODE_NOTE },
122: { "option", NODE_OPTION },
123: { "orderedlist", NODE_ORDEREDLIST },
124: { "orgname", NODE_ORGNAME },
1.3 schwarze 125: { "othername", NODE_IGNORE },
1.1 schwarze 126: { "para", NODE_PARA },
127: { "paramdef", NODE_PARAMDEF },
128: { "parameter", NODE_PARAMETER },
129: { "part", NODE_SECTION },
130: { "personname", NODE_PERSONNAME },
1.3 schwarze 131: { "phrase", NODE_IGNORE },
1.1 schwarze 132: { "preface", NODE_PREFACE },
1.4 schwarze 133: { "primary", NODE_DELETE },
1.1 schwarze 134: { "programlisting", NODE_PROGRAMLISTING },
135: { "prompt", NODE_PROMPT },
136: { "quote", NODE_QUOTE },
137: { "refclass", NODE_REFCLASS },
138: { "refdescriptor", NODE_REFDESCRIPTOR },
139: { "refentry", NODE_REFENTRY },
140: { "refentryinfo", NODE_REFENTRYINFO },
141: { "refentrytitle", NODE_REFENTRYTITLE },
142: { "refmeta", NODE_REFMETA },
143: { "refmetainfo", NODE_REFMETAINFO },
144: { "refmiscinfo", NODE_REFMISCINFO },
145: { "refname", NODE_REFNAME },
146: { "refnamediv", NODE_REFNAMEDIV },
147: { "refpurpose", NODE_REFPURPOSE },
148: { "refsect1", NODE_SECTION },
149: { "refsect2", NODE_SECTION },
150: { "refsect3", NODE_SECTION },
151: { "refsection", NODE_SECTION },
152: { "refsynopsisdiv", NODE_REFSYNOPSISDIV },
153: { "releaseinfo", NODE_RELEASEINFO },
154: { "replaceable", NODE_REPLACEABLE },
155: { "row", NODE_ROW },
156: { "sbr", NODE_SBR },
157: { "screen", NODE_SCREEN },
1.4 schwarze 158: { "secondary", NODE_DELETE },
1.1 schwarze 159: { "sect1", NODE_SECTION },
160: { "sect2", NODE_SECTION },
161: { "section", NODE_SECTION },
162: { "sgmltag", NODE_SGMLTAG },
163: { "simplelist", NODE_SIMPLELIST },
164: { "spanspec", NODE_SPANSPEC },
165: { "structname", NODE_STRUCTNAME },
166: { "subtitle", NODE_SUBTITLE },
1.3 schwarze 167: { "surname", NODE_IGNORE },
1.1 schwarze 168: { "synopsis", NODE_SYNOPSIS },
169: { "table", NODE_TABLE },
170: { "tbody", NODE_TBODY },
171: { "term", NODE_TERM },
172: { "tfoot", NODE_TFOOT },
173: { "tgroup", NODE_TGROUP },
174: { "thead", NODE_THEAD },
175: { "tip", NODE_TIP },
176: { "title", NODE_TITLE },
1.3 schwarze 177: { "trademark", NODE_IGNORE },
1.1 schwarze 178: { "type", NODE_TYPE },
179: { "ulink", NODE_ULINK },
180: { "userinput", NODE_USERINPUT },
181: { "variablelist", NODE_VARIABLELIST },
182: { "varlistentry", NODE_VARLISTENTRY },
183: { "varname", NODE_VARNAME },
184: { "warning", NODE_WARNING },
185: { "wordasword", NODE_WORDASWORD },
1.4 schwarze 186: { "xi:include", NODE_DELETE_WARN },
1.1 schwarze 187: { "year", NODE_YEAR },
1.5 schwarze 188: { NULL, NODE_IGNORE }
1.1 schwarze 189: };
190:
1.6 ! schwarze 191: static void
! 192: error_msg(struct parse *p, const char *fmt, ...)
! 193: {
! 194: va_list ap;
! 195:
! 196: fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col);
! 197: va_start(ap, fmt);
! 198: vfprintf(stderr, fmt, ap);
! 199: va_end(ap);
! 200: fputc('\n', stderr);
! 201: p->tree->flags |= TREE_FAIL;
! 202: }
! 203:
! 204: static void
! 205: warn_msg(struct parse *p, const char *fmt, ...)
! 206: {
! 207: va_list ap;
! 208:
! 209: if (p->warn == 0)
! 210: return;
! 211:
! 212: fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col);
! 213: va_start(ap, fmt);
! 214: vfprintf(stderr, fmt, ap);
! 215: va_end(ap);
! 216: fputc('\n', stderr);
! 217: }
! 218:
1.1 schwarze 219: /*
220: * Process a string of characters.
221: * If a text node is already open, append to it.
222: * Otherwise, create a new one as a child of the current node.
223: */
224: static void
1.5 schwarze 225: xml_char(struct parse *ps, const char *p, int sz)
1.1 schwarze 226: {
227: struct pnode *dat;
228:
1.5 schwarze 229: if (ps->del > 0)
1.1 schwarze 230: return;
231:
1.5 schwarze 232: if (ps->cur == NULL) {
1.6 ! schwarze 233: error_msg(ps, "discarding text before document: %.*s", sz, p);
1.5 schwarze 234: return;
235: }
236:
1.1 schwarze 237: if (ps->cur->node != NODE_TEXT) {
238: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
239: perror(NULL);
240: exit(1);
241: }
242: dat->node = NODE_TEXT;
243: dat->parent = ps->cur;
244: TAILQ_INIT(&dat->childq);
245: TAILQ_INIT(&dat->attrq);
246: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
247: ps->cur = dat;
248: }
249:
1.5 schwarze 250: if (ps->tree->flags & TREE_CLOSED &&
1.6 ! schwarze 251: ps->cur->parent == ps->tree->root)
! 252: warn_msg(ps, "text after end of document: %.*s", sz, p);
1.5 schwarze 253:
1.1 schwarze 254: /* Append to the current text node. */
255:
256: assert(sz >= 0);
257: ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
258: if (ps->cur->b == NULL) {
259: perror(NULL);
260: exit(1);
261: }
262: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
263: ps->cur->bsz += sz;
264: ps->cur->b[ps->cur->bsz] = '\0';
265: ps->cur->real = ps->cur->b;
266: }
267:
268: static void
269: pnode_trim(struct pnode *pn)
270: {
271: assert(pn->node == NODE_TEXT);
272: for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
273: if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
274: break;
275: }
276:
277: /*
278: * Begin an element.
279: */
280: static void
1.5 schwarze 281: xml_elem_start(struct parse *ps, const char *name)
1.1 schwarze 282: {
1.5 schwarze 283: const struct element *elem;
284: struct pnode *dat;
1.1 schwarze 285:
1.5 schwarze 286: if (*name == '!' || *name == '?')
1.1 schwarze 287: return;
288:
1.4 schwarze 289: /*
290: * An ancestor is excluded from the tree;
291: * keep track of the number of levels excluded.
292: */
293: if (ps->del > 0) {
294: ps->del++;
295: return;
296: }
297:
1.1 schwarze 298: /* Close out the text node, if there is one. */
299: if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
300: pnode_trim(ps->cur);
301: ps->cur = ps->cur->parent;
302: }
303:
304: for (elem = elements; elem->name != NULL; elem++)
305: if (strcmp(elem->name, name) == 0)
306: break;
307:
1.6 ! schwarze 308: if (elem->name == NULL)
! 309: error_msg(ps, "unknown element <%s>", name);
! 310:
1.5 schwarze 311: ps->ncur = elem->node;
1.1 schwarze 312:
1.5 schwarze 313: switch (ps->ncur) {
1.4 schwarze 314: case NODE_DELETE_WARN:
1.6 ! schwarze 315: warn_msg(ps, "skipping element <%s>", name);
1.2 schwarze 316: /* FALLTHROUGH */
1.4 schwarze 317: case NODE_DELETE:
318: ps->del = 1;
319: /* FALLTHROUGH */
1.2 schwarze 320: case NODE_IGNORE:
321: return;
322: case NODE_INLINEEQUATION:
1.1 schwarze 323: ps->tree->flags |= TREE_EQN;
1.2 schwarze 324: break;
325: default:
326: break;
327: }
1.1 schwarze 328:
1.6 ! schwarze 329: if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL)
! 330: warn_msg(ps, "element after end of document: <%s>", name);
1.5 schwarze 331:
1.1 schwarze 332: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
333: perror(NULL);
334: exit(1);
335: }
336: dat->node = elem->node;
337: dat->parent = ps->cur;
338: TAILQ_INIT(&dat->childq);
339: TAILQ_INIT(&dat->attrq);
340:
341: if (ps->cur != NULL)
342: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
343:
344: ps->cur = dat;
345: if (ps->tree->root == NULL)
346: ps->tree->root = dat;
1.5 schwarze 347: }
348:
349: static void
350: xml_attrkey(struct parse *ps, const char *name)
351: {
352: struct pattr *attr;
353: enum attrkey key;
1.1 schwarze 354:
1.5 schwarze 355: if (ps->del > 0 || *name == '\0')
356: return;
357: if ((key = attrkey_parse(name)) == ATTRKEY__MAX) {
358: ps->attr = 0;
359: return;
360: }
361: if ((attr = calloc(1, sizeof(*attr))) == NULL) {
362: perror(NULL);
363: exit(1);
364: }
365: attr->key = key;
366: attr->val = ATTRVAL__MAX;
367: attr->rawval = NULL;
368: TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child);
369: ps->attr = 1;
370: }
371:
372: static void
373: xml_attrval(struct parse *ps, const char *name)
374: {
375: struct pattr *attr;
376:
377: if (ps->del > 0 || ps->attr == 0)
378: return;
379: if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL)
380: return;
381: if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX &&
382: (attr->rawval = strdup(name)) == NULL) {
383: perror(NULL);
384: exit(1);
1.1 schwarze 385: }
386: }
387:
388: /*
389: * Roll up the parse tree.
390: * If we're at a text node, roll that one up first.
391: */
392: static void
1.5 schwarze 393: xml_elem_end(struct parse *ps, const char *name)
1.1 schwarze 394: {
1.5 schwarze 395: const struct element *elem;
396: enum nodeid node;
1.1 schwarze 397:
1.4 schwarze 398: /*
399: * An ancestor is excluded from the tree;
400: * keep track of the number of levels excluded.
401: */
402: if (ps->del > 1) {
403: ps->del--;
404: return;
405: }
406:
1.1 schwarze 407: /* Close out the text node, if there is one. */
1.5 schwarze 408: if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
1.1 schwarze 409: pnode_trim(ps->cur);
410: ps->cur = ps->cur->parent;
411: }
1.2 schwarze 412:
1.5 schwarze 413: if (name != NULL) {
414: for (elem = elements; elem->name != NULL; elem++)
415: if (strcmp(elem->name, name) == 0)
416: break;
417: node = elem->node;
418: } else
419: node = ps->ncur;
1.2 schwarze 420:
1.5 schwarze 421: switch (node) {
1.4 schwarze 422: case NODE_DELETE_WARN:
423: case NODE_DELETE:
1.5 schwarze 424: if (ps->del > 0)
425: ps->del--;
1.4 schwarze 426: break;
1.2 schwarze 427: case NODE_IGNORE:
428: break;
429: default:
1.5 schwarze 430: if (ps->cur == NULL || node != ps->cur->node) {
1.6 ! schwarze 431: warn_msg(ps, "element not open: </%s>", name);
1.5 schwarze 432: break;
433: }
434:
435: /*
436: * Refrain from actually closing the document element.
437: * If no more content follows, no harm is done, but if
438: * some content still follows, simply processing it is
439: * obviously better than discarding it or crashing.
440: */
441:
442: if (ps->cur->parent == NULL)
443: ps->tree->flags |= TREE_CLOSED;
444: else
445: ps->cur = ps->cur->parent;
1.4 schwarze 446: break;
1.2 schwarze 447: }
1.4 schwarze 448: assert(ps->del == 0);
1.1 schwarze 449: }
450:
451: struct parse *
452: parse_alloc(int warn)
453: {
454: struct parse *p;
455:
456: if ((p = calloc(1, sizeof(*p))) == NULL)
457: return NULL;
458:
459: if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) {
460: free(p);
461: return NULL;
462: }
463: p->warn = warn;
464: return p;
465: }
466:
467: void
468: parse_free(struct parse *p)
469: {
470: if (p == NULL)
471: return;
472: if (p->tree != NULL) {
473: pnode_unlink(p->tree->root);
474: free(p->tree);
475: }
476: free(p);
477: }
478:
1.5 schwarze 479: /*
480: * Advance the pend pointer to the next character in the charset.
481: * If the charset starts with a space, it stands for any whitespace.
482: * Update the new input file position, used for messages.
483: * Do not overrun the buffer b of length rlen.
484: * When reaching the end, NUL-terminate the buffer and return 1;
485: * otherwise, return 0.
486: */
487: static int
488: advance(struct parse *p, char *b, size_t rlen, size_t *pend,
489: const char *charset)
490: {
491: int space;
492:
493: if (*charset == ' ') {
494: space = 1;
495: charset++;
496: } else
497: space = 0;
498:
499: p->nline = p->line;
500: p->ncol = p->col;
501: while (*pend < rlen) {
502: if (b[*pend] == '\n') {
503: p->nline++;
504: p->ncol = 1;
505: } else
506: p->ncol++;
507: if (space && isspace((unsigned char)b[*pend]))
508: break;
509: if (strchr(charset, b[*pend]) != NULL)
510: break;
511: ++*pend;
512: }
513: if (*pend == rlen) {
514: b[rlen] = '\0';
515: return 1;
516: } else
517: return 0;
518: }
519:
1.1 schwarze 520: struct ptree *
521: parse_file(struct parse *p, int fd, const char *fname)
522: {
523: char b[4096];
1.5 schwarze 524: ssize_t rsz; /* Return value from read(2). */
525: size_t rlen; /* Number of bytes in b[]. */
526: size_t poff; /* Parse offset in b[]. */
527: size_t pend; /* Offset of the end of the current word. */
528: int in_tag, in_arg, in_quotes, elem_end;
1.1 schwarze 529:
530: p->fname = fname;
1.5 schwarze 531: p->nline = 1;
532: p->ncol = 1;
533: rlen = 0;
534: in_tag = in_arg = in_quotes = 0;
535:
536: /*
537: * Read loop.
538: *
539: * We have to enter the read loop once more even on EOF
540: * because the previous token may have been incomplete,
541: * such that it asked for more input.
542: * Once rsz is 0, incomplete tokens will no longer ask
543: * for more input but instead use whatever there is,
544: * and then exit the read loop.
545: * The minus one on the size limit for read(2) is needed
546: * such that advance() can set b[rlen] to NUL when needed.
547: */
548:
549: while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) {
550: if ((rlen += rsz) == 0)
551: break;
552:
553: /* Token loop. */
554:
555: pend = 0;
556: for (;;) {
557:
558: /* Proceed to the next token, skipping whitespace. */
559:
560: p->line = p->nline;
561: p->col = p->ncol;
562: if ((poff = pend) == rlen)
563: break;
564: if (isspace((unsigned char)b[pend])) {
565: if (b[pend++] == '\n') {
566: p->nline++;
567: p->ncol = 1;
568: } else
569: p->ncol++;
570: continue;
571: }
572:
573: /*
574: * The following three cases (in_arg, in_tag,
575: * and starting a tag) all parse a word or
576: * quoted string. If that extends beyond the
577: * read buffer and the last read(2) still got
578: * data, they all break out of the token loop
579: * to request more data from the read loop.
580: *
581: * Also, they all detect self-closing tags,
582: * those ending with "/>", setting the flag
583: * elem_end and calling xml_elem_end() at the
584: * very end, after handling the attribute value,
585: * attribute name, or tag name, respectively.
586: */
587:
588: /* Parse an attribute value. */
589:
590: if (in_arg) {
591: if (in_quotes == 0 && b[pend] == '"') {
592: in_quotes = 1;
593: p->ncol++;
594: pend++;
595: continue;
596: }
597: if (advance(p, b, rlen, &pend,
598: in_quotes ? "\"" : " >") && rsz > 0)
599: break;
600: in_arg = in_quotes = elem_end = 0;
601: if (b[pend] == '>') {
602: in_tag = 0;
603: if (pend > 0 && b[pend - 1] == '/') {
604: b[pend - 1] = '\0';
605: elem_end = 1;
606: }
607: }
608: b[pend] = '\0';
609: if (pend < rlen)
610: pend++;
611: xml_attrval(p, b + poff);
612: if (elem_end)
613: xml_elem_end(p, NULL);
614:
615: /* Look for an attribute name. */
616:
617: } else if (in_tag) {
618: if (advance(p, b, rlen, &pend, " =>") &&
619: rsz > 0)
620: break;
621: elem_end = 0;
622: switch (b[pend]) {
623: case '>':
624: in_tag = 0;
625: if (pend > 0 && b[pend - 1] == '/') {
626: b[pend - 1] = '\0';
627: elem_end = 1;
628: }
629: break;
630: case '=':
631: in_arg = 1;
632: break;
633: default:
634: break;
635: }
636: b[pend] = '\0';
637: if (pend < rlen)
638: pend++;
639: xml_attrkey(p, b + poff);
640: if (elem_end)
641: xml_elem_end(p, NULL);
642:
643: /* Begin an opening or closing tag. */
644:
645: } else if (b[poff] == '<') {
646: if (advance(p, b, rlen, &pend, " >") &&
647: rsz > 0)
648: break;
649: elem_end = 0;
650: if (b[pend] != '>')
651: in_tag = 1;
652: else if (pend > 0 && b[pend - 1] == '/') {
653: b[pend - 1] = '\0';
654: elem_end = 1;
655: }
656: b[pend] = '\0';
657: if (pend < rlen)
658: pend++;
659: if (b[++poff] == '/') {
660: elem_end = 1;
661: poff++;
662: } else
663: xml_elem_start(p, b + poff);
664: if (elem_end)
665: xml_elem_end(p, b + poff);
666:
667: /* Process text up to the next tag. */
668:
669: } else {
670: if (advance(p, b, rlen, &pend, "<") == 0)
671: p->ncol--;
672: xml_char(p, b + poff, pend - poff);
673: }
1.1 schwarze 674: }
1.5 schwarze 675:
676: /* Buffer exhausted; shift left and re-fill. */
677:
678: assert(poff > 0);
679: memmove(b, b + poff, rlen - poff);
680: rlen -= poff;
681: }
682: if (rsz < 0) {
683: perror(fname);
684: p->tree->flags |= TREE_FAIL;
685: }
686: if (p->cur != NULL && p->cur->node == NODE_TEXT) {
687: pnode_trim(p->cur);
688: p->cur = p->cur->parent;
689: }
1.6 ! schwarze 690: if ((p->tree->flags & TREE_CLOSED) == 0)
! 691: warn_msg(p, "document not closed");
1.1 schwarze 692: return p->tree;
693: }
CVSweb