Annotation of docbook2mdoc/parse.c, Revision 1.7
1.7 ! schwarze 1: /* $Id: parse.c,v 1.6 2019/03/28 15:05:40 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <ctype.h>
1.6 schwarze 20: #include <stdarg.h>
1.1 schwarze 21: #include <stdio.h>
1.5 schwarze 22: #include <stdlib.h>
1.1 schwarze 23: #include <string.h>
24: #include <unistd.h>
25:
26: #include "node.h"
27: #include "parse.h"
28:
29: /*
30: * The implementation of the DocBook parser.
31: */
32:
33: /*
34: * Global parse state.
35: * Keep this as simple and small as possible.
36: */
37: struct parse {
38: const char *fname; /* Name of the input file. */
39: struct ptree *tree; /* Complete parse result. */
40: struct pnode *cur; /* Current node in the tree. */
1.5 schwarze 41: enum nodeid ncur; /* Type of the current node. */
42: int line; /* Line number in the input file. */
43: int col; /* Column number in the input file. */
44: int nline; /* Line number of next token. */
45: int ncol; /* Column number of next token. */
1.4 schwarze 46: int del; /* Levels of nested nodes being deleted. */
1.5 schwarze 47: int attr; /* The most recent attribute is valid. */
1.1 schwarze 48: int warn;
49: };
50:
51: struct element {
52: const char *name; /* DocBook element name. */
53: enum nodeid node; /* Node type to generate. */
54: };
55:
56: static const struct element elements[] = {
1.3 schwarze 57: { "acronym", NODE_IGNORE },
1.1 schwarze 58: { "affiliation", NODE_AFFILIATION },
1.4 schwarze 59: { "anchor", NODE_DELETE },
1.1 schwarze 60: { "application", NODE_APPLICATION },
61: { "arg", NODE_ARG },
62: { "author", NODE_AUTHOR },
63: { "authorgroup", NODE_AUTHORGROUP },
64: { "blockquote", NODE_BLOCKQUOTE },
65: { "book", NODE_BOOK },
66: { "bookinfo", NODE_BOOKINFO },
67: { "caution", NODE_CAUTION },
68: { "chapter", NODE_SECTION },
69: { "citerefentry", NODE_CITEREFENTRY },
70: { "citetitle", NODE_CITETITLE },
71: { "cmdsynopsis", NODE_CMDSYNOPSIS },
72: { "code", NODE_CODE },
73: { "colspec", NODE_COLSPEC },
74: { "command", NODE_COMMAND },
75: { "constant", NODE_CONSTANT },
1.7 ! schwarze 76: { "contrib", NODE_CONTRIB },
1.1 schwarze 77: { "copyright", NODE_COPYRIGHT },
78: { "date", NODE_DATE },
79: { "editor", NODE_EDITOR },
80: { "email", NODE_EMAIL },
81: { "emphasis", NODE_EMPHASIS },
82: { "entry", NODE_ENTRY },
83: { "envar", NODE_ENVAR },
84: { "fieldsynopsis", NODE_FIELDSYNOPSIS },
85: { "filename", NODE_FILENAME },
1.7 ! schwarze 86: { "firstname", NODE_PERSONNAME },
1.1 schwarze 87: { "firstterm", NODE_FIRSTTERM },
88: { "footnote", NODE_FOOTNOTE },
89: { "funcdef", NODE_FUNCDEF },
90: { "funcprototype", NODE_FUNCPROTOTYPE },
91: { "funcsynopsis", NODE_FUNCSYNOPSIS },
92: { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO },
93: { "function", NODE_FUNCTION },
94: { "glossterm", NODE_GLOSSTERM },
95: { "group", NODE_GROUP },
96: { "holder", NODE_HOLDER },
97: { "index", NODE_INDEX },
1.4 schwarze 98: { "indexterm", NODE_DELETE },
1.1 schwarze 99: { "info", NODE_INFO },
100: { "informalequation", NODE_INFORMALEQUATION },
101: { "informaltable", NODE_INFORMALTABLE },
102: { "inlineequation", NODE_INLINEEQUATION },
103: { "itemizedlist", NODE_ITEMIZEDLIST },
104: { "keysym", NODE_KEYSYM },
105: { "legalnotice", NODE_LEGALNOTICE },
106: { "link", NODE_LINK },
107: { "listitem", NODE_LISTITEM },
108: { "literal", NODE_LITERAL },
109: { "literallayout", NODE_LITERALLAYOUT },
110: { "manvolnum", NODE_MANVOLNUM },
111: { "member", NODE_MEMBER },
112: { "mml:math", NODE_MML_MATH },
113: { "mml:mfenced", NODE_MML_MFENCED },
114: { "mml:mfrac", NODE_MML_MFRAC },
115: { "mml:mi", NODE_MML_MI },
116: { "mml:mn", NODE_MML_MN },
117: { "mml:mo", NODE_MML_MO },
118: { "mml:mrow", NODE_MML_MROW },
119: { "mml:msub", NODE_MML_MSUB },
120: { "mml:msup", NODE_MML_MSUP },
121: { "modifier", NODE_MODIFIER },
122: { "note", NODE_NOTE },
123: { "option", NODE_OPTION },
124: { "orderedlist", NODE_ORDEREDLIST },
125: { "orgname", NODE_ORGNAME },
1.7 ! schwarze 126: { "othername", NODE_PERSONNAME },
1.1 schwarze 127: { "para", NODE_PARA },
128: { "paramdef", NODE_PARAMDEF },
129: { "parameter", NODE_PARAMETER },
130: { "part", NODE_SECTION },
131: { "personname", NODE_PERSONNAME },
1.3 schwarze 132: { "phrase", NODE_IGNORE },
1.1 schwarze 133: { "preface", NODE_PREFACE },
1.4 schwarze 134: { "primary", NODE_DELETE },
1.1 schwarze 135: { "programlisting", NODE_PROGRAMLISTING },
136: { "prompt", NODE_PROMPT },
137: { "quote", NODE_QUOTE },
138: { "refclass", NODE_REFCLASS },
139: { "refdescriptor", NODE_REFDESCRIPTOR },
140: { "refentry", NODE_REFENTRY },
141: { "refentryinfo", NODE_REFENTRYINFO },
142: { "refentrytitle", NODE_REFENTRYTITLE },
143: { "refmeta", NODE_REFMETA },
144: { "refmetainfo", NODE_REFMETAINFO },
145: { "refmiscinfo", NODE_REFMISCINFO },
146: { "refname", NODE_REFNAME },
147: { "refnamediv", NODE_REFNAMEDIV },
148: { "refpurpose", NODE_REFPURPOSE },
149: { "refsect1", NODE_SECTION },
150: { "refsect2", NODE_SECTION },
151: { "refsect3", NODE_SECTION },
152: { "refsection", NODE_SECTION },
153: { "refsynopsisdiv", NODE_REFSYNOPSISDIV },
154: { "releaseinfo", NODE_RELEASEINFO },
155: { "replaceable", NODE_REPLACEABLE },
156: { "row", NODE_ROW },
157: { "sbr", NODE_SBR },
158: { "screen", NODE_SCREEN },
1.4 schwarze 159: { "secondary", NODE_DELETE },
1.1 schwarze 160: { "sect1", NODE_SECTION },
161: { "sect2", NODE_SECTION },
162: { "section", NODE_SECTION },
163: { "sgmltag", NODE_SGMLTAG },
164: { "simplelist", NODE_SIMPLELIST },
165: { "spanspec", NODE_SPANSPEC },
166: { "structname", NODE_STRUCTNAME },
167: { "subtitle", NODE_SUBTITLE },
1.7 ! schwarze 168: { "surname", NODE_PERSONNAME },
1.1 schwarze 169: { "synopsis", NODE_SYNOPSIS },
170: { "table", NODE_TABLE },
171: { "tbody", NODE_TBODY },
172: { "term", NODE_TERM },
173: { "tfoot", NODE_TFOOT },
174: { "tgroup", NODE_TGROUP },
175: { "thead", NODE_THEAD },
176: { "tip", NODE_TIP },
177: { "title", NODE_TITLE },
1.3 schwarze 178: { "trademark", NODE_IGNORE },
1.1 schwarze 179: { "type", NODE_TYPE },
180: { "ulink", NODE_ULINK },
181: { "userinput", NODE_USERINPUT },
182: { "variablelist", NODE_VARIABLELIST },
183: { "varlistentry", NODE_VARLISTENTRY },
184: { "varname", NODE_VARNAME },
185: { "warning", NODE_WARNING },
186: { "wordasword", NODE_WORDASWORD },
1.4 schwarze 187: { "xi:include", NODE_DELETE_WARN },
1.1 schwarze 188: { "year", NODE_YEAR },
1.5 schwarze 189: { NULL, NODE_IGNORE }
1.1 schwarze 190: };
191:
1.6 schwarze 192: static void
193: error_msg(struct parse *p, const char *fmt, ...)
194: {
195: va_list ap;
196:
197: fprintf(stderr, "%s:%d:%d: ", p->fname, p->line, p->col);
198: va_start(ap, fmt);
199: vfprintf(stderr, fmt, ap);
200: va_end(ap);
201: fputc('\n', stderr);
202: p->tree->flags |= TREE_FAIL;
203: }
204:
205: static void
206: warn_msg(struct parse *p, const char *fmt, ...)
207: {
208: va_list ap;
209:
210: if (p->warn == 0)
211: return;
212:
213: fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col);
214: va_start(ap, fmt);
215: vfprintf(stderr, fmt, ap);
216: va_end(ap);
217: fputc('\n', stderr);
218: }
219:
1.1 schwarze 220: /*
221: * Process a string of characters.
222: * If a text node is already open, append to it.
223: * Otherwise, create a new one as a child of the current node.
224: */
225: static void
1.5 schwarze 226: xml_char(struct parse *ps, const char *p, int sz)
1.1 schwarze 227: {
228: struct pnode *dat;
229:
1.5 schwarze 230: if (ps->del > 0)
1.1 schwarze 231: return;
232:
1.5 schwarze 233: if (ps->cur == NULL) {
1.6 schwarze 234: error_msg(ps, "discarding text before document: %.*s", sz, p);
1.5 schwarze 235: return;
236: }
237:
1.1 schwarze 238: if (ps->cur->node != NODE_TEXT) {
239: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
240: perror(NULL);
241: exit(1);
242: }
243: dat->node = NODE_TEXT;
244: dat->parent = ps->cur;
245: TAILQ_INIT(&dat->childq);
246: TAILQ_INIT(&dat->attrq);
247: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
248: ps->cur = dat;
249: }
250:
1.5 schwarze 251: if (ps->tree->flags & TREE_CLOSED &&
1.6 schwarze 252: ps->cur->parent == ps->tree->root)
253: warn_msg(ps, "text after end of document: %.*s", sz, p);
1.5 schwarze 254:
1.1 schwarze 255: /* Append to the current text node. */
256:
257: assert(sz >= 0);
258: ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
259: if (ps->cur->b == NULL) {
260: perror(NULL);
261: exit(1);
262: }
263: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
264: ps->cur->bsz += sz;
265: ps->cur->b[ps->cur->bsz] = '\0';
266: ps->cur->real = ps->cur->b;
267: }
268:
269: static void
270: pnode_trim(struct pnode *pn)
271: {
272: assert(pn->node == NODE_TEXT);
273: for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
274: if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
275: break;
276: }
277:
278: /*
279: * Begin an element.
280: */
281: static void
1.5 schwarze 282: xml_elem_start(struct parse *ps, const char *name)
1.1 schwarze 283: {
1.5 schwarze 284: const struct element *elem;
285: struct pnode *dat;
1.1 schwarze 286:
1.5 schwarze 287: if (*name == '!' || *name == '?')
1.1 schwarze 288: return;
289:
1.4 schwarze 290: /*
291: * An ancestor is excluded from the tree;
292: * keep track of the number of levels excluded.
293: */
294: if (ps->del > 0) {
295: ps->del++;
296: return;
297: }
298:
1.1 schwarze 299: /* Close out the text node, if there is one. */
300: if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
301: pnode_trim(ps->cur);
302: ps->cur = ps->cur->parent;
303: }
304:
305: for (elem = elements; elem->name != NULL; elem++)
306: if (strcmp(elem->name, name) == 0)
307: break;
308:
1.6 schwarze 309: if (elem->name == NULL)
310: error_msg(ps, "unknown element <%s>", name);
311:
1.5 schwarze 312: ps->ncur = elem->node;
1.1 schwarze 313:
1.5 schwarze 314: switch (ps->ncur) {
1.4 schwarze 315: case NODE_DELETE_WARN:
1.6 schwarze 316: warn_msg(ps, "skipping element <%s>", name);
1.2 schwarze 317: /* FALLTHROUGH */
1.4 schwarze 318: case NODE_DELETE:
319: ps->del = 1;
320: /* FALLTHROUGH */
1.2 schwarze 321: case NODE_IGNORE:
322: return;
323: case NODE_INLINEEQUATION:
1.1 schwarze 324: ps->tree->flags |= TREE_EQN;
1.2 schwarze 325: break;
326: default:
327: break;
328: }
1.1 schwarze 329:
1.6 schwarze 330: if (ps->tree->flags & TREE_CLOSED && ps->cur->parent == NULL)
331: warn_msg(ps, "element after end of document: <%s>", name);
1.5 schwarze 332:
1.1 schwarze 333: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
334: perror(NULL);
335: exit(1);
336: }
337: dat->node = elem->node;
338: dat->parent = ps->cur;
339: TAILQ_INIT(&dat->childq);
340: TAILQ_INIT(&dat->attrq);
341:
342: if (ps->cur != NULL)
343: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
344:
345: ps->cur = dat;
346: if (ps->tree->root == NULL)
347: ps->tree->root = dat;
1.5 schwarze 348: }
349:
350: static void
351: xml_attrkey(struct parse *ps, const char *name)
352: {
353: struct pattr *attr;
354: enum attrkey key;
1.1 schwarze 355:
1.5 schwarze 356: if (ps->del > 0 || *name == '\0')
357: return;
358: if ((key = attrkey_parse(name)) == ATTRKEY__MAX) {
359: ps->attr = 0;
360: return;
361: }
362: if ((attr = calloc(1, sizeof(*attr))) == NULL) {
363: perror(NULL);
364: exit(1);
365: }
366: attr->key = key;
367: attr->val = ATTRVAL__MAX;
368: attr->rawval = NULL;
369: TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child);
370: ps->attr = 1;
371: }
372:
373: static void
374: xml_attrval(struct parse *ps, const char *name)
375: {
376: struct pattr *attr;
377:
378: if (ps->del > 0 || ps->attr == 0)
379: return;
380: if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL)
381: return;
382: if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX &&
383: (attr->rawval = strdup(name)) == NULL) {
384: perror(NULL);
385: exit(1);
1.1 schwarze 386: }
387: }
388:
389: /*
390: * Roll up the parse tree.
391: * If we're at a text node, roll that one up first.
392: */
393: static void
1.5 schwarze 394: xml_elem_end(struct parse *ps, const char *name)
1.1 schwarze 395: {
1.5 schwarze 396: const struct element *elem;
397: enum nodeid node;
1.1 schwarze 398:
1.4 schwarze 399: /*
400: * An ancestor is excluded from the tree;
401: * keep track of the number of levels excluded.
402: */
403: if (ps->del > 1) {
404: ps->del--;
405: return;
406: }
407:
1.1 schwarze 408: /* Close out the text node, if there is one. */
1.5 schwarze 409: if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
1.1 schwarze 410: pnode_trim(ps->cur);
411: ps->cur = ps->cur->parent;
412: }
1.2 schwarze 413:
1.5 schwarze 414: if (name != NULL) {
415: for (elem = elements; elem->name != NULL; elem++)
416: if (strcmp(elem->name, name) == 0)
417: break;
418: node = elem->node;
419: } else
420: node = ps->ncur;
1.2 schwarze 421:
1.5 schwarze 422: switch (node) {
1.4 schwarze 423: case NODE_DELETE_WARN:
424: case NODE_DELETE:
1.5 schwarze 425: if (ps->del > 0)
426: ps->del--;
1.4 schwarze 427: break;
1.2 schwarze 428: case NODE_IGNORE:
429: break;
430: default:
1.5 schwarze 431: if (ps->cur == NULL || node != ps->cur->node) {
1.6 schwarze 432: warn_msg(ps, "element not open: </%s>", name);
1.5 schwarze 433: break;
434: }
435:
436: /*
437: * Refrain from actually closing the document element.
438: * If no more content follows, no harm is done, but if
439: * some content still follows, simply processing it is
440: * obviously better than discarding it or crashing.
441: */
442:
443: if (ps->cur->parent == NULL)
444: ps->tree->flags |= TREE_CLOSED;
445: else
446: ps->cur = ps->cur->parent;
1.4 schwarze 447: break;
1.2 schwarze 448: }
1.4 schwarze 449: assert(ps->del == 0);
1.1 schwarze 450: }
451:
452: struct parse *
453: parse_alloc(int warn)
454: {
455: struct parse *p;
456:
457: if ((p = calloc(1, sizeof(*p))) == NULL)
458: return NULL;
459:
460: if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) {
461: free(p);
462: return NULL;
463: }
464: p->warn = warn;
465: return p;
466: }
467:
468: void
469: parse_free(struct parse *p)
470: {
471: if (p == NULL)
472: return;
473: if (p->tree != NULL) {
474: pnode_unlink(p->tree->root);
475: free(p->tree);
476: }
477: free(p);
478: }
479:
1.5 schwarze 480: /*
481: * Advance the pend pointer to the next character in the charset.
482: * If the charset starts with a space, it stands for any whitespace.
483: * Update the new input file position, used for messages.
484: * Do not overrun the buffer b of length rlen.
485: * When reaching the end, NUL-terminate the buffer and return 1;
486: * otherwise, return 0.
487: */
488: static int
489: advance(struct parse *p, char *b, size_t rlen, size_t *pend,
490: const char *charset)
491: {
492: int space;
493:
494: if (*charset == ' ') {
495: space = 1;
496: charset++;
497: } else
498: space = 0;
499:
500: p->nline = p->line;
501: p->ncol = p->col;
502: while (*pend < rlen) {
503: if (b[*pend] == '\n') {
504: p->nline++;
505: p->ncol = 1;
506: } else
507: p->ncol++;
508: if (space && isspace((unsigned char)b[*pend]))
509: break;
510: if (strchr(charset, b[*pend]) != NULL)
511: break;
512: ++*pend;
513: }
514: if (*pend == rlen) {
515: b[rlen] = '\0';
516: return 1;
517: } else
518: return 0;
519: }
520:
1.1 schwarze 521: struct ptree *
522: parse_file(struct parse *p, int fd, const char *fname)
523: {
524: char b[4096];
1.5 schwarze 525: ssize_t rsz; /* Return value from read(2). */
526: size_t rlen; /* Number of bytes in b[]. */
527: size_t poff; /* Parse offset in b[]. */
528: size_t pend; /* Offset of the end of the current word. */
529: int in_tag, in_arg, in_quotes, elem_end;
1.1 schwarze 530:
531: p->fname = fname;
1.5 schwarze 532: p->nline = 1;
533: p->ncol = 1;
534: rlen = 0;
535: in_tag = in_arg = in_quotes = 0;
536:
537: /*
538: * Read loop.
539: *
540: * We have to enter the read loop once more even on EOF
541: * because the previous token may have been incomplete,
542: * such that it asked for more input.
543: * Once rsz is 0, incomplete tokens will no longer ask
544: * for more input but instead use whatever there is,
545: * and then exit the read loop.
546: * The minus one on the size limit for read(2) is needed
547: * such that advance() can set b[rlen] to NUL when needed.
548: */
549:
550: while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) {
551: if ((rlen += rsz) == 0)
552: break;
553:
554: /* Token loop. */
555:
556: pend = 0;
557: for (;;) {
558:
559: /* Proceed to the next token, skipping whitespace. */
560:
561: p->line = p->nline;
562: p->col = p->ncol;
563: if ((poff = pend) == rlen)
564: break;
565: if (isspace((unsigned char)b[pend])) {
566: if (b[pend++] == '\n') {
567: p->nline++;
568: p->ncol = 1;
569: } else
570: p->ncol++;
571: continue;
572: }
573:
574: /*
575: * The following three cases (in_arg, in_tag,
576: * and starting a tag) all parse a word or
577: * quoted string. If that extends beyond the
578: * read buffer and the last read(2) still got
579: * data, they all break out of the token loop
580: * to request more data from the read loop.
581: *
582: * Also, they all detect self-closing tags,
583: * those ending with "/>", setting the flag
584: * elem_end and calling xml_elem_end() at the
585: * very end, after handling the attribute value,
586: * attribute name, or tag name, respectively.
587: */
588:
589: /* Parse an attribute value. */
590:
591: if (in_arg) {
592: if (in_quotes == 0 && b[pend] == '"') {
593: in_quotes = 1;
594: p->ncol++;
595: pend++;
596: continue;
597: }
598: if (advance(p, b, rlen, &pend,
599: in_quotes ? "\"" : " >") && rsz > 0)
600: break;
601: in_arg = in_quotes = elem_end = 0;
602: if (b[pend] == '>') {
603: in_tag = 0;
604: if (pend > 0 && b[pend - 1] == '/') {
605: b[pend - 1] = '\0';
606: elem_end = 1;
607: }
608: }
609: b[pend] = '\0';
610: if (pend < rlen)
611: pend++;
612: xml_attrval(p, b + poff);
613: if (elem_end)
614: xml_elem_end(p, NULL);
615:
616: /* Look for an attribute name. */
617:
618: } else if (in_tag) {
619: if (advance(p, b, rlen, &pend, " =>") &&
620: rsz > 0)
621: break;
622: elem_end = 0;
623: switch (b[pend]) {
624: case '>':
625: in_tag = 0;
626: if (pend > 0 && b[pend - 1] == '/') {
627: b[pend - 1] = '\0';
628: elem_end = 1;
629: }
630: break;
631: case '=':
632: in_arg = 1;
633: break;
634: default:
635: break;
636: }
637: b[pend] = '\0';
638: if (pend < rlen)
639: pend++;
640: xml_attrkey(p, b + poff);
641: if (elem_end)
642: xml_elem_end(p, NULL);
643:
644: /* Begin an opening or closing tag. */
645:
646: } else if (b[poff] == '<') {
647: if (advance(p, b, rlen, &pend, " >") &&
648: rsz > 0)
649: break;
650: elem_end = 0;
651: if (b[pend] != '>')
652: in_tag = 1;
653: else if (pend > 0 && b[pend - 1] == '/') {
654: b[pend - 1] = '\0';
655: elem_end = 1;
656: }
657: b[pend] = '\0';
658: if (pend < rlen)
659: pend++;
660: if (b[++poff] == '/') {
661: elem_end = 1;
662: poff++;
663: } else
664: xml_elem_start(p, b + poff);
665: if (elem_end)
666: xml_elem_end(p, b + poff);
667:
668: /* Process text up to the next tag. */
669:
670: } else {
671: if (advance(p, b, rlen, &pend, "<") == 0)
672: p->ncol--;
673: xml_char(p, b + poff, pend - poff);
674: }
1.1 schwarze 675: }
1.5 schwarze 676:
677: /* Buffer exhausted; shift left and re-fill. */
678:
679: assert(poff > 0);
680: memmove(b, b + poff, rlen - poff);
681: rlen -= poff;
682: }
683: if (rsz < 0) {
684: perror(fname);
685: p->tree->flags |= TREE_FAIL;
686: }
687: if (p->cur != NULL && p->cur->node == NODE_TEXT) {
688: pnode_trim(p->cur);
689: p->cur = p->cur->parent;
690: }
1.6 schwarze 691: if ((p->tree->flags & TREE_CLOSED) == 0)
692: warn_msg(p, "document not closed");
1.1 schwarze 693: return p->tree;
694: }
CVSweb