Annotation of docbook2mdoc/parse.c, Revision 1.5
1.5 ! schwarze 1: /* $Id: parse.c,v 1.4 2019/03/26 22:39:33 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <assert.h>
19: #include <ctype.h>
20: #include <stdio.h>
1.5 ! schwarze 21: #include <stdlib.h>
1.1 schwarze 22: #include <string.h>
23: #include <unistd.h>
24:
25: #include "node.h"
26: #include "parse.h"
27:
28: /*
29: * The implementation of the DocBook parser.
30: */
31:
32: /*
33: * Global parse state.
34: * Keep this as simple and small as possible.
35: */
36: struct parse {
37: const char *fname; /* Name of the input file. */
38: struct ptree *tree; /* Complete parse result. */
39: struct pnode *cur; /* Current node in the tree. */
1.5 ! schwarze 40: enum nodeid ncur; /* Type of the current node. */
! 41: int line; /* Line number in the input file. */
! 42: int col; /* Column number in the input file. */
! 43: int nline; /* Line number of next token. */
! 44: int ncol; /* Column number of next token. */
1.4 schwarze 45: int del; /* Levels of nested nodes being deleted. */
1.5 ! schwarze 46: int attr; /* The most recent attribute is valid. */
1.1 schwarze 47: int warn;
48: };
49:
50: struct element {
51: const char *name; /* DocBook element name. */
52: enum nodeid node; /* Node type to generate. */
53: };
54:
55: static const struct element elements[] = {
1.3 schwarze 56: { "acronym", NODE_IGNORE },
1.1 schwarze 57: { "affiliation", NODE_AFFILIATION },
1.4 schwarze 58: { "anchor", NODE_DELETE },
1.1 schwarze 59: { "application", NODE_APPLICATION },
60: { "arg", NODE_ARG },
61: { "author", NODE_AUTHOR },
62: { "authorgroup", NODE_AUTHORGROUP },
63: { "blockquote", NODE_BLOCKQUOTE },
64: { "book", NODE_BOOK },
65: { "bookinfo", NODE_BOOKINFO },
66: { "caution", NODE_CAUTION },
67: { "chapter", NODE_SECTION },
68: { "citerefentry", NODE_CITEREFENTRY },
69: { "citetitle", NODE_CITETITLE },
70: { "cmdsynopsis", NODE_CMDSYNOPSIS },
71: { "code", NODE_CODE },
72: { "colspec", NODE_COLSPEC },
73: { "command", NODE_COMMAND },
74: { "constant", NODE_CONSTANT },
75: { "copyright", NODE_COPYRIGHT },
76: { "date", NODE_DATE },
77: { "editor", NODE_EDITOR },
78: { "email", NODE_EMAIL },
79: { "emphasis", NODE_EMPHASIS },
80: { "entry", NODE_ENTRY },
81: { "envar", NODE_ENVAR },
82: { "fieldsynopsis", NODE_FIELDSYNOPSIS },
83: { "filename", NODE_FILENAME },
1.3 schwarze 84: { "firstname", NODE_IGNORE },
1.1 schwarze 85: { "firstterm", NODE_FIRSTTERM },
86: { "footnote", NODE_FOOTNOTE },
87: { "funcdef", NODE_FUNCDEF },
88: { "funcprototype", NODE_FUNCPROTOTYPE },
89: { "funcsynopsis", NODE_FUNCSYNOPSIS },
90: { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO },
91: { "function", NODE_FUNCTION },
92: { "glossterm", NODE_GLOSSTERM },
93: { "group", NODE_GROUP },
94: { "holder", NODE_HOLDER },
95: { "index", NODE_INDEX },
1.4 schwarze 96: { "indexterm", NODE_DELETE },
1.1 schwarze 97: { "info", NODE_INFO },
98: { "informalequation", NODE_INFORMALEQUATION },
99: { "informaltable", NODE_INFORMALTABLE },
100: { "inlineequation", NODE_INLINEEQUATION },
101: { "itemizedlist", NODE_ITEMIZEDLIST },
102: { "keysym", NODE_KEYSYM },
103: { "legalnotice", NODE_LEGALNOTICE },
104: { "link", NODE_LINK },
105: { "listitem", NODE_LISTITEM },
106: { "literal", NODE_LITERAL },
107: { "literallayout", NODE_LITERALLAYOUT },
108: { "manvolnum", NODE_MANVOLNUM },
109: { "member", NODE_MEMBER },
110: { "mml:math", NODE_MML_MATH },
111: { "mml:mfenced", NODE_MML_MFENCED },
112: { "mml:mfrac", NODE_MML_MFRAC },
113: { "mml:mi", NODE_MML_MI },
114: { "mml:mn", NODE_MML_MN },
115: { "mml:mo", NODE_MML_MO },
116: { "mml:mrow", NODE_MML_MROW },
117: { "mml:msub", NODE_MML_MSUB },
118: { "mml:msup", NODE_MML_MSUP },
119: { "modifier", NODE_MODIFIER },
120: { "note", NODE_NOTE },
121: { "option", NODE_OPTION },
122: { "orderedlist", NODE_ORDEREDLIST },
123: { "orgname", NODE_ORGNAME },
1.3 schwarze 124: { "othername", NODE_IGNORE },
1.1 schwarze 125: { "para", NODE_PARA },
126: { "paramdef", NODE_PARAMDEF },
127: { "parameter", NODE_PARAMETER },
128: { "part", NODE_SECTION },
129: { "personname", NODE_PERSONNAME },
1.3 schwarze 130: { "phrase", NODE_IGNORE },
1.1 schwarze 131: { "preface", NODE_PREFACE },
1.4 schwarze 132: { "primary", NODE_DELETE },
1.1 schwarze 133: { "programlisting", NODE_PROGRAMLISTING },
134: { "prompt", NODE_PROMPT },
135: { "quote", NODE_QUOTE },
136: { "refclass", NODE_REFCLASS },
137: { "refdescriptor", NODE_REFDESCRIPTOR },
138: { "refentry", NODE_REFENTRY },
139: { "refentryinfo", NODE_REFENTRYINFO },
140: { "refentrytitle", NODE_REFENTRYTITLE },
141: { "refmeta", NODE_REFMETA },
142: { "refmetainfo", NODE_REFMETAINFO },
143: { "refmiscinfo", NODE_REFMISCINFO },
144: { "refname", NODE_REFNAME },
145: { "refnamediv", NODE_REFNAMEDIV },
146: { "refpurpose", NODE_REFPURPOSE },
147: { "refsect1", NODE_SECTION },
148: { "refsect2", NODE_SECTION },
149: { "refsect3", NODE_SECTION },
150: { "refsection", NODE_SECTION },
151: { "refsynopsisdiv", NODE_REFSYNOPSISDIV },
152: { "releaseinfo", NODE_RELEASEINFO },
153: { "replaceable", NODE_REPLACEABLE },
154: { "row", NODE_ROW },
155: { "sbr", NODE_SBR },
156: { "screen", NODE_SCREEN },
1.4 schwarze 157: { "secondary", NODE_DELETE },
1.1 schwarze 158: { "sect1", NODE_SECTION },
159: { "sect2", NODE_SECTION },
160: { "section", NODE_SECTION },
161: { "sgmltag", NODE_SGMLTAG },
162: { "simplelist", NODE_SIMPLELIST },
163: { "spanspec", NODE_SPANSPEC },
164: { "structname", NODE_STRUCTNAME },
165: { "subtitle", NODE_SUBTITLE },
1.3 schwarze 166: { "surname", NODE_IGNORE },
1.1 schwarze 167: { "synopsis", NODE_SYNOPSIS },
168: { "table", NODE_TABLE },
169: { "tbody", NODE_TBODY },
170: { "term", NODE_TERM },
171: { "tfoot", NODE_TFOOT },
172: { "tgroup", NODE_TGROUP },
173: { "thead", NODE_THEAD },
174: { "tip", NODE_TIP },
175: { "title", NODE_TITLE },
1.3 schwarze 176: { "trademark", NODE_IGNORE },
1.1 schwarze 177: { "type", NODE_TYPE },
178: { "ulink", NODE_ULINK },
179: { "userinput", NODE_USERINPUT },
180: { "variablelist", NODE_VARIABLELIST },
181: { "varlistentry", NODE_VARLISTENTRY },
182: { "varname", NODE_VARNAME },
183: { "warning", NODE_WARNING },
184: { "wordasword", NODE_WORDASWORD },
1.4 schwarze 185: { "xi:include", NODE_DELETE_WARN },
1.1 schwarze 186: { "year", NODE_YEAR },
1.5 ! schwarze 187: { NULL, NODE_IGNORE }
1.1 schwarze 188: };
189:
190: /*
191: * Process a string of characters.
192: * If a text node is already open, append to it.
193: * Otherwise, create a new one as a child of the current node.
194: */
195: static void
1.5 ! schwarze 196: xml_char(struct parse *ps, const char *p, int sz)
1.1 schwarze 197: {
198: struct pnode *dat;
199:
1.5 ! schwarze 200: if (ps->del > 0)
1.1 schwarze 201: return;
202:
1.5 ! schwarze 203: if (ps->cur == NULL) {
! 204: fprintf(stderr, "%s:%d:%d: discarding text before docum"
! 205: "ent: %.*s\n", ps->fname, ps->line, ps->col, sz, p);
! 206: ps->tree->flags |= TREE_FAIL;
! 207: return;
! 208: }
! 209:
1.1 schwarze 210: if (ps->cur->node != NODE_TEXT) {
211: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
212: perror(NULL);
213: exit(1);
214: }
215: dat->node = NODE_TEXT;
216: dat->parent = ps->cur;
217: TAILQ_INIT(&dat->childq);
218: TAILQ_INIT(&dat->attrq);
219: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
220: ps->cur = dat;
221: }
222:
1.5 ! schwarze 223: if (ps->tree->flags & TREE_CLOSED &&
! 224: ps->cur->parent == ps->tree->root && ps->warn)
! 225: fprintf(stderr, "%s:%d:%d: warning: "
! 226: "text after end of document: %.*s\n",
! 227: ps->fname, ps->line, ps->col, sz, p);
! 228:
1.1 schwarze 229: /* Append to the current text node. */
230:
231: assert(sz >= 0);
232: ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
233: if (ps->cur->b == NULL) {
234: perror(NULL);
235: exit(1);
236: }
237: memcpy(ps->cur->b + ps->cur->bsz, p, sz);
238: ps->cur->bsz += sz;
239: ps->cur->b[ps->cur->bsz] = '\0';
240: ps->cur->real = ps->cur->b;
241: }
242:
243: static void
244: pnode_trim(struct pnode *pn)
245: {
246: assert(pn->node == NODE_TEXT);
247: for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
248: if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
249: break;
250: }
251:
252: /*
253: * Begin an element.
254: */
255: static void
1.5 ! schwarze 256: xml_elem_start(struct parse *ps, const char *name)
1.1 schwarze 257: {
1.5 ! schwarze 258: const struct element *elem;
! 259: struct pnode *dat;
1.1 schwarze 260:
1.5 ! schwarze 261: if (*name == '!' || *name == '?')
1.1 schwarze 262: return;
263:
1.4 schwarze 264: /*
265: * An ancestor is excluded from the tree;
266: * keep track of the number of levels excluded.
267: */
268: if (ps->del > 0) {
269: ps->del++;
270: return;
271: }
272:
1.1 schwarze 273: /* Close out the text node, if there is one. */
274: if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
275: pnode_trim(ps->cur);
276: ps->cur = ps->cur->parent;
277: }
278:
279: for (elem = elements; elem->name != NULL; elem++)
280: if (strcmp(elem->name, name) == 0)
281: break;
282:
283: if (elem->name == NULL) {
1.5 ! schwarze 284: fprintf(stderr, "%s:%d:%d: unknown element <%s>\n",
! 285: ps->fname, ps->line, ps->col, name);
1.1 schwarze 286: ps->tree->flags |= TREE_FAIL;
287: }
1.5 ! schwarze 288: ps->ncur = elem->node;
1.1 schwarze 289:
1.5 ! schwarze 290: switch (ps->ncur) {
1.4 schwarze 291: case NODE_DELETE_WARN:
1.2 schwarze 292: if (ps->warn)
1.5 ! schwarze 293: fprintf(stderr, "%s:%d:%d: warning: "
! 294: "skipping element <%s>\n",
! 295: ps->fname, ps->line, ps->col, name);
1.2 schwarze 296: /* FALLTHROUGH */
1.4 schwarze 297: case NODE_DELETE:
298: ps->del = 1;
299: /* FALLTHROUGH */
1.2 schwarze 300: case NODE_IGNORE:
301: return;
302: case NODE_INLINEEQUATION:
1.1 schwarze 303: ps->tree->flags |= TREE_EQN;
1.2 schwarze 304: break;
305: default:
306: break;
307: }
1.1 schwarze 308:
1.5 ! schwarze 309: if (ps->tree->flags & TREE_CLOSED &&
! 310: ps->cur->parent == NULL && ps->warn)
! 311: fprintf(stderr, "%s:%d:%d: warning: "
! 312: "element after end of document: %s\n",
! 313: ps->fname, ps->line, ps->col, name);
! 314:
1.1 schwarze 315: if ((dat = calloc(1, sizeof(*dat))) == NULL) {
316: perror(NULL);
317: exit(1);
318: }
319: dat->node = elem->node;
320: dat->parent = ps->cur;
321: TAILQ_INIT(&dat->childq);
322: TAILQ_INIT(&dat->attrq);
323:
324: if (ps->cur != NULL)
325: TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
326:
327: ps->cur = dat;
328: if (ps->tree->root == NULL)
329: ps->tree->root = dat;
1.5 ! schwarze 330: }
! 331:
! 332: static void
! 333: xml_attrkey(struct parse *ps, const char *name)
! 334: {
! 335: struct pattr *attr;
! 336: enum attrkey key;
1.1 schwarze 337:
1.5 ! schwarze 338: if (ps->del > 0 || *name == '\0')
! 339: return;
! 340: if ((key = attrkey_parse(name)) == ATTRKEY__MAX) {
! 341: if (ps->warn)
! 342: fprintf(stderr, "%s:%d:%d: warning: "
! 343: "unknown attribute \"%s\"\n",
! 344: ps->fname, ps->line, ps->col, name);
! 345: ps->attr = 0;
! 346: return;
! 347: }
! 348: if ((attr = calloc(1, sizeof(*attr))) == NULL) {
! 349: perror(NULL);
! 350: exit(1);
! 351: }
! 352: attr->key = key;
! 353: attr->val = ATTRVAL__MAX;
! 354: attr->rawval = NULL;
! 355: TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child);
! 356: ps->attr = 1;
! 357: }
! 358:
! 359: static void
! 360: xml_attrval(struct parse *ps, const char *name)
! 361: {
! 362: struct pattr *attr;
! 363:
! 364: if (ps->del > 0 || ps->attr == 0)
! 365: return;
! 366: if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL)
! 367: return;
! 368: if ((attr->val = attrval_parse(name)) == ATTRVAL__MAX &&
! 369: (attr->rawval = strdup(name)) == NULL) {
! 370: perror(NULL);
! 371: exit(1);
1.1 schwarze 372: }
373: }
374:
375: /*
376: * Roll up the parse tree.
377: * If we're at a text node, roll that one up first.
378: */
379: static void
1.5 ! schwarze 380: xml_elem_end(struct parse *ps, const char *name)
1.1 schwarze 381: {
1.5 ! schwarze 382: const struct element *elem;
! 383: enum nodeid node;
1.1 schwarze 384:
1.4 schwarze 385: /*
386: * An ancestor is excluded from the tree;
387: * keep track of the number of levels excluded.
388: */
389: if (ps->del > 1) {
390: ps->del--;
391: return;
392: }
393:
1.1 schwarze 394: /* Close out the text node, if there is one. */
1.5 ! schwarze 395: if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
1.1 schwarze 396: pnode_trim(ps->cur);
397: ps->cur = ps->cur->parent;
398: }
1.2 schwarze 399:
1.5 ! schwarze 400: if (name != NULL) {
! 401: for (elem = elements; elem->name != NULL; elem++)
! 402: if (strcmp(elem->name, name) == 0)
! 403: break;
! 404: node = elem->node;
! 405: } else
! 406: node = ps->ncur;
1.2 schwarze 407:
1.5 ! schwarze 408: switch (node) {
1.4 schwarze 409: case NODE_DELETE_WARN:
410: case NODE_DELETE:
1.5 ! schwarze 411: if (ps->del > 0)
! 412: ps->del--;
1.4 schwarze 413: break;
1.2 schwarze 414: case NODE_IGNORE:
415: break;
416: default:
1.5 ! schwarze 417: if (ps->cur == NULL || node != ps->cur->node) {
! 418: if (ps->warn)
! 419: fprintf(stderr, "%s:%d:%d: warning: "
! 420: "element not open: </%s>\n",
! 421: ps->fname, ps->line, ps->col, name);
! 422: break;
! 423: }
! 424:
! 425: /*
! 426: * Refrain from actually closing the document element.
! 427: * If no more content follows, no harm is done, but if
! 428: * some content still follows, simply processing it is
! 429: * obviously better than discarding it or crashing.
! 430: */
! 431:
! 432: if (ps->cur->parent == NULL)
! 433: ps->tree->flags |= TREE_CLOSED;
! 434: else
! 435: ps->cur = ps->cur->parent;
1.4 schwarze 436: break;
1.2 schwarze 437: }
1.4 schwarze 438: assert(ps->del == 0);
1.1 schwarze 439: }
440:
441: struct parse *
442: parse_alloc(int warn)
443: {
444: struct parse *p;
445:
446: if ((p = calloc(1, sizeof(*p))) == NULL)
447: return NULL;
448:
449: if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) {
450: free(p);
451: return NULL;
452: }
453: p->warn = warn;
454: return p;
455: }
456:
457: void
458: parse_free(struct parse *p)
459: {
460: if (p == NULL)
461: return;
462: if (p->tree != NULL) {
463: pnode_unlink(p->tree->root);
464: free(p->tree);
465: }
466: free(p);
467: }
468:
1.5 ! schwarze 469: /*
! 470: * Advance the pend pointer to the next character in the charset.
! 471: * If the charset starts with a space, it stands for any whitespace.
! 472: * Update the new input file position, used for messages.
! 473: * Do not overrun the buffer b of length rlen.
! 474: * When reaching the end, NUL-terminate the buffer and return 1;
! 475: * otherwise, return 0.
! 476: */
! 477: static int
! 478: advance(struct parse *p, char *b, size_t rlen, size_t *pend,
! 479: const char *charset)
! 480: {
! 481: int space;
! 482:
! 483: if (*charset == ' ') {
! 484: space = 1;
! 485: charset++;
! 486: } else
! 487: space = 0;
! 488:
! 489: p->nline = p->line;
! 490: p->ncol = p->col;
! 491: while (*pend < rlen) {
! 492: if (b[*pend] == '\n') {
! 493: p->nline++;
! 494: p->ncol = 1;
! 495: } else
! 496: p->ncol++;
! 497: if (space && isspace((unsigned char)b[*pend]))
! 498: break;
! 499: if (strchr(charset, b[*pend]) != NULL)
! 500: break;
! 501: ++*pend;
! 502: }
! 503: if (*pend == rlen) {
! 504: b[rlen] = '\0';
! 505: return 1;
! 506: } else
! 507: return 0;
! 508: }
! 509:
1.1 schwarze 510: struct ptree *
511: parse_file(struct parse *p, int fd, const char *fname)
512: {
513: char b[4096];
1.5 ! schwarze 514: ssize_t rsz; /* Return value from read(2). */
! 515: size_t rlen; /* Number of bytes in b[]. */
! 516: size_t poff; /* Parse offset in b[]. */
! 517: size_t pend; /* Offset of the end of the current word. */
! 518: int in_tag, in_arg, in_quotes, elem_end;
1.1 schwarze 519:
520: p->fname = fname;
1.5 ! schwarze 521: p->nline = 1;
! 522: p->ncol = 1;
! 523: rlen = 0;
! 524: in_tag = in_arg = in_quotes = 0;
! 525:
! 526: /*
! 527: * Read loop.
! 528: *
! 529: * We have to enter the read loop once more even on EOF
! 530: * because the previous token may have been incomplete,
! 531: * such that it asked for more input.
! 532: * Once rsz is 0, incomplete tokens will no longer ask
! 533: * for more input but instead use whatever there is,
! 534: * and then exit the read loop.
! 535: * The minus one on the size limit for read(2) is needed
! 536: * such that advance() can set b[rlen] to NUL when needed.
! 537: */
! 538:
! 539: while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) {
! 540: if ((rlen += rsz) == 0)
! 541: break;
! 542:
! 543: /* Token loop. */
! 544:
! 545: pend = 0;
! 546: for (;;) {
! 547:
! 548: /* Proceed to the next token, skipping whitespace. */
! 549:
! 550: p->line = p->nline;
! 551: p->col = p->ncol;
! 552: if ((poff = pend) == rlen)
! 553: break;
! 554: if (isspace((unsigned char)b[pend])) {
! 555: if (b[pend++] == '\n') {
! 556: p->nline++;
! 557: p->ncol = 1;
! 558: } else
! 559: p->ncol++;
! 560: continue;
! 561: }
! 562:
! 563: /*
! 564: * The following three cases (in_arg, in_tag,
! 565: * and starting a tag) all parse a word or
! 566: * quoted string. If that extends beyond the
! 567: * read buffer and the last read(2) still got
! 568: * data, they all break out of the token loop
! 569: * to request more data from the read loop.
! 570: *
! 571: * Also, they all detect self-closing tags,
! 572: * those ending with "/>", setting the flag
! 573: * elem_end and calling xml_elem_end() at the
! 574: * very end, after handling the attribute value,
! 575: * attribute name, or tag name, respectively.
! 576: */
! 577:
! 578: /* Parse an attribute value. */
! 579:
! 580: if (in_arg) {
! 581: if (in_quotes == 0 && b[pend] == '"') {
! 582: in_quotes = 1;
! 583: p->ncol++;
! 584: pend++;
! 585: continue;
! 586: }
! 587: if (advance(p, b, rlen, &pend,
! 588: in_quotes ? "\"" : " >") && rsz > 0)
! 589: break;
! 590: in_arg = in_quotes = elem_end = 0;
! 591: if (b[pend] == '>') {
! 592: in_tag = 0;
! 593: if (pend > 0 && b[pend - 1] == '/') {
! 594: b[pend - 1] = '\0';
! 595: elem_end = 1;
! 596: }
! 597: }
! 598: b[pend] = '\0';
! 599: if (pend < rlen)
! 600: pend++;
! 601: xml_attrval(p, b + poff);
! 602: if (elem_end)
! 603: xml_elem_end(p, NULL);
! 604:
! 605: /* Look for an attribute name. */
! 606:
! 607: } else if (in_tag) {
! 608: if (advance(p, b, rlen, &pend, " =>") &&
! 609: rsz > 0)
! 610: break;
! 611: elem_end = 0;
! 612: switch (b[pend]) {
! 613: case '>':
! 614: in_tag = 0;
! 615: if (pend > 0 && b[pend - 1] == '/') {
! 616: b[pend - 1] = '\0';
! 617: elem_end = 1;
! 618: }
! 619: break;
! 620: case '=':
! 621: in_arg = 1;
! 622: break;
! 623: default:
! 624: break;
! 625: }
! 626: b[pend] = '\0';
! 627: if (pend < rlen)
! 628: pend++;
! 629: xml_attrkey(p, b + poff);
! 630: if (elem_end)
! 631: xml_elem_end(p, NULL);
! 632:
! 633: /* Begin an opening or closing tag. */
! 634:
! 635: } else if (b[poff] == '<') {
! 636: if (advance(p, b, rlen, &pend, " >") &&
! 637: rsz > 0)
! 638: break;
! 639: elem_end = 0;
! 640: if (b[pend] != '>')
! 641: in_tag = 1;
! 642: else if (pend > 0 && b[pend - 1] == '/') {
! 643: b[pend - 1] = '\0';
! 644: elem_end = 1;
! 645: }
! 646: b[pend] = '\0';
! 647: if (pend < rlen)
! 648: pend++;
! 649: if (b[++poff] == '/') {
! 650: elem_end = 1;
! 651: poff++;
! 652: } else
! 653: xml_elem_start(p, b + poff);
! 654: if (elem_end)
! 655: xml_elem_end(p, b + poff);
! 656:
! 657: /* Process text up to the next tag. */
! 658:
! 659: } else {
! 660: if (advance(p, b, rlen, &pend, "<") == 0)
! 661: p->ncol--;
! 662: xml_char(p, b + poff, pend - poff);
! 663: }
1.1 schwarze 664: }
1.5 ! schwarze 665:
! 666: /* Buffer exhausted; shift left and re-fill. */
! 667:
! 668: assert(poff > 0);
! 669: memmove(b, b + poff, rlen - poff);
! 670: rlen -= poff;
! 671: }
! 672: if (rsz < 0) {
! 673: perror(fname);
! 674: p->tree->flags |= TREE_FAIL;
! 675: }
! 676: if (p->cur != NULL && p->cur->node == NODE_TEXT) {
! 677: pnode_trim(p->cur);
! 678: p->cur = p->cur->parent;
! 679: }
! 680: if ((p->tree->flags & TREE_CLOSED) == 0 && p->warn)
! 681: fprintf(stderr, "%s:%d:%d: warning: document not closed\n",
! 682: p->fname, p->line, p->col);
1.1 schwarze 683: return p->tree;
684: }
CVSweb