=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.41 retrieving revision 1.50 diff -u -p -r1.41 -r1.50 --- docbook2mdoc/parse.c 2019/04/13 13:06:35 1.41 +++ docbook2mdoc/parse.c 2019/04/23 15:47:23 1.50 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.41 2019/04/13 13:06:35 schwarze Exp $ */ +/* $Id: parse.c,v 1.50 2019/04/23 15:47:23 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -56,6 +56,7 @@ struct parse { int nline; /* Line number of next token. */ int ncol; /* Column number of next token. */ int del; /* Levels of nested nodes being deleted. */ + int nofill; /* Levels of open no-fill displays. */ int flags; #define PFLAG_WARN (1 << 0) /* Print warning messages. */ #define PFLAG_SPC (1 << 1) /* Whitespace before the next element. */ @@ -70,43 +71,60 @@ struct alias { static const struct alias aliases[] = { { "acronym", NODE_IGNORE }, + { "affiliation", NODE_IGNORE }, { "anchor", NODE_DELETE }, + { "application", NODE_COMMAND }, { "article", NODE_SECTION }, { "articleinfo", NODE_BOOKINFO }, { "book", NODE_SECTION }, { "chapter", NODE_SECTION }, + { "caption", NODE_IGNORE }, { "code", NODE_LITERAL }, { "computeroutput", NODE_LITERAL }, { "!doctype", NODE_DOCTYPE }, + { "figure", NODE_IGNORE }, { "firstname", NODE_PERSONNAME }, { "glossary", NODE_VARIABLELIST }, { "glossdef", NODE_IGNORE }, { "glossdiv", NODE_IGNORE }, { "glossentry", NODE_VARLISTENTRY }, { "glosslist", NODE_VARIABLELIST }, + { "holder", NODE_IGNORE }, + { "imageobject", NODE_IGNORE }, { "indexterm", NODE_DELETE }, { "informaltable", NODE_TABLE }, + { "keycap", NODE_KEYSYM }, + { "keycode", NODE_IGNORE }, + { "mediaobject", NODE_BLOCKQUOTE }, + { "orgname", NODE_IGNORE }, { "othercredit", NODE_AUTHOR }, { "othername", NODE_PERSONNAME }, { "part", NODE_SECTION }, { "phrase", NODE_IGNORE }, { "primary", NODE_DELETE }, + { "property", NODE_PARAMETER }, { "refsect1", NODE_SECTION }, { "refsect2", NODE_SECTION }, { "refsect3", NODE_SECTION }, { "refsection", NODE_SECTION }, + { "releaseinfo", NODE_IGNORE }, + { "returnvalue", NODE_IGNORE }, { "secondary", NODE_DELETE }, { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, + { "sect3", NODE_SECTION }, + { "sect4", NODE_SECTION }, { "sgmltag", NODE_MARKUP }, { "simpara", NODE_PARA }, { "structfield", NODE_PARAMETER }, { "structname", NODE_TYPE }, { "surname", NODE_PERSONNAME }, { "symbol", NODE_CONSTANT }, + { "tag", NODE_MARKUP }, { "trademark", NODE_IGNORE }, { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, + { "year", NODE_IGNORE }, { NULL, NODE_IGNORE } }; @@ -270,6 +288,7 @@ xml_text(struct parse *p, const char *word, int sz) case CLASS_VOID: case CLASS_TEXT: case CLASS_BLOCK: + case CLASS_NOFILL: np = NULL; break; case CLASS_TRANS: @@ -364,6 +383,7 @@ xml_entity(struct parse *p, const char *name) struct pnode *n; const char *ccp; char *cp; + unsigned int codepoint; enum pstate pstate; if (p->del > 0) @@ -408,6 +428,16 @@ xml_entity(struct parse *p, const char *name) return; } } + if (*name == '#') { + codepoint = strtonum(name + 1, 0, 0x10ffff, &ccp); + if (ccp == NULL) { + if ((n = pnode_alloc(p->cur)) == NULL || + asprintf(&n->b, "\\[u%4.4X]", + codepoint) < 0) + fatal(p); + goto done; + } + } error_msg(p, "unknown entity &%s;", name); return; } @@ -416,6 +446,7 @@ xml_entity(struct parse *p, const char *name) if ((n = pnode_alloc(p->cur)) == NULL || (n->b = strdup(entity->roff)) == NULL) fatal(p); +done: n->node = NODE_ESCAPE; n->spc = (p->flags & PFLAG_SPC) != 0; p->flags &= ~PFLAG_SPC; @@ -501,6 +532,7 @@ xml_elem_start(struct parse *p, const char *name) case NODE_DOCTYPE: case NODE_ENTITY: case NODE_SBR: + case NODE_VOID: p->flags |= PFLAG_EEND; break; default: @@ -511,6 +543,9 @@ xml_elem_start(struct parse *p, const char *name) case CLASS_ENCL: n->spc = (p->flags & PFLAG_SPC) != 0; break; + case CLASS_NOFILL: + p->nofill++; + /* FALLTHROUGH */ default: n->spc = 1; break; @@ -532,7 +567,7 @@ xml_attrkey(struct parse *p, const char *name) const char *value; enum attrkey key; - if (p->del > 0 || p->ncur == NODE_IGNORE || *name == '\0') + if (p->del > 0 || p->ncur >= NODE_UNKNOWN || *name == '\0') return; if ((p->ncur == NODE_DOCTYPE || p->ncur == NODE_ENTITY) && @@ -569,7 +604,7 @@ xml_attrval(struct parse *p, const char *name) { struct pattr *a; - if (p->del > 0 || p->ncur == NODE_IGNORE || + if (p->del > 0 || p->ncur >= NODE_UNKNOWN || (p->flags & PFLAG_ATTR) == 0) return; if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL) @@ -603,6 +638,7 @@ xml_elem_end(struct parse *p, const char *name) if (p->del == 0) pnode_closetext(p, 0); + n = p->cur; node = name == NULL ? p->ncur : xml_name2node(p, name); switch (node) { @@ -615,8 +651,7 @@ xml_elem_end(struct parse *p, const char *name) case NODE_UNKNOWN: break; case NODE_INCLUDE: - n = p->cur; - p->cur = p->cur->parent; + p->cur = n->parent; cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL); if (cp == NULL) error_msg(p, " element " @@ -628,13 +663,16 @@ xml_elem_end(struct parse *p, const char *name) break; case NODE_DOCTYPE: case NODE_SBR: + case NODE_VOID: p->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ default: - if (p->cur == NULL || node != p->cur->node) { + if (n == NULL || node != n->node) { warn_msg(p, "element not open: ", name); break; } + if (pnode_class(node) == CLASS_NOFILL) + p->nofill--; /* * Refrain from actually closing the document element. @@ -643,13 +681,21 @@ xml_elem_end(struct parse *p, const char *name) * obviously better than discarding it or crashing. */ - if (p->cur->parent != NULL || node == NODE_DOCTYPE) { - p->cur = p->cur->parent; + if (n->parent != NULL || node == NODE_DOCTYPE) { + p->cur = n->parent; if (p->cur != NULL) p->ncur = p->cur->node; } else p->tree->flags |= TREE_CLOSED; p->flags &= ~PFLAG_SPC; + + /* Include a file containing entity declarations. */ + + if (node == NODE_ENTITY && strcmp("%", + pnode_getattr_raw(n, ATTRKEY_NAME, "")) == 0 && + (cp = pnode_getattr_raw(n, ATTRKEY_SYSTEM, NULL)) != NULL) + parse_file(p, -1, cp); + break; } assert(p->del == 0); @@ -742,11 +788,12 @@ parse_string(struct parse *p, char *b, size_t rlen, enum pstate *pstate, int refill) { char *cp; + size_t pws; /* Parse offset including whitespace. */ size_t poff; /* Parse offset in b[]. */ size_t pend; /* Offset of the end of the current word. */ int elem_end; - pend = 0; + pend = pws = 0; for (;;) { /* Proceed to the next token, skipping whitespace. */ @@ -759,6 +806,8 @@ parse_string(struct parse *p, char *b, size_t rlen, break; if (isspace((unsigned char)b[pend])) { p->flags |= PFLAG_SPC; + if (b[pend] == '\n') + pws = pend + 1; increment(p, b, &pend, refill); continue; } @@ -920,10 +969,13 @@ parse_string(struct parse *p, char *b, size_t rlen, advance(p, b, rlen, &pend, p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n", refill); + if (p->nofill) + poff = pws; xml_text(p, b + poff, pend - poff); if (b[pend] == '\n') pnode_closetext(p, 0); } + pws = pend; } return poff; }