=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.43 retrieving revision 1.48 diff -u -p -r1.43 -r1.48 --- docbook2mdoc/parse.c 2019/04/14 18:07:35 1.43 +++ docbook2mdoc/parse.c 2019/04/21 14:48:11 1.48 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.43 2019/04/14 18:07:35 schwarze Exp $ */ +/* $Id: parse.c,v 1.48 2019/04/21 14:48:11 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -56,6 +56,7 @@ struct parse { int nline; /* Line number of next token. */ int ncol; /* Column number of next token. */ int del; /* Levels of nested nodes being deleted. */ + int nofill; /* Levels of open no-fill displays. */ int flags; #define PFLAG_WARN (1 << 0) /* Print warning messages. */ #define PFLAG_SPC (1 << 1) /* Whitespace before the next element. */ @@ -77,9 +78,11 @@ static const struct alias aliases[] = { { "articleinfo", NODE_BOOKINFO }, { "book", NODE_SECTION }, { "chapter", NODE_SECTION }, + { "caption", NODE_IGNORE }, { "code", NODE_LITERAL }, { "computeroutput", NODE_LITERAL }, { "!doctype", NODE_DOCTYPE }, + { "figure", NODE_IGNORE }, { "firstname", NODE_PERSONNAME }, { "glossary", NODE_VARIABLELIST }, { "glossdef", NODE_IGNORE }, @@ -87,10 +90,12 @@ static const struct alias aliases[] = { { "glossentry", NODE_VARLISTENTRY }, { "glosslist", NODE_VARIABLELIST }, { "holder", NODE_IGNORE }, + { "imageobject", NODE_IGNORE }, { "indexterm", NODE_DELETE }, { "informaltable", NODE_TABLE }, { "keycap", NODE_KEYSYM }, { "keycode", NODE_IGNORE }, + { "mediaobject", NODE_BLOCKQUOTE }, { "orgname", NODE_IGNORE }, { "othercredit", NODE_AUTHOR }, { "othername", NODE_PERSONNAME }, @@ -107,12 +112,15 @@ static const struct alias aliases[] = { { "secondary", NODE_DELETE }, { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, + { "sect3", NODE_SECTION }, + { "sect4", NODE_SECTION }, { "sgmltag", NODE_MARKUP }, { "simpara", NODE_PARA }, { "structfield", NODE_PARAMETER }, { "structname", NODE_TYPE }, { "surname", NODE_PERSONNAME }, { "symbol", NODE_CONSTANT }, + { "tag", NODE_MARKUP }, { "trademark", NODE_IGNORE }, { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, @@ -280,6 +288,7 @@ xml_text(struct parse *p, const char *word, int sz) case CLASS_VOID: case CLASS_TEXT: case CLASS_BLOCK: + case CLASS_NOFILL: np = NULL; break; case CLASS_TRANS: @@ -511,6 +520,7 @@ xml_elem_start(struct parse *p, const char *name) case NODE_DOCTYPE: case NODE_ENTITY: case NODE_SBR: + case NODE_VOID: p->flags |= PFLAG_EEND; break; default: @@ -521,6 +531,9 @@ xml_elem_start(struct parse *p, const char *name) case CLASS_ENCL: n->spc = (p->flags & PFLAG_SPC) != 0; break; + case CLASS_NOFILL: + p->nofill++; + /* FALLTHROUGH */ default: n->spc = 1; break; @@ -542,7 +555,7 @@ xml_attrkey(struct parse *p, const char *name) const char *value; enum attrkey key; - if (p->del > 0 || p->ncur == NODE_IGNORE || *name == '\0') + if (p->del > 0 || p->ncur >= NODE_UNKNOWN || *name == '\0') return; if ((p->ncur == NODE_DOCTYPE || p->ncur == NODE_ENTITY) && @@ -579,7 +592,7 @@ xml_attrval(struct parse *p, const char *name) { struct pattr *a; - if (p->del > 0 || p->ncur == NODE_IGNORE || + if (p->del > 0 || p->ncur >= NODE_UNKNOWN || (p->flags & PFLAG_ATTR) == 0) return; if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL) @@ -638,6 +651,7 @@ xml_elem_end(struct parse *p, const char *name) break; case NODE_DOCTYPE: case NODE_SBR: + case NODE_VOID: p->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ default: @@ -645,6 +659,8 @@ xml_elem_end(struct parse *p, const char *name) warn_msg(p, "element not open: ", name); break; } + if (pnode_class(node) == CLASS_NOFILL) + p->nofill--; /* * Refrain from actually closing the document element. @@ -752,11 +768,12 @@ parse_string(struct parse *p, char *b, size_t rlen, enum pstate *pstate, int refill) { char *cp; + size_t pws; /* Parse offset including whitespace. */ size_t poff; /* Parse offset in b[]. */ size_t pend; /* Offset of the end of the current word. */ int elem_end; - pend = 0; + pend = pws = 0; for (;;) { /* Proceed to the next token, skipping whitespace. */ @@ -769,6 +786,8 @@ parse_string(struct parse *p, char *b, size_t rlen, break; if (isspace((unsigned char)b[pend])) { p->flags |= PFLAG_SPC; + if (b[pend] == '\n') + pws = pend + 1; increment(p, b, &pend, refill); continue; } @@ -930,10 +949,13 @@ parse_string(struct parse *p, char *b, size_t rlen, advance(p, b, rlen, &pend, p->ncur == NODE_DOCTYPE ? "<&]\n" : "<&\n", refill); + if (p->nofill) + poff = pws; xml_text(p, b + poff, pend - poff); if (b[pend] == '\n') pnode_closetext(p, 0); } + pws = pend; } return poff; }