=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.47 retrieving revision 1.54 diff -u -p -r1.47 -r1.54 --- docbook2mdoc/parse.c 2019/04/20 04:15:06 1.47 +++ docbook2mdoc/parse.c 2019/04/28 15:32:05 1.54 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.47 2019/04/20 04:15:06 schwarze Exp $ */ +/* $Id: parse.c,v 1.54 2019/04/28 15:32:05 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -15,6 +15,8 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include + #include #include #include @@ -26,6 +28,7 @@ #include #include +#include "xmalloc.h" #include "node.h" #include "parse.h" @@ -59,9 +62,10 @@ struct parse { int nofill; /* Levels of open no-fill displays. */ int flags; #define PFLAG_WARN (1 << 0) /* Print warning messages. */ -#define PFLAG_SPC (1 << 1) /* Whitespace before the next element. */ -#define PFLAG_ATTR (1 << 2) /* The most recent attribute is valid. */ -#define PFLAG_EEND (1 << 3) /* This element is self-closing. */ +#define PFLAG_LINE (1 << 1) /* New line before the next element. */ +#define PFLAG_SPC (1 << 2) /* Whitespace before the next element. */ +#define PFLAG_ATTR (1 << 3) /* The most recent attribute is valid. */ +#define PFLAG_EEND (1 << 4) /* This element is self-closing. */ }; struct alias { @@ -103,6 +107,7 @@ static const struct alias aliases[] = { { "phrase", NODE_IGNORE }, { "primary", NODE_DELETE }, { "property", NODE_PARAMETER }, + { "reference", NODE_SECTION }, { "refsect1", NODE_SECTION }, { "refsect2", NODE_SECTION }, { "refsect3", NODE_SECTION }, @@ -120,6 +125,7 @@ static const struct alias aliases[] = { { "structname", NODE_TYPE }, { "surname", NODE_PERSONNAME }, { "symbol", NODE_CONSTANT }, + { "tag", NODE_MARKUP }, { "trademark", NODE_IGNORE }, { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, @@ -189,14 +195,6 @@ static void parse_fd(struct parse *, int); static void -fatal(struct parse *p) -{ - fprintf(stderr, "%s:%d:%d: FATAL: ", p->fname, p->line, p->col); - perror(NULL); - exit(6); -} - -static void error_msg(struct parse *p, const char *fmt, ...) { va_list ap; @@ -254,13 +252,12 @@ xml_text(struct parse *p, const char *word, int sz) newsz = oldsz + sz; if (oldsz && (p->flags & PFLAG_SPC)) newsz++; - if ((n->b = realloc(n->b, newsz + 1)) == NULL) - fatal(p); + n->b = xrealloc(n->b, newsz + 1); if (oldsz && (p->flags & PFLAG_SPC)) n->b[oldsz++] = ' '; memcpy(n->b + oldsz, word, sz); n->b[newsz] = '\0'; - p->flags &= ~PFLAG_SPC; + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); return; } @@ -269,11 +266,11 @@ xml_text(struct parse *p, const char *word, int sz) /* Create a new text node. */ - if ((n = pnode_alloc(p->cur)) == NULL) - fatal(p); + n = pnode_alloc(p->cur); n->node = NODE_TEXT; - n->spc = (p->flags & PFLAG_SPC) != 0; - p->flags &= ~PFLAG_SPC; + n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) | + ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0); + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); /* * If this node follows an in-line macro without intervening @@ -281,7 +278,7 @@ xml_text(struct parse *p, const char *word, int sz) * and do not keep it open. */ - np = n->spc ? NULL : TAILQ_PREV(n, pnodeq, child); + np = n->flags & NFLAG_SPC ? NULL : TAILQ_PREV(n, pnodeq, child); while (np != NULL) { switch (pnode_class(np->node)) { case CLASS_VOID: @@ -303,8 +300,7 @@ xml_text(struct parse *p, const char *word, int sz) i = 0; while (i < sz && !isspace((unsigned char)word[i])) i++; - if ((n->b = strndup(word, i)) == NULL) - fatal(p); + n->b = xstrndup(word, i); if (i == sz) return; while (i < sz && isspace((unsigned char)word[i])) @@ -316,15 +312,13 @@ xml_text(struct parse *p, const char *word, int sz) /* Put any remaining text into a second node. */ - if ((n = pnode_alloc(p->cur)) == NULL) - fatal(p); + n = pnode_alloc(p->cur); n->node = NODE_TEXT; - n->spc = 1; + n->flags |= NFLAG_SPC; word += i; sz -= i; } - if ((n->b = strndup(word, sz)) == NULL) - fatal(p); + n->b = xstrndup(word, sz); /* The new node remains open for later pnode_closetext(). */ @@ -367,12 +361,8 @@ pnode_closetext(struct parse *p, int check_last_word) /* Move the last word into its own node, for use with .Pf. */ - if ((n = pnode_alloc(p->cur)) == NULL) - fatal(p); - n->node = NODE_TEXT; - n->spc = 1; - if ((n->b = strdup(last_word)) == NULL) - fatal(p); + n = pnode_alloc_text(p->cur, last_word); + n->flags |= NFLAG_SPC; } static void @@ -382,6 +372,7 @@ xml_entity(struct parse *p, const char *name) struct pnode *n; const char *ccp; char *cp; + unsigned int codepoint; enum pstate pstate; if (p->del > 0) @@ -411,32 +402,40 @@ xml_entity(struct parse *p, const char *name) if ((ccp = pnode_getattr_raw(n, ATTRKEY_SYSTEM, NULL)) != NULL) { parse_file(p, -1, ccp); - p->flags &= ~PFLAG_SPC; + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); return; } if ((ccp = pnode_getattr_raw(n, ATTRKEY_DEFINITION, NULL)) == NULL) continue; - if ((cp = strdup(ccp)) == NULL) - fatal(p); + cp = xstrdup(ccp); pstate = PARSE_ELEM; parse_string(p, cp, strlen(cp), &pstate, 0); - p->flags &= ~PFLAG_SPC; + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); free(cp); return; } } + if (*name == '#') { + codepoint = strtonum(name + 1, 0, 0x10ffff, &ccp); + if (ccp == NULL) { + n = pnode_alloc(p->cur); + xasprintf(&n->b, "\\[u%4.4X]", codepoint); + goto done; + } + } error_msg(p, "unknown entity &%s;", name); return; } /* Create, append, and close out an entity node. */ - if ((n = pnode_alloc(p->cur)) == NULL || - (n->b = strdup(entity->roff)) == NULL) - fatal(p); + n = pnode_alloc(p->cur); + n->b = xstrdup(entity->roff); +done: n->node = NODE_ESCAPE; - n->spc = (p->flags & PFLAG_SPC) != 0; - p->flags &= ~PFLAG_SPC; + n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) | + ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0); + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); } /* @@ -506,8 +505,7 @@ xml_elem_start(struct parse *p, const char *name) break; } - if ((n = pnode_alloc(p->cur)) == NULL) - fatal(p); + n = pnode_alloc(p->cur); /* * Some elements are self-closing. @@ -519,6 +517,7 @@ xml_elem_start(struct parse *p, const char *name) case NODE_DOCTYPE: case NODE_ENTITY: case NODE_SBR: + case NODE_VOID: p->flags |= PFLAG_EEND; break; default: @@ -527,13 +526,14 @@ xml_elem_start(struct parse *p, const char *name) switch (pnode_class(p->ncur)) { case CLASS_LINE: case CLASS_ENCL: - n->spc = (p->flags & PFLAG_SPC) != 0; + n->flags = ((p->flags & PFLAG_LINE) ? NFLAG_LINE : 0) | + ((p->flags & PFLAG_SPC) ? NFLAG_SPC : 0); break; case CLASS_NOFILL: p->nofill++; /* FALLTHROUGH */ default: - n->spc = 1; + n->flags |= NFLAG_SPC; break; } p->cur = n; @@ -567,17 +567,14 @@ xml_attrkey(struct parse *p, const char *name) p->flags &= ~PFLAG_ATTR; return; } - if ((a = calloc(1, sizeof(*a))) == NULL) - fatal(p); - + a = xcalloc(1, sizeof(*a)); a->key = key; a->val = ATTRVAL__MAX; if (value == NULL) { a->rawval = NULL; p->flags |= PFLAG_ATTR; } else { - if ((a->rawval = strdup(value)) == NULL) - fatal(p); + a->rawval = xstrdup(value); p->flags &= ~PFLAG_ATTR; } TAILQ_INSERT_TAIL(&p->cur->attrq, a, child); @@ -595,9 +592,8 @@ xml_attrval(struct parse *p, const char *name) return; if ((a = TAILQ_LAST(&p->cur->attrq, pattrq)) == NULL) return; - if ((a->val = attrval_parse(name)) == ATTRVAL__MAX && - (a->rawval = strdup(name)) == NULL) - fatal(p); + if ((a->val = attrval_parse(name)) == ATTRVAL__MAX) + a->rawval = xstrdup(name); p->flags &= ~PFLAG_ATTR; } @@ -624,6 +620,7 @@ xml_elem_end(struct parse *p, const char *name) if (p->del == 0) pnode_closetext(p, 0); + n = p->cur; node = name == NULL ? p->ncur : xml_name2node(p, name); switch (node) { @@ -636,8 +633,7 @@ xml_elem_end(struct parse *p, const char *name) case NODE_UNKNOWN: break; case NODE_INCLUDE: - n = p->cur; - p->cur = p->cur->parent; + p->cur = n->parent; cp = pnode_getattr_raw(n, ATTRKEY_HREF, NULL); if (cp == NULL) error_msg(p, " element " @@ -645,14 +641,15 @@ xml_elem_end(struct parse *p, const char *name) else parse_file(p, -1, cp); pnode_unlink(n); - p->flags &= ~PFLAG_SPC; + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); break; case NODE_DOCTYPE: case NODE_SBR: + case NODE_VOID: p->flags &= ~PFLAG_EEND; /* FALLTHROUGH */ default: - if (p->cur == NULL || node != p->cur->node) { + if (n == NULL || node != n->node) { warn_msg(p, "element not open: ", name); break; } @@ -666,13 +663,21 @@ xml_elem_end(struct parse *p, const char *name) * obviously better than discarding it or crashing. */ - if (p->cur->parent != NULL || node == NODE_DOCTYPE) { - p->cur = p->cur->parent; + if (n->parent != NULL || node == NODE_DOCTYPE) { + p->cur = n->parent; if (p->cur != NULL) p->ncur = p->cur->node; } else p->tree->flags |= TREE_CLOSED; - p->flags &= ~PFLAG_SPC; + p->flags &= ~(PFLAG_LINE | PFLAG_SPC); + + /* Include a file containing entity declarations. */ + + if (node == NODE_ENTITY && strcmp("%", + pnode_getattr_raw(n, ATTRKEY_NAME, "")) == 0 && + (cp = pnode_getattr_raw(n, ATTRKEY_SYSTEM, NULL)) != NULL) + parse_file(p, -1, cp); + break; } assert(p->del == 0); @@ -683,13 +688,8 @@ parse_alloc(int warn) { struct parse *p; - if ((p = calloc(1, sizeof(*p))) == NULL) - return NULL; - - if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) { - free(p); - return NULL; - } + p = xcalloc(1, sizeof(*p)); + p->tree = xcalloc(1, sizeof(*p->tree)); if (warn) p->flags |= PFLAG_WARN; else @@ -783,8 +783,10 @@ parse_string(struct parse *p, char *b, size_t rlen, break; if (isspace((unsigned char)b[pend])) { p->flags |= PFLAG_SPC; - if (b[pend] == '\n') + if (b[pend] == '\n') { + p->flags |= PFLAG_LINE; pws = pend + 1; + } increment(p, b, &pend, refill); continue; }