=================================================================== RCS file: /cvs/docbook2mdoc/parse.c,v retrieving revision 1.38 retrieving revision 1.39 diff -u -p -r1.38 -r1.39 --- docbook2mdoc/parse.c 2019/04/12 11:37:09 1.38 +++ docbook2mdoc/parse.c 2019/04/12 16:40:53 1.39 @@ -1,4 +1,4 @@ -/* $Id: parse.c,v 1.38 2019/04/12 11:37:09 schwarze Exp $ */ +/* $Id: parse.c,v 1.39 2019/04/12 16:40:53 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2019 Ingo Schwarze @@ -63,160 +63,48 @@ struct parse { #define PFLAG_EEND (1 << 3) /* This element is self-closing. */ }; -struct element { +struct alias { const char *name; /* DocBook element name. */ enum nodeid node; /* Node type to generate. */ }; -static const struct element elements[] = { +static const struct alias aliases[] = { { "acronym", NODE_IGNORE }, - { "affiliation", NODE_AFFILIATION }, { "anchor", NODE_DELETE }, - { "appendix", NODE_APPENDIX }, - { "application", NODE_APPLICATION }, - { "arg", NODE_ARG }, { "article", NODE_SECTION }, - { "author", NODE_AUTHOR }, - { "authorgroup", NODE_AUTHORGROUP }, - { "blockquote", NODE_BLOCKQUOTE }, { "book", NODE_SECTION }, - { "bookinfo", NODE_BOOKINFO }, - { "caution", NODE_CAUTION }, { "chapter", NODE_SECTION }, - { "citerefentry", NODE_CITEREFENTRY }, - { "citetitle", NODE_CITETITLE }, - { "cmdsynopsis", NODE_CMDSYNOPSIS }, { "code", NODE_LITERAL }, - { "colspec", NODE_COLSPEC }, - { "command", NODE_COMMAND }, { "computeroutput", NODE_LITERAL }, - { "constant", NODE_CONSTANT }, - { "contrib", NODE_CONTRIB }, - { "copyright", NODE_COPYRIGHT }, - { "date", NODE_DATE }, { "!doctype", NODE_DOCTYPE }, - { "!DOCTYPE", NODE_DOCTYPE }, - { "editor", NODE_EDITOR }, - { "email", NODE_EMAIL }, - { "emphasis", NODE_EMPHASIS }, - { "!ENTITY", NODE_ENTITY }, - { "entry", NODE_ENTRY }, - { "envar", NODE_ENVAR }, - { "errorname", NODE_ERRORNAME }, - { "fieldsynopsis", NODE_FIELDSYNOPSIS }, - { "filename", NODE_FILENAME }, { "firstname", NODE_PERSONNAME }, - { "firstterm", NODE_FIRSTTERM }, - { "footnote", NODE_FOOTNOTE }, - { "funcdef", NODE_FUNCDEF }, - { "funcprototype", NODE_FUNCPROTOTYPE }, - { "funcsynopsis", NODE_FUNCSYNOPSIS }, - { "funcsynopsisinfo", NODE_FUNCSYNOPSISINFO }, - { "function", NODE_FUNCTION }, { "glossary", NODE_VARIABLELIST }, { "glossdef", NODE_IGNORE }, { "glossdiv", NODE_IGNORE }, { "glossentry", NODE_VARLISTENTRY }, { "glosslist", NODE_VARIABLELIST }, - { "glossterm", NODE_GLOSSTERM }, - { "group", NODE_GROUP }, - { "holder", NODE_HOLDER }, - { "index", NODE_INDEX }, { "indexterm", NODE_DELETE }, - { "info", NODE_INFO }, - { "informalequation", NODE_INFORMALEQUATION }, { "informaltable", NODE_TABLE }, - { "inlineequation", NODE_INLINEEQUATION }, - { "itemizedlist", NODE_ITEMIZEDLIST }, - { "keysym", NODE_KEYSYM }, - { "legalnotice", NODE_LEGALNOTICE }, - { "link", NODE_LINK }, - { "listitem", NODE_LISTITEM }, - { "literal", NODE_LITERAL }, - { "literallayout", NODE_LITERALLAYOUT }, - { "manvolnum", NODE_MANVOLNUM }, - { "markup", NODE_MARKUP }, - { "member", NODE_MEMBER }, - { "mml:math", NODE_MML_MATH }, - { "mml:mfenced", NODE_MML_MFENCED }, - { "mml:mfrac", NODE_MML_MFRAC }, - { "mml:mi", NODE_MML_MI }, - { "mml:mn", NODE_MML_MN }, - { "mml:mo", NODE_MML_MO }, - { "mml:mrow", NODE_MML_MROW }, - { "mml:msub", NODE_MML_MSUB }, - { "mml:msup", NODE_MML_MSUP }, - { "modifier", NODE_MODIFIER }, - { "note", NODE_NOTE }, - { "option", NODE_OPTION }, - { "orderedlist", NODE_ORDEREDLIST }, - { "orgname", NODE_ORGNAME }, { "othername", NODE_PERSONNAME }, - { "para", NODE_PARA }, - { "paramdef", NODE_PARAMDEF }, - { "parameter", NODE_PARAMETER }, { "part", NODE_SECTION }, - { "personname", NODE_PERSONNAME }, { "phrase", NODE_IGNORE }, - { "preface", NODE_PREFACE }, { "primary", NODE_DELETE }, - { "programlisting", NODE_PROGRAMLISTING }, - { "prompt", NODE_PROMPT }, - { "quote", NODE_QUOTE }, - { "refclass", NODE_REFCLASS }, - { "refdescriptor", NODE_REFDESCRIPTOR }, - { "refentry", NODE_REFENTRY }, - { "refentryinfo", NODE_REFENTRYINFO }, - { "refentrytitle", NODE_REFENTRYTITLE }, - { "refmeta", NODE_REFMETA }, - { "refmetainfo", NODE_REFMETAINFO }, - { "refmiscinfo", NODE_REFMISCINFO }, - { "refname", NODE_REFNAME }, - { "refnamediv", NODE_REFNAMEDIV }, - { "refpurpose", NODE_REFPURPOSE }, { "refsect1", NODE_SECTION }, { "refsect2", NODE_SECTION }, { "refsect3", NODE_SECTION }, { "refsection", NODE_SECTION }, - { "refsynopsisdiv", NODE_REFSYNOPSISDIV }, - { "releaseinfo", NODE_RELEASEINFO }, - { "replaceable", NODE_REPLACEABLE }, - { "row", NODE_ROW }, - { "sbr", NODE_SBR }, - { "screen", NODE_SCREEN }, { "secondary", NODE_DELETE }, { "sect1", NODE_SECTION }, { "sect2", NODE_SECTION }, - { "section", NODE_SECTION }, { "sgmltag", NODE_MARKUP }, { "simpara", NODE_PARA }, - { "simplelist", NODE_SIMPLELIST }, - { "spanspec", NODE_SPANSPEC }, { "structfield", NODE_PARAMETER }, { "structname", NODE_TYPE }, - { "subtitle", NODE_SUBTITLE }, { "surname", NODE_PERSONNAME }, { "symbol", NODE_CONSTANT }, - { "synopsis", NODE_SYNOPSIS }, - { "table", NODE_TABLE }, - { "tbody", NODE_TBODY }, - { "term", NODE_TERM }, - { "tfoot", NODE_TFOOT }, - { "tgroup", NODE_TGROUP }, - { "thead", NODE_THEAD }, - { "tip", NODE_TIP }, - { "title", NODE_TITLE }, { "trademark", NODE_IGNORE }, - { "type", NODE_TYPE }, { "ulink", NODE_LINK }, { "userinput", NODE_LITERAL }, - { "variablelist", NODE_VARIABLELIST }, - { "varlistentry", NODE_VARLISTENTRY }, - { "varname", NODE_VARNAME }, - { "warning", NODE_WARNING }, - { "wordasword", NODE_WORDASWORD }, - { "xi:include", NODE_INCLUDE }, - { "year", NODE_YEAR }, { NULL, NODE_IGNORE } }; @@ -369,14 +257,29 @@ xml_text(struct parse *p, const char *word, int sz) p->flags &= ~PFLAG_SPC; /* - * If this node follows a non-text node without intervening + * If this node follows an in-line macro without intervening * whitespace, keep the text in it as short as possible, * and do not keep it open. */ - if (n->spc == 0 && - (np = TAILQ_PREV(n, pnodeq, child)) != NULL && - np->node != NODE_TEXT && np->node != NODE_ESCAPE) { + np = n->spc ? NULL : TAILQ_PREV(n, pnodeq, child); + while (np != NULL) { + switch (pnode_class(np->node)) { + case CLASS_VOID: + case CLASS_TEXT: + case CLASS_BLOCK: + np = NULL; + break; + case CLASS_TRANS: + np = TAILQ_LAST(&np->childq, pnodeq); + continue; + case CLASS_LINE: + case CLASS_ENCL: + break; + } + break; + } + if (np != NULL) { i = 0; while (i < sz && !isspace((unsigned char)word[i])) i++; @@ -517,12 +420,30 @@ xml_entity(struct parse *p, const char *name) } /* + * Parse an element name. + */ +static enum nodeid +xml_name2node(struct parse *p, const char *name) +{ + const struct alias *alias; + enum nodeid node; + + if ((node = pnode_parse(name)) < NODE_UNKNOWN) + return node; + + for (alias = aliases; alias->name != NULL; alias++) + if (strcmp(alias->name, name) == 0) + return alias->node; + + return NODE_UNKNOWN; +} + +/* * Begin an element. */ static void xml_elem_start(struct parse *p, const char *name) { - const struct element *elem; struct pnode *n; /* @@ -535,21 +456,7 @@ xml_elem_start(struct parse *p, const char *name) return; } - pnode_closetext(p, 1); - - for (elem = elements; elem->name != NULL; elem++) - if (strcmp(elem->name, name) == 0) - break; - - if (elem->name == NULL) { - if (*name == '!' || *name == '?') - return; - error_msg(p, "unknown element <%s>", name); - } - - p->ncur = elem->node; - - switch (p->ncur) { + switch (p->ncur = xml_name2node(p, name)) { case NODE_DELETE_WARN: warn_msg(p, "skipping element <%s>", name); /* FALLTHROUGH */ @@ -558,6 +465,10 @@ xml_elem_start(struct parse *p, const char *name) /* FALLTHROUGH */ case NODE_IGNORE: return; + case NODE_UNKNOWN: + if (*name != '!' && *name != '?') + error_msg(p, "unknown element <%s>", name); + return; default: break; } @@ -565,57 +476,42 @@ xml_elem_start(struct parse *p, const char *name) if (p->tree->flags & TREE_CLOSED && p->cur->parent == NULL) warn_msg(p, "element after end of document: <%s>", name); + switch (pnode_class(p->ncur)) { + case CLASS_LINE: + case CLASS_ENCL: + pnode_closetext(p, 1); + break; + default: + pnode_closetext(p, 0); + break; + } + if ((n = pnode_alloc(p->cur)) == NULL) fatal(p); /* + * Some elements are self-closing. * Nodes that begin a new macro or request line or start by * printing text always want whitespace before themselves. */ - switch (n->node = elem->node) { + switch (n->node = p->ncur) { case NODE_DOCTYPE: case NODE_ENTITY: case NODE_SBR: p->flags |= PFLAG_EEND; - /* FALLTHROUGH */ - case NODE_APPENDIX: - case NODE_AUTHORGROUP: - case NODE_BLOCKQUOTE: - case NODE_BOOKINFO: - case NODE_CAUTION: - case NODE_EDITOR: - case NODE_ENTRY: - case NODE_FUNCDEF: - case NODE_FUNCPROTOTYPE: - case NODE_INFORMALEQUATION: - case NODE_INLINEEQUATION: - case NODE_ITEMIZEDLIST: - case NODE_LEGALNOTICE: - case NODE_LITERALLAYOUT: - case NODE_NOTE: - case NODE_ORDEREDLIST: - case NODE_PARA: - case NODE_PREFACE: - case NODE_PROGRAMLISTING: - case NODE_REFMETA: - case NODE_REFNAMEDIV: - case NODE_REFSYNOPSISDIV: - case NODE_ROW: - case NODE_SCREEN: - case NODE_SECTION: - case NODE_SYNOPSIS: - case NODE_TGROUP: - case NODE_TIP: - case NODE_TITLE: - case NODE_VARIABLELIST: - case NODE_VARLISTENTRY: - case NODE_WARNING: - n->spc = 1; break; default: + break; + } + switch (pnode_class(p->ncur)) { + case CLASS_LINE: + case CLASS_ENCL: n->spc = (p->flags & PFLAG_SPC) != 0; break; + default: + n->spc = 1; + break; } p->cur = n; if (n->node == NODE_DOCTYPE) { @@ -689,7 +585,6 @@ xml_attrval(struct parse *p, const char *name) static void xml_elem_end(struct parse *p, const char *name) { - const struct element *elem; struct pnode *n; const char *cp; enum nodeid node; @@ -706,13 +601,7 @@ xml_elem_end(struct parse *p, const char *name) if (p->del == 0) pnode_closetext(p, 0); - if (name != NULL) { - for (elem = elements; elem->name != NULL; elem++) - if (strcmp(elem->name, name) == 0) - break; - node = elem->node; - } else - node = p->ncur; + node = name == NULL ? p->ncur : xml_name2node(p, name); switch (node) { case NODE_DELETE_WARN: @@ -721,6 +610,7 @@ xml_elem_end(struct parse *p, const char *name) p->del--; break; case NODE_IGNORE: + case NODE_UNKNOWN: break; case NODE_INCLUDE: n = p->cur;