Return to mdoc.c CVS log | Up to [cvsweb.bsd.lv] / mandoc |
version 1.124, 2010/05/07 15:49:36 | version 1.229, 2014/11/19 03:08:17 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> | * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> | |||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
|
|
||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
*/ | */ | ||
#ifdef HAVE_CONFIG_H | |||
#include "config.h" | #include "config.h" | ||
#endif | |||
#include <sys/types.h> | #include <sys/types.h> | ||
|
|
||
#include <string.h> | #include <string.h> | ||
#include <time.h> | #include <time.h> | ||
#include "mdoc.h" | |||
#include "mandoc.h" | |||
#include "mandoc_aux.h" | |||
#include "libmdoc.h" | #include "libmdoc.h" | ||
#include "libmandoc.h" | #include "libmandoc.h" | ||
const char *const __mdoc_merrnames[MERRMAX] = { | const char *const __mdoc_macronames[MDOC_MAX + 1] = { | ||
"trailing whitespace", /* ETAILWS */ | |||
"unexpected quoted parameter", /* EQUOTPARM */ | |||
"unterminated quoted parameter", /* EQUOTTERM */ | |||
"argument parameter suggested", /* EARGVAL */ | |||
"macro disallowed in prologue", /* EBODYPROL */ | |||
"macro disallowed in body", /* EPROLBODY */ | |||
"text disallowed in prologue", /* ETEXTPROL */ | |||
"blank line disallowed", /* ENOBLANK */ | |||
"text parameter too long", /* ETOOLONG */ | |||
"invalid escape sequence", /* EESCAPE */ | |||
"invalid character", /* EPRINT */ | |||
"document has no body", /* ENODAT */ | |||
"document has no prologue", /* ENOPROLOGUE */ | |||
"expected line arguments", /* ELINE */ | |||
"invalid AT&T argument", /* EATT */ | |||
"default name not yet set", /* ENAME */ | |||
"missing list type", /* ELISTTYPE */ | |||
"missing display type", /* EDISPTYPE */ | |||
"too many display types", /* EMULTIDISP */ | |||
"too many list types", /* EMULTILIST */ | |||
"NAME section must be first", /* ESECNAME */ | |||
"badly-formed NAME section", /* ENAMESECINC */ | |||
"argument repeated", /* EARGREP */ | |||
"expected boolean parameter", /* EBOOL */ | |||
"inconsistent column syntax", /* ECOLMIS */ | |||
"nested display invalid", /* ENESTDISP */ | |||
"width argument missing", /* EMISSWIDTH */ | |||
"invalid section for this manual section", /* EWRONGMSEC */ | |||
"section out of conventional order", /* ESECOOO */ | |||
"section repeated", /* ESECREP */ | |||
"invalid standard argument", /* EBADSTAND */ | |||
"multi-line arguments discouraged", /* ENOMULTILINE */ | |||
"multi-line arguments suggested", /* EMULTILINE */ | |||
"line arguments discouraged", /* ENOLINE */ | |||
"prologue macro out of conventional order", /* EPROLOOO */ | |||
"prologue macro repeated", /* EPROLREP */ | |||
"invalid manual section", /* EBADMSEC */ | |||
"invalid section", /* EBADSEC */ | |||
"invalid font mode", /* EFONT */ | |||
"invalid date syntax", /* EBADDATE */ | |||
"invalid number format", /* ENUMFMT */ | |||
"superfluous width argument", /* ENOWIDTH */ | |||
"system: utsname error", /* EUTSNAME */ | |||
"obsolete macro", /* EOBS */ | |||
"end-of-line scope violation", /* EIMPBRK */ | |||
"empty macro ignored", /* EIGNE */ | |||
"unclosed explicit scope", /* EOPEN */ | |||
"unterminated quoted phrase", /* EQUOTPHR */ | |||
"closure macro without prior context", /* ENOCTX */ | |||
"no description found for library", /* ELIB */ | |||
"bad child for parent context", /* EBADCHILD */ | |||
"list arguments preceding type", /* ENOTYPE */ | |||
"deprecated comment style", /* EBADCOMMENT */ | |||
}; | |||
const char *const __mdoc_macronames[MDOC_MAX] = { | |||
"Ap", "Dd", "Dt", "Os", | "Ap", "Dd", "Dt", "Os", | ||
"Sh", "Ss", "Pp", "D1", | "Sh", "Ss", "Pp", "D1", | ||
"Dl", "Bd", "Ed", "Bl", | "Dl", "Bd", "Ed", "Bl", | ||
|
|
||
"Ic", "In", "Li", "Nd", | "Ic", "In", "Li", "Nd", | ||
"Nm", "Op", "Ot", "Pa", | "Nm", "Op", "Ot", "Pa", | ||
"Rv", "St", "Va", "Vt", | "Rv", "St", "Va", "Vt", | ||
/* LINTED */ | |||
"Xr", "%A", "%B", "%D", | "Xr", "%A", "%B", "%D", | ||
/* LINTED */ | |||
"%I", "%J", "%N", "%O", | "%I", "%J", "%N", "%O", | ||
/* LINTED */ | |||
"%P", "%R", "%T", "%V", | "%P", "%R", "%T", "%V", | ||
"Ac", "Ao", "Aq", "At", | "Ac", "Ao", "Aq", "At", | ||
"Bc", "Bf", "Bo", "Bq", | "Bc", "Bf", "Bo", "Bq", | ||
|
|
||
"Bk", "Ek", "Bt", "Hf", | "Bk", "Ek", "Bt", "Hf", | ||
"Fr", "Ud", "Lb", "Lp", | "Fr", "Ud", "Lb", "Lp", | ||
"Lk", "Mt", "Brq", "Bro", | "Lk", "Mt", "Brq", "Bro", | ||
/* LINTED */ | |||
"Brc", "%C", "Es", "En", | "Brc", "%C", "Es", "En", | ||
/* LINTED */ | |||
"Dx", "%Q", "br", "sp", | "Dx", "%Q", "br", "sp", | ||
/* LINTED */ | "%U", "Ta", "ll", "text", | ||
"%U" | |||
}; | }; | ||
const char *const __mdoc_argnames[MDOC_ARG_MAX] = { | const char *const __mdoc_argnames[MDOC_ARG_MAX] = { | ||
"split", "nosplit", "ragged", | "split", "nosplit", "ragged", | ||
"unfilled", "literal", "file", | "unfilled", "literal", "file", | ||
"offset", "bullet", "dash", | "offset", "bullet", "dash", | ||
"hyphen", "item", "enum", | "hyphen", "item", "enum", | ||
"tag", "diag", "hang", | "tag", "diag", "hang", | ||
"ohang", "inset", "column", | "ohang", "inset", "column", | ||
"width", "compact", "std", | "width", "compact", "std", | ||
"filled", "words", "emphasis", | "filled", "words", "emphasis", | ||
"symbolic", "nested", "centered" | "symbolic", "nested", "centered" | ||
}; | }; | ||
|
|
||
const char * const *mdoc_argnames = __mdoc_argnames; | const char * const *mdoc_argnames = __mdoc_argnames; | ||
static void mdoc_node_free(struct mdoc_node *); | static void mdoc_node_free(struct mdoc_node *); | ||
static void mdoc_node_unlink(struct mdoc *, | static void mdoc_node_unlink(struct mdoc *, | ||
struct mdoc_node *); | struct mdoc_node *); | ||
static void mdoc_free1(struct mdoc *); | static void mdoc_free1(struct mdoc *); | ||
static void mdoc_alloc1(struct mdoc *); | static void mdoc_alloc1(struct mdoc *); | ||
static struct mdoc_node *node_alloc(struct mdoc *, int, int, | static struct mdoc_node *node_alloc(struct mdoc *, int, int, | ||
enum mdoct, enum mdoc_type); | enum mdoct, enum mdoc_type); | ||
static int node_append(struct mdoc *, | static int node_append(struct mdoc *, | ||
struct mdoc_node *); | struct mdoc_node *); | ||
static int mdoc_ptext(struct mdoc *, int, char *); | static int mdoc_ptext(struct mdoc *, int, char *, int); | ||
static int mdoc_pmacro(struct mdoc *, int, char *); | static int mdoc_pmacro(struct mdoc *, int, char *, int); | ||
static int macrowarn(struct mdoc *, int, const char *); | |||
const struct mdoc_node * | const struct mdoc_node * | ||
mdoc_node(const struct mdoc *m) | mdoc_node(const struct mdoc *mdoc) | ||
{ | { | ||
return(MDOC_HALT & m->flags ? NULL : m->first); | return(mdoc->first); | ||
} | } | ||
const struct mdoc_meta * | const struct mdoc_meta * | ||
mdoc_meta(const struct mdoc *m) | mdoc_meta(const struct mdoc *mdoc) | ||
{ | { | ||
return(MDOC_HALT & m->flags ? NULL : &m->meta); | return(&mdoc->meta); | ||
} | } | ||
/* | /* | ||
* Frees volatile resources (parse tree, meta-data, fields). | * Frees volatile resources (parse tree, meta-data, fields). | ||
*/ | */ | ||
|
|
||
if (mdoc->first) | if (mdoc->first) | ||
mdoc_node_delete(mdoc, mdoc->first); | mdoc_node_delete(mdoc, mdoc->first); | ||
if (mdoc->meta.title) | free(mdoc->meta.msec); | ||
free(mdoc->meta.title); | free(mdoc->meta.vol); | ||
if (mdoc->meta.os) | free(mdoc->meta.arch); | ||
free(mdoc->meta.os); | free(mdoc->meta.date); | ||
if (mdoc->meta.name) | free(mdoc->meta.title); | ||
free(mdoc->meta.name); | free(mdoc->meta.os); | ||
if (mdoc->meta.arch) | free(mdoc->meta.name); | ||
free(mdoc->meta.arch); | |||
if (mdoc->meta.vol) | |||
free(mdoc->meta.vol); | |||
} | } | ||
/* | /* | ||
* Allocate all volatile resources (parse tree, meta-data, fields). | * Allocate all volatile resources (parse tree, meta-data, fields). | ||
*/ | */ | ||
|
|
||
mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); | mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); | ||
mdoc->first = mdoc->last; | mdoc->first = mdoc->last; | ||
mdoc->last->type = MDOC_ROOT; | mdoc->last->type = MDOC_ROOT; | ||
mdoc->last->tok = MDOC_MAX; | |||
mdoc->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
} | } | ||
/* | /* | ||
* Free up volatile resources (see mdoc_free1()) then re-initialises the | * Free up volatile resources (see mdoc_free1()) then re-initialises the | ||
* data with mdoc_alloc1(). After invocation, parse data has been reset | * data with mdoc_alloc1(). After invocation, parse data has been reset | ||
|
|
||
mdoc_alloc1(mdoc); | mdoc_alloc1(mdoc); | ||
} | } | ||
/* | /* | ||
* Completely free up all volatile and non-volatile parse resources. | * Completely free up all volatile and non-volatile parse resources. | ||
* After invocation, the pointer is no longer usable. | * After invocation, the pointer is no longer usable. | ||
|
|
||
free(mdoc); | free(mdoc); | ||
} | } | ||
/* | /* | ||
* Allocate volatile and non-volatile parse resources. | * Allocate volatile and non-volatile parse resources. | ||
*/ | */ | ||
struct mdoc * | struct mdoc * | ||
mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) | mdoc_alloc(struct roff *roff, struct mparse *parse, | ||
const char *defos, int quick) | |||
{ | { | ||
struct mdoc *p; | struct mdoc *p; | ||
p = mandoc_calloc(1, sizeof(struct mdoc)); | p = mandoc_calloc(1, sizeof(struct mdoc)); | ||
if (cb) | p->parse = parse; | ||
memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); | p->defos = defos; | ||
p->quick = quick; | |||
p->roff = roff; | |||
p->data = data; | |||
p->pflags = pflags; | |||
mdoc_hash_init(); | mdoc_hash_init(); | ||
mdoc_alloc1(p); | mdoc_alloc1(p); | ||
return(p); | return(p); | ||
} | } | ||
/* | |||
* Climb back up the parse tree, validating open scopes. Mostly calls | |||
* through to macro_end() in macro.c. | |||
*/ | |||
int | int | ||
mdoc_endparse(struct mdoc *m) | mdoc_endparse(struct mdoc *mdoc) | ||
{ | { | ||
if (MDOC_HALT & m->flags) | return(mdoc_macroend(mdoc)); | ||
return(0); | |||
else if (mdoc_macroend(m)) | |||
return(1); | |||
m->flags |= MDOC_HALT; | |||
return(0); | |||
} | } | ||
/* | |||
* Main parse routine. Parses a single line -- really just hands off to | |||
* the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). | |||
*/ | |||
int | int | ||
mdoc_parseln(struct mdoc *m, int ln, char *buf) | mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) | ||
{ | { | ||
struct mdoc_node *n; | |||
if (MDOC_HALT & m->flags) | n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); | ||
return(0); | n->eqn = ep; | ||
if (ep->ln > mdoc->last->line) | |||
n->flags |= MDOC_LINE; | |||
return('.' == *buf ? mdoc_pmacro(m, ln, buf) : | if ( ! node_append(mdoc, n)) | ||
mdoc_ptext(m, ln, buf)); | |||
} | |||
int | |||
mdoc_verr(struct mdoc *mdoc, int ln, int pos, | |||
const char *fmt, ...) | |||
{ | |||
char buf[256]; | |||
va_list ap; | |||
if (NULL == mdoc->cb.mdoc_err) | |||
return(0); | return(0); | ||
va_start(ap, fmt); | mdoc->next = MDOC_NEXT_SIBLING; | ||
(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); | return(1); | ||
va_end(ap); | |||
return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); | |||
} | } | ||
int | int | ||
mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) | mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) | ||
{ | { | ||
char buf[256]; | struct mdoc_node *n; | ||
va_list ap; | |||
if (NULL == mdoc->cb.mdoc_warn) | n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); | ||
n->span = sp; | |||
if ( ! node_append(mdoc, n)) | |||
return(0); | return(0); | ||
va_start(ap, fmt); | mdoc->next = MDOC_NEXT_SIBLING; | ||
(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); | return(1); | ||
va_end(ap); | |||
return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); | |||
} | } | ||
/* | |||
* Main parse routine. Parses a single line -- really just hands off to | |||
* the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). | |||
*/ | |||
int | int | ||
mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) | mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) | ||
{ | { | ||
const char *p; | |||
p = __mdoc_merrnames[(int)type]; | if (mdoc->last->type != MDOC_EQN || ln > mdoc->last->line) | ||
assert(p); | mdoc->flags |= MDOC_NEWLINE; | ||
if (iserr) | /* | ||
return(mdoc_verr(m, line, pos, p)); | * Let the roff nS register switch SYNOPSIS mode early, | ||
* such that the parser knows at all times | |||
* whether this mode is on or off. | |||
* Note that this mode is also switched by the Sh macro. | |||
*/ | |||
if (roff_getreg(mdoc->roff, "nS")) | |||
mdoc->flags |= MDOC_SYNOPSIS; | |||
else | |||
mdoc->flags &= ~MDOC_SYNOPSIS; | |||
return(mdoc_vwarn(m, line, pos, p)); | return(roff_getcontrol(mdoc->roff, buf, &offs) ? | ||
mdoc_pmacro(mdoc, ln, buf, offs) : | |||
mdoc_ptext(mdoc, ln, buf, offs)); | |||
} | } | ||
int | int | ||
mdoc_macro(struct mdoc *m, enum mdoct tok, | mdoc_macro(MACRO_PROT_ARGS) | ||
int ln, int pp, int *pos, char *buf) | |||
{ | { | ||
assert(tok < MDOC_MAX); | assert(tok < MDOC_MAX); | ||
/* If we're in the body, deny prologue calls. */ | if (mdoc->flags & MDOC_PBODY) { | ||
if (tok == MDOC_Dt) { | |||
if (MDOC_PROLOGUE & mdoc_macros[tok].flags && | mandoc_vmsg(MANDOCERR_DT_LATE, | ||
MDOC_PBODY & m->flags) | mdoc->parse, line, ppos, | ||
return(mdoc_perr(m, ln, pp, EPROLBODY)); | "Dt %s", buf + *pos); | ||
return(1); | |||
/* If we're in the prologue, deny "body" macros. */ | } | ||
} else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) { | |||
if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && | if (mdoc->meta.title == NULL) { | ||
! (MDOC_PBODY & m->flags)) { | mandoc_vmsg(MANDOCERR_DT_NOTITLE, | ||
if ( ! mdoc_pwarn(m, ln, pp, EBODYPROL)) | mdoc->parse, line, ppos, "%s %s", | ||
return(0); | mdoc_macronames[tok], buf + *pos); | ||
if (NULL == m->meta.title) | mdoc->meta.title = mandoc_strdup("UNTITLED"); | ||
m->meta.title = mandoc_strdup("unknown"); | } | ||
if (NULL == m->meta.vol) | if (NULL == mdoc->meta.vol) | ||
m->meta.vol = mandoc_strdup("local"); | mdoc->meta.vol = mandoc_strdup("LOCAL"); | ||
if (NULL == m->meta.os) | mdoc->flags |= MDOC_PBODY; | ||
m->meta.os = mandoc_strdup("local"); | |||
if (0 == m->meta.date) | |||
m->meta.date = time(NULL); | |||
m->flags |= MDOC_PBODY; | |||
} | } | ||
return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); | return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); | ||
} | } | ||
|
|
||
assert(MDOC_ROOT != p->type); | assert(MDOC_ROOT != p->type); | ||
switch (mdoc->next) { | switch (mdoc->next) { | ||
case (MDOC_NEXT_SIBLING): | case MDOC_NEXT_SIBLING: | ||
mdoc->last->next = p; | mdoc->last->next = p; | ||
p->prev = mdoc->last; | p->prev = mdoc->last; | ||
p->parent = mdoc->last->parent; | p->parent = mdoc->last->parent; | ||
break; | break; | ||
case (MDOC_NEXT_CHILD): | case MDOC_NEXT_CHILD: | ||
mdoc->last->child = p; | mdoc->last->child = p; | ||
p->parent = mdoc->last; | p->parent = mdoc->last; | ||
break; | break; | ||
|
|
||
p->parent->nchild++; | p->parent->nchild++; | ||
/* | |||
* Copy over the normalised-data pointer of our parent. Not | |||
* everybody has one, but copying a null pointer is fine. | |||
*/ | |||
switch (p->type) { | |||
case MDOC_BODY: | |||
if (ENDBODY_NOT != p->end) | |||
break; | |||
/* FALLTHROUGH */ | |||
case MDOC_TAIL: | |||
/* FALLTHROUGH */ | |||
case MDOC_HEAD: | |||
p->norm = p->parent->norm; | |||
break; | |||
default: | |||
break; | |||
} | |||
if ( ! mdoc_valid_pre(mdoc, p)) | if ( ! mdoc_valid_pre(mdoc, p)) | ||
return(0); | return(0); | ||
if ( ! mdoc_action_pre(mdoc, p)) | |||
return(0); | |||
switch (p->type) { | switch (p->type) { | ||
case (MDOC_HEAD): | case MDOC_HEAD: | ||
assert(MDOC_BLOCK == p->parent->type); | assert(MDOC_BLOCK == p->parent->type); | ||
p->parent->head = p; | p->parent->head = p; | ||
break; | break; | ||
case (MDOC_TAIL): | case MDOC_TAIL: | ||
assert(MDOC_BLOCK == p->parent->type); | assert(MDOC_BLOCK == p->parent->type); | ||
p->parent->tail = p; | p->parent->tail = p; | ||
break; | break; | ||
case (MDOC_BODY): | case MDOC_BODY: | ||
if (p->end) | |||
break; | |||
assert(MDOC_BLOCK == p->parent->type); | assert(MDOC_BLOCK == p->parent->type); | ||
p->parent->body = p; | p->parent->body = p; | ||
break; | break; | ||
|
|
||
mdoc->last = p; | mdoc->last = p; | ||
switch (p->type) { | switch (p->type) { | ||
case (MDOC_TEXT): | case MDOC_TBL: | ||
/* FALLTHROUGH */ | |||
case MDOC_TEXT: | |||
if ( ! mdoc_valid_post(mdoc)) | if ( ! mdoc_valid_post(mdoc)) | ||
return(0); | return(0); | ||
if ( ! mdoc_action_post(mdoc)) | |||
return(0); | |||
break; | break; | ||
default: | default: | ||
break; | break; | ||
|
|
||
return(1); | return(1); | ||
} | } | ||
static struct mdoc_node * | static struct mdoc_node * | ||
node_alloc(struct mdoc *m, int line, int pos, | node_alloc(struct mdoc *mdoc, int line, int pos, | ||
enum mdoct tok, enum mdoc_type type) | enum mdoct tok, enum mdoc_type type) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
p = mandoc_calloc(1, sizeof(struct mdoc_node)); | p = mandoc_calloc(1, sizeof(struct mdoc_node)); | ||
p->sec = m->lastsec; | p->sec = mdoc->lastsec; | ||
p->line = line; | p->line = line; | ||
p->pos = pos; | p->pos = pos; | ||
p->lastline = line; | |||
p->tok = tok; | p->tok = tok; | ||
p->type = type; | p->type = type; | ||
/* Flag analysis. */ | |||
if (MDOC_SYNOPSIS & mdoc->flags) | |||
p->flags |= MDOC_SYNPRETTY; | |||
else | |||
p->flags &= ~MDOC_SYNPRETTY; | |||
if (MDOC_NEWLINE & mdoc->flags) | |||
p->flags |= MDOC_LINE; | |||
mdoc->flags &= ~MDOC_NEWLINE; | |||
return(p); | return(p); | ||
} | } | ||
int | int | ||
mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) | mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
p = node_alloc(m, line, pos, tok, MDOC_TAIL); | p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); | ||
if ( ! node_append(m, p)) | if ( ! node_append(mdoc, p)) | ||
return(0); | return(0); | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
return(1); | return(1); | ||
} | } | ||
int | int | ||
mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) | mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
assert(m->first); | assert(mdoc->first); | ||
assert(m->last); | assert(mdoc->last); | ||
p = node_alloc(m, line, pos, tok, MDOC_HEAD); | p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); | ||
if ( ! node_append(m, p)) | if ( ! node_append(mdoc, p)) | ||
return(0); | return(0); | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
return(1); | return(1); | ||
} | } | ||
int | int | ||
mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) | mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
p = node_alloc(m, line, pos, tok, MDOC_BODY); | p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); | ||
if ( ! node_append(m, p)) | if ( ! node_append(mdoc, p)) | ||
return(0); | return(0); | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
return(1); | return(1); | ||
} | } | ||
int | |||
mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, | |||
struct mdoc_node *body, enum mdoc_endbody end) | |||
{ | |||
struct mdoc_node *p; | |||
p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); | |||
p->pending = body; | |||
p->norm = body->norm; | |||
p->end = end; | |||
if ( ! node_append(mdoc, p)) | |||
return(0); | |||
mdoc->next = MDOC_NEXT_SIBLING; | |||
return(1); | |||
} | |||
int | int | ||
mdoc_block_alloc(struct mdoc *m, int line, int pos, | mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, | ||
enum mdoct tok, struct mdoc_arg *args) | enum mdoct tok, struct mdoc_arg *args) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
p = node_alloc(m, line, pos, tok, MDOC_BLOCK); | p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); | ||
p->args = args; | p->args = args; | ||
if (p->args) | if (p->args) | ||
(args->refcnt)++; | (args->refcnt)++; | ||
if ( ! node_append(m, p)) | |||
switch (tok) { | |||
case MDOC_Bd: | |||
/* FALLTHROUGH */ | |||
case MDOC_Bf: | |||
/* FALLTHROUGH */ | |||
case MDOC_Bl: | |||
/* FALLTHROUGH */ | |||
case MDOC_En: | |||
/* FALLTHROUGH */ | |||
case MDOC_Rs: | |||
p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); | |||
break; | |||
default: | |||
break; | |||
} | |||
if ( ! node_append(mdoc, p)) | |||
return(0); | return(0); | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
return(1); | return(1); | ||
} | } | ||
int | int | ||
mdoc_elem_alloc(struct mdoc *m, int line, int pos, | mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, | ||
enum mdoct tok, struct mdoc_arg *args) | enum mdoct tok, struct mdoc_arg *args) | ||
{ | { | ||
struct mdoc_node *p; | struct mdoc_node *p; | ||
p = node_alloc(m, line, pos, tok, MDOC_ELEM); | p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); | ||
p->args = args; | p->args = args; | ||
if (p->args) | if (p->args) | ||
(args->refcnt)++; | (args->refcnt)++; | ||
if ( ! node_append(m, p)) | |||
switch (tok) { | |||
case MDOC_An: | |||
p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); | |||
break; | |||
default: | |||
break; | |||
} | |||
if ( ! node_append(mdoc, p)) | |||
return(0); | return(0); | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
return(1); | return(1); | ||
} | } | ||
int | int | ||
mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) | mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) | ||
{ | { | ||
struct mdoc_node *n; | struct mdoc_node *n; | ||
size_t sv, len; | |||
len = strlen(p); | n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); | ||
n->string = roff_strdup(mdoc->roff, p); | |||
n = node_alloc(m, line, pos, -1, MDOC_TEXT); | if ( ! node_append(mdoc, n)) | ||
n->string = mandoc_malloc(len + 1); | |||
sv = strlcpy(n->string, p, len + 1); | |||
/* Prohibit truncation. */ | |||
assert(sv < len + 1); | |||
if ( ! node_append(m, n)) | |||
return(0); | return(0); | ||
m->next = MDOC_NEXT_SIBLING; | mdoc->next = MDOC_NEXT_SIBLING; | ||
return(1); | return(1); | ||
} | } | ||
void | void | ||
mdoc_word_append(struct mdoc *mdoc, const char *p) | |||
{ | |||
struct mdoc_node *n; | |||
char *addstr, *newstr; | |||
n = mdoc->last; | |||
addstr = roff_strdup(mdoc->roff, p); | |||
mandoc_asprintf(&newstr, "%s %s", n->string, addstr); | |||
free(addstr); | |||
free(n->string); | |||
n->string = newstr; | |||
mdoc->next = MDOC_NEXT_SIBLING; | |||
} | |||
static void | |||
mdoc_node_free(struct mdoc_node *p) | mdoc_node_free(struct mdoc_node *p) | ||
{ | { | ||
if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) | |||
free(p->norm); | |||
if (p->string) | if (p->string) | ||
free(p->string); | free(p->string); | ||
if (p->args) | if (p->args) | ||
|
|
||
free(p); | free(p); | ||
} | } | ||
static void | static void | ||
mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) | mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) | ||
{ | { | ||
/* Adjust siblings. */ | /* Adjust siblings. */ | ||
|
|
||
n->parent->nchild--; | n->parent->nchild--; | ||
if (n->parent->child == n) | if (n->parent->child == n) | ||
n->parent->child = n->prev ? n->prev : n->next; | n->parent->child = n->prev ? n->prev : n->next; | ||
if (n->parent->last == n) | |||
n->parent->last = n->prev ? n->prev : NULL; | |||
} | } | ||
/* Adjust parse point, if applicable. */ | /* Adjust parse point, if applicable. */ | ||
if (m && m->last == n) { | if (mdoc && mdoc->last == n) { | ||
if (n->prev) { | if (n->prev) { | ||
m->last = n->prev; | mdoc->last = n->prev; | ||
m->next = MDOC_NEXT_SIBLING; | mdoc->next = MDOC_NEXT_SIBLING; | ||
} else { | } else { | ||
m->last = n->parent; | mdoc->last = n->parent; | ||
m->next = MDOC_NEXT_CHILD; | mdoc->next = MDOC_NEXT_CHILD; | ||
} | } | ||
} | } | ||
if (m && m->first == n) | if (mdoc && mdoc->first == n) | ||
m->first = NULL; | mdoc->first = NULL; | ||
} | } | ||
void | void | ||
mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) | mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) | ||
{ | { | ||
while (p->child) { | while (p->child) { | ||
assert(p->nchild); | assert(p->nchild); | ||
mdoc_node_delete(m, p->child); | mdoc_node_delete(mdoc, p->child); | ||
} | } | ||
assert(0 == p->nchild); | assert(0 == p->nchild); | ||
mdoc_node_unlink(m, p); | mdoc_node_unlink(mdoc, p); | ||
mdoc_node_free(p); | mdoc_node_free(p); | ||
} | } | ||
int | |||
mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) | |||
{ | |||
mdoc_node_unlink(mdoc, p); | |||
return(node_append(mdoc, p)); | |||
} | |||
/* | /* | ||
* Parse free-form text, that is, a line that does not begin with the | * Parse free-form text, that is, a line that does not begin with the | ||
* control character. | * control character. | ||
*/ | */ | ||
static int | static int | ||
mdoc_ptext(struct mdoc *m, int line, char *buf) | mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) | ||
{ | { | ||
int i; | char *c, *ws, *end; | ||
struct mdoc_node *n; | |||
/* Ignore bogus comments. */ | assert(mdoc->last); | ||
n = mdoc->last; | |||
if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2]) | /* | ||
return(mdoc_pwarn(m, line, 0, EBADCOMMENT)); | * Divert directly to list processing if we're encountering a | ||
* columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry | |||
* (a MDOC_BODY means it's already open, in which case we should | |||
* process within its context in the normal way). | |||
*/ | |||
/* No text before an initial macro. */ | if (MDOC_Bl == n->tok && MDOC_BODY == n->type && | ||
LIST_column == n->norm->Bl.type) { | |||
/* `Bl' is open without any children. */ | |||
mdoc->flags |= MDOC_FREECOL; | |||
return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); | |||
} | |||
if (SEC_NONE == m->lastnamed) | if (MDOC_It == n->tok && MDOC_BLOCK == n->type && | ||
return(mdoc_perr(m, line, 0, ETEXTPROL)); | NULL != n->parent && | ||
MDOC_Bl == n->parent->tok && | |||
LIST_column == n->parent->norm->Bl.type) { | |||
/* `Bl' has block-level `It' children. */ | |||
mdoc->flags |= MDOC_FREECOL; | |||
return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); | |||
} | |||
/* Literal just gets pulled in as-is. */ | /* | ||
* Search for the beginning of unescaped trailing whitespace (ws) | |||
if (MDOC_LITERAL & m->flags) | * and for the first character not to be output (end). | ||
return(mdoc_word_alloc(m, line, 0, buf)); | */ | ||
/* Check for a blank line, which may also consist of spaces. */ | /* FIXME: replace with strcspn(). */ | ||
ws = NULL; | |||
for (c = end = buf + offs; *c; c++) { | |||
switch (*c) { | |||
case ' ': | |||
if (NULL == ws) | |||
ws = c; | |||
continue; | |||
case '\t': | |||
/* | |||
* Always warn about trailing tabs, | |||
* even outside literal context, | |||
* where they should be put on the next line. | |||
*/ | |||
if (NULL == ws) | |||
ws = c; | |||
/* | |||
* Strip trailing tabs in literal context only; | |||
* outside, they affect the next line. | |||
*/ | |||
if (MDOC_LITERAL & mdoc->flags) | |||
continue; | |||
break; | |||
case '\\': | |||
/* Skip the escaped character, too, if any. */ | |||
if (c[1]) | |||
c++; | |||
/* FALLTHROUGH */ | |||
default: | |||
ws = NULL; | |||
break; | |||
} | |||
end = c + 1; | |||
} | |||
*end = '\0'; | |||
for (i = 0; ' ' == buf[i]; i++) | if (ws) | ||
/* Skip to first non-space. */ ; | mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, | ||
line, (int)(ws-buf), NULL); | |||
if ('\0' == buf[i]) { | if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { | ||
if ( ! mdoc_pwarn(m, line, 0, ENOBLANK)) | mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, | ||
return(0); | line, (int)(c - buf), NULL); | ||
/* | /* | ||
* Insert a `Pp' in the case of a blank line. Technically, | * Insert a `sp' in the case of a blank line. Technically, | ||
* blank lines aren't allowed, but enough manuals assume this | * blank lines aren't allowed, but enough manuals assume this | ||
* behaviour that we want to work around it. | * behaviour that we want to work around it. | ||
*/ | */ | ||
if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL)) | if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) | ||
return(0); | return(0); | ||
m->next = MDOC_NEXT_SIBLING; | mdoc->next = MDOC_NEXT_SIBLING; | ||
return(1); | |||
return(mdoc_valid_post(mdoc)); | |||
} | } | ||
/* Warn if the last un-escaped character is whitespace. */ | if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) | ||
return(0); | |||
i = (int)strlen(buf); | if (MDOC_LITERAL & mdoc->flags) | ||
assert(i); | return(1); | ||
if (' ' == buf[i - 1] || '\t' == buf[i - 1]) | /* | ||
if (1 == i || ('\\' != buf[i - 2])) | * End-of-sentence check. If the last character is an unescaped | ||
if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) | * EOS character, then flag the node as being the end of a | ||
return(0); | * sentence. The front-end will know how to interpret this. | ||
*/ | |||
/* Allocate the whole word. */ | assert(buf < end); | ||
return(mdoc_word_alloc(m, line, 0, buf)); | if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) | ||
} | mdoc->last->flags |= MDOC_EOS; | ||
return(1); | |||
static int | |||
macrowarn(struct mdoc *m, int ln, const char *buf) | |||
{ | |||
if ( ! (MDOC_IGN_MACRO & m->pflags)) | |||
return(mdoc_verr(m, ln, 0, | |||
"unknown macro: %s%s", | |||
buf, strlen(buf) > 3 ? "..." : "")); | |||
return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", | |||
buf, strlen(buf) > 3 ? "..." : "")); | |||
} | } | ||
/* | /* | ||
* Parse a macro line, that is, a line beginning with the control | * Parse a macro line, that is, a line beginning with the control | ||
* character. | * character. | ||
*/ | */ | ||
int | static int | ||
mdoc_pmacro(struct mdoc *m, int ln, char *buf) | mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) | ||
{ | { | ||
int i, j, c; | struct mdoc_node *n; | ||
const char *cp; | |||
enum mdoct tok; | |||
int i, sv; | |||
char mac[5]; | char mac[5]; | ||
/* Empty lines are ignored. */ | sv = offs; | ||
if ('\0' == buf[1]) | /* | ||
return(1); | * Copy the first word into a nil-terminated buffer. | ||
* Stop when a space, tab, escape, or eoln is encountered. | |||
*/ | |||
i = 1; | i = 0; | ||
while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) | |||
mac[i++] = buf[offs++]; | |||
/* Accept whitespace after the initial control char. */ | mac[i] = '\0'; | ||
if (' ' == buf[i]) { | tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; | ||
i++; | |||
while (buf[i] && ' ' == buf[i]) | if (tok == MDOC_MAX) { | ||
i++; | mandoc_msg(MANDOCERR_MACRO, mdoc->parse, | ||
if ('\0' == buf[i]) | ln, sv, buf + sv - 1); | ||
return(1); | return(1); | ||
} | } | ||
/* Copy the first word into a nil-terminated buffer. */ | /* Skip a leading escape sequence or tab. */ | ||
for (j = 0; j < 4; j++, i++) { | switch (buf[offs]) { | ||
if ('\0' == (mac[j] = buf[i])) | case '\\': | ||
break; | cp = buf + offs + 1; | ||
else if (' ' == buf[i]) | mandoc_escape(&cp, NULL, NULL); | ||
break; | offs = cp - buf; | ||
break; | |||
case '\t': | |||
offs++; | |||
break; | |||
default: | |||
break; | |||
} | |||
/* Check for invalid characters. */ | /* Jump to the next non-whitespace word. */ | ||
if (isgraph((u_char)buf[i])) | while (buf[offs] && ' ' == buf[offs]) | ||
continue; | offs++; | ||
return(mdoc_perr(m, ln, i, EPRINT)); | |||
} | |||
mac[j] = 0; | /* | ||
* Trailing whitespace. Note that tabs are allowed to be passed | |||
* into the parser as "text", so we only warn about spaces here. | |||
*/ | |||
if (j == 4 || j < 2) { | if ('\0' == buf[offs] && ' ' == buf[offs - 1]) | ||
if ( ! macrowarn(m, ln, mac)) | mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, | ||
goto err; | ln, offs - 1, NULL); | ||
return(1); | |||
} | |||
if (MDOC_MAX == (c = mdoc_hash_find(mac))) { | |||
if ( ! macrowarn(m, ln, mac)) | |||
goto err; | |||
return(1); | |||
} | |||
/* The macro is sane. Jump to the next word. */ | /* | ||
* If an initial macro or a list invocation, divert directly | |||
* into macro processing. | |||
*/ | |||
while (buf[i] && ' ' == buf[i]) | if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) | ||
i++; | return(mdoc_macro(mdoc, tok, ln, sv, &offs, buf)); | ||
/* Trailing whitespace? */ | n = mdoc->last; | ||
assert(mdoc->last); | |||
if ('\0' == buf[i] && ' ' == buf[i - 1]) | /* | ||
if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS)) | * If the first macro of a `Bl -column', open an `It' block | ||
goto err; | * context around the parsed macro. | ||
*/ | |||
/* | if (MDOC_Bl == n->tok && MDOC_BODY == n->type && | ||
* Begin recursive parse sequence. Since we're at the start of | LIST_column == n->norm->Bl.type) { | ||
* the line, we don't need to do callable/parseable checks. | mdoc->flags |= MDOC_FREECOL; | ||
return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); | |||
} | |||
/* | |||
* If we're following a block-level `It' within a `Bl -column' | |||
* context (perhaps opened in the above block or in ptext()), | |||
* then open an `It' block context around the parsed macro. | |||
*/ | */ | ||
if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) | |||
goto err; | |||
if (MDOC_It == n->tok && MDOC_BLOCK == n->type && | |||
NULL != n->parent && | |||
MDOC_Bl == n->parent->tok && | |||
LIST_column == n->parent->norm->Bl.type) { | |||
mdoc->flags |= MDOC_FREECOL; | |||
return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); | |||
} | |||
/* Normal processing of a macro. */ | |||
if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) | |||
return(0); | |||
/* In quick mode (for mandocdb), abort after the NAME section. */ | |||
if (mdoc->quick && MDOC_Sh == tok && | |||
SEC_NAME != mdoc->last->sec) | |||
return(2); | |||
return(1); | return(1); | ||
} | |||
err: /* Error out. */ | enum mdelim | ||
mdoc_isdelim(const char *p) | |||
{ | |||
m->flags |= MDOC_HALT; | if ('\0' == p[0]) | ||
return(0); | return(DELIM_NONE); | ||
if ('\0' == p[1]) | |||
switch (p[0]) { | |||
case '(': | |||
/* FALLTHROUGH */ | |||
case '[': | |||
return(DELIM_OPEN); | |||
case '|': | |||
return(DELIM_MIDDLE); | |||
case '.': | |||
/* FALLTHROUGH */ | |||
case ',': | |||
/* FALLTHROUGH */ | |||
case ';': | |||
/* FALLTHROUGH */ | |||
case ':': | |||
/* FALLTHROUGH */ | |||
case '?': | |||
/* FALLTHROUGH */ | |||
case '!': | |||
/* FALLTHROUGH */ | |||
case ')': | |||
/* FALLTHROUGH */ | |||
case ']': | |||
return(DELIM_CLOSE); | |||
default: | |||
return(DELIM_NONE); | |||
} | |||
if ('\\' != p[0]) | |||
return(DELIM_NONE); | |||
if (0 == strcmp(p + 1, ".")) | |||
return(DELIM_CLOSE); | |||
if (0 == strcmp(p + 1, "fR|\\fP")) | |||
return(DELIM_MIDDLE); | |||
return(DELIM_NONE); | |||
} | } | ||
void | |||
mdoc_deroff(char **dest, const struct mdoc_node *n) | |||
{ | |||
char *cp; | |||
size_t sz; | |||
if (MDOC_TEXT != n->type) { | |||
for (n = n->child; n; n = n->next) | |||
mdoc_deroff(dest, n); | |||
return; | |||
} | |||
/* Skip leading whitespace. */ | |||
for (cp = n->string; '\0' != *cp; cp++) | |||
if (0 == isspace((unsigned char)*cp)) | |||
break; | |||
/* Skip trailing whitespace. */ | |||
for (sz = strlen(cp); sz; sz--) | |||
if (0 == isspace((unsigned char)cp[sz-1])) | |||
break; | |||
/* Skip empty strings. */ | |||
if (0 == sz) | |||
return; | |||
if (NULL == *dest) { | |||
*dest = mandoc_strndup(cp, sz); | |||
return; | |||
} | |||
mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); | |||
free(*dest); | |||
*dest = cp; | |||
} |