version 1.59, 2009/03/09 13:35:09 |
version 1.73, 2009/03/31 13:50:19 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se> |
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the |
* purpose with or without fee is hereby granted, provided that the |
|
|
*/ |
*/ |
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <err.h> |
|
#include <stdarg.h> |
#include <stdarg.h> |
#include <stdlib.h> |
|
#include <stdio.h> |
#include <stdio.h> |
|
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
|
|
#include "private.h" |
#include "libmdoc.h" |
|
|
/* |
enum merr { |
* Main caller in the libmdoc library. This begins the parsing routine, |
ENOCALL, |
* handles allocation of data, and so forth. Most of the "work" is done |
EBODYPROL, |
* in macro.c and validate.c. |
EPROLBODY, |
*/ |
ESPACE, |
|
ETEXTPROL, |
|
ENOBLANK, |
|
EMALLOC |
|
}; |
|
|
static struct mdoc_node *mdoc_node_alloc(const struct mdoc *); |
|
static int mdoc_node_append(struct mdoc *, |
|
struct mdoc_node *); |
|
|
|
static int parsetext(struct mdoc *, int, char *); |
|
static int parsemacro(struct mdoc *, int, char *); |
|
static int macrowarn(struct mdoc *, int, const char *); |
|
|
|
|
|
const char *const __mdoc_macronames[MDOC_MAX] = { |
const char *const __mdoc_macronames[MDOC_MAX] = { |
"\\\"", "Dd", "Dt", "Os", |
"\\\"", "Dd", "Dt", "Os", |
"Sh", "Ss", "Pp", "D1", |
"Sh", "Ss", "Pp", "D1", |
Line 72 const char *const __mdoc_macronames[MDOC_MAX] = { |
|
Line 66 const char *const __mdoc_macronames[MDOC_MAX] = { |
|
"Fo", "Fc", "Oo", "Oc", |
"Fo", "Fc", "Oo", "Oc", |
"Bk", "Ek", "Bt", "Hf", |
"Bk", "Ek", "Bt", "Hf", |
"Fr", "Ud", "Lb", "Ap", |
"Fr", "Ud", "Lb", "Ap", |
"Lp" |
"Lp", "Lk", "Mt", "Brq", |
|
/* LINTED */ |
|
"Bro", "Brc", "\%C", "Es", |
|
/* LINTED */ |
|
"En", "Dx", "\%Q" |
}; |
}; |
|
|
const char *const __mdoc_argnames[MDOC_ARG_MAX] = { |
const char *const __mdoc_argnames[MDOC_ARG_MAX] = { |
Line 84 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { |
|
Line 82 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { |
|
"ohang", "inset", "column", |
"ohang", "inset", "column", |
"width", "compact", "std", |
"width", "compact", "std", |
"filled", "words", "emphasis", |
"filled", "words", "emphasis", |
"symbolic" |
"symbolic", "nested" |
}; |
}; |
|
|
const char * const *mdoc_macronames = __mdoc_macronames; |
const char * const *mdoc_macronames = __mdoc_macronames; |
const char * const *mdoc_argnames = __mdoc_argnames; |
const char * const *mdoc_argnames = __mdoc_argnames; |
|
|
|
static void mdoc_free1(struct mdoc *); |
|
static int mdoc_alloc1(struct mdoc *); |
|
static struct mdoc_node *node_alloc(struct mdoc *, int, int, |
|
int, enum mdoc_type); |
|
static int node_append(struct mdoc *, |
|
struct mdoc_node *); |
|
static int parsetext(struct mdoc *, int, char *); |
|
static int parsemacro(struct mdoc *, int, char *); |
|
static int macrowarn(struct mdoc *, int, const char *); |
|
static int perr(struct mdoc *, int, int, enum merr); |
|
|
|
#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t)) |
|
|
|
/* |
|
* Get the first (root) node of the parse tree. |
|
*/ |
const struct mdoc_node * |
const struct mdoc_node * |
mdoc_node(const struct mdoc *mdoc) |
mdoc_node(const struct mdoc *m) |
{ |
{ |
|
|
return(mdoc->first); |
return(MDOC_HALT & m->flags ? NULL : m->first); |
} |
} |
|
|
|
|
const struct mdoc_meta * |
const struct mdoc_meta * |
mdoc_meta(const struct mdoc *mdoc) |
mdoc_meta(const struct mdoc *m) |
{ |
{ |
|
|
return(&mdoc->meta); |
return(MDOC_HALT & m->flags ? NULL : &m->meta); |
} |
} |
|
|
|
|
void |
static void |
mdoc_free(struct mdoc *mdoc) |
mdoc_free1(struct mdoc *mdoc) |
{ |
{ |
|
|
if (mdoc->first) |
if (mdoc->first) |
mdoc_node_freelist(mdoc->first); |
mdoc_node_freelist(mdoc->first); |
if (mdoc->htab) |
|
mdoc_tokhash_free(mdoc->htab); |
|
if (mdoc->meta.title) |
if (mdoc->meta.title) |
free(mdoc->meta.title); |
free(mdoc->meta.title); |
if (mdoc->meta.os) |
if (mdoc->meta.os) |
Line 125 mdoc_free(struct mdoc *mdoc) |
|
Line 136 mdoc_free(struct mdoc *mdoc) |
|
free(mdoc->meta.arch); |
free(mdoc->meta.arch); |
if (mdoc->meta.vol) |
if (mdoc->meta.vol) |
free(mdoc->meta.vol); |
free(mdoc->meta.vol); |
|
} |
|
|
|
|
|
static int |
|
mdoc_alloc1(struct mdoc *mdoc) |
|
{ |
|
|
|
bzero(&mdoc->meta, sizeof(struct mdoc_meta)); |
|
mdoc->flags = 0; |
|
mdoc->lastnamed = mdoc->lastsec = 0; |
|
mdoc->last = calloc(1, sizeof(struct mdoc_node)); |
|
if (NULL == mdoc->last) |
|
return(0); |
|
|
|
mdoc->first = mdoc->last; |
|
mdoc->last->type = MDOC_ROOT; |
|
mdoc->next = MDOC_NEXT_CHILD; |
|
return(1); |
|
} |
|
|
|
|
|
/* |
|
* Free up all resources contributed by a parse: the node tree, |
|
* meta-data and so on. Then reallocate the root node for another |
|
* parse. |
|
*/ |
|
int |
|
mdoc_reset(struct mdoc *mdoc) |
|
{ |
|
|
|
mdoc_free1(mdoc); |
|
return(mdoc_alloc1(mdoc)); |
|
} |
|
|
|
|
|
/* |
|
* Completely free up all resources. |
|
*/ |
|
void |
|
mdoc_free(struct mdoc *mdoc) |
|
{ |
|
|
|
mdoc_free1(mdoc); |
|
if (mdoc->htab) |
|
mdoc_tokhash_free(mdoc->htab); |
free(mdoc); |
free(mdoc); |
} |
} |
|
|
Line 135 mdoc_alloc(void *data, int pflags, const struct mdoc_c |
|
Line 190 mdoc_alloc(void *data, int pflags, const struct mdoc_c |
|
{ |
{ |
struct mdoc *p; |
struct mdoc *p; |
|
|
p = xcalloc(1, sizeof(struct mdoc)); |
if (NULL == (p = calloc(1, sizeof(struct mdoc)))) |
|
return(NULL); |
|
|
p->data = data; |
p->data = data; |
|
p->htab = mdoc_tokhash_alloc(); |
|
p->pflags = pflags; |
|
|
if (cb) |
if (cb) |
(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); |
(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); |
|
|
p->last = xcalloc(1, sizeof(struct mdoc_node)); |
if (mdoc_alloc1(p)) |
p->last->type = MDOC_ROOT; |
return(p); |
p->first = p->last; |
free(p); |
p->pflags = pflags; |
return(NULL); |
p->next = MDOC_NEXT_CHILD; |
|
p->htab = mdoc_tokhash_alloc(); |
|
|
|
return(p); |
|
} |
} |
|
|
|
|
|
/* |
|
* Climb back up the parse tree, validating open scopes. Mostly calls |
|
* through to macro_end in macro.c. |
|
*/ |
int |
int |
mdoc_endparse(struct mdoc *mdoc) |
mdoc_endparse(struct mdoc *m) |
{ |
{ |
|
|
if (MDOC_HALT & mdoc->flags) |
if (MDOC_HALT & m->flags) |
return(0); |
return(0); |
if (NULL == mdoc->first) |
else if (mdoc_macroend(m)) |
return(1); |
return(1); |
|
m->flags |= MDOC_HALT; |
assert(mdoc->last); |
return(0); |
if ( ! macro_end(mdoc)) { |
|
mdoc->flags |= MDOC_HALT; |
|
return(0); |
|
} |
|
return(1); |
|
} |
} |
|
|
|
|
Line 247 mdoc_macro(struct mdoc *m, int tok, |
|
Line 301 mdoc_macro(struct mdoc *m, int tok, |
|
|
|
if (MDOC_PROLOGUE & mdoc_macros[tok].flags && |
if (MDOC_PROLOGUE & mdoc_macros[tok].flags && |
SEC_PROLOGUE != m->lastnamed) |
SEC_PROLOGUE != m->lastnamed) |
return(mdoc_perr(m, ln, pp, |
return(perr(m, ln, pp, EPROLBODY)); |
"disallowed in document body")); |
|
|
|
if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && |
if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && |
SEC_PROLOGUE == m->lastnamed) |
SEC_PROLOGUE == m->lastnamed) |
return(mdoc_perr(m, ln, pp, |
return(perr(m, ln, pp, EBODYPROL)); |
"disallowed in prologue")); |
|
|
|
if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) |
if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) |
return(mdoc_perr(m, ln, pp, "not callable")); |
return(perr(m, ln, pp, ENOCALL)); |
|
|
return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); |
return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); |
} |
} |
|
|
|
|
static int |
static int |
mdoc_node_append(struct mdoc *mdoc, struct mdoc_node *p) |
perr(struct mdoc *m, int line, int pos, enum merr type) |
{ |
{ |
|
char *p; |
|
|
|
p = NULL; |
|
switch (type) { |
|
case (ENOCALL): |
|
p = "not callable"; |
|
break; |
|
case (EPROLBODY): |
|
p = "macro disallowed in document body"; |
|
break; |
|
case (EBODYPROL): |
|
p = "macro disallowed in document prologue"; |
|
break; |
|
case (EMALLOC): |
|
p = "memory exhausted"; |
|
break; |
|
case (ETEXTPROL): |
|
p = "text disallowed in document prologue"; |
|
break; |
|
case (ENOBLANK): |
|
p = "blank lines disallowed in non-literal contexts"; |
|
break; |
|
case (ESPACE): |
|
p = "whitespace disallowed after delimiter"; |
|
break; |
|
} |
|
assert(p); |
|
return(mdoc_perr(m, line, pos, p)); |
|
} |
|
|
|
|
|
static int |
|
node_append(struct mdoc *mdoc, struct mdoc_node *p) |
|
{ |
|
|
assert(mdoc->last); |
assert(mdoc->last); |
assert(mdoc->first); |
assert(mdoc->first); |
assert(MDOC_ROOT != p->type); |
assert(MDOC_ROOT != p->type); |
Line 287 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node * |
|
Line 373 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node * |
|
|
|
if ( ! mdoc_valid_pre(mdoc, p)) |
if ( ! mdoc_valid_pre(mdoc, p)) |
return(0); |
return(0); |
|
if ( ! mdoc_action_pre(mdoc, p)) |
|
return(0); |
|
|
switch (p->type) { |
switch (p->type) { |
case (MDOC_HEAD): |
case (MDOC_HEAD): |
Line 306 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node * |
|
Line 394 mdoc_node_append(struct mdoc *mdoc, struct mdoc_node * |
|
} |
} |
|
|
mdoc->last = p; |
mdoc->last = p; |
|
|
|
switch (p->type) { |
|
case (MDOC_TEXT): |
|
if ( ! mdoc_valid_post(mdoc)) |
|
return(0); |
|
if ( ! mdoc_action_post(mdoc)) |
|
return(0); |
|
break; |
|
default: |
|
break; |
|
} |
|
|
return(1); |
return(1); |
} |
} |
|
|
|
|
static struct mdoc_node * |
static struct mdoc_node * |
mdoc_node_alloc(const struct mdoc *mdoc) |
node_alloc(struct mdoc *mdoc, int line, |
|
int pos, int tok, enum mdoc_type type) |
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
p = xcalloc(1, sizeof(struct mdoc_node)); |
if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { |
|
(void)verr(mdoc, EMALLOC); |
|
return(NULL); |
|
} |
|
|
p->sec = mdoc->lastsec; |
p->sec = mdoc->lastsec; |
|
p->line = line; |
|
p->pos = pos; |
|
p->tok = tok; |
|
if (MDOC_TEXT != (p->type = type)) |
|
assert(p->tok >= 0); |
|
|
return(p); |
return(p); |
} |
} |
Line 327 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, |
|
Line 437 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, |
|
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
assert(mdoc->first); |
p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); |
assert(mdoc->last); |
if (NULL == p) |
|
return(0); |
p = mdoc_node_alloc(mdoc); |
return(node_append(mdoc, p)); |
|
|
p->line = line; |
|
p->pos = pos; |
|
p->type = MDOC_TAIL; |
|
p->tok = tok; |
|
|
|
return(mdoc_node_append(mdoc, p)); |
|
} |
} |
|
|
|
|
Line 349 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, |
|
Line 452 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, |
|
assert(mdoc->first); |
assert(mdoc->first); |
assert(mdoc->last); |
assert(mdoc->last); |
|
|
p = mdoc_node_alloc(mdoc); |
p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); |
|
if (NULL == p) |
p->line = line; |
return(0); |
p->pos = pos; |
return(node_append(mdoc, p)); |
p->type = MDOC_HEAD; |
|
p->tok = tok; |
|
|
|
return(mdoc_node_append(mdoc, p)); |
|
} |
} |
|
|
|
|
Line 365 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, |
|
Line 464 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, |
|
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
assert(mdoc->first); |
p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); |
assert(mdoc->last); |
if (NULL == p) |
|
return(0); |
p = mdoc_node_alloc(mdoc); |
return(node_append(mdoc, p)); |
|
|
p->line = line; |
|
p->pos = pos; |
|
p->type = MDOC_BODY; |
|
p->tok = tok; |
|
|
|
return(mdoc_node_append(mdoc, p)); |
|
} |
} |
|
|
|
|
int |
int |
mdoc_root_alloc(struct mdoc *mdoc) |
|
{ |
|
struct mdoc_node *p; |
|
|
|
p = mdoc_node_alloc(mdoc); |
|
|
|
p->type = MDOC_ROOT; |
|
|
|
return(mdoc_node_append(mdoc, p)); |
|
} |
|
|
|
|
|
int |
|
mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, |
mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, |
int tok, struct mdoc_arg *args) |
int tok, struct mdoc_arg *args) |
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
p = mdoc_node_alloc(mdoc); |
p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); |
|
if (NULL == p) |
p->pos = pos; |
return(0); |
p->line = line; |
if ((p->args = args)) |
p->type = MDOC_BLOCK; |
|
p->tok = tok; |
|
p->args = args; |
|
|
|
if (args) |
|
(args->refcnt)++; |
(args->refcnt)++; |
|
return(node_append(mdoc, p)); |
return(mdoc_node_append(mdoc, p)); |
|
} |
} |
|
|
|
|
Line 419 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, |
|
Line 492 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, |
|
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
p = mdoc_node_alloc(mdoc); |
p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); |
|
if (NULL == p) |
p->line = line; |
return(0); |
p->pos = pos; |
if ((p->args = args)) |
p->type = MDOC_ELEM; |
|
p->tok = tok; |
|
p->args = args; |
|
|
|
if (args) |
|
(args->refcnt)++; |
(args->refcnt)++; |
|
return(node_append(mdoc, p)); |
return(mdoc_node_append(mdoc, p)); |
|
} |
} |
|
|
|
|
Line 440 mdoc_word_alloc(struct mdoc *mdoc, |
|
Line 507 mdoc_word_alloc(struct mdoc *mdoc, |
|
{ |
{ |
struct mdoc_node *p; |
struct mdoc_node *p; |
|
|
p = mdoc_node_alloc(mdoc); |
p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT); |
|
if (NULL == p) |
p->line = line; |
return(0); |
p->pos = pos; |
if (NULL == (p->string = strdup(word))) { |
p->type = MDOC_TEXT; |
(void)verr(mdoc, EMALLOC); |
p->string = xstrdup(word); |
return(0); |
|
} |
return(mdoc_node_append(mdoc, p)); |
return(node_append(mdoc, p)); |
} |
} |
|
|
|
|
Line 481 mdoc_node_freelist(struct mdoc_node *p) |
|
Line 548 mdoc_node_freelist(struct mdoc_node *p) |
|
* control character. |
* control character. |
*/ |
*/ |
static int |
static int |
parsetext(struct mdoc *mdoc, int line, char *buf) |
parsetext(struct mdoc *m, int line, char *buf) |
{ |
{ |
|
|
if (SEC_PROLOGUE == mdoc->lastnamed) |
if (SEC_PROLOGUE == m->lastnamed) |
return(mdoc_perr(mdoc, line, 0, |
return(perr(m, line, 0, ETEXTPROL)); |
"text disallowed in prologue")); |
|
|
|
if ( ! mdoc_word_alloc(mdoc, line, 0, buf)) |
if (0 == buf[0] && ! (MDOC_LITERAL & m->flags)) |
|
return(perr(m, line, 0, ENOBLANK)); |
|
|
|
if ( ! mdoc_word_alloc(m, line, 0, buf)) |
return(0); |
return(0); |
|
|
mdoc->next = MDOC_NEXT_SIBLING; |
m->next = MDOC_NEXT_SIBLING; |
return(1); |
return(1); |
} |
} |
|
|
|
|
macrowarn(struct mdoc *m, int ln, const char *buf) |
macrowarn(struct mdoc *m, int ln, const char *buf) |
{ |
{ |
if ( ! (MDOC_IGN_MACRO & m->pflags)) |
if ( ! (MDOC_IGN_MACRO & m->pflags)) |
return(mdoc_perr(m, ln, 1, "unknown macro: %s%s", |
return(mdoc_perr(m, ln, 1, |
|
"unknown macro: %s%s", |
buf, strlen(buf) > 3 ? "..." : "")); |
buf, strlen(buf) > 3 ? "..." : "")); |
return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, |
return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, |
"unknown macro: %s%s", |
"unknown macro: %s%s", |
Line 519 parsemacro(struct mdoc *m, int ln, char *buf) |
|
Line 589 parsemacro(struct mdoc *m, int ln, char *buf) |
|
int i, c; |
int i, c; |
char mac[5]; |
char mac[5]; |
|
|
/* Comments are quickly ignored. */ |
/* Comments and empties are quickly ignored. */ |
|
|
|
if (0 == buf[1]) |
|
return(1); |
|
|
|
if (' ' == buf[1]) { |
|
i = 2; |
|
while (buf[i] && ' ' == buf[i]) |
|
i++; |
|
if (0 == buf[i]) |
|
return(1); |
|
return(perr(m, ln, 1, ESPACE)); |
|
} |
|
|
if (buf[1] && '\\' == buf[1]) |
if (buf[1] && '\\' == buf[1]) |
if (buf[2] && '\"' == buf[2]) |
if (buf[2] && '\"' == buf[2]) |
return(1); |
return(1); |
Line 530 parsemacro(struct mdoc *m, int ln, char *buf) |
|
Line 612 parsemacro(struct mdoc *m, int ln, char *buf) |
|
for (i = 1; i < 5; i++) { |
for (i = 1; i < 5; i++) { |
if (0 == (mac[i - 1] = buf[i])) |
if (0 == (mac[i - 1] = buf[i])) |
break; |
break; |
else if (isspace((unsigned char)buf[i])) |
else if (' ' == buf[i]) |
break; |
break; |
} |
} |
|
|
/* FIXME: be able to skip unknown macro lines! */ |
|
|
|
mac[i - 1] = 0; |
mac[i - 1] = 0; |
|
|
if (i == 5 || i <= 2) { |
if (i == 5 || i <= 2) { |
Line 552 parsemacro(struct mdoc *m, int ln, char *buf) |
|
Line 632 parsemacro(struct mdoc *m, int ln, char *buf) |
|
|
|
/* The macro is sane. Jump to the next word. */ |
/* The macro is sane. Jump to the next word. */ |
|
|
while (buf[i] && isspace((unsigned char)buf[i])) |
while (buf[i] && ' ' == buf[i]) |
i++; |
i++; |
|
|
/* Begin recursive parse sequence. */ |
/* Begin recursive parse sequence. */ |