version 1.17, 2009/04/12 19:30:45 |
version 1.182, 2018/12/30 00:49:55 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org> |
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> |
|
* Copyright (c) 2013,2014,2015,2017,2018 Ingo Schwarze <schwarze@openbsd.org> |
|
* Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the |
* purpose with or without fee is hereby granted, provided that the above |
* above copyright notice and this permission notice appear in all |
* copyright notice and this permission notice appear in all copies. |
* copies. |
|
* |
* |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL |
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED |
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE |
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR |
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL |
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* PERFORMANCE OF THIS SOFTWARE. |
|
*/ |
*/ |
|
#include "config.h" |
|
|
|
#include <sys/types.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <ctype.h> |
#include <ctype.h> |
#include <stdarg.h> |
#include <stdarg.h> |
|
|
#include <stdio.h> |
#include <stdio.h> |
#include <string.h> |
#include <string.h> |
|
|
|
#include "mandoc_aux.h" |
|
#include "mandoc.h" |
|
#include "roff.h" |
|
#include "man.h" |
|
#include "libmandoc.h" |
|
#include "roff_int.h" |
#include "libman.h" |
#include "libman.h" |
|
|
const char *const __man_macronames[MAN_MAX] = { |
static char *man_hasc(char *); |
"\\\"", "TH", "SH", "SS", |
static int man_ptext(struct roff_man *, int, char *, int); |
"TP", "LP", "PP", "P", |
static int man_pmacro(struct roff_man *, int, char *, int); |
"IP", "HP", "SM", "SB", |
|
"BI", "IB", "BR", "RB", |
|
"R", "B", "I", "IR", |
|
"RI", "br", "na", "i" |
|
}; |
|
|
|
const char * const *man_macronames = __man_macronames; |
|
|
|
static struct man_node *man_node_alloc(int, int, |
int |
enum man_type, int); |
man_parseln(struct roff_man *man, int ln, char *buf, int offs) |
static int man_node_append(struct man *, |
|
struct man_node *); |
|
static int man_ptext(struct man *, int, char *); |
|
static int man_pmacro(struct man *, int, char *); |
|
static void man_free1(struct man *); |
|
static int man_alloc1(struct man *); |
|
|
|
|
|
const struct man_node * |
|
man_node(const struct man *m) |
|
{ |
{ |
|
|
return(MAN_HALT & m->flags ? NULL : m->first); |
if (man->last->type != ROFFT_EQN || ln > man->last->line) |
} |
man->flags |= MAN_NEWLINE; |
|
|
|
return roff_getcontrol(man->roff, buf, &offs) ? |
const struct man_meta * |
man_pmacro(man, ln, buf, offs) : |
man_meta(const struct man *m) |
man_ptext(man, ln, buf, offs); |
{ |
|
|
|
return(MAN_HALT & m->flags ? NULL : &m->meta); |
|
} |
} |
|
|
|
/* |
int |
* If the string ends with \c, return a pointer to the backslash. |
man_reset(struct man *man) |
* Otherwise, return NULL. |
|
*/ |
|
static char * |
|
man_hasc(char *start) |
{ |
{ |
|
char *cp, *ep; |
|
|
man_free1(man); |
ep = strchr(start, '\0') - 2; |
return(man_alloc1(man)); |
if (ep < start || ep[0] != '\\' || ep[1] != 'c') |
|
return NULL; |
|
for (cp = ep; cp > start; cp--) |
|
if (cp[-1] != '\\') |
|
break; |
|
return (ep - cp) % 2 ? NULL : ep; |
} |
} |
|
|
|
|
void |
void |
man_free(struct man *man) |
man_descope(struct roff_man *man, int line, int offs, char *start) |
{ |
{ |
|
/* Trailing \c keeps next-line scope open. */ |
|
|
man_free1(man); |
if (start != NULL && man_hasc(start) != NULL) |
|
return; |
|
|
if (man->htab) |
/* |
man_hash_free(man->htab); |
* Co-ordinate what happens with having a next-line scope open: |
free(man); |
* first close out the element scopes (if applicable), |
|
* then close out the block scope (also if applicable). |
|
*/ |
|
|
|
if (man->flags & MAN_ELINE) { |
|
while (man->last->parent->type != ROFFT_ROOT && |
|
man_macro(man->last->parent->tok)->flags & MAN_ESCOPED) |
|
man_unscope(man, man->last->parent); |
|
man->flags &= ~MAN_ELINE; |
|
} |
|
if ( ! (man->flags & MAN_BLINE)) |
|
return; |
|
man->flags &= ~MAN_BLINE; |
|
man_unscope(man, man->last->parent); |
|
roff_body_alloc(man, line, offs, man->last->tok); |
} |
} |
|
|
|
static int |
struct man * |
man_ptext(struct roff_man *man, int line, char *buf, int offs) |
man_alloc(void *data, int pflags, const struct man_cb *cb) |
|
{ |
{ |
struct man *p; |
int i; |
|
char *ep; |
|
|
if (NULL == (p = calloc(1, sizeof(struct man)))) |
/* Literal free-form text whitespace is preserved. */ |
return(NULL); |
|
|
|
if ( ! man_alloc1(p)) { |
if (man->flags & MAN_LITERAL) { |
free(p); |
roff_word_alloc(man, line, offs, buf + offs); |
return(NULL); |
man_descope(man, line, offs, buf + offs); |
|
return 1; |
} |
} |
|
|
p->data = data; |
for (i = offs; buf[i] == ' '; i++) |
p->pflags = pflags; |
/* Skip leading whitespace. */ ; |
(void)memcpy(&p->cb, cb, sizeof(struct man_cb)); |
|
|
|
if (NULL == (p->htab = man_hash_alloc())) { |
/* |
free(p); |
* Blank lines are ignored in next line scope |
return(NULL); |
* and right after headings and cancel preceding \c, |
|
* but add a single vertical space elsewhere. |
|
*/ |
|
|
|
if (buf[i] == '\0') { |
|
if (man->flags & (MAN_ELINE | MAN_BLINE)) { |
|
mandoc_msg(MANDOCERR_BLK_BLANK, line, 0, NULL); |
|
return 1; |
|
} |
|
if (man->last->tok == MAN_SH || man->last->tok == MAN_SS) |
|
return 1; |
|
if (man->last->type == ROFFT_TEXT && |
|
((ep = man_hasc(man->last->string)) != NULL)) { |
|
*ep = '\0'; |
|
return 1; |
|
} |
|
roff_elem_alloc(man, line, offs, ROFF_sp); |
|
man->next = ROFF_NEXT_SIBLING; |
|
return 1; |
} |
} |
return(p); |
|
} |
|
|
|
|
/* |
|
* Warn if the last un-escaped character is whitespace. Then |
|
* strip away the remaining spaces (tabs stay!). |
|
*/ |
|
|
int |
i = (int)strlen(buf); |
man_endparse(struct man *m) |
assert(i); |
{ |
|
|
|
if (MAN_HALT & m->flags) |
if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { |
return(0); |
if (i > 1 && '\\' != buf[i - 2]) |
else if (man_macroend(m)) |
mandoc_msg(MANDOCERR_SPACE_EOL, line, i - 1, NULL); |
return(1); |
|
m->flags |= MAN_HALT; |
|
return(0); |
|
} |
|
|
|
|
for (--i; i && ' ' == buf[i]; i--) |
|
/* Spin back to non-space. */ ; |
|
|
int |
/* Jump ahead of escaped whitespace. */ |
man_parseln(struct man *m, int ln, char *buf) |
i += '\\' == buf[i] ? 2 : 1; |
{ |
|
|
|
return('.' == *buf ? |
buf[i] = '\0'; |
man_pmacro(m, ln, buf) : |
} |
man_ptext(m, ln, buf)); |
roff_word_alloc(man, line, offs, buf + offs); |
} |
|
|
|
|
/* |
|
* End-of-sentence check. If the last character is an unescaped |
|
* EOS character, then flag the node as being the end of a |
|
* sentence. The front-end will know how to interpret this. |
|
*/ |
|
|
static void |
assert(i); |
man_free1(struct man *man) |
if (mandoc_eos(buf, (size_t)i)) |
{ |
man->last->flags |= NODE_EOS; |
|
|
if (man->first) |
man_descope(man, line, offs, buf + offs); |
man_node_freelist(man->first); |
return 1; |
if (man->meta.title) |
|
free(man->meta.title); |
|
if (man->meta.source) |
|
free(man->meta.source); |
|
if (man->meta.vol) |
|
free(man->meta.vol); |
|
} |
} |
|
|
|
|
static int |
static int |
man_alloc1(struct man *m) |
man_pmacro(struct roff_man *man, int ln, char *buf, int offs) |
{ |
{ |
|
struct roff_node *n; |
|
const char *cp; |
|
size_t sz; |
|
enum roff_tok tok; |
|
int ppos; |
|
int bline; |
|
|
bzero(&m->meta, sizeof(struct man_meta)); |
/* Determine the line macro. */ |
m->flags = 0; |
|
m->last = calloc(1, sizeof(struct man_node)); |
|
if (NULL == m->last) |
|
return(0); |
|
m->first = m->last; |
|
m->last->type = MAN_ROOT; |
|
m->next = MAN_NEXT_CHILD; |
|
return(1); |
|
} |
|
|
|
|
ppos = offs; |
|
tok = TOKEN_NONE; |
|
for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) |
|
offs++; |
|
if (sz > 0 && sz < 4) |
|
tok = roffhash_find(man->manmac, buf + ppos, sz); |
|
if (tok == TOKEN_NONE) { |
|
mandoc_msg(MANDOCERR_MACRO, ln, ppos, "%s", buf + ppos - 1); |
|
return 1; |
|
} |
|
|
static int |
/* Skip a leading escape sequence or tab. */ |
man_node_append(struct man *man, struct man_node *p) |
|
{ |
|
|
|
assert(man->last); |
switch (buf[offs]) { |
assert(man->first); |
case '\\': |
assert(MAN_ROOT != p->type); |
cp = buf + offs + 1; |
|
mandoc_escape(&cp, NULL, NULL); |
switch (man->next) { |
offs = cp - buf; |
case (MAN_NEXT_SIBLING): |
|
man->last->next = p; |
|
p->prev = man->last; |
|
p->parent = man->last->parent; |
|
break; |
break; |
case (MAN_NEXT_CHILD): |
case '\t': |
man->last->child = p; |
offs++; |
p->parent = man->last; |
|
break; |
break; |
default: |
default: |
abort(); |
|
/* NOTREACHED */ |
|
} |
|
|
|
man->last = p; |
|
|
|
switch (p->type) { |
|
case (MAN_TEXT): |
|
if ( ! man_valid_post(man)) |
|
return(0); |
|
if ( ! man_action_post(man)) |
|
return(0); |
|
break; |
break; |
default: |
|
break; |
|
} |
} |
|
|
return(1); |
/* Jump to the next non-whitespace word. */ |
} |
|
|
|
|
while (buf[offs] == ' ') |
|
offs++; |
|
|
static struct man_node * |
/* |
man_node_alloc(int line, int pos, enum man_type type, int tok) |
* Trailing whitespace. Note that tabs are allowed to be passed |
{ |
* into the parser as "text", so we only warn about spaces here. |
struct man_node *p; |
*/ |
|
|
p = calloc(1, sizeof(struct man_node)); |
if (buf[offs] == '\0' && buf[offs - 1] == ' ') |
if (NULL == p) |
mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL); |
return(NULL); |
|
|
|
p->line = line; |
/* |
p->pos = pos; |
* Some macros break next-line scopes; otherwise, remember |
p->type = type; |
* whether we are in next-line scope for a block head. |
p->tok = tok; |
*/ |
return(p); |
|
} |
|
|
|
|
man_breakscope(man, tok); |
|
bline = man->flags & MAN_BLINE; |
|
|
int |
/* |
man_elem_alloc(struct man *man, int line, int pos, int tok) |
* If the line in next-line scope ends with \c, keep the |
{ |
* next-line scope open for the subsequent input line. |
struct man_node *p; |
* That is not at all portable, only groff >= 1.22.4 |
|
* does it, but *if* this weird idiom occurs in a manual |
|
* page, that's very likely what the author intended. |
|
*/ |
|
|
p = man_node_alloc(line, pos, MAN_ELEM, tok); |
if (bline && man_hasc(buf + offs)) |
if (NULL == p) |
bline = 0; |
return(0); |
|
return(man_node_append(man, p)); |
|
} |
|
|
|
|
/* Call to handler... */ |
|
|
int |
(*man_macro(tok)->fp)(man, tok, ln, ppos, &offs, buf); |
man_word_alloc(struct man *man, |
|
int line, int pos, const char *word) |
|
{ |
|
struct man_node *p; |
|
|
|
p = man_node_alloc(line, pos, MAN_TEXT, -1); |
/* In quick mode (for mandocdb), abort after the NAME section. */ |
if (NULL == p) |
|
return(0); |
|
if (NULL == (p->string = strdup(word))) |
|
return(0); |
|
return(man_node_append(man, p)); |
|
} |
|
|
|
|
if (man->quick && tok == MAN_SH) { |
|
n = man->last; |
|
if (n->type == ROFFT_BODY && |
|
strcmp(n->prev->child->string, "NAME")) |
|
return 2; |
|
} |
|
|
void |
/* |
man_node_free(struct man_node *p) |
* If we are in a next-line scope for a block head, |
{ |
* close it out now and switch to the body, |
|
* unless the next-line scope is allowed to continue. |
|
*/ |
|
|
if (p->string) |
if (bline == 0 || |
free(p->string); |
(man->flags & MAN_BLINE) == 0 || |
free(p); |
man->flags & MAN_ELINE || |
|
man_macro(tok)->flags & MAN_NSCOPED) |
|
return 1; |
|
|
|
man->flags &= ~MAN_BLINE; |
|
man_unscope(man, man->last->parent); |
|
roff_body_alloc(man, ln, ppos, man->last->tok); |
|
return 1; |
} |
} |
|
|
|
|
void |
void |
man_node_freelist(struct man_node *p) |
man_breakscope(struct roff_man *man, int tok) |
{ |
{ |
|
struct roff_node *n; |
|
|
if (p->child) |
|
man_node_freelist(p->child); |
|
if (p->next) |
|
man_node_freelist(p->next); |
|
|
|
man_node_free(p); |
|
} |
|
|
|
|
|
static int |
|
man_ptext(struct man *m, int line, char *buf) |
|
{ |
|
|
|
if ( ! man_word_alloc(m, line, 0, buf)) |
|
return(0); |
|
m->next = MAN_NEXT_SIBLING; |
|
|
|
/* |
/* |
* If this is one of the zany NLINE macros that consumes the |
* An element next line scope is open, |
* next line of input as being influenced, then close out the |
* and the new macro is not allowed inside elements. |
* existing macro "scope" and continue processing. |
* Delete the element that is being broken. |
*/ |
*/ |
|
|
if ( ! (MAN_NLINE & m->flags)) |
if (man->flags & MAN_ELINE && (tok < MAN_TH || |
return(1); |
(man_macro(tok)->flags & MAN_NSCOPED) == 0)) { |
|
n = man->last; |
|
if (n->type == ROFFT_TEXT) |
|
n = n->parent; |
|
if (n->tok < MAN_TH || |
|
(man_macro(n->tok)->flags & (MAN_NSCOPED | MAN_ESCOPED)) |
|
== MAN_NSCOPED) |
|
n = n->parent; |
|
|
m->flags &= ~MAN_NLINE; |
mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos, |
m->last = m->last->parent; |
"%s breaks %s", roff_name[tok], roff_name[n->tok]); |
|
|
assert(MAN_ROOT != m->last->type); |
roff_node_delete(man, n); |
if ( ! man_valid_post(m)) |
man->flags &= ~MAN_ELINE; |
return(0); |
|
if ( ! man_action_post(m)) |
|
return(0); |
|
|
|
return(1); |
|
} |
|
|
|
|
|
int |
|
man_pmacro(struct man *m, int ln, char *buf) |
|
{ |
|
int i, j, c, ppos, fl; |
|
char mac[5]; |
|
struct man_node *n; |
|
|
|
/* Comments and empties are quickly ignored. */ |
|
|
|
n = m->last; |
|
fl = MAN_NLINE & m->flags; |
|
|
|
if (0 == buf[1]) |
|
goto out; |
|
|
|
i = 1; |
|
|
|
if (' ' == buf[i]) { |
|
i++; |
|
while (buf[i] && ' ' == buf[i]) |
|
i++; |
|
if (0 == buf[i]) |
|
goto out; |
|
} |
} |
|
|
ppos = i; |
/* |
|
* Weird special case: |
|
* Switching fill mode closes section headers. |
|
*/ |
|
|
if (buf[i] && '\\' == buf[i]) |
if (man->flags & MAN_BLINE && |
if (buf[i + 1] && '\"' == buf[i + 1]) |
(tok == MAN_nf || tok == MAN_fi) && |
goto out; |
(man->last->tok == MAN_SH || man->last->tok == MAN_SS)) { |
|
n = man->last; |
/* Copy the first word into a nil-terminated buffer. */ |
man_unscope(man, n); |
|
roff_body_alloc(man, n->line, n->pos, n->tok); |
for (j = 0; j < 4; j++, i++) { |
man->flags &= ~MAN_BLINE; |
if (0 == (mac[j] = buf[i])) |
|
break; |
|
else if (' ' == buf[i]) |
|
break; |
|
} |
} |
|
|
mac[j] = 0; |
/* |
|
* A block header next line scope is open, |
|
* and the new macro is not allowed inside block headers. |
|
* Delete the block that is being broken. |
|
*/ |
|
|
if (j == 4 || j < 1) { |
if (man->flags & MAN_BLINE && (tok < MAN_TH || |
if ( ! (MAN_IGN_MACRO & m->pflags)) { |
man_macro(tok)->flags & MAN_XSCOPE)) { |
(void)man_verr(m, ln, ppos, |
n = man->last; |
"ill-formed macro: %s", mac); |
if (n->type == ROFFT_TEXT) |
goto err; |
n = n->parent; |
} |
if (n->tok < MAN_TH || |
if ( ! man_vwarn(m, ln, ppos, |
(man_macro(n->tok)->flags & MAN_XSCOPE) == 0) |
"ill-formed macro: %s", mac)) |
n = n->parent; |
goto err; |
|
return(1); |
|
} |
|
|
|
if (MAN_MAX == (c = man_hash_find(m->htab, mac))) { |
|
if ( ! (MAN_IGN_MACRO & m->pflags)) { |
|
(void)man_verr(m, ln, ppos, |
|
"unknown macro: %s", mac); |
|
goto err; |
|
} |
|
if ( ! man_vwarn(m, ln, ppos, |
|
"unknown macro: %s", mac)) |
|
goto err; |
|
return(1); |
|
} |
|
|
|
/* The macro is sane. Jump to the next word. */ |
assert(n->type == ROFFT_HEAD); |
|
n = n->parent; |
|
assert(n->type == ROFFT_BLOCK); |
|
assert(man_macro(n->tok)->flags & MAN_BSCOPED); |
|
|
while (buf[i] && ' ' == buf[i]) |
mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos, |
i++; |
"%s breaks %s", roff_name[tok], roff_name[n->tok]); |
|
|
/* Begin recursive parse sequence. */ |
roff_node_delete(man, n); |
|
man->flags &= ~MAN_BLINE; |
if ( ! man_macro(m, c, ln, ppos, &i, buf)) |
} |
goto err; |
|
|
|
out: |
|
if (fl) { |
|
/* |
|
* A NLINE macro has been immediately followed with |
|
* another. Close out the preceding macro's scope, and |
|
* continue. |
|
*/ |
|
assert(MAN_ROOT != m->last->type); |
|
assert(m->last->parent); |
|
assert(MAN_ROOT != m->last->parent->type); |
|
|
|
if (n != m->last) |
|
m->last = m->last->parent; |
|
|
|
if ( ! man_valid_post(m)) |
|
return(0); |
|
if ( ! man_action_post(m)) |
|
return(0); |
|
m->next = MAN_NEXT_SIBLING; |
|
m->flags &= ~MAN_NLINE; |
|
} |
|
|
|
return(1); |
|
|
|
err: /* Error out. */ |
|
|
|
m->flags |= MAN_HALT; |
|
return(0); |
|
} |
} |
|
|
|
void |
int |
man_state(struct roff_man *man, struct roff_node *n) |
man_verr(struct man *man, int ln, int pos, const char *fmt, ...) |
|
{ |
{ |
char buf[256]; |
|
va_list ap; |
|
|
|
if (NULL == man->cb.man_err) |
switch(n->tok) { |
return(0); |
case MAN_nf: |
|
case MAN_EX: |
va_start(ap, fmt); |
if (man->flags & MAN_LITERAL && ! (n->flags & NODE_VALID)) |
(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); |
mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "nf"); |
va_end(ap); |
man->flags |= MAN_LITERAL; |
return((*man->cb.man_err)(man->data, ln, pos, buf)); |
break; |
|
case MAN_fi: |
|
case MAN_EE: |
|
if ( ! (man->flags & MAN_LITERAL) && |
|
! (n->flags & NODE_VALID)) |
|
mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "fi"); |
|
man->flags &= ~MAN_LITERAL; |
|
break; |
|
default: |
|
break; |
|
} |
|
man->last->flags |= NODE_VALID; |
} |
} |
|
|
|
void |
int |
man_validate(struct roff_man *man) |
man_vwarn(struct man *man, int ln, int pos, const char *fmt, ...) |
|
{ |
{ |
char buf[256]; |
|
va_list ap; |
|
|
|
if (NULL == man->cb.man_warn) |
man->last = man->meta.first; |
return(0); |
man_node_validate(man); |
|
man->flags &= ~MAN_LITERAL; |
va_start(ap, fmt); |
|
(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); |
|
va_end(ap); |
|
return((*man->cb.man_warn)(man->data, ln, pos, buf)); |
|
} |
} |
|
|
|
|