![]() ![]() | ![]() |
version 1.3, 2014/03/20 15:18:56 | version 1.57, 2015/02/21 21:15:41 | ||
---|---|---|---|
|
|
||
/* $Id$ */ | /* $Id$ */ | ||
/* | /* | ||
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv> | * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv> | ||
* Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> | |||
* | * | ||
* Permission to use, copy, modify, and distribute this software for any | * Permission to use, copy, modify, and distribute this software for any | ||
* purpose with or without fee is hereby granted, provided that the above | * purpose with or without fee is hereby granted, provided that the above | ||
|
|
||
#include <string.h> | #include <string.h> | ||
#include <unistd.h> | #include <unistd.h> | ||
#include "dict.h" | |||
/* | |||
* In what section can we find Perl module manuals? | |||
* Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p. | |||
* XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL. | |||
*/ | |||
#define PERL_SECTION "3p" | |||
struct args { | struct args { | ||
const char *title; /* override "Dt" title */ | const char *title; /* override "Dt" title */ | ||
const char *date; /* override "Dd" date */ | const char *date; /* override "Dd" date */ | ||
const char *section; /* override "Dt" section */ | const char *section; /* override "Dt" section */ | ||
}; | }; | ||
enum list { | |||
LIST_BULLET = 0, | |||
LIST_ENUM, | |||
LIST_TAG, | |||
LIST__MAX | |||
}; | |||
enum sect { | |||
SECT_NONE = 0, | |||
SECT_NAME, /* NAME section */ | |||
SECT_SYNOPSIS, /* SYNOPSIS section */ | |||
}; | |||
enum outstate { | |||
OUST_NL = 0, /* just started a new output line */ | |||
OUST_TXT, /* text line output in progress */ | |||
OUST_MAC /* macro line output in progress */ | |||
}; | |||
struct state { | struct state { | ||
const char *fname; /* file being parsed */ | |||
int parsing; /* after =cut of before command */ | int parsing; /* after =cut of before command */ | ||
int paused; /* in =begin and before =end */ | int paused; /* in =begin and before =end */ | ||
enum sect sect; /* which section are we in? */ | |||
#define LIST_STACKSZ 128 | |||
enum list lstack[LIST_STACKSZ]; /* open lists */ | |||
size_t lpos; /* where in list stack */ | |||
int haspar; /* in paragraph: do we need Pp? */ | int haspar; /* in paragraph: do we need Pp? */ | ||
int isname; /* are we the NAME section? */ | enum outstate oust; /* state of the mdoc output stream */ | ||
const char *fname; /* file being parsed */ | int wantws; /* let mdoc(7) output whitespace here */ | ||
char *outbuf; /* text buffered for output */ | |||
size_t outbufsz; /* allocated size of outbuf */ | |||
size_t outbuflen; /* current length of outbuf */ | |||
}; | }; | ||
enum fmt { | enum fmt { | ||
|
|
||
CMD__MAX | CMD__MAX | ||
}; | }; | ||
static void command(struct state *, const char *, size_t, size_t); | |||
static void dofile(const struct args *, const char *, | |||
const struct tm *, char *, size_t); | |||
static void donamenm(struct state *, const char *, size_t *, size_t); | |||
static void dopar(struct state *, char *, size_t, size_t); | |||
static void dosynopsisfl(const char *, size_t *, size_t); | |||
static int dosynopsisop(struct state *, const char *, size_t *, | |||
size_t, size_t *); | |||
static int formatcode(struct state *, const char *, size_t *, | |||
size_t, int, int); | |||
static void formatcodeln(struct state *, const char *, const char *, | |||
size_t *, size_t, int); | |||
static void formatescape(struct state *, const char *, size_t *, size_t); | |||
static int hasmatch(const char *, size_t, size_t); | |||
static void ordinary(struct state *, const char *, size_t, size_t); | |||
static void outbuf_addchar(struct state *); | |||
static void outbuf_addstr(struct state *, const char *); | |||
static void outbuf_flush(struct state *); | |||
static void outbuf_grow(struct state *, size_t); | |||
static enum list listguess(const char *, size_t, size_t); | |||
static void mdoc_newln(struct state *); | |||
static int readfile(const struct args *, const char *); | |||
static void register_type(const char *); | |||
static int trylink(const char *, size_t *, size_t, size_t); | |||
static void verbatim(struct state *, char *, size_t, size_t); | |||
static const char *const cmds[CMD__MAX] = { | static const char *const cmds[CMD__MAX] = { | ||
"pod", /* CMD_POD */ | "pod", /* CMD_POD */ | ||
"head1", /* CMD_HEAD1 */ | "head1", /* CMD_HEAD1 */ | ||
|
|
||
'Z' /* FMT_NULL */ | 'Z' /* FMT_NULL */ | ||
}; | }; | ||
static unsigned char last; | |||
static void | |||
outbuf_grow(struct state *st, size_t by) | |||
{ | |||
st->outbufsz += (by / 128 + 1) * 128; | |||
st->outbuf = realloc(st->outbuf, st->outbufsz); | |||
if (NULL == st->outbuf) { | |||
perror(NULL); | |||
exit(EXIT_FAILURE); | |||
} | |||
} | |||
static void | |||
outbuf_addchar(struct state *st) | |||
{ | |||
if (st->outbuflen + 2 >= st->outbufsz) | |||
outbuf_grow(st, 1); | |||
st->outbuf[st->outbuflen++] = last; | |||
if ('\\' == last) | |||
st->outbuf[st->outbuflen++] = 'e'; | |||
st->outbuf[st->outbuflen] = '\0'; | |||
} | |||
static void | |||
outbuf_addstr(struct state *st, const char *str) | |||
{ | |||
size_t slen; | |||
slen = strlen(str); | |||
if (st->outbuflen + slen >= st->outbufsz) | |||
outbuf_grow(st, slen); | |||
memcpy(st->outbuf + st->outbuflen, str, slen+1); | |||
st->outbuflen += slen; | |||
last = str[slen - 1]; | |||
} | |||
static void | |||
outbuf_flush(struct state *st) | |||
{ | |||
if (0 == st->outbuflen) | |||
return; | |||
if (OUST_NL != st->oust && st->wantws) | |||
putchar(' '); | |||
if (OUST_MAC == st->oust && '"' == *st->outbuf) | |||
printf("\\(dq%s", st->outbuf + 1); | |||
else | |||
fputs(st->outbuf, stdout); | |||
*st->outbuf = '\0'; | |||
st->outbuflen = 0; | |||
if (OUST_NL == st->oust) | |||
st->oust = OUST_TXT; | |||
} | |||
static void | |||
mdoc_newln(struct state *st) | |||
{ | |||
if (OUST_NL == st->oust) | |||
return; | |||
putchar('\n'); | |||
last = '\n'; | |||
st->oust = OUST_NL; | |||
st->wantws = 1; | |||
} | |||
/* | /* | ||
* Given buf[*start] is at the start of an escape name, read til the end | * Given buf[*start] is at the start of an escape name, read til the end | ||
* of the escape ('>') then try to do something with it. | * of the escape ('>') then try to do something with it. | ||
* Sets start to be one after the '>'. | * Sets start to be one after the '>'. | ||
* | |||
* This function does not care about output modes, | |||
* it merely appends text to the output buffer, | |||
* which can then be used in any mode. | |||
*/ | */ | ||
static void | static void | ||
formatescape(const char *buf, size_t *start, size_t end) | formatescape(struct state *st, const char *buf, size_t *start, size_t end) | ||
{ | { | ||
char esc[16]; /* no more needed */ | char esc[16]; /* no more needed */ | ||
size_t i, max; | size_t i, max; | ||
|
|
||
* TODO: right now, we only recognise the named escapes. | * TODO: right now, we only recognise the named escapes. | ||
* Just let the rest of them go. | * Just let the rest of them go. | ||
*/ | */ | ||
if (0 == strcmp(esc, "lt")) | if (0 == strcmp(esc, "lt")) | ||
printf("\\(la"); | outbuf_addstr(st, "\\(la"); | ||
else if (0 == strcmp(esc, "gt")) | else if (0 == strcmp(esc, "gt")) | ||
printf("\\(ra"); | outbuf_addstr(st, "\\(ra"); | ||
else if (0 == strcmp(esc, "vb")) | else if (0 == strcmp(esc, "verbar")) | ||
printf("\\(ba"); | outbuf_addstr(st, "\\(ba"); | ||
else if (0 == strcmp(esc, "sol")) | else if (0 == strcmp(esc, "sol")) | ||
printf("\\(sl"); | outbuf_addstr(st, "\\(sl"); | ||
} | } | ||
/* | /* | ||
* Skip space characters. | * Run some heuristics to intuit a link format. | ||
* I set "start" to be the end of the sequence (last right-carrot) so | |||
* that the caller can safely just continue processing. | |||
* If this is just an empty tag, I'll return 0. | |||
* | |||
* Always operates in OUST_MAC mode. | |||
* Mode handling is done by the caller. | |||
*/ | */ | ||
static int | |||
trylink(const char *buf, size_t *start, size_t end, size_t dsz) | |||
{ | |||
size_t linkstart, realend, linkend, | |||
i, j, textsz, stack; | |||
/* | |||
* Scan to the start of the terminus. | |||
* This function is more or less replicated in the formatcode() | |||
* for null or index formatting codes. | |||
* However, we're slightly different because we might have | |||
* nested escapes we need to ignore. | |||
*/ | |||
stack = 0; | |||
for (linkstart = realend = *start; realend < end; realend++) { | |||
if ('<' == buf[realend]) | |||
stack++; | |||
if ('>' != buf[realend]) | |||
continue; | |||
else if (stack-- > 0) | |||
continue; | |||
if (dsz == 1) | |||
break; | |||
assert(realend > 0); | |||
if (' ' != buf[realend - 1]) | |||
continue; | |||
for (i = realend, j = 0; i < end && j < dsz; j++) | |||
if ('>' != buf[i++]) | |||
break; | |||
if (dsz == j) | |||
break; | |||
} | |||
/* Ignore stubs. */ | |||
if (realend == end || realend == *start) | |||
return(0); | |||
/* Set linkend to the end of content. */ | |||
linkend = dsz > 1 ? realend - 1 : realend; | |||
/* Re-scan to see if we have a title or section. */ | |||
for (textsz = *start; textsz < linkend; textsz++) | |||
if ('|' == buf[textsz] || '/' == buf[textsz]) | |||
break; | |||
if (textsz < linkend && '|' == buf[textsz]) { | |||
/* With title: set start, then end at section. */ | |||
linkstart = textsz + 1; | |||
textsz = textsz - *start; | |||
for (i = linkstart; i < linkend; i++) | |||
if ('/' == buf[i]) | |||
break; | |||
if (i < linkend) | |||
linkend = i; | |||
} else if (textsz < linkend && '/' == buf[textsz]) { | |||
/* With section: set end at section. */ | |||
linkend = textsz; | |||
textsz = 0; | |||
} else | |||
/* No title, no section. */ | |||
textsz = 0; | |||
*start = realend; | |||
j = linkend - linkstart; | |||
/* Do we have only subsection material? */ | |||
if (0 == j && '/' == buf[linkend]) { | |||
linkstart = linkend + 1; | |||
linkend = dsz > 1 ? realend - 1 : realend; | |||
if (0 == (j = linkend - linkstart)) | |||
return(0); | |||
printf("Sx %.*s", (int)j, &buf[linkstart]); | |||
return(1); | |||
} else if (0 == j) | |||
return(0); | |||
/* See if we qualify as being a link or not. */ | |||
if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) || | |||
(j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) || | |||
(j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) || | |||
(j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) || | |||
(j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || | |||
(j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { | |||
/* Gross. */ | |||
printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : | |||
realend) - linkstart), &buf[linkstart]); | |||
return(1); | |||
} | |||
/* See if we qualify as a mailto. */ | |||
if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { | |||
printf("Mt %.*s", (int)j, &buf[linkstart]); | |||
return(1); | |||
} | |||
/* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ | |||
if ((j > 3 && ')' == buf[linkend - 1]) && | |||
('(' == buf[linkend - 3])) { | |||
printf("Xr %.*s %c", (int)(j - 3), | |||
&buf[linkstart], buf[linkend - 2]); | |||
return(1); | |||
} else if ((j > 4 && ')' == buf[linkend - 1]) && | |||
('(' == buf[linkend - 4])) { | |||
printf("Xr %.*s %.*s", (int)(j - 4), | |||
&buf[linkstart], 2, &buf[linkend - 3]); | |||
return(1); | |||
} else if ((j > 5 && ')' == buf[linkend - 1]) && | |||
('(' == buf[linkend - 5])) { | |||
printf("Xr %.*s %.*s", (int)(j - 5), | |||
&buf[linkstart], 3, &buf[linkend - 4]); | |||
return(1); | |||
} | |||
/* Last try: do we have a double-colon? */ | |||
for (i = linkstart + 1; i < linkend; i++) | |||
if (':' == buf[i] && ':' == buf[i - 1]) | |||
break; | |||
if (i < linkend) | |||
printf("Xr %.*s " PERL_SECTION, | |||
(int)j, &buf[linkstart]); | |||
else | |||
printf("Xr %.*s 1", (int)j, &buf[linkstart]); | |||
return(1); | |||
} | |||
/* | |||
* Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section, | |||
* then it's likely that we're a flag. | |||
* Our flag might be followed by an argument, so make sure that we're | |||
* accounting for that, too. | |||
* If we don't have a flag at all, however, then assume we're an "Ar". | |||
* | |||
* Always operates in OUST_MAC mode. | |||
* Mode handlinf is done by the caller. | |||
*/ | |||
static void | static void | ||
skipspace(const char *buf, size_t *start, size_t end) | dosynopsisfl(const char *buf, size_t *start, size_t end) | ||
{ | { | ||
size_t i; | |||
again: | |||
assert(*start + 1 < end); | |||
assert('-' == buf[*start]); | |||
while (*start < end && ' ' == buf[*start]) | if ( ! isalnum((int)buf[*start + 1]) && | ||
(*start)++; | '?' != buf[*start + 1] && | ||
'-' != buf[*start + 1]) { | |||
(*start)--; | |||
fputs("Ar", stdout); | |||
return; | |||
} | |||
(*start)++; | |||
for (i = *start; i < end; i++) | |||
if (isalnum((int)buf[i])) | |||
continue; | |||
else if ('?' == buf[i]) | |||
continue; | |||
else if ('-' == buf[i]) | |||
continue; | |||
else if ('_' == buf[i]) | |||
continue; | |||
else | |||
break; | |||
assert(i < end); | |||
if ( ! (' ' == buf[i] || '>' == buf[i])) { | |||
fputs("Ar", stdout); | |||
return; | |||
} | |||
printf("Fl "); | |||
if (end - *start > 1 && | |||
isupper((int)buf[*start]) && | |||
islower((int)buf[*start + 1]) && | |||
(end - *start == 2 || | |||
' ' == buf[*start + 2])) | |||
printf("\\&"); | |||
printf("%.*s", (int)(i - *start), &buf[*start]); | |||
*start = i; | |||
if (' ' == buf[i]) { | |||
while (i < end && ' ' == buf[i]) | |||
i++; | |||
assert(i < end); | |||
if ('-' == buf[i]) { | |||
*start = i; | |||
goto again; | |||
} | |||
fputs("Ar", stdout); | |||
*start = i; | |||
} | |||
} | } | ||
/* | /* | ||
|
|
||
* like X<...> and can contain nested format codes. | * like X<...> and can contain nested format codes. | ||
* This consumes the whole format code, and any nested format codes, til | * This consumes the whole format code, and any nested format codes, til | ||
* the end of matched production. | * the end of matched production. | ||
* If "reentrant", then we're being called after a macro has already | * If "nomacro", then we don't print any macros, just contained data | ||
* been printed to the current line. | * (e.g., following "Sh" or "Nm"). | ||
* "last" is set to the last read character: this is used to determine | * "pos" is only significant in SYNOPSIS, and should be 0 when invoked | ||
* whether we should buffer with space or not. | * as the first format code on a line (for decoration as an "Nm"), | ||
* If "nomacro", then we don't print any macros, just contained data. | * non-zero otherwise. | ||
* | |||
* Output mode handling is most complicated here. | |||
* We may enter in any mode. | |||
* We usually exit in OUST_MAC mode, except when | |||
* entering without OUST_MAC and the code is invalid. | |||
*/ | */ | ||
static int | static int | ||
formatcode(const char *buf, size_t *start, | formatcode(struct state *st, const char *buf, size_t *start, | ||
size_t end, int reentrant, int last, int nomacro) | size_t end, int nomacro, int pos) | ||
{ | { | ||
size_t i, j, dsz; | |||
enum fmt fmt; | enum fmt fmt; | ||
unsigned char uc; | |||
int gotmacro, wantws; | |||
assert(*start + 1 < end); | assert(*start + 1 < end); | ||
assert('<' == buf[*start + 1]); | assert('<' == buf[*start + 1]); | ||
/* | |||
* First, look up the format code. | |||
* If it's not valid, treat it as a NOOP. | |||
*/ | |||
for (fmt = 0; fmt < FMT__MAX; fmt++) | for (fmt = 0; fmt < FMT__MAX; fmt++) | ||
if (buf[*start] == fmts[fmt]) | if (buf[*start] == fmts[fmt]) | ||
break; | break; | ||
/* Invalid macros are just regular text. */ | /* | ||
* Determine whether we're overriding our delimiter. | |||
* According to POD, if we have more than one '<' followed by a | |||
* space, then we need a space followed by matching '>' to close | |||
* the expression. | |||
* Otherwise we use the usual '<' and '>' matched pair. | |||
*/ | |||
i = *start + 1; | |||
while (i < end && '<' == buf[i]) | |||
i++; | |||
assert(i > *start + 1); | |||
dsz = i - (*start + 1); | |||
if (dsz > 1 && (i >= end || ' ' != buf[i])) | |||
dsz = 1; | |||
if (FMT__MAX == fmt) { | /* Remember, if dsz>1, to jump the trailing space. */ | ||
putchar(buf[*start]); | *start += dsz + 1 + (dsz > 1 ? 1 : 0); | ||
(*start)++; | |||
return(0); | |||
} | |||
*start += 2; | |||
/* | /* | ||
* Escapes don't print macro sequences, so just output them like | * Escapes and ignored codes (NULL and INDEX) don't print macro | ||
* normal text before processing for macros. | * sequences, so just output them like normal text before | ||
* processing for real macros. | |||
*/ | */ | ||
if (FMT_ESCAPE == fmt) { | if (FMT_ESCAPE == fmt) { | ||
formatescape(buf, start, end); | formatescape(st, buf, start, end); | ||
return(0); | return(0); | ||
} else if (FMT_NULL == fmt || FMT_INDEX == fmt) { | } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { | ||
/* For indices and nulls, just consume. */ | /* | ||
while (*start < end && '>' != buf[*start]) | * Just consume til the end delimiter, accounting for | ||
* whether it's a custom one. | |||
*/ | |||
for ( ; *start < end; (*start)++) { | |||
if ('>' != buf[*start]) | |||
continue; | |||
else if (dsz == 1) | |||
break; | |||
assert(*start > 0); | |||
if (' ' != buf[*start - 1]) | |||
continue; | |||
i = *start; | |||
for (j = 0; i < end && j < dsz; j++) | |||
if ('>' != buf[i++]) | |||
break; | |||
if (dsz != j) | |||
continue; | |||
(*start) += dsz; | |||
break; | |||
} | |||
if (*start < end) { | |||
assert('>' == buf[*start]); | |||
(*start)++; | (*start)++; | ||
if (*start < end) | } | ||
(*start)++; | if (isspace(last)) | ||
while (*start < end && isspace((int)buf[*start])) | |||
(*start)++; | |||
return(0); | return(0); | ||
} | } | ||
if ( ! nomacro) { | /* | ||
* Check whether we're supposed to print macro stuff (this is | |||
* suppressed in, e.g., "Nm" and "Sh" macros). | |||
*/ | |||
if (FMT__MAX != fmt && !nomacro) { | |||
/* | /* | ||
* Print out the macro describing this format code. | * Do we need spacing before the upcoming macro, | ||
* If we're not "reentrant" (not yet on a macro line) | * after any pending text already in the outbuf? | ||
* then print a newline, if necessary, and the macro | * We may already have wantws if there was whitespace | ||
* indicator. | * before the code ("text B<text"), or there may be | ||
* Otherwise, offset us with a space. | * whitespace inside our scope ("textB< text"). | ||
*/ | */ | ||
if ( ! reentrant && last != '\n') | |||
putchar('\n'); | wantws = ' ' == buf[*start] || | ||
if ( ! reentrant) | (st->wantws && ! st->outbuflen); | ||
/* | |||
* If we are on a text line and there is no | |||
* whitespace before our content, we have to make | |||
* the previous word a prefix to the macro line. | |||
*/ | |||
if (OUST_MAC != st->oust && ! wantws) { | |||
if (OUST_NL != st->oust) | |||
mdoc_newln(st); | |||
fputs(".Pf", stdout); | |||
st->oust = OUST_MAC; | |||
st->wantws = wantws = 1; | |||
} | |||
outbuf_flush(st); | |||
/* Whitespace is easier to suppress on macro lines. */ | |||
if (OUST_MAC == st->oust && ! wantws) | |||
printf(" Ns"); | |||
/* Unless we are on a macro line, start one. */ | |||
if (OUST_MAC != st->oust) { | |||
if (OUST_NL != st->oust) | |||
mdoc_newln(st); | |||
putchar('.'); | putchar('.'); | ||
else | st->oust = OUST_MAC; | ||
} else | |||
putchar(' '); | putchar(' '); | ||
st->wantws = 1; | |||
/* | /* | ||
* If we don't have whitespace before us, then suppress | * Print the macro corresponding to this format code, | ||
* macro whitespace with Ns. | * and update the output state afterwards. | ||
*/ | */ | ||
if (' ' != last) | |||
printf("Ns "); | |||
switch (fmt) { | switch (fmt) { | ||
case (FMT_ITALIC): | case (FMT_ITALIC): | ||
printf("Em "); | fputs("Em", stdout); | ||
break; | break; | ||
case (FMT_BOLD): | case (FMT_BOLD): | ||
printf("Sy "); | if (SECT_SYNOPSIS == st->sect) { | ||
if (1 == dsz && '-' == buf[*start]) | |||
dosynopsisfl(buf, start, end); | |||
else if (0 == pos) | |||
fputs("Nm", stdout); | |||
else | |||
fputs("Ar", stdout); | |||
break; | |||
} | |||
i = 0; | |||
uc = buf[*start]; | |||
while (isalnum(uc) || '_' == uc || ' ' == uc) | |||
uc = buf[*start + ++i]; | |||
if ('=' != uc && '>' != uc) | |||
i = 0; | |||
if (4 == i && ! strncmp(buf + *start, "NULL", 4)) { | |||
fputs("Dv", stdout); | |||
break; | |||
} | |||
switch (i ? dict_get(buf + *start, i) : MDOC_MAX) { | |||
case MDOC_Fa: | |||
fputs("Fa", stdout); | |||
break; | |||
case MDOC_Vt: | |||
fputs("Vt", stdout); | |||
break; | |||
default: | |||
fputs("Sy", stdout); | |||
break; | |||
} | |||
break; | break; | ||
case (FMT_CODE): | case (FMT_CODE): | ||
printf("Qo Li "); | fputs("Qo Li", stdout); | ||
break; | break; | ||
case (FMT_LINK): | case (FMT_LINK): | ||
printf("Lk "); | /* Try to link; use "No" if it's empty. */ | ||
if ( ! trylink(buf, start, end, dsz)) | |||
fputs("No", stdout); | |||
break; | break; | ||
case (FMT_FILE): | case (FMT_FILE): | ||
printf("Pa "); | fputs("Pa", stdout); | ||
break; | break; | ||
case (FMT_NBSP): | case (FMT_NBSP): | ||
/* TODO. */ | fputs("No", stdout); | ||
printf("No "); | |||
break; | break; | ||
default: | default: | ||
abort(); | abort(); | ||
} | } | ||
} else { | |||
outbuf_flush(st); | |||
st->wantws = 0; | |||
} | } | ||
/* | /* | ||
* Read until we reach the end market ('>') or until we find a | * Process until we reach the end marker (e.g., '>') or until we | ||
* nested format code. | * find a nested format code. | ||
* Don't emit any newlines: since we're on a macro line, we | * Don't emit any newlines: since we're on a macro line, we | ||
* don't want to break the line. | * don't want to break the line. | ||
*/ | */ | ||
gotmacro = 0; | |||
while (*start < end) { | while (*start < end) { | ||
if ('>' == buf[*start]) { | if ('>' == buf[*start] && 1 == dsz) { | ||
(*start)++; | (*start)++; | ||
break; | break; | ||
} else if ('>' == buf[*start] && | |||
' ' == buf[*start - 1]) { | |||
/* | |||
* Handle custom delimiters. | |||
* These require a certain number of | |||
* space-preceded carrots before we're really at | |||
* the end. | |||
*/ | |||
i = *start; | |||
for (j = 0; i < end && j < dsz; j++) | |||
if ('>' != buf[i++]) | |||
break; | |||
if (dsz == j) { | |||
*start += dsz; | |||
break; | |||
} | |||
} | } | ||
if (*start + 1 < end && '<' == buf[*start + 1]) { | if (*start + 1 < end && '<' == buf[*start + 1] && | ||
formatcode(buf, start, end, 1, last, nomacro); | 'A' <= buf[*start] && 'Z' >= buf[*start]) { | ||
gotmacro = formatcode(st, buf, | |||
start, end, nomacro, 1); | |||
continue; | continue; | ||
} | } | ||
/* Suppress newlines and multiple spaces. */ | |||
last = buf[(*start)++]; | |||
if (isspace(last)) { | |||
outbuf_flush(st); | |||
st->wantws = 1; | |||
gotmacro = 0; | |||
while (*start < end && | |||
isspace((unsigned char)buf[*start])) | |||
(*start)++; | |||
continue; | |||
} | |||
if (OUST_MAC == st->oust && FMT__MAX != fmt) { | |||
if (gotmacro && ! st->wantws) { | |||
printf(" Ns"); | |||
st->wantws = 1; | |||
} | |||
gotmacro = 0; | |||
/* | /* | ||
* Make sure that any macro-like words (or | * Escape macro-like words. | ||
* really any word starting with a capital | * This matches "Xx " and "XxEOLN". | ||
* letter) is assumed to be a macro that must be | |||
* escaped. | |||
* XXX: should this be isalpha()? | |||
*/ | */ | ||
if ((' ' == last || '\n' == last) && | |||
isupper(buf[*start])) | |||
printf("\\&"); | |||
last = buf[*start]; | if (*start < end && ! st->outbuflen && | ||
if ('\n' == last) | isupper(last) && | ||
last = ' '; | islower((unsigned char)buf[*start]) && | ||
putchar(last); | (end - *start == 1 || | ||
' ' == buf[*start + 1] || | |||
(*start)++; | '>' == buf[*start + 1])) | ||
outbuf_addstr(st, "\\&"); | |||
last = buf[*start - 1]; | |||
} | |||
outbuf_addchar(st); | |||
} | } | ||
if ( ! nomacro && FMT_CODE == fmt) | if (FMT__MAX == fmt) | ||
printf(" Qc "); | return(0); | ||
if (reentrant) | outbuf_flush(st); | ||
return(1); | |||
/* | if ( ! nomacro && FMT_CODE == fmt) | ||
* If we're not reentrant, we want to put ending punctuation on | fputs(" Qc", stdout); | ||
* the macro line so that it's properly handled by being | |||
* smooshed against the terminal word. | st->wantws = ' ' == last; | ||
*/ | |||
skipspace(buf, start, end); | |||
if (',' != buf[*start] && '.' != buf[*start] && | |||
'!' != buf[*start] && '?' != buf[*start] && | |||
')' != buf[*start]) | |||
return(1); | |||
while (*start < end) { | |||
if (',' != buf[*start] && | |||
'.' != buf[*start] && | |||
'!' != buf[*start] && | |||
'?' != buf[*start] && | |||
')' != buf[*start]) | |||
break; | |||
putchar(' '); | |||
putchar(buf[*start]); | |||
(*start)++; | |||
} | |||
skipspace(buf, start, end); | |||
return(1); | return(1); | ||
} | } | ||
/* | /* | ||
* Calls formatcode() til the end of a paragraph. | * Calls formatcode() til the end of a paragraph. | ||
* Goes to OUST_MAC mode and stays there when returning, | |||
* such that the caller can add arguments to the macro line | |||
* before closing it out. | |||
*/ | */ | ||
static void | static void | ||
formatcodeln(const char *buf, size_t *start, size_t end, int nomacro) | formatcodeln(struct state *st, const char *linemac, | ||
const char *buf, size_t *start, size_t end, int nomacro) | |||
{ | { | ||
int last; | int gotmacro; | ||
last = '\n'; | assert(OUST_NL == st->oust); | ||
assert(st->wantws); | |||
printf(".%s", linemac); | |||
st->oust = OUST_MAC; | |||
gotmacro = 0; | |||
while (*start < end) { | while (*start < end) { | ||
if (*start + 1 < end && '<' == buf[*start + 1]) { | if (*start + 1 < end && '<' == buf[*start + 1] && | ||
formatcode(buf, start, end, 1, last, nomacro); | 'A' <= buf[*start] && 'Z' >= buf[*start]) { | ||
gotmacro = formatcode(st, buf, | |||
start, end, nomacro, 1); | |||
continue; | continue; | ||
} | } | ||
if ('\n' != buf[*start]) | |||
putchar(last = buf[*start]); | /* Suppress newlines and multiple spaces. */ | ||
(*start)++; | |||
last = buf[(*start)++]; | |||
if (isspace(last)) { | |||
outbuf_flush(st); | |||
st->wantws = 1; | |||
while (*start < end && | |||
isspace((unsigned char)buf[*start])) | |||
(*start)++; | |||
continue; | |||
} | |||
if (gotmacro) { | |||
if (*start < end) { | |||
if (st->wantws) | |||
printf(" No"); | |||
else | |||
printf(" Ns"); | |||
} | |||
st->wantws = 1; | |||
gotmacro = 0; | |||
} | |||
/* | |||
* Since we're already on a macro line, we want to make | |||
* sure that we don't inadvertently invoke a macro. | |||
* We need to do this carefully because section names | |||
* are used in troff and we don't want to escape | |||
* something that needn't be escaped. | |||
*/ | |||
if (*start < end && ! st->outbuflen && isupper(last) && | |||
islower((unsigned char)buf[*start]) && | |||
(end - *start == 1 || ' ' == buf[*start + 1])) { | |||
outbuf_addstr(st, "\\&"); | |||
last = buf[*start - 1]; | |||
} | |||
outbuf_addchar(st); | |||
} | } | ||
outbuf_flush(st); | |||
st->wantws = 1; | |||
} | } | ||
/* | /* | ||
* Guess at what kind of list we are. | |||
* These are taken straight from the POD manual. | |||
* I don't know what people do in real life. | |||
*/ | |||
static enum list | |||
listguess(const char *buf, size_t start, size_t end) | |||
{ | |||
size_t len = end - start; | |||
assert(end >= start); | |||
if (len == 1 && '*' == buf[start]) | |||
return(LIST_BULLET); | |||
if (len == 2 && '1' == buf[start] && '.' == buf[start + 1]) | |||
return(LIST_ENUM); | |||
else if (len == 1 && '1' == buf[start]) | |||
return(LIST_ENUM); | |||
else | |||
return(LIST_TAG); | |||
} | |||
/* | |||
* A command paragraph, as noted in the perlpod manual, just indicates | * A command paragraph, as noted in the perlpod manual, just indicates | ||
* that we should do something, optionally with some text to print as | * that we should do something, optionally with some text to print as | ||
* well. | * well. | ||
* From the perspective of external callers, | |||
* always stays in OUST_NL/wantws mode, | |||
* but its children do use OUST_MAC. | |||
*/ | */ | ||
static void | static void | ||
command(struct state *st, const char *buf, size_t start, size_t end) | command(struct state *st, const char *buf, size_t start, size_t end) | ||
|
|
||
return; | return; | ||
start += csz; | start += csz; | ||
skipspace(buf, &start, end); | while (start < end && ' ' == buf[start]) | ||
start++; | |||
len = end - start; | len = end - start; | ||
if (st->paused) { | if (st->paused) { | ||
|
|
||
* The behaviour of head= follows from a quick glance at | * The behaviour of head= follows from a quick glance at | ||
* how pod2man handles it. | * how pod2man handles it. | ||
*/ | */ | ||
printf(".Sh "); | st->sect = SECT_NONE; | ||
st->isname = 0; | if (end - start == 4) { | ||
if (end - start == 4) | |||
if (0 == memcmp(&buf[start], "NAME", 4)) | if (0 == memcmp(&buf[start], "NAME", 4)) | ||
st->isname = 1; | st->sect = SECT_NAME; | ||
formatcodeln(buf, &start, end, 1); | } else if (end - start == 8) { | ||
putchar('\n'); | if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) | ||
st->sect = SECT_SYNOPSIS; | |||
} | |||
formatcodeln(st, "Sh", buf, &start, end, 1); | |||
mdoc_newln(st); | |||
st->haspar = 1; | st->haspar = 1; | ||
break; | break; | ||
case (CMD_HEAD2): | case (CMD_HEAD2): | ||
printf(".Ss "); | formatcodeln(st, "Ss", buf, &start, end, 1); | ||
formatcodeln(buf, &start, end, 1); | mdoc_newln(st); | ||
putchar('\n'); | |||
st->haspar = 1; | st->haspar = 1; | ||
break; | break; | ||
case (CMD_HEAD3): | case (CMD_HEAD3): | ||
puts(".Pp"); | puts(".Pp"); | ||
printf(".Em "); | formatcodeln(st, "Em", buf, &start, end, 0); | ||
formatcodeln(buf, &start, end, 0); | mdoc_newln(st); | ||
putchar('\n'); | |||
puts(".Pp"); | puts(".Pp"); | ||
st->haspar = 1; | st->haspar = 1; | ||
break; | break; | ||
case (CMD_HEAD4): | case (CMD_HEAD4): | ||
puts(".Pp"); | puts(".Pp"); | ||
printf(".No "); | formatcodeln(st, "No", buf, &start, end, 0); | ||
formatcodeln(buf, &start, end, 0); | mdoc_newln(st); | ||
putchar('\n'); | |||
puts(".Pp"); | puts(".Pp"); | ||
st->haspar = 1; | st->haspar = 1; | ||
break; | break; | ||
case (CMD_OVER): | case (CMD_OVER): | ||
/* | /* | ||
* TODO: we should be doing this after we process the | * If we have an existing list that hasn't had an =item | ||
* first =item to see whether we'll do an -enum, | * yet, then make sure that we open it now. | ||
* -bullet, or something else. | * We use the default list type, but that can't be | ||
* helped (we haven't seen any items yet). | |||
*/ | */ | ||
puts(".Bl -tag -width Ds"); | if (st->lpos > 0) | ||
if (LIST__MAX == st->lstack[st->lpos - 1]) { | |||
st->lstack[st->lpos - 1] = LIST_TAG; | |||
puts(".Bl -tag -width Ds"); | |||
} | |||
st->lpos++; | |||
assert(st->lpos < LIST_STACKSZ); | |||
st->lstack[st->lpos - 1] = LIST__MAX; | |||
break; | break; | ||
case (CMD_ITEM): | case (CMD_ITEM): | ||
printf(".It "); | if (0 == st->lpos) { | ||
formatcodeln(buf, &start, end, 0); | /* | ||
putchar('\n'); | * Bad markup. | ||
* Try to compensate. | |||
*/ | |||
st->lstack[st->lpos] = LIST__MAX; | |||
st->lpos++; | |||
} | |||
assert(st->lpos > 0); | |||
/* | |||
* If we're the first =item, guess at what our content | |||
* will be: "*" is a bullet list, "1." is a numbered | |||
* list, and everything is tagged. | |||
*/ | |||
if (LIST__MAX == st->lstack[st->lpos - 1]) { | |||
st->lstack[st->lpos - 1] = | |||
listguess(buf, start, end); | |||
switch (st->lstack[st->lpos - 1]) { | |||
case (LIST_BULLET): | |||
puts(".Bl -bullet"); | |||
break; | |||
case (LIST_ENUM): | |||
puts(".Bl -enum"); | |||
break; | |||
default: | |||
puts(".Bl -tag -width Ds"); | |||
break; | |||
} | |||
} | |||
switch (st->lstack[st->lpos - 1]) { | |||
case (LIST_TAG): | |||
formatcodeln(st, "It", buf, &start, end, 0); | |||
mdoc_newln(st); | |||
break; | |||
case (LIST_ENUM): | |||
/* FALLTHROUGH */ | |||
case (LIST_BULLET): | |||
/* | |||
* Abandon the remainder of the paragraph | |||
* because we're going to be a bulletted or | |||
* numbered list. | |||
*/ | |||
puts(".It"); | |||
break; | |||
default: | |||
abort(); | |||
} | |||
st->haspar = 1; | st->haspar = 1; | ||
break; | break; | ||
case (CMD_BACK): | case (CMD_BACK): | ||
puts(".El"); | /* Make sure we don't back over the stack. */ | ||
if (st->lpos > 0) { | |||
st->lpos--; | |||
puts(".El"); | |||
} | |||
break; | break; | ||
case (CMD_BEGIN): | case (CMD_BEGIN): | ||
/* | /* | ||
|
|
||
} | } | ||
/* | /* | ||
* Put the type provided as an argument into the dictionary. | |||
*/ | |||
static void | |||
register_type(const char *ptype) | |||
{ | |||
const char *pname, *pend; | |||
pname = ptype; | |||
while (isalnum((unsigned char)*pname) || '_' == *pname) | |||
pname++; | |||
if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) || | |||
(pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) { | |||
while (' ' == *pname) | |||
pname++; | |||
pend = pname; | |||
while (isalnum((unsigned char)*pend) || '_' == *pend) | |||
pend++; | |||
if (pend > pname) | |||
dict_put(pname, pend - pname, MDOC_Vt); | |||
} else | |||
pend = pname; | |||
if (pend > ptype) | |||
dict_put(ptype, pend - ptype, MDOC_Vt); | |||
} | |||
/* | |||
* Just pump out the line in a verbatim block. | * Just pump out the line in a verbatim block. | ||
* From the perspective of external callers, | |||
* always stays in OUST_NL/wantws mode. | |||
*/ | */ | ||
static void | static void | ||
verbatim(struct state *st, const char *buf, size_t start, size_t end) | verbatim(struct state *st, char *buf, size_t start, size_t end) | ||
{ | { | ||
size_t i, ift, ifo, ifa, ifc, inl; | |||
char *cp, *cp2; | |||
int indisplay, nopen, wantsp; | |||
if ( ! st->parsing || st->paused) | if (st->paused || ! st->parsing) | ||
return; | return; | ||
puts(".Bd -literal"); | indisplay = wantsp = 0; | ||
printf("%.*s\n", (int)(end - start), &buf[start]); | |||
puts(".Ed"); | again: | ||
if (start == end) { | |||
if (indisplay) | |||
puts(".Ed"); | |||
return; | |||
} | |||
if ('\n' == buf[start]) { | |||
wantsp = 1; | |||
start++; | |||
goto again; | |||
} | |||
/* | |||
* If we're in the SYNOPSIS, see if we're an #include block. | |||
* If we are, then print the "In" macro and re-loop. | |||
* This handles any number of inclusions, but only when they | |||
* come before the remaining parts... | |||
*/ | |||
if (SECT_SYNOPSIS == st->sect) { | |||
i = start; | |||
while (i < end && buf[i] == ' ') | |||
i++; | |||
if (i == end) | |||
goto again; | |||
/* We're an include block! */ | |||
if (end - i > 10 && | |||
0 == memcmp(&buf[i], "#include <", 10)) { | |||
start = i + 10; | |||
while (start < end && ' ' == buf[start]) | |||
start++; | |||
if (indisplay) | |||
puts(".Ed"); | |||
indisplay = wantsp = 0; | |||
fputs(".In ", stdout); | |||
/* Stop til the '>' marker or we hit eoln. */ | |||
while (start < end && | |||
'>' != buf[start] && '\n' != buf[start]) | |||
putchar(buf[start++]); | |||
putchar('\n'); | |||
if (start < end && '>' == buf[start]) | |||
start++; | |||
if (start < end && '\n' == buf[start]) | |||
start++; | |||
goto again; | |||
} | |||
/* Other preprocessor directives. */ | |||
if ('#' == buf[i]) { | |||
if (indisplay) | |||
puts(".Ed"); | |||
indisplay = wantsp = 0; | |||
fputs(".Fd ", stdout); | |||
start = i; | |||
while(start < end && '\n' != buf[start]) | |||
putchar(buf[start++]); | |||
putchar('\n'); | |||
if (start < end && '\n' == buf[start]) | |||
start++; | |||
/* Remember #define for Dv or Fn. */ | |||
if (strncmp(buf + i + 1, "define", 6) || | |||
! isspace((unsigned char)buf[i + 7])) | |||
goto again; | |||
ifo = i + 7; | |||
while (ifo < start && | |||
isspace((unsigned char)buf[ifo])) | |||
ifo++; | |||
ifa = ifo; | |||
while ('_' == buf[ifa] || | |||
isalnum((unsigned char)buf[ifa])) | |||
ifa++; | |||
dict_put(buf + ifo, ifa - ifo, | |||
'(' == buf[ifa] ? MDOC_Fo : MDOC_Dv); | |||
goto again; | |||
} | |||
/* Parse function declaration. */ | |||
ifo = ifa = ifc = 0; | |||
inl = end; | |||
nopen = 0; | |||
for (ift = i; i < end; i++) { | |||
if (ifc) { | |||
if (buf[i] != '\n') | |||
continue; | |||
inl = i; | |||
break; | |||
} | |||
switch (buf[i]) { | |||
case '\t': | |||
/* FALLTHROUGH */ | |||
case ' ': | |||
if ( ! ifa) | |||
ifo = i; | |||
break; | |||
case '(': | |||
if (ifo) { | |||
nopen++; | |||
if ( ! ifa) | |||
ifa = i; | |||
} else | |||
i = end; | |||
break; | |||
case ')': | |||
switch (nopen) { | |||
case 0: | |||
i = end; | |||
break; | |||
case 1: | |||
ifc = i; | |||
break; | |||
default: | |||
nopen--; | |||
break; | |||
} | |||
break; | |||
default: | |||
break; | |||
} | |||
} | |||
/* Encode function declaration. */ | |||
if (ifc) { | |||
for (i = ifa; i < ifc; i++) | |||
if (buf[i] == '\n') | |||
buf[i] = ' '; | |||
buf[ifo++] = '\0'; | |||
register_type(buf + ift); | |||
if (indisplay) | |||
puts(".Ed"); | |||
indisplay = wantsp = 0; | |||
printf(".Ft %s", buf + ift); | |||
if (buf[ifo] == '*') { | |||
fputs(" *", stdout); | |||
ifo++; | |||
} | |||
putchar('\n'); | |||
buf[ifa++] = '\0'; | |||
printf(".Fo %s\n", buf + ifo); | |||
dict_put(buf + ifo, 0, MDOC_Fo); | |||
buf[ifc++] = '\0'; | |||
for (;;) { | |||
cp = strchr(buf + ifa, ','); | |||
if (cp != NULL) { | |||
cp2 = cp; | |||
*cp++ = '\0'; | |||
} else | |||
cp2 = strchr(buf + ifa, '\0'); | |||
while (isalnum((unsigned char)cp2[-1]) || | |||
'_' == cp2[-1]) | |||
cp2--; | |||
if ('\0' != *cp2) | |||
dict_put(cp2, 0, MDOC_Fa); | |||
register_type(buf + ifa); | |||
if (strchr(buf + ifa, ' ') == NULL) | |||
printf(".Fa %s\n", buf + ifa); | |||
else | |||
printf(".Fa \"%s\"\n", buf + ifa); | |||
if (cp == NULL) | |||
break; | |||
while (*cp == ' ' || *cp == '\t') | |||
cp++; | |||
ifa = cp - buf; | |||
} | |||
puts(".Fc"); | |||
if (buf[ifc] == ';') | |||
ifc++; | |||
if (ifc < inl) { | |||
buf[inl] = '\0'; | |||
puts(buf + ifc); | |||
} | |||
start = inl < end ? inl + 1 : end; | |||
goto again; | |||
} | |||
} | |||
if ( ! indisplay) | |||
puts(".Bd -literal"); | |||
else if (wantsp) | |||
putchar('\n'); | |||
indisplay = 1; | |||
wantsp = 0; | |||
for (last = '\n'; start < end; start++) { | |||
/* | |||
* Handle accidental macros (newline starting with | |||
* control character) and escapes. | |||
*/ | |||
if ('\n' == last) { | |||
if ('\n' == buf[start]) | |||
goto again; | |||
if ('.' == buf[start] || '\'' == buf[start]) | |||
printf("\\&"); | |||
} | |||
putchar(last = buf[start]); | |||
if ('\\' == buf[start]) | |||
printf("e"); | |||
} | |||
if ('\n' != last) | |||
putchar('\n'); | |||
if (indisplay) | |||
puts(".Ed"); | |||
} | } | ||
/* | /* | ||
* See dosynopsisop(). | |||
*/ | |||
static int | |||
hasmatch(const char *buf, size_t start, size_t end) | |||
{ | |||
size_t stack; | |||
for (stack = 0; start < end; start++) | |||
if (buf[start] == '[') | |||
stack++; | |||
else if (buf[start] == ']' && 0 == stack) | |||
return(1); | |||
else if (buf[start] == ']') | |||
stack--; | |||
return(0); | |||
} | |||
/* | |||
* If we're in the SYNOPSIS section and we've encounter braces in an | |||
* ordinary paragraph, then try to see whether we're an [-option]. | |||
* Do this, if we're an opening bracket, by first seeing if we have a | |||
* matching end via hasmatch(). | |||
* If we're an ending bracket, see if we have a stack already. | |||
*/ | |||
static int | |||
dosynopsisop(struct state *st, const char *buf, | |||
size_t *start, size_t end, size_t *opstack) | |||
{ | |||
assert('[' == buf[*start] || ']' == buf[*start]); | |||
if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { | |||
mdoc_newln(st); | |||
puts(".Oo"); | |||
(*opstack)++; | |||
} else if ('[' == buf[*start]) | |||
return(0); | |||
if (']' == buf[*start] && *opstack > 0) { | |||
mdoc_newln(st); | |||
puts(".Oc"); | |||
(*opstack)--; | |||
} else if (']' == buf[*start]) | |||
return(0); | |||
(*start)++; | |||
last = '\n'; | |||
while (' ' == buf[*start]) | |||
(*start)++; | |||
return(1); | |||
} | |||
/* | |||
* Format multiple "Nm" manpage names in the NAME section. | |||
* From the perspective of external callers, | |||
* always stays in OUST_NL/wantws mode, | |||
* but its children do use OUST_MAC. | |||
*/ | |||
static void | |||
donamenm(struct state *st, const char *buf, size_t *start, size_t end) | |||
{ | |||
size_t word; | |||
assert(OUST_NL == st->oust); | |||
assert(st->wantws); | |||
while (*start < end && isspace((unsigned char)buf[*start])) | |||
(*start)++; | |||
if (end == *start) { | |||
puts(".Nm unknown"); | |||
return; | |||
} | |||
while (*start < end) { | |||
for (word = *start; word < end; word++) | |||
if (',' == buf[word]) | |||
break; | |||
formatcodeln(st, "Nm", buf, start, word, 1); | |||
if (*start == end) { | |||
mdoc_newln(st); | |||
break; | |||
} | |||
assert(',' == buf[*start]); | |||
printf(" ,"); | |||
mdoc_newln(st); | |||
(*start)++; | |||
while (*start < end && isspace((unsigned char)buf[*start])) | |||
(*start)++; | |||
} | |||
} | |||
/* | |||
* Ordinary paragraph. | * Ordinary paragraph. | ||
* Well, this is really the hardest--POD seems to assume that, for | * Well, this is really the hardest--POD seems to assume that, for | ||
* example, a leading space implies a newline, and so on. | * example, a leading space implies a newline, and so on. | ||
* Lots of other snakes in the grass: escaping a newline followed by a | * Lots of other snakes in the grass: escaping a newline followed by a | ||
* period (accidental mdoc(7) control), double-newlines after macro | * period (accidental mdoc(7) control), double-newlines after macro | ||
* passages, etc. | * passages, etc. | ||
* | |||
* Uses formatcode() to go to OUST_MAC mode | |||
* and outbuf_flush() to go to OUST_TXT mode. | |||
* In text mode, wantws requests white space before the text | |||
* currently contained in the outbuf, not before upcoming text. | |||
* Must make sure to go back to OUST_NL/wantws mode before returning. | |||
*/ | */ | ||
static void | static void | ||
ordinary(struct state *st, const char *buf, size_t start, size_t end) | ordinary(struct state *st, const char *buf, size_t start, size_t end) | ||
{ | { | ||
int last; | size_t i, j, opstack, wend; | ||
size_t i, j; | enum mdoc_type mtype; | ||
int eos, noeos, seq; | |||
char savechar; | |||
if ( ! st->parsing || st->paused) | if ( ! st->parsing || st->paused) | ||
return; | return; | ||
|
|
||
* we're in "name - description" format. | * we're in "name - description" format. | ||
* To wit, print out a "Nm" and "Nd" in that format. | * To wit, print out a "Nm" and "Nd" in that format. | ||
*/ | */ | ||
if (st->isname) { | if (SECT_NAME == st->sect) { | ||
for (i = end - 1; i > start; i--) | for (i = end - 2; i > start; i--) | ||
if ('-' == buf[i]) | if ('-' == buf[i] && | ||
isspace((unsigned char)buf[i + 1])) | |||
break; | break; | ||
if ('-' == buf[i]) { | if ('-' == buf[i]) { | ||
j = i; | j = i; | ||
|
|
||
for ( ; i > start; i--) | for ( ; i > start; i--) | ||
if ('-' != buf[i]) | if ('-' != buf[i]) | ||
break; | break; | ||
printf(".Nm %.*s\n", | donamenm(st, buf, &start, i + 1); | ||
(int)((i + 1) - start), &buf[start]); | start = j + 1; | ||
printf(".Nd %.*s\n", | while (start < end && | ||
(int)(end - (j + 1)), &buf[j + 1]); | isspace((unsigned char)buf[start])) | ||
start++; | |||
while (start < end && '.' == buf[end - 1]) | |||
end--; | |||
formatcodeln(st, "Nd", buf, &start, end, 1); | |||
mdoc_newln(st); | |||
return; | return; | ||
} | } | ||
} | } | ||
|
|
||
st->haspar = 0; | st->haspar = 0; | ||
last = '\n'; | last = '\n'; | ||
opstack = 0; | |||
while (start < end) { | for (seq = 0; start < end; seq++) { | ||
/* | /* | ||
* Loop til we get either to a newline or escape. | * Loop til we get either to a newline or escape. | ||
* Escape initial control characters. | * Escape initial control characters. | ||
*/ | */ | ||
while (start < end) { | while (start < end) { | ||
if (start < end - 1 && '<' == buf[start + 1]) | if (start < end - 1 && '<' == buf[start + 1] && | ||
'A' <= buf[start] && 'Z' >= buf[start]) | |||
break; | break; | ||
else if ('\n' == buf[start]) | else if ('\n' == buf[start]) | ||
break; | break; | ||
else if ('\n' == last && '.' == buf[start]) | else if ('\n' == last && '.' == buf[start]) | ||
printf("\\&"); | outbuf_addstr(st, "\\&"); | ||
else if ('\n' == last && '\'' == buf[start]) | else if ('\n' == last && '\'' == buf[start]) | ||
printf("\\&"); | outbuf_addstr(st, "\\&"); | ||
putchar(last = buf[start++]); | /* | ||
} | * If we're in the SYNOPSIS, have square | ||
* brackets indicate that we're opening and | |||
* closing an optional context. | |||
*/ | |||
if (start < end - 1 && '<' == buf[start + 1]) { | if (SECT_SYNOPSIS == st->sect && | ||
('[' == buf[start] || | |||
']' == buf[start]) && | |||
dosynopsisop(st, buf, | |||
&start, end, &opstack)) | |||
continue; | |||
/* Merely buffer non-whitespace. */ | |||
last = buf[start++]; | |||
if ( ! isspace(last)) | |||
outbuf_addchar(st); | |||
if (start < end && | |||
! isspace((unsigned char)buf[start - 1]) && | |||
! isspace((unsigned char)buf[start])) | |||
continue; | |||
/* | /* | ||
* We've encountered a format code. | * Found the end of a word. | ||
* This is going to trigger a macro no matter | * Rewind trailing delimiters. | ||
* what, so print a newline now. | |||
* Then print the (possibly nested) macros and | |||
* following that, a newline. | |||
*/ | */ | ||
if (formatcode(buf, &start, end, 0, last, 0)) | |||
putchar(last = '\n'); | eos = noeos = 0; | ||
} else if (start < end && '\n' == buf[start]) { | for (wend = st->outbuflen; wend; wend--) | ||
if ('.' == st->outbuf[wend - 1] || | |||
'!' == st->outbuf[wend - 1] || | |||
'?' == st->outbuf[wend - 1]) | |||
eos = 1; | |||
else if ('|' == st->outbuf[wend - 1] || | |||
',' == st->outbuf[wend - 1] || | |||
';' == st->outbuf[wend - 1] || | |||
':' == st->outbuf[wend - 1]) | |||
noeos = 1; | |||
else if ('\'' != st->outbuf[wend - 1] && | |||
'"' != st->outbuf[wend - 1] && | |||
')' != st->outbuf[wend - 1] && | |||
']' != st->outbuf[wend - 1]) | |||
break; | |||
eos &= ! noeos; | |||
/* | /* | ||
* Print the newline only if we haven't already | * Detect function names. | ||
* printed a newline. | |||
*/ | */ | ||
if (last != '\n') | |||
putchar(last = buf[start]); | mtype = MDOC_Fa; | ||
savechar = '\0'; | |||
if (wend && ')' == st->outbuf[wend] && | |||
'(' == st->outbuf[wend - 1]) { | |||
mtype = dict_get(st->outbuf, --wend); | |||
if (MDOC_Dv == mtype) | |||
mtype = MDOC_Fo; | |||
if (MDOC_Fo == mtype || MDOC_MAX == mtype) { | |||
st->outbuflen = wend; | |||
st->outbuf[wend] = '\0'; | |||
mdoc_newln(st); | |||
if (MDOC_Fo == mtype) | |||
fputs(".Fn", stdout); | |||
else | |||
fputs(".Xr", stdout); | |||
st->oust = OUST_MAC; | |||
} | |||
} else { | |||
mtype = dict_get(st->outbuf, wend); | |||
if (MDOC_Dv == mtype) { | |||
savechar = st->outbuf[wend]; | |||
st->outbuf[wend] = '\0'; | |||
mdoc_newln(st); | |||
fputs(".Dv", stdout); | |||
st->oust = OUST_MAC; | |||
} else | |||
mtype = MDOC_Fa; | |||
} | |||
/* | |||
* On whitespace, flush the output buffer | |||
* and allow breaking to a macro line. | |||
*/ | |||
outbuf_flush(st); | |||
/* | |||
* End macro lines, and | |||
* end text lines at the end of sentences. | |||
*/ | |||
if (OUST_MAC == st->oust || (eos && wend > 1 && | |||
islower((unsigned char)st->outbuf[wend - 1]))) { | |||
if (MDOC_MAX == mtype) | |||
fputs(" 3", stdout); | |||
if (MDOC_Fa != mtype) { | |||
if (MDOC_Dv == mtype) | |||
st->outbuf[wend] = savechar; | |||
else | |||
wend += 2; | |||
while ('\0' != st->outbuf[wend]) | |||
printf(" %c", | |||
st->outbuf[wend++]); | |||
} | |||
mdoc_newln(st); | |||
} | |||
/* Advance to the next word. */ | |||
while ('\n' != buf[start] && | |||
isspace((unsigned char)buf[start])) | |||
start++; | |||
st->wantws = 1; | |||
} | |||
if (start < end - 1 && '<' == buf[start + 1] && | |||
'A' <= buf[start] && 'Z' >= buf[start]) { | |||
formatcode(st, buf, &start, end, 0, seq); | |||
if (OUST_MAC == st->oust) { | |||
/* | |||
* Let mdoc(7) handle trailing punctuation. | |||
* XXX Some punctuation characters | |||
* are not handled yet. | |||
*/ | |||
if ((start == end - 1 || | |||
(start < end - 1 && | |||
(' ' == buf[start + 1] || | |||
'\n' == buf[start + 1]))) && | |||
NULL != strchr("|.,;:?!)]", buf[start])) { | |||
putchar(' '); | |||
putchar(buf[start++]); | |||
} | |||
if (st->wantws || | |||
' ' == buf[start] || | |||
'\n' == buf[start]) | |||
mdoc_newln(st); | |||
/* | |||
* Consume all whitespace | |||
* so we don't accidentally start | |||
* an implicit literal line. | |||
*/ | |||
while (start < end && ' ' == buf[start]) | |||
start++; | |||
/* | |||
* Some text is following. | |||
* Implement requested spacing. | |||
*/ | |||
if ( ! st->wantws && start < end && | |||
('<' != buf[start + 1] || | |||
'A' > buf[start] || | |||
'Z' < buf[start])) { | |||
fputs(" Ns", stdout); | |||
st->wantws = 1; | |||
} | |||
} | |||
} else if (start < end && '\n' == buf[start]) { | |||
outbuf_flush(st); | |||
mdoc_newln(st); | |||
if (++start >= end) | if (++start >= end) | ||
continue; | continue; | ||
/* | /* | ||
|
|
||
* have a macro subsequent it, which may be | * have a macro subsequent it, which may be | ||
* possible if we have an escape next. | * possible if we have an escape next. | ||
*/ | */ | ||
if (' ' == buf[start] || '\t' == buf[start]) { | if (' ' == buf[start] || '\t' == buf[start]) | ||
puts(".br"); | puts(".br"); | ||
last = '\n'; | |||
} | |||
for ( ; start < end; start++) | for ( ; start < end; start++) | ||
if (' ' != buf[start] && '\t' != buf[start]) | if (' ' != buf[start] && '\t' != buf[start]) | ||
break; | break; | ||
} else if (start < end) { | } | ||
/* | |||
* Default: print the character. | |||
* Escape initial control characters. | |||
*/ | |||
if ('\n' == last && '.' == buf[start]) | |||
printf("\\&"); | |||
else if ('\n' == last && '\'' == buf[start]) | |||
printf("\\&"); | |||
putchar(last = buf[start++]); | |||
} | |||
} | } | ||
outbuf_flush(st); | |||
if (last != '\n') | mdoc_newln(st); | ||
putchar('\n'); | |||
} | } | ||
/* | /* | ||
|
|
||
* (default: starts with "="). | * (default: starts with "="). | ||
*/ | */ | ||
static void | static void | ||
dopar(struct state *st, const char *buf, size_t start, size_t end) | dopar(struct state *st, char *buf, size_t start, size_t end) | ||
{ | { | ||
assert(OUST_NL == st->oust); | |||
assert(st->wantws); | |||
if (end == start) | if (end == start) | ||
return; | return; | ||
if (' ' == buf[start] || '\t' == buf[start]) | if (' ' == buf[start] || '\t' == buf[start]) | ||
|
|
||
*/ | */ | ||
static void | static void | ||
dofile(const struct args *args, const char *fname, | dofile(const struct args *args, const char *fname, | ||
const struct tm *tm, const char *buf, size_t sz) | const struct tm *tm, char *buf, size_t sz) | ||
{ | { | ||
size_t sup, end, i, cur = 0; | |||
struct state st; | |||
const char *section, *date; | |||
char datebuf[64]; | char datebuf[64]; | ||
struct state st; | |||
const char *fbase, *fext, *section, *date, *format; | |||
char *title, *cp; | char *title, *cp; | ||
size_t cur, end; | |||
int verb; | |||
if (0 == sz) | if (0 == sz) | ||
return; | return; | ||
/* Title is last path component of the filename. */ | /* | ||
* Parsing the filename is almost always required, | |||
* except when both the title and the section | |||
* are provided on the command line. | |||
*/ | |||
if (NULL != args->title) | if (NULL == args->title || NULL == args->section) { | ||
title = strdup(args->title); | fbase = strrchr(fname, '/'); | ||
else if (NULL != (cp = strrchr(fname, '/'))) | if (NULL == fbase) | ||
title = strdup(cp + 1); | fbase = fname; | ||
else | else | ||
title = strdup(fname); | fbase++; | ||
fext = strrchr(fbase, '.'); | |||
} else | |||
fext = NULL; | |||
/* | |||
* The title will be converted to uppercase, | |||
* so it needs to be copied. | |||
*/ | |||
title = (NULL != args->title) ? strdup(args->title) : | |||
(NULL != fext) ? strndup(fbase, fext - fbase) : | |||
strdup(fbase); | |||
if (NULL == title) { | if (NULL == title) { | ||
perror(NULL); | perror(NULL); | ||
exit(EXIT_FAILURE); | exit(EXIT_FAILURE); | ||
|
|
||
/* Section is 1 unless suffix is "pm". */ | /* Section is 1 unless suffix is "pm". */ | ||
if (NULL == (section = args->section)) { | section = (NULL != args->section) ? args->section : | ||
section = "1"; | (NULL == fext || strcmp(fext + 1, "pm")) ? "1" : | ||
if (NULL != (cp = strrchr(title, '.'))) { | PERL_SECTION; | ||
*cp++ = '\0'; | |||
if (0 == strcmp(cp, "pm")) | |||
section = "3p"; | |||
} | |||
} | |||
/* Date. Or the given "tm" if not supplied. */ | /* Date. Or the given "tm" if not supplied. */ | ||
if (NULL == (date = args->date)) { | date = args->date; | ||
strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); | format = (NULL == date) ? "%B %d, %Y" : | ||
strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $"; | |||
if (NULL != format) { | |||
strftime(datebuf, sizeof(datebuf), format, tm); | |||
date = datebuf; | date = datebuf; | ||
} | } | ||
|
|
||
free(title); | free(title); | ||
dict_init(); | |||
memset(&st, 0, sizeof(struct state)); | memset(&st, 0, sizeof(struct state)); | ||
st.oust = OUST_NL; | |||
st.wantws = 1; | |||
assert(sz > 0); | assert(sz > 0); | ||
/* Main loop over file contents. */ | /* Main loop over file contents. */ | ||
while (cur < sz) { | cur = 0; | ||
for (;;) { | |||
while (cur < sz && '\n' == buf[cur]) | |||
cur++; | |||
if (cur >= sz) | |||
break; | |||
verb = isspace((unsigned char)buf[cur]); | |||
/* Read until next paragraph. */ | /* Read until next paragraph. */ | ||
for (i = cur + 1; i < sz; i++) | |||
if ('\n' == buf[i] && '\n' == buf[i - 1]) { | for (end = cur + 1; end + 1 < sz; end++) | ||
/* Consume blank paragraphs. */ | if ('\n' == buf[end] && '\n' == buf[end + 1] && | ||
while (i + 1 < sz && '\n' == buf[i + 1]) | !(verb && end + 2 < sz && | ||
i++; | isspace((unsigned char)buf[end + 2]))) | ||
break; | break; | ||
} | |||
/* Adjust end marker for EOF. */ | /* Adjust end marker for EOF. */ | ||
end = i < sz ? i - 1 : | |||
('\n' == buf[sz - 1] ? sz - 1 : sz); | |||
sup = i < sz ? end + 2 : sz; | |||
if (end < sz && '\n' != buf[end]) | |||
end++; | |||
/* Process paragraph and adjust start. */ | /* Process paragraph and adjust start. */ | ||
dopar(&st, buf, cur, end); | dopar(&st, buf, cur, end); | ||
cur = sup; | cur = end + 2; | ||
} | } | ||
dict_destroy(); | |||
} | } | ||
/* | /* | ||
|
|
||
time_t ttm; | time_t ttm; | ||
struct stat st; | struct stat st; | ||
assert(NULL != fname); | |||
fd = 0 != strcmp("-", fname) ? | fd = 0 != strcmp("-", fname) ? | ||
open(fname, O_RDONLY, 0) : STDIN_FILENO; | open(fname, O_RDONLY, 0) : STDIN_FILENO; | ||
|
|
||
/* Accept only a single input file. */ | /* Accept only a single input file. */ | ||
if (argc > 2) | if (argc > 1) | ||
return(EXIT_FAILURE); | goto usage; | ||
else if (1 == argc) | else if (1 == argc) | ||
fname = *argv; | fname = *argv; | ||
|
|
||
usage: | usage: | ||
fprintf(stderr, "usage: %s [-d date] " | fprintf(stderr, "usage: %s [-d date] " | ||
"[-n title] [-s section]\n", name); | "[-n title] [-s section] [file]\n", name); | ||
return(EXIT_FAILURE); | return(EXIT_FAILURE); | ||
} | } |