version 1.4, 2014/03/20 15:29:57 |
version 1.51, 2015/02/19 11:09:44 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv> |
* Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv> |
|
* Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
#include <unistd.h> |
|
|
|
#include "dict.h" |
|
|
|
/* |
|
* In what section can we find Perl module manuals? |
|
* Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p. |
|
* XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL. |
|
*/ |
|
#define PERL_SECTION "3p" |
|
|
struct args { |
struct args { |
const char *title; /* override "Dt" title */ |
const char *title; /* override "Dt" title */ |
const char *date; /* override "Dd" date */ |
const char *date; /* override "Dd" date */ |
|
|
LIST__MAX |
LIST__MAX |
}; |
}; |
|
|
|
enum sect { |
|
SECT_NONE = 0, |
|
SECT_NAME, /* NAME section */ |
|
SECT_SYNOPSIS, /* SYNOPSIS section */ |
|
}; |
|
|
|
enum outstate { |
|
OUST_NL = 0, /* just started a new output line */ |
|
OUST_TXT, /* text line output in progress */ |
|
OUST_MAC /* macro line output in progress */ |
|
}; |
|
|
struct state { |
struct state { |
|
const char *fname; /* file being parsed */ |
int parsing; /* after =cut of before command */ |
int parsing; /* after =cut of before command */ |
int paused; /* in =begin and before =end */ |
int paused; /* in =begin and before =end */ |
int haspar; /* in paragraph: do we need Pp? */ |
enum sect sect; /* which section are we in? */ |
int isname; /* are we the NAME section? */ |
|
const char *fname; /* file being parsed */ |
|
#define LIST_STACKSZ 128 |
#define LIST_STACKSZ 128 |
enum list lstack[LIST_STACKSZ]; /* open lists */ |
enum list lstack[LIST_STACKSZ]; /* open lists */ |
size_t lpos; /* where in list stack */ |
size_t lpos; /* where in list stack */ |
|
int haspar; /* in paragraph: do we need Pp? */ |
|
enum outstate oust; /* state of the mdoc output stream */ |
|
int wantws; /* let mdoc(7) output whitespace here */ |
|
char *outbuf; /* text buffered for output */ |
|
size_t outbufsz; /* allocated size of outbuf */ |
|
size_t outbuflen; /* current length of outbuf */ |
}; |
}; |
|
|
enum fmt { |
enum fmt { |
Line 108 static const char fmts[FMT__MAX] = { |
|
Line 135 static const char fmts[FMT__MAX] = { |
|
'Z' /* FMT_NULL */ |
'Z' /* FMT_NULL */ |
}; |
}; |
|
|
|
static unsigned char last; |
|
|
|
|
|
static void |
|
outbuf_grow(struct state *st, size_t by) |
|
{ |
|
|
|
st->outbufsz += (by / 128 + 1) * 128; |
|
st->outbuf = realloc(st->outbuf, st->outbufsz); |
|
if (NULL == st->outbuf) { |
|
perror(NULL); |
|
exit(EXIT_FAILURE); |
|
} |
|
} |
|
|
|
static void |
|
outbuf_addchar(struct state *st) |
|
{ |
|
|
|
if (st->outbuflen + 2 >= st->outbufsz) |
|
outbuf_grow(st, 1); |
|
st->outbuf[st->outbuflen++] = last; |
|
if ('\\' == last) |
|
st->outbuf[st->outbuflen++] = 'e'; |
|
st->outbuf[st->outbuflen] = '\0'; |
|
} |
|
|
|
static void |
|
outbuf_addstr(struct state *st, const char *str) |
|
{ |
|
size_t slen; |
|
|
|
slen = strlen(str); |
|
if (st->outbuflen + slen >= st->outbufsz) |
|
outbuf_grow(st, slen); |
|
memcpy(st->outbuf + st->outbuflen, str, slen+1); |
|
st->outbuflen += slen; |
|
last = str[slen - 1]; |
|
} |
|
|
|
static void |
|
outbuf_flush(struct state *st) |
|
{ |
|
|
|
if (0 == st->outbuflen) |
|
return; |
|
|
|
if (OUST_TXT == st->oust && st->wantws) |
|
putchar(' '); |
|
|
|
fputs(st->outbuf, stdout); |
|
*st->outbuf = '\0'; |
|
st->outbuflen = 0; |
|
|
|
if (OUST_NL == st->oust) |
|
st->oust = OUST_TXT; |
|
} |
|
|
|
static void |
|
mdoc_newln(struct state *st) |
|
{ |
|
|
|
if (OUST_NL == st->oust) |
|
return; |
|
|
|
putchar('\n'); |
|
last = '\n'; |
|
st->oust = OUST_NL; |
|
st->wantws = 1; |
|
} |
|
|
/* |
/* |
* Given buf[*start] is at the start of an escape name, read til the end |
* Given buf[*start] is at the start of an escape name, read til the end |
* of the escape ('>') then try to do something with it. |
* of the escape ('>') then try to do something with it. |
* Sets start to be one after the '>'. |
* Sets start to be one after the '>'. |
|
* |
|
* This function does not care about output modes, |
|
* it merely appends text to the output buffer, |
|
* which can then be used in any mode. |
*/ |
*/ |
static void |
static void |
formatescape(const char *buf, size_t *start, size_t end) |
formatescape(struct state *st, const char *buf, size_t *start, size_t end) |
{ |
{ |
char esc[16]; /* no more needed */ |
char esc[16]; /* no more needed */ |
size_t i, max; |
size_t i, max; |
Line 141 formatescape(const char *buf, size_t *start, size_t en |
|
Line 243 formatescape(const char *buf, size_t *start, size_t en |
|
* TODO: right now, we only recognise the named escapes. |
* TODO: right now, we only recognise the named escapes. |
* Just let the rest of them go. |
* Just let the rest of them go. |
*/ |
*/ |
if (0 == strcmp(esc, "lt")) |
if (0 == strcmp(esc, "lt")) |
printf("\\(la"); |
outbuf_addstr(st, "\\(la"); |
else if (0 == strcmp(esc, "gt")) |
else if (0 == strcmp(esc, "gt")) |
printf("\\(ra"); |
outbuf_addstr(st, "\\(ra"); |
else if (0 == strcmp(esc, "vb")) |
else if (0 == strcmp(esc, "verbar")) |
printf("\\(ba"); |
outbuf_addstr(st, "\\(ba"); |
else if (0 == strcmp(esc, "sol")) |
else if (0 == strcmp(esc, "sol")) |
printf("\\(sl"); |
outbuf_addstr(st, "\\(sl"); |
} |
} |
|
|
/* |
/* |
* Skip space characters. |
* Run some heuristics to intuit a link format. |
|
* I set "start" to be the end of the sequence (last right-carrot) so |
|
* that the caller can safely just continue processing. |
|
* If this is just an empty tag, I'll return 0. |
|
* |
|
* Always operates in OUST_MAC mode. |
|
* Mode handling is done by the caller. |
*/ |
*/ |
|
static int |
|
trylink(const char *buf, size_t *start, size_t end, size_t dsz) |
|
{ |
|
size_t linkstart, realend, linkend, |
|
i, j, textsz, stack; |
|
|
|
/* |
|
* Scan to the start of the terminus. |
|
* This function is more or less replicated in the formatcode() |
|
* for null or index formatting codes. |
|
* However, we're slightly different because we might have |
|
* nested escapes we need to ignore. |
|
*/ |
|
stack = 0; |
|
for (linkstart = realend = *start; realend < end; realend++) { |
|
if ('<' == buf[realend]) |
|
stack++; |
|
if ('>' != buf[realend]) |
|
continue; |
|
else if (stack-- > 0) |
|
continue; |
|
if (dsz == 1) |
|
break; |
|
assert(realend > 0); |
|
if (' ' != buf[realend - 1]) |
|
continue; |
|
for (i = realend, j = 0; i < end && j < dsz; j++) |
|
if ('>' != buf[i++]) |
|
break; |
|
if (dsz == j) |
|
break; |
|
} |
|
|
|
/* Ignore stubs. */ |
|
if (realend == end || realend == *start) |
|
return(0); |
|
|
|
/* Set linkend to the end of content. */ |
|
linkend = dsz > 1 ? realend - 1 : realend; |
|
|
|
/* Re-scan to see if we have a title or section. */ |
|
for (textsz = *start; textsz < linkend; textsz++) |
|
if ('|' == buf[textsz] || '/' == buf[textsz]) |
|
break; |
|
|
|
if (textsz < linkend && '|' == buf[textsz]) { |
|
/* With title: set start, then end at section. */ |
|
linkstart = textsz + 1; |
|
textsz = textsz - *start; |
|
for (i = linkstart; i < linkend; i++) |
|
if ('/' == buf[i]) |
|
break; |
|
if (i < linkend) |
|
linkend = i; |
|
} else if (textsz < linkend && '/' == buf[textsz]) { |
|
/* With section: set end at section. */ |
|
linkend = textsz; |
|
textsz = 0; |
|
} else |
|
/* No title, no section. */ |
|
textsz = 0; |
|
|
|
*start = realend; |
|
j = linkend - linkstart; |
|
|
|
/* Do we have only subsection material? */ |
|
if (0 == j && '/' == buf[linkend]) { |
|
linkstart = linkend + 1; |
|
linkend = dsz > 1 ? realend - 1 : realend; |
|
if (0 == (j = linkend - linkstart)) |
|
return(0); |
|
printf("Sx %.*s", (int)j, &buf[linkstart]); |
|
return(1); |
|
} else if (0 == j) |
|
return(0); |
|
|
|
/* See if we qualify as being a link or not. */ |
|
if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) || |
|
(j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) || |
|
(j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) || |
|
(j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) || |
|
(j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || |
|
(j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { |
|
/* Gross. */ |
|
printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : |
|
realend) - linkstart), &buf[linkstart]); |
|
return(1); |
|
} |
|
|
|
/* See if we qualify as a mailto. */ |
|
if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { |
|
printf("Mt %.*s", (int)j, &buf[linkstart]); |
|
return(1); |
|
} |
|
|
|
/* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ |
|
if ((j > 3 && ')' == buf[linkend - 1]) && |
|
('(' == buf[linkend - 3])) { |
|
printf("Xr %.*s %c", (int)(j - 3), |
|
&buf[linkstart], buf[linkend - 2]); |
|
return(1); |
|
} else if ((j > 4 && ')' == buf[linkend - 1]) && |
|
('(' == buf[linkend - 4])) { |
|
printf("Xr %.*s %.*s", (int)(j - 4), |
|
&buf[linkstart], 2, &buf[linkend - 3]); |
|
return(1); |
|
} else if ((j > 5 && ')' == buf[linkend - 1]) && |
|
('(' == buf[linkend - 5])) { |
|
printf("Xr %.*s %.*s", (int)(j - 5), |
|
&buf[linkstart], 3, &buf[linkend - 4]); |
|
return(1); |
|
} |
|
|
|
/* Last try: do we have a double-colon? */ |
|
for (i = linkstart + 1; i < linkend; i++) |
|
if (':' == buf[i] && ':' == buf[i - 1]) |
|
break; |
|
|
|
if (i < linkend) |
|
printf("Xr %.*s " PERL_SECTION, |
|
(int)j, &buf[linkstart]); |
|
else |
|
printf("Xr %.*s 1", (int)j, &buf[linkstart]); |
|
|
|
return(1); |
|
} |
|
|
|
/* |
|
* Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section, |
|
* then it's likely that we're a flag. |
|
* Our flag might be followed by an argument, so make sure that we're |
|
* accounting for that, too. |
|
* If we don't have a flag at all, however, then assume we're an "Ar". |
|
* |
|
* Always operates in OUST_MAC mode. |
|
* Mode handlinf is done by the caller. |
|
*/ |
static void |
static void |
skipspace(const char *buf, size_t *start, size_t end) |
dosynopsisfl(const char *buf, size_t *start, size_t end) |
{ |
{ |
|
size_t i; |
|
again: |
|
assert(*start + 1 < end); |
|
assert('-' == buf[*start]); |
|
|
while (*start < end && ' ' == buf[*start]) |
if ( ! isalnum((int)buf[*start + 1]) && |
(*start)++; |
'?' != buf[*start + 1] && |
|
'-' != buf[*start + 1]) { |
|
(*start)--; |
|
fputs("Ar ", stdout); |
|
return; |
|
} |
|
|
|
(*start)++; |
|
for (i = *start; i < end; i++) |
|
if (isalnum((int)buf[i])) |
|
continue; |
|
else if ('?' == buf[i]) |
|
continue; |
|
else if ('-' == buf[i]) |
|
continue; |
|
else if ('_' == buf[i]) |
|
continue; |
|
else |
|
break; |
|
|
|
assert(i < end); |
|
|
|
if ( ! (' ' == buf[i] || '>' == buf[i])) { |
|
printf("Ar "); |
|
return; |
|
} |
|
|
|
printf("Fl "); |
|
if (end - *start > 1 && |
|
isupper((int)buf[*start]) && |
|
islower((int)buf[*start + 1]) && |
|
(end - *start == 2 || |
|
' ' == buf[*start + 2])) |
|
printf("\\&"); |
|
printf("%.*s ", (int)(i - *start), &buf[*start]); |
|
*start = i; |
|
|
|
if (' ' == buf[i]) { |
|
while (i < end && ' ' == buf[i]) |
|
i++; |
|
assert(i < end); |
|
if ('-' == buf[i]) { |
|
*start = i; |
|
goto again; |
|
} |
|
printf("Ar "); |
|
*start = i; |
|
} |
} |
} |
|
|
/* |
/* |
Line 167 skipspace(const char *buf, size_t *start, size_t end) |
|
Line 463 skipspace(const char *buf, size_t *start, size_t end) |
|
* like X<...> and can contain nested format codes. |
* like X<...> and can contain nested format codes. |
* This consumes the whole format code, and any nested format codes, til |
* This consumes the whole format code, and any nested format codes, til |
* the end of matched production. |
* the end of matched production. |
* If "reentrant", then we're being called after a macro has already |
* If "nomacro", then we don't print any macros, just contained data |
* been printed to the current line. |
* (e.g., following "Sh" or "Nm"). |
* "last" is set to the last read character: this is used to determine |
* "pos" is only significant in SYNOPSIS, and should be 0 when invoked |
* whether we should buffer with space or not. |
* as the first format code on a line (for decoration as an "Nm"), |
* If "nomacro", then we don't print any macros, just contained data. |
* non-zero otherwise. |
|
* |
|
* Output mode handling is most complicated here. |
|
* We may enter in any mode. |
|
* We usually exit in OUST_MAC mode, except when |
|
* entering without OUST_MAC and the code is invalid. |
*/ |
*/ |
static int |
static int |
formatcode(const char *buf, size_t *start, |
formatcode(struct state *st, const char *buf, size_t *start, |
size_t end, int reentrant, int last, int nomacro) |
size_t end, int nomacro, int pos) |
{ |
{ |
|
size_t i, j, dsz; |
enum fmt fmt; |
enum fmt fmt; |
|
int wantws; |
|
unsigned char uc; |
|
|
assert(*start + 1 < end); |
assert(*start + 1 < end); |
assert('<' == buf[*start + 1]); |
assert('<' == buf[*start + 1]); |
|
|
|
/* |
|
* First, look up the format code. |
|
* If it's not valid, treat it as a NOOP. |
|
*/ |
for (fmt = 0; fmt < FMT__MAX; fmt++) |
for (fmt = 0; fmt < FMT__MAX; fmt++) |
if (buf[*start] == fmts[fmt]) |
if (buf[*start] == fmts[fmt]) |
break; |
break; |
|
|
/* Invalid macros are just regular text. */ |
/* |
|
* Determine whether we're overriding our delimiter. |
|
* According to POD, if we have more than one '<' followed by a |
|
* space, then we need a space followed by matching '>' to close |
|
* the expression. |
|
* Otherwise we use the usual '<' and '>' matched pair. |
|
*/ |
|
i = *start + 1; |
|
while (i < end && '<' == buf[i]) |
|
i++; |
|
assert(i > *start + 1); |
|
dsz = i - (*start + 1); |
|
if (dsz > 1 && (i >= end || ' ' != buf[i])) |
|
dsz = 1; |
|
|
if (FMT__MAX == fmt) { |
/* Remember, if dsz>1, to jump the trailing space. */ |
putchar(buf[*start]); |
*start += dsz + 1 + (dsz > 1 ? 1 : 0); |
(*start)++; |
|
return(0); |
|
} |
|
|
|
*start += 2; |
|
|
|
/* |
/* |
* Escapes don't print macro sequences, so just output them like |
* Escapes and ignored codes (NULL and INDEX) don't print macro |
* normal text before processing for macros. |
* sequences, so just output them like normal text before |
|
* processing for real macros. |
*/ |
*/ |
if (FMT_ESCAPE == fmt) { |
if (FMT_ESCAPE == fmt) { |
formatescape(buf, start, end); |
formatescape(st, buf, start, end); |
return(0); |
return(0); |
} else if (FMT_NULL == fmt || FMT_INDEX == fmt) { |
} else if (FMT_NULL == fmt || FMT_INDEX == fmt) { |
/* For indices and nulls, just consume. */ |
/* |
while (*start < end && '>' != buf[*start]) |
* Just consume til the end delimiter, accounting for |
|
* whether it's a custom one. |
|
*/ |
|
for ( ; *start < end; (*start)++) { |
|
if ('>' != buf[*start]) |
|
continue; |
|
else if (dsz == 1) |
|
break; |
|
assert(*start > 0); |
|
if (' ' != buf[*start - 1]) |
|
continue; |
|
i = *start; |
|
for (j = 0; i < end && j < dsz; j++) |
|
if ('>' != buf[i++]) |
|
break; |
|
if (dsz != j) |
|
continue; |
|
(*start) += dsz; |
|
break; |
|
} |
|
if (*start < end) { |
|
assert('>' == buf[*start]); |
(*start)++; |
(*start)++; |
if (*start < end) |
} |
(*start)++; |
if (isspace(last)) |
|
while (*start < end && isspace((int)buf[*start])) |
|
(*start)++; |
return(0); |
return(0); |
} |
} |
|
|
if ( ! nomacro) { |
/* |
|
* Check whether we're supposed to print macro stuff (this is |
|
* suppressed in, e.g., "Nm" and "Sh" macros). |
|
*/ |
|
if (FMT__MAX != fmt && !nomacro) { |
|
|
/* |
/* |
* Print out the macro describing this format code. |
* We may already have wantws if there was whitespace |
* If we're not "reentrant" (not yet on a macro line) |
* before the code ("text B<text"), but initial |
* then print a newline, if necessary, and the macro |
* whitespace inside our scope ("textB< text") |
* indicator. |
* allows to break at this point as well. |
* Otherwise, offset us with a space. |
|
*/ |
*/ |
if ( ! reentrant && last != '\n') |
|
putchar('\n'); |
wantws = ' ' == buf[*start] || |
if ( ! reentrant) |
(OUST_MAC == st->oust ? st->wantws : ! st->outbuflen); |
|
|
|
/* |
|
* If we are on a text line and there is no |
|
* whitespace before our content, we have to make |
|
* the previous word a prefix to the macro line. |
|
* In the following, mdoc_newln() must not be used |
|
* lest we clobber out output state. |
|
*/ |
|
|
|
if (OUST_MAC != st->oust && ! wantws) { |
|
if (OUST_NL != st->oust) |
|
putchar('\n'); |
|
printf(".Pf "); |
|
st->wantws = 0; |
|
} |
|
|
|
outbuf_flush(st); |
|
|
|
/* Whitespace is easier to suppress on macro lines. */ |
|
|
|
if (OUST_MAC == st->oust && ! wantws) |
|
printf(" Ns "); |
|
|
|
/* Unless we are on a macro line, start one. */ |
|
|
|
if (OUST_MAC != st->oust && wantws) { |
|
if (OUST_NL != st->oust) |
|
putchar('\n'); |
putchar('.'); |
putchar('.'); |
else |
} else |
putchar(' '); |
putchar(' '); |
|
|
/* |
/* |
* If we don't have whitespace before us, then suppress |
* Print the macro corresponding to this format code, |
* macro whitespace with Ns. |
* and update the output state afterwards. |
*/ |
*/ |
if (' ' != last) |
|
printf("Ns "); |
|
switch (fmt) { |
switch (fmt) { |
case (FMT_ITALIC): |
case (FMT_ITALIC): |
printf("Em "); |
printf("Em "); |
break; |
break; |
case (FMT_BOLD): |
case (FMT_BOLD): |
printf("Sy "); |
if (SECT_SYNOPSIS == st->sect) { |
|
if (1 == dsz && '-' == buf[*start]) |
|
dosynopsisfl(buf, start, end); |
|
else if (0 == pos) |
|
printf("Nm "); |
|
else |
|
printf("Ar "); |
|
break; |
|
} |
|
i = 0; |
|
uc = buf[*start]; |
|
while (isalnum(uc) || '_' == uc || ' ' == uc) |
|
uc = buf[*start + ++i]; |
|
if ('=' != uc && '>' != uc) |
|
i = 0; |
|
if (4 == i && ! strncmp(buf + *start, "NULL", 4)) { |
|
printf("Dv "); |
|
break; |
|
} |
|
switch (i ? dict_get(buf + *start, i) : MDOC_MAX) { |
|
case MDOC_Fa: |
|
printf("Fa "); |
|
break; |
|
case MDOC_Vt: |
|
printf("Vt "); |
|
break; |
|
default: |
|
printf("Sy "); |
|
break; |
|
} |
break; |
break; |
case (FMT_CODE): |
case (FMT_CODE): |
printf("Qo Li "); |
printf("Qo Li "); |
break; |
break; |
case (FMT_LINK): |
case (FMT_LINK): |
printf("Lk "); |
/* Try to link; use "No" if it's empty. */ |
|
if ( ! trylink(buf, start, end, dsz)) |
|
printf("No "); |
break; |
break; |
case (FMT_FILE): |
case (FMT_FILE): |
printf("Pa "); |
printf("Pa "); |
break; |
break; |
case (FMT_NBSP): |
case (FMT_NBSP): |
/* TODO. */ |
|
printf("No "); |
printf("No "); |
break; |
break; |
default: |
default: |
abort(); |
abort(); |
} |
} |
} |
st->oust = OUST_MAC; |
|
st->wantws = 1; |
|
} else |
|
outbuf_flush(st); |
|
|
/* |
/* |
* Read until we reach the end market ('>') or until we find a |
* Process until we reach the end marker (e.g., '>') or until we |
* nested format code. |
* find a nested format code. |
* Don't emit any newlines: since we're on a macro line, we |
* Don't emit any newlines: since we're on a macro line, we |
* don't want to break the line. |
* don't want to break the line. |
*/ |
*/ |
while (*start < end) { |
while (*start < end) { |
if ('>' == buf[*start]) { |
if ('>' == buf[*start] && 1 == dsz) { |
(*start)++; |
(*start)++; |
break; |
break; |
|
} else if ('>' == buf[*start] && |
|
' ' == buf[*start - 1]) { |
|
/* |
|
* Handle custom delimiters. |
|
* These require a certain number of |
|
* space-preceded carrots before we're really at |
|
* the end. |
|
*/ |
|
i = *start; |
|
for (j = 0; i < end && j < dsz; j++) |
|
if ('>' != buf[i++]) |
|
break; |
|
if (dsz == j) { |
|
*start += dsz; |
|
break; |
|
} |
} |
} |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
if (*start + 1 < end && '<' == buf[*start + 1] && |
formatcode(buf, start, end, 1, last, nomacro); |
'A' <= buf[*start] && 'Z' >= buf[*start]) { |
|
if ( ! formatcode(st, buf, start, end, nomacro, 1)) |
|
st->wantws = 1; |
continue; |
continue; |
} |
} |
|
|
/* |
/* Suppress newlines and multiple spaces. */ |
* Make sure that any macro-like words (or |
|
* really any word starting with a capital |
|
* letter) is assumed to be a macro that must be |
|
* escaped. |
|
* This matches "Xx " and "XxEOLN". |
|
*/ |
|
if ((' ' == last || '\n' == last) && |
|
end - *start > 1 && |
|
isupper((int)buf[*start]) && |
|
islower((int)buf[*start + 1]) && |
|
(end - *start == 2 || |
|
' ' == buf[*start + 2])) |
|
printf("\\&"); |
|
|
|
/* Suppress newline. */ |
last = buf[(*start)++]; |
if ('\n' == (last = buf[(*start)++])) |
if (' ' == last || '\n' == last) { |
last = ' '; |
putchar(' '); |
|
while (*start < end && ' ' == buf[*start]) |
|
(*start)++; |
|
continue; |
|
} |
|
|
|
if (OUST_MAC == st->oust && FMT__MAX != fmt) { |
|
if ( ! st->wantws) { |
|
printf(" Ns "); |
|
st->wantws = 1; |
|
} |
|
|
|
/* |
|
* Escape macro-like words. |
|
* This matches "Xx " and "XxEOLN". |
|
*/ |
|
|
|
if (end - *start > 0 && |
|
isupper((unsigned char)last) && |
|
islower((unsigned char)buf[*start]) && |
|
(end - *start == 1 || |
|
' ' == buf[*start + 1] || |
|
'>' == buf[*start + 1])) |
|
printf("\\&"); |
|
} |
|
|
putchar(last); |
putchar(last); |
|
|
|
/* Protect against character escapes. */ |
|
|
|
if ('\\' == last) |
|
putchar('e'); |
} |
} |
|
|
if ( ! nomacro && FMT_CODE == fmt) |
if ( ! nomacro && FMT_CODE == fmt) |
printf(" Qc "); |
printf(" Qc "); |
|
|
if (reentrant) |
st->wantws = ' ' == last; |
return(1); |
return(FMT__MAX != fmt); |
|
|
/* |
|
* If we're not reentrant, we want to put ending punctuation on |
|
* the macro line so that it's properly handled by being |
|
* smooshed against the terminal word. |
|
*/ |
|
skipspace(buf, start, end); |
|
if (',' != buf[*start] && '.' != buf[*start] && |
|
'!' != buf[*start] && '?' != buf[*start] && |
|
')' != buf[*start]) |
|
return(1); |
|
while (*start < end) { |
|
if (',' != buf[*start] && |
|
'.' != buf[*start] && |
|
'!' != buf[*start] && |
|
'?' != buf[*start] && |
|
')' != buf[*start]) |
|
break; |
|
putchar(' '); |
|
putchar(buf[*start]); |
|
(*start)++; |
|
} |
|
skipspace(buf, start, end); |
|
return(1); |
|
} |
} |
|
|
/* |
/* |
* Calls formatcode() til the end of a paragraph. |
* Calls formatcode() til the end of a paragraph. |
|
* Goes to OUST_MAC mode and stays there when returning, |
|
* such that the caller can add arguments to the macro line |
|
* before closing it out. |
*/ |
*/ |
static void |
static void |
formatcodeln(const char *buf, size_t *start, size_t end, int nomacro) |
formatcodeln(struct state *st, const char *linemac, |
|
const char *buf, size_t *start, size_t end, int nomacro) |
{ |
{ |
int last; |
int gotmacro, wantws; |
|
|
last = ' '; |
assert(OUST_NL == st->oust); |
|
assert(st->wantws); |
|
printf(".%s ", linemac); |
|
st->oust = OUST_MAC; |
|
|
|
gotmacro = 0; |
while (*start < end) { |
while (*start < end) { |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
wantws = ' ' == buf[*start] || '\n' == buf[*start]; |
formatcode(buf, start, end, 1, last, nomacro); |
if (wantws) { |
|
last = ' '; |
|
do { |
|
(*start)++; |
|
} while (*start < end && ' ' == buf[*start]); |
|
} |
|
|
|
if (*start + 1 < end && '<' == buf[*start + 1] && |
|
'A' <= buf[*start] && 'Z' >= buf[*start]) { |
|
st->wantws |= wantws; |
|
gotmacro = formatcode(st, buf, |
|
start, end, nomacro, 1); |
continue; |
continue; |
} |
} |
|
|
|
if (gotmacro) { |
|
if (*start < end || st->outbuflen) { |
|
if (st->wantws || |
|
(wantws && !st->outbuflen)) |
|
printf(" No "); |
|
else |
|
printf(" Ns "); |
|
} |
|
gotmacro = 0; |
|
} |
|
outbuf_flush(st); |
|
st->wantws = wantws; |
|
|
|
if (*start >= end) |
|
break; |
|
|
|
if (st->wantws) { |
|
putchar(' '); |
|
st->wantws = 0; |
|
} |
|
|
/* |
/* |
* Since we're already on a macro line, we want to make |
* Since we're already on a macro line, we want to make |
* sure that we don't inadvertently invoke a macro. |
* sure that we don't inadvertently invoke a macro. |
Line 349 formatcodeln(const char *buf, size_t *start, size_t en |
|
Line 805 formatcodeln(const char *buf, size_t *start, size_t en |
|
* something that needn't be escaped. |
* something that needn't be escaped. |
*/ |
*/ |
if (' ' == last && end - *start > 1 && |
if (' ' == last && end - *start > 1 && |
isupper((int)buf[*start]) && |
isupper((unsigned char)buf[*start]) && |
islower((int)buf[*start + 1]) && |
islower((unsigned char)buf[*start + 1]) && |
(end - *start == 2 || |
(end - *start == 2 || ' ' == buf[*start + 2])) |
' ' == buf[*start + 2])) |
|
printf("\\&"); |
printf("\\&"); |
|
|
if ('\n' != buf[*start]) |
putchar(last = buf[*start]); |
putchar(last = buf[*start]); |
|
else |
/* Protect against character escapes. */ |
putchar(last = ' '); |
|
|
if ('\\' == last) |
|
putchar('e'); |
|
|
(*start)++; |
(*start)++; |
} |
} |
} |
} |
Line 389 listguess(const char *buf, size_t start, size_t end) |
|
Line 847 listguess(const char *buf, size_t start, size_t end) |
|
* A command paragraph, as noted in the perlpod manual, just indicates |
* A command paragraph, as noted in the perlpod manual, just indicates |
* that we should do something, optionally with some text to print as |
* that we should do something, optionally with some text to print as |
* well. |
* well. |
|
* From the perspective of external callers, |
|
* always stays in OUST_NL/wantws mode, |
|
* but its children do use OUST_MAC. |
*/ |
*/ |
static void |
static void |
command(struct state *st, const char *buf, size_t start, size_t end) |
command(struct state *st, const char *buf, size_t start, size_t end) |
Line 414 command(struct state *st, const char *buf, size_t star |
|
Line 875 command(struct state *st, const char *buf, size_t star |
|
return; |
return; |
|
|
start += csz; |
start += csz; |
skipspace(buf, &start, end); |
while (start < end && ' ' == buf[start]) |
|
start++; |
|
|
len = end - start; |
len = end - start; |
|
|
if (st->paused) { |
if (st->paused) { |
Line 430 command(struct state *st, const char *buf, size_t star |
|
Line 893 command(struct state *st, const char *buf, size_t star |
|
* The behaviour of head= follows from a quick glance at |
* The behaviour of head= follows from a quick glance at |
* how pod2man handles it. |
* how pod2man handles it. |
*/ |
*/ |
printf(".Sh "); |
st->sect = SECT_NONE; |
st->isname = 0; |
if (end - start == 4) { |
if (end - start == 4) |
|
if (0 == memcmp(&buf[start], "NAME", 4)) |
if (0 == memcmp(&buf[start], "NAME", 4)) |
st->isname = 1; |
st->sect = SECT_NAME; |
formatcodeln(buf, &start, end, 1); |
} else if (end - start == 8) { |
putchar('\n'); |
if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) |
|
st->sect = SECT_SYNOPSIS; |
|
} |
|
formatcodeln(st, "Sh", buf, &start, end, 1); |
|
mdoc_newln(st); |
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
case (CMD_HEAD2): |
case (CMD_HEAD2): |
printf(".Ss "); |
formatcodeln(st, "Ss", buf, &start, end, 1); |
formatcodeln(buf, &start, end, 1); |
mdoc_newln(st); |
putchar('\n'); |
|
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
case (CMD_HEAD3): |
case (CMD_HEAD3): |
puts(".Pp"); |
puts(".Pp"); |
printf(".Em "); |
formatcodeln(st, "Em", buf, &start, end, 0); |
formatcodeln(buf, &start, end, 0); |
mdoc_newln(st); |
putchar('\n'); |
|
puts(".Pp"); |
puts(".Pp"); |
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
case (CMD_HEAD4): |
case (CMD_HEAD4): |
puts(".Pp"); |
puts(".Pp"); |
printf(".No "); |
formatcodeln(st, "No", buf, &start, end, 0); |
formatcodeln(buf, &start, end, 0); |
mdoc_newln(st); |
putchar('\n'); |
|
puts(".Pp"); |
puts(".Pp"); |
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
Line 478 command(struct state *st, const char *buf, size_t star |
|
Line 941 command(struct state *st, const char *buf, size_t star |
|
st->lstack[st->lpos - 1] = LIST__MAX; |
st->lstack[st->lpos - 1] = LIST__MAX; |
break; |
break; |
case (CMD_ITEM): |
case (CMD_ITEM): |
|
if (0 == st->lpos) { |
|
/* |
|
* Bad markup. |
|
* Try to compensate. |
|
*/ |
|
st->lstack[st->lpos] = LIST__MAX; |
|
st->lpos++; |
|
} |
assert(st->lpos > 0); |
assert(st->lpos > 0); |
/* |
/* |
* If we're the first =item, guess at what our content |
* If we're the first =item, guess at what our content |
Line 501 command(struct state *st, const char *buf, size_t star |
|
Line 972 command(struct state *st, const char *buf, size_t star |
|
} |
} |
switch (st->lstack[st->lpos - 1]) { |
switch (st->lstack[st->lpos - 1]) { |
case (LIST_TAG): |
case (LIST_TAG): |
printf(".It "); |
formatcodeln(st, "It", buf, &start, end, 0); |
formatcodeln(buf, &start, end, 0); |
mdoc_newln(st); |
putchar('\n'); |
|
break; |
break; |
case (LIST_ENUM): |
case (LIST_ENUM): |
/* FALLTHROUGH */ |
/* FALLTHROUGH */ |
Line 554 command(struct state *st, const char *buf, size_t star |
|
Line 1024 command(struct state *st, const char *buf, size_t star |
|
} |
} |
|
|
/* |
/* |
|
* Put the type provided as an argument into the dictionary. |
|
*/ |
|
static void |
|
register_type(const char *ptype) |
|
{ |
|
const char *pname, *pend; |
|
|
|
pname = ptype; |
|
while (isalnum((unsigned char)*pname) || '_' == *pname) |
|
pname++; |
|
if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) || |
|
(pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) { |
|
while (' ' == *pname) |
|
pname++; |
|
pend = pname; |
|
while (isalnum((unsigned char)*pend) || '_' == *pend) |
|
pend++; |
|
if (pend > pname) |
|
dict_put(pname, pend - pname, MDOC_Vt); |
|
} else |
|
pend = pname; |
|
if (pend > ptype) |
|
dict_put(ptype, pend - ptype, MDOC_Vt); |
|
} |
|
|
|
/* |
* Just pump out the line in a verbatim block. |
* Just pump out the line in a verbatim block. |
|
* From the perspective of external callers, |
|
* always stays in OUST_NL/wantws mode. |
*/ |
*/ |
static void |
static void |
verbatim(struct state *st, const char *buf, size_t start, size_t end) |
verbatim(struct state *st, char *buf, size_t start, size_t end) |
{ |
{ |
|
size_t i, ift, ifo, ifa, ifc, inl; |
|
char *cp, *cp2; |
|
int nopen; |
|
|
if ( ! st->parsing || st->paused) |
if ( ! st->parsing || st->paused || start == end) |
return; |
return; |
|
again: |
|
/* |
|
* If we're in the SYNOPSIS, see if we're an #include block. |
|
* If we are, then print the "In" macro and re-loop. |
|
* This handles any number of inclusions, but only when they |
|
* come before the remaining parts... |
|
*/ |
|
if (SECT_SYNOPSIS == st->sect) { |
|
i = start; |
|
while (i < end && buf[i] == ' ') |
|
i++; |
|
if (i == end) |
|
return; |
|
|
|
/* We're an include block! */ |
|
if (end - i > 10 && |
|
0 == memcmp(&buf[i], "#include <", 10)) { |
|
start = i + 10; |
|
while (start < end && ' ' == buf[start]) |
|
start++; |
|
fputs(".In ", stdout); |
|
/* Stop til the '>' marker or we hit eoln. */ |
|
while (start < end && |
|
'>' != buf[start] && '\n' != buf[start]) |
|
putchar(buf[start++]); |
|
putchar('\n'); |
|
if (start < end && '>' == buf[start]) |
|
start++; |
|
if (start < end && '\n' == buf[start]) |
|
start++; |
|
goto again; |
|
} |
|
|
|
/* Other preprocessor directives. */ |
|
if ('#' == buf[i]) { |
|
fputs(".Fd ", stdout); |
|
start = i; |
|
while(start < end && '\n' != buf[start]) |
|
putchar(buf[start++]); |
|
putchar('\n'); |
|
if (start < end && '\n' == buf[start]) |
|
start++; |
|
|
|
/* Remember #define for Dv or Fn. */ |
|
|
|
if (strncmp(buf + i + 1, "define", 6) || |
|
! isspace((unsigned char)buf[i + 7])) |
|
goto again; |
|
|
|
ifo = i + 7; |
|
while (ifo < start && |
|
isspace((unsigned char)buf[ifo])) |
|
ifo++; |
|
ifa = ifo; |
|
while ('_' == buf[ifa] || |
|
isalnum((unsigned char)buf[ifa])) |
|
ifa++; |
|
dict_put(buf + ifo, ifa - ifo, |
|
'(' == buf[ifa] ? MDOC_Fo : MDOC_Dv); |
|
|
|
goto again; |
|
} |
|
|
|
/* Parse function declaration. */ |
|
ifo = ifa = ifc = 0; |
|
inl = end; |
|
nopen = 0; |
|
for (ift = i; i < end; i++) { |
|
if (ifc) { |
|
if (buf[i] != '\n') |
|
continue; |
|
inl = i; |
|
break; |
|
} |
|
switch (buf[i]) { |
|
case '\t': |
|
/* FALLTHROUGH */ |
|
case ' ': |
|
if ( ! ifa) |
|
ifo = i; |
|
break; |
|
case '(': |
|
if (ifo) { |
|
nopen++; |
|
if ( ! ifa) |
|
ifa = i; |
|
} else |
|
i = end; |
|
break; |
|
case ')': |
|
switch (nopen) { |
|
case 0: |
|
i = end; |
|
break; |
|
case 1: |
|
ifc = i; |
|
break; |
|
default: |
|
nopen--; |
|
break; |
|
} |
|
break; |
|
default: |
|
break; |
|
} |
|
} |
|
|
|
/* Encode function declaration. */ |
|
if (ifc) { |
|
for (i = ifa; i < ifc; i++) |
|
if (buf[i] == '\n') |
|
buf[i] = ' '; |
|
buf[ifo++] = '\0'; |
|
register_type(buf + ift); |
|
printf(".Ft %s", buf + ift); |
|
if (buf[ifo] == '*') { |
|
fputs(" *", stdout); |
|
ifo++; |
|
} |
|
putchar('\n'); |
|
buf[ifa++] = '\0'; |
|
printf(".Fo %s\n", buf + ifo); |
|
dict_put(buf + ifo, 0, MDOC_Fo); |
|
buf[ifc++] = '\0'; |
|
for (;;) { |
|
cp = strchr(buf + ifa, ','); |
|
if (cp != NULL) { |
|
cp2 = cp; |
|
*cp++ = '\0'; |
|
} else |
|
cp2 = strchr(buf + ifa, '\0'); |
|
while (isalnum((unsigned char)cp2[-1]) || |
|
'_' == cp2[-1]) |
|
cp2--; |
|
if ('\0' != *cp2) |
|
dict_put(cp2, 0, MDOC_Fa); |
|
register_type(buf + ifa); |
|
if (strchr(buf + ifa, ' ') == NULL) |
|
printf(".Fa %s\n", buf + ifa); |
|
else |
|
printf(".Fa \"%s\"\n", buf + ifa); |
|
if (cp == NULL) |
|
break; |
|
while (*cp == ' ' || *cp == '\t') |
|
cp++; |
|
ifa = cp - buf; |
|
} |
|
puts(".Fc"); |
|
if (buf[ifc] == ';') |
|
ifc++; |
|
if (ifc < inl) { |
|
buf[inl] = '\0'; |
|
puts(buf + ifc); |
|
} |
|
start = inl + 1; |
|
if (start < end) |
|
goto again; |
|
return; |
|
} |
|
} |
|
|
puts(".Bd -literal"); |
puts(".Bd -literal"); |
printf("%.*s\n", (int)(end - start), &buf[start]); |
for (last = ' '; start < end; start++) { |
|
/* |
|
* Handle accidental macros (newline starting with |
|
* control character) and escapes. |
|
*/ |
|
if ('\n' == last) |
|
if ('.' == buf[start] || '\'' == buf[start]) |
|
printf("\\&"); |
|
putchar(last = buf[start]); |
|
if ('\\' == buf[start]) |
|
printf("e"); |
|
} |
|
putchar(last = '\n'); |
puts(".Ed"); |
puts(".Ed"); |
} |
} |
|
|
/* |
/* |
|
* See dosynopsisop(). |
|
*/ |
|
static int |
|
hasmatch(const char *buf, size_t start, size_t end) |
|
{ |
|
size_t stack; |
|
|
|
for (stack = 0; start < end; start++) |
|
if (buf[start] == '[') |
|
stack++; |
|
else if (buf[start] == ']' && 0 == stack) |
|
return(1); |
|
else if (buf[start] == ']') |
|
stack--; |
|
return(0); |
|
} |
|
|
|
/* |
|
* If we're in the SYNOPSIS section and we've encounter braces in an |
|
* ordinary paragraph, then try to see whether we're an [-option]. |
|
* Do this, if we're an opening bracket, by first seeing if we have a |
|
* matching end via hasmatch(). |
|
* If we're an ending bracket, see if we have a stack already. |
|
*/ |
|
static int |
|
dosynopsisop(struct state *st, const char *buf, |
|
size_t *start, size_t end, size_t *opstack) |
|
{ |
|
|
|
assert('[' == buf[*start] || ']' == buf[*start]); |
|
|
|
if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { |
|
mdoc_newln(st); |
|
puts(".Oo"); |
|
(*opstack)++; |
|
} else if ('[' == buf[*start]) |
|
return(0); |
|
|
|
if (']' == buf[*start] && *opstack > 0) { |
|
mdoc_newln(st); |
|
puts(".Oc"); |
|
(*opstack)--; |
|
} else if (']' == buf[*start]) |
|
return(0); |
|
|
|
(*start)++; |
|
last = '\n'; |
|
while (' ' == buf[*start]) |
|
(*start)++; |
|
return(1); |
|
} |
|
|
|
/* |
|
* Format multiple "Nm" manpage names in the NAME section. |
|
* From the perspective of external callers, |
|
* always stays in OUST_NL/wantws mode, |
|
* but its children do use OUST_MAC. |
|
*/ |
|
static void |
|
donamenm(struct state *st, const char *buf, size_t *start, size_t end) |
|
{ |
|
size_t word; |
|
|
|
assert(OUST_NL == st->oust); |
|
assert(st->wantws); |
|
|
|
while (*start < end && isspace((unsigned char)buf[*start])) |
|
(*start)++; |
|
|
|
if (end == *start) { |
|
puts(".Nm unknown"); |
|
return; |
|
} |
|
|
|
while (*start < end) { |
|
for (word = *start; word < end; word++) |
|
if (',' == buf[word]) |
|
break; |
|
formatcodeln(st, "Nm", buf, start, word, 1); |
|
if (*start == end) { |
|
mdoc_newln(st); |
|
break; |
|
} |
|
assert(',' == buf[*start]); |
|
printf(" ,"); |
|
mdoc_newln(st); |
|
(*start)++; |
|
while (*start < end && isspace((unsigned char)buf[*start])) |
|
(*start)++; |
|
} |
|
} |
|
|
|
/* |
* Ordinary paragraph. |
* Ordinary paragraph. |
* Well, this is really the hardest--POD seems to assume that, for |
* Well, this is really the hardest--POD seems to assume that, for |
* example, a leading space implies a newline, and so on. |
* example, a leading space implies a newline, and so on. |
* Lots of other snakes in the grass: escaping a newline followed by a |
* Lots of other snakes in the grass: escaping a newline followed by a |
* period (accidental mdoc(7) control), double-newlines after macro |
* period (accidental mdoc(7) control), double-newlines after macro |
* passages, etc. |
* passages, etc. |
|
* |
|
* Uses formatcode() to go to OUST_MAC mode |
|
* and outbuf_flush() to go to OUST_TXT mode. |
|
* In text mode, wantws requests white space before the text |
|
* currently contained in the outbuf, not before upcoming text. |
|
* Must make sure to go back to OUST_NL/wantws mode before returning. |
*/ |
*/ |
static void |
static void |
ordinary(struct state *st, const char *buf, size_t start, size_t end) |
ordinary(struct state *st, const char *buf, size_t start, size_t end) |
{ |
{ |
int last; |
size_t i, j, opstack, wend; |
size_t i, j; |
enum mdoc_type mtype; |
|
int eos, noeos, seq; |
|
char savechar; |
|
|
if ( ! st->parsing || st->paused) |
if ( ! st->parsing || st->paused) |
return; |
return; |
Line 591 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 1365 ordinary(struct state *st, const char *buf, size_t sta |
|
* we're in "name - description" format. |
* we're in "name - description" format. |
* To wit, print out a "Nm" and "Nd" in that format. |
* To wit, print out a "Nm" and "Nd" in that format. |
*/ |
*/ |
if (st->isname) { |
if (SECT_NAME == st->sect) { |
for (i = end - 1; i > start; i--) |
for (i = end - 2; i > start; i--) |
if ('-' == buf[i]) |
if ('-' == buf[i] && |
|
isspace((unsigned char)buf[i + 1])) |
break; |
break; |
if ('-' == buf[i]) { |
if ('-' == buf[i]) { |
j = i; |
j = i; |
Line 601 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 1376 ordinary(struct state *st, const char *buf, size_t sta |
|
for ( ; i > start; i--) |
for ( ; i > start; i--) |
if ('-' != buf[i]) |
if ('-' != buf[i]) |
break; |
break; |
/* FIXME: escape macro-like words etc. */ |
donamenm(st, buf, &start, i + 1); |
printf(".Nm %.*s\n", |
start = j + 1; |
(int)((i + 1) - start), &buf[start]); |
while (start < end && |
printf(".Nd %.*s\n", |
isspace((unsigned char)buf[start])) |
(int)(end - (j + 1)), &buf[j + 1]); |
start++; |
|
formatcodeln(st, "Nd", buf, &start, end, 1); |
|
mdoc_newln(st); |
return; |
return; |
} |
} |
} |
} |
Line 615 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 1392 ordinary(struct state *st, const char *buf, size_t sta |
|
|
|
st->haspar = 0; |
st->haspar = 0; |
last = '\n'; |
last = '\n'; |
|
opstack = 0; |
|
|
while (start < end) { |
for (seq = 0; start < end; seq++) { |
/* |
/* |
* Loop til we get either to a newline or escape. |
* Loop til we get either to a newline or escape. |
* Escape initial control characters. |
* Escape initial control characters. |
*/ |
*/ |
while (start < end) { |
while (start < end) { |
if (start < end - 1 && '<' == buf[start + 1]) |
if (start < end - 1 && '<' == buf[start + 1] && |
|
'A' <= buf[start] && 'Z' >= buf[start]) |
break; |
break; |
else if ('\n' == buf[start]) |
else if ('\n' == buf[start]) |
break; |
break; |
else if ('\n' == last && '.' == buf[start]) |
else if ('\n' == last && '.' == buf[start]) |
printf("\\&"); |
outbuf_addstr(st, "\\&"); |
else if ('\n' == last && '\'' == buf[start]) |
else if ('\n' == last && '\'' == buf[start]) |
printf("\\&"); |
outbuf_addstr(st, "\\&"); |
putchar(last = buf[start++]); |
/* |
} |
* If we're in the SYNOPSIS, have square |
|
* brackets indicate that we're opening and |
|
* closing an optional context. |
|
*/ |
|
|
if (start < end - 1 && '<' == buf[start + 1]) { |
if (SECT_SYNOPSIS == st->sect && |
|
('[' == buf[start] || |
|
']' == buf[start]) && |
|
dosynopsisop(st, buf, |
|
&start, end, &opstack)) |
|
continue; |
|
|
|
/* Merely buffer non-whitespace. */ |
|
|
|
last = buf[start++]; |
|
if ( ! isspace(last)) |
|
outbuf_addchar(st); |
|
if (start < end && |
|
! isspace((unsigned char)buf[start])) |
|
continue; |
|
|
/* |
/* |
* We've encountered a format code. |
* Found the end of a word. |
* This is going to trigger a macro no matter |
* Rewind trailing delimiters. |
* what, so print a newline now. |
|
* Then print the (possibly nested) macros and |
|
* following that, a newline. |
|
*/ |
*/ |
if (formatcode(buf, &start, end, 0, last, 0)) |
|
putchar(last = '\n'); |
eos = noeos = 0; |
} else if (start < end && '\n' == buf[start]) { |
for (wend = st->outbuflen; wend; wend--) |
|
if ('.' == st->outbuf[wend - 1] || |
|
'!' == st->outbuf[wend - 1] || |
|
'?' == st->outbuf[wend - 1]) |
|
eos = 1; |
|
else if ('|' == st->outbuf[wend - 1] || |
|
',' == st->outbuf[wend - 1] || |
|
';' == st->outbuf[wend - 1] || |
|
':' == st->outbuf[wend - 1]) |
|
noeos = 1; |
|
else if ('\'' != st->outbuf[wend - 1] && |
|
'"' != st->outbuf[wend - 1] && |
|
')' != st->outbuf[wend - 1] && |
|
']' != st->outbuf[wend - 1]) |
|
break; |
|
eos &= ! noeos; |
|
|
/* |
/* |
* Print the newline only if we haven't already |
* Detect function names. |
* printed a newline. |
|
*/ |
*/ |
if (last != '\n') |
|
putchar(last = buf[start]); |
mtype = MDOC_Fa; |
|
savechar = '\0'; |
|
if (wend && ')' == st->outbuf[wend] && |
|
'(' == st->outbuf[wend - 1]) { |
|
mtype = dict_get(st->outbuf, --wend); |
|
if (MDOC_Dv == mtype) |
|
mtype = MDOC_Fo; |
|
if (MDOC_Fo == mtype || MDOC_MAX == mtype) { |
|
st->outbuflen = wend; |
|
st->outbuf[wend] = '\0'; |
|
mdoc_newln(st); |
|
if (MDOC_Fo == mtype) |
|
fputs(".Fn ", stdout); |
|
else |
|
fputs(".Xr ", stdout); |
|
st->oust = OUST_MAC; |
|
} |
|
} else { |
|
mtype = dict_get(st->outbuf, wend); |
|
if (MDOC_Dv == mtype) { |
|
savechar = st->outbuf[wend]; |
|
st->outbuf[wend] = '\0'; |
|
mdoc_newln(st); |
|
fputs(".Dv ", stdout); |
|
st->oust = OUST_MAC; |
|
} else |
|
mtype = MDOC_Fa; |
|
} |
|
|
|
/* |
|
* On whitespace, flush the output buffer |
|
* and allow breaking to a macro line. |
|
*/ |
|
|
|
outbuf_flush(st); |
|
|
|
/* |
|
* End macro lines, and |
|
* end text lines at the end of sentences. |
|
*/ |
|
|
|
if (OUST_MAC == st->oust || (eos && wend > 1 && |
|
islower((unsigned char)st->outbuf[wend - 1]))) { |
|
if (MDOC_MAX == mtype) |
|
fputs(" 3", stdout); |
|
if (MDOC_Fa != mtype) { |
|
if (MDOC_Dv == mtype) |
|
st->outbuf[wend] = savechar; |
|
else |
|
wend += 2; |
|
while ('\0' != st->outbuf[wend]) |
|
printf(" %c", |
|
st->outbuf[wend++]); |
|
} |
|
mdoc_newln(st); |
|
} |
|
|
|
/* Advance to the next word. */ |
|
|
|
while ('\n' != buf[start] && |
|
isspace((unsigned char)buf[start])) |
|
start++; |
|
st->wantws = 1; |
|
} |
|
|
|
if (start < end - 1 && '<' == buf[start + 1] && |
|
'A' <= buf[start] && 'Z' >= buf[start]) { |
|
formatcode(st, buf, &start, end, 0, seq); |
|
if (OUST_MAC == st->oust) { |
|
/* |
|
* Let mdoc(7) handle trailing punctuation. |
|
* XXX Some punctuation characters |
|
* are not handled yet. |
|
*/ |
|
if ((start == end - 1 || |
|
(start < end - 1 && |
|
(' ' == buf[start + 1] || |
|
'\n' == buf[start + 1]))) && |
|
NULL != strchr("|.,;:?!)]", buf[start])) { |
|
putchar(' '); |
|
putchar(buf[start++]); |
|
} |
|
|
|
if (st->wantws || |
|
' ' == buf[start] || |
|
'\n' == buf[start]) |
|
mdoc_newln(st); |
|
|
|
/* |
|
* Consume all whitespace |
|
* so we don't accidentally start |
|
* an implicit literal line. |
|
*/ |
|
|
|
while (start < end && ' ' == buf[start]) |
|
start++; |
|
|
|
/* |
|
* Some text is following. |
|
* Implement requested spacing. |
|
*/ |
|
|
|
if ( ! st->wantws && start < end && |
|
('<' != buf[start + 1] || |
|
'A' > buf[start] || |
|
'Z' < buf[start])) { |
|
printf(" Ns "); |
|
st->wantws = 1; |
|
} |
|
} |
|
} else if (start < end && '\n' == buf[start]) { |
|
outbuf_flush(st); |
|
mdoc_newln(st); |
if (++start >= end) |
if (++start >= end) |
continue; |
continue; |
/* |
/* |
Line 660 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 1581 ordinary(struct state *st, const char *buf, size_t sta |
|
* have a macro subsequent it, which may be |
* have a macro subsequent it, which may be |
* possible if we have an escape next. |
* possible if we have an escape next. |
*/ |
*/ |
if (' ' == buf[start] || '\t' == buf[start]) { |
if (' ' == buf[start] || '\t' == buf[start]) |
puts(".br"); |
puts(".br"); |
last = '\n'; |
|
} |
|
for ( ; start < end; start++) |
for ( ; start < end; start++) |
if (' ' != buf[start] && '\t' != buf[start]) |
if (' ' != buf[start] && '\t' != buf[start]) |
break; |
break; |
} else if (start < end) { |
} |
/* |
|
* Default: print the character. |
|
* Escape initial control characters. |
|
*/ |
|
if ('\n' == last && '.' == buf[start]) |
|
printf("\\&"); |
|
else if ('\n' == last && '\'' == buf[start]) |
|
printf("\\&"); |
|
putchar(last = buf[start++]); |
|
} |
|
} |
} |
|
outbuf_flush(st); |
if (last != '\n') |
mdoc_newln(st); |
putchar('\n'); |
|
} |
} |
|
|
/* |
/* |
Line 690 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 1598 ordinary(struct state *st, const char *buf, size_t sta |
|
* (default: starts with "="). |
* (default: starts with "="). |
*/ |
*/ |
static void |
static void |
dopar(struct state *st, const char *buf, size_t start, size_t end) |
dopar(struct state *st, char *buf, size_t start, size_t end) |
{ |
{ |
|
|
|
assert(OUST_NL == st->oust); |
|
assert(st->wantws); |
|
|
if (end == start) |
if (end == start) |
return; |
return; |
if (' ' == buf[start] || '\t' == buf[start]) |
if (' ' == buf[start] || '\t' == buf[start]) |
Line 709 dopar(struct state *st, const char *buf, size_t start, |
|
Line 1620 dopar(struct state *st, const char *buf, size_t start, |
|
*/ |
*/ |
static void |
static void |
dofile(const struct args *args, const char *fname, |
dofile(const struct args *args, const char *fname, |
const struct tm *tm, const char *buf, size_t sz) |
const struct tm *tm, char *buf, size_t sz) |
{ |
{ |
size_t sup, end, i, cur = 0; |
|
struct state st; |
|
const char *section, *date; |
|
char datebuf[64]; |
char datebuf[64]; |
|
struct state st; |
|
const char *fbase, *fext, *section, *date, *format; |
char *title, *cp; |
char *title, *cp; |
|
size_t sup, end, i, cur = 0; |
|
|
if (0 == sz) |
if (0 == sz) |
return; |
return; |
|
|
/* Title is last path component of the filename. */ |
/* |
|
* Parsing the filename is almost always required, |
|
* except when both the title and the section |
|
* are provided on the command line. |
|
*/ |
|
|
if (NULL != args->title) |
if (NULL == args->title || NULL == args->section) { |
title = strdup(args->title); |
fbase = strrchr(fname, '/'); |
else if (NULL != (cp = strrchr(fname, '/'))) |
if (NULL == fbase) |
title = strdup(cp + 1); |
fbase = fname; |
else |
else |
title = strdup(fname); |
fbase++; |
|
fext = strrchr(fbase, '.'); |
|
} else |
|
fext = NULL; |
|
|
|
/* |
|
* The title will be converted to uppercase, |
|
* so it needs to be copied. |
|
*/ |
|
|
|
title = (NULL != args->title) ? strdup(args->title) : |
|
(NULL != fext) ? strndup(fbase, fext - fbase) : |
|
strdup(fbase); |
|
|
if (NULL == title) { |
if (NULL == title) { |
perror(NULL); |
perror(NULL); |
exit(EXIT_FAILURE); |
exit(EXIT_FAILURE); |
Line 736 dofile(const struct args *args, const char *fname, |
|
Line 1663 dofile(const struct args *args, const char *fname, |
|
|
|
/* Section is 1 unless suffix is "pm". */ |
/* Section is 1 unless suffix is "pm". */ |
|
|
if (NULL == (section = args->section)) { |
section = (NULL != args->section) ? args->section : |
section = "1"; |
(NULL == fext || strcmp(fext + 1, "pm")) ? "1" : |
if (NULL != (cp = strrchr(title, '.'))) { |
PERL_SECTION; |
*cp++ = '\0'; |
|
if (0 == strcmp(cp, "pm")) |
|
section = "3p"; |
|
} |
|
} |
|
|
|
/* Date. Or the given "tm" if not supplied. */ |
/* Date. Or the given "tm" if not supplied. */ |
|
|
if (NULL == (date = args->date)) { |
date = args->date; |
strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); |
format = (NULL == date) ? "%B %d, %Y" : |
|
strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $"; |
|
|
|
if (NULL != format) { |
|
strftime(datebuf, sizeof(datebuf), format, tm); |
date = datebuf; |
date = datebuf; |
} |
} |
|
|
Line 763 dofile(const struct args *args, const char *fname, |
|
Line 1689 dofile(const struct args *args, const char *fname, |
|
|
|
free(title); |
free(title); |
|
|
|
dict_init(); |
memset(&st, 0, sizeof(struct state)); |
memset(&st, 0, sizeof(struct state)); |
|
st.oust = OUST_NL; |
|
st.wantws = 1; |
|
|
assert(sz > 0); |
assert(sz > 0); |
|
|
/* Main loop over file contents. */ |
/* Main loop over file contents. */ |
Line 787 dofile(const struct args *args, const char *fname, |
|
Line 1717 dofile(const struct args *args, const char *fname, |
|
dopar(&st, buf, cur, end); |
dopar(&st, buf, cur, end); |
cur = sup; |
cur = sup; |
} |
} |
|
dict_destroy(); |
} |
} |
|
|
/* |
/* |
Line 806 readfile(const struct args *args, const char *fname) |
|
Line 1737 readfile(const struct args *args, const char *fname) |
|
time_t ttm; |
time_t ttm; |
struct stat st; |
struct stat st; |
|
|
assert(NULL != fname); |
|
|
|
fd = 0 != strcmp("-", fname) ? |
fd = 0 != strcmp("-", fname) ? |
open(fname, O_RDONLY, 0) : STDIN_FILENO; |
open(fname, O_RDONLY, 0) : STDIN_FILENO; |
|
|
Line 913 main(int argc, char *argv[]) |
|
Line 1842 main(int argc, char *argv[]) |
|
|
|
/* Accept only a single input file. */ |
/* Accept only a single input file. */ |
|
|
if (argc > 2) |
if (argc > 1) |
return(EXIT_FAILURE); |
goto usage; |
else if (1 == argc) |
else if (1 == argc) |
fname = *argv; |
fname = *argv; |
|
|
Line 923 main(int argc, char *argv[]) |
|
Line 1852 main(int argc, char *argv[]) |
|
|
|
usage: |
usage: |
fprintf(stderr, "usage: %s [-d date] " |
fprintf(stderr, "usage: %s [-d date] " |
"[-n title] [-s section]\n", name); |
"[-n title] [-s section] [file]\n", name); |
|
|
return(EXIT_FAILURE); |
return(EXIT_FAILURE); |
} |
} |