version 1.1, 2014/03/20 15:07:56 |
version 1.5, 2014/03/23 13:00:24 |
|
|
const char *section; /* override "Dt" section */ |
const char *section; /* override "Dt" section */ |
}; |
}; |
|
|
|
enum list { |
|
LIST_BULLET = 0, |
|
LIST_ENUM, |
|
LIST_TAG, |
|
LIST__MAX |
|
}; |
|
|
struct state { |
struct state { |
int parsing; /* after =cut of before command */ |
int parsing; /* after =cut of before command */ |
int paused; /* in =begin and before =end */ |
int paused; /* in =begin and before =end */ |
int haspar; /* in paragraph: do we need Pp? */ |
int haspar; /* in paragraph: do we need Pp? */ |
int isname; /* are we the NAME section? */ |
int isname; /* are we the NAME section? */ |
const char *fname; /* file being parsed */ |
const char *fname; /* file being parsed */ |
|
#define LIST_STACKSZ 128 |
|
enum list lstack[LIST_STACKSZ]; /* open lists */ |
|
size_t lpos; /* where in list stack */ |
}; |
}; |
|
|
enum fmt { |
enum fmt { |
Line 144 formatescape(const char *buf, size_t *start, size_t en |
|
Line 154 formatescape(const char *buf, size_t *start, size_t en |
|
/* |
/* |
* Skip space characters. |
* Skip space characters. |
*/ |
*/ |
static void |
static int |
skipspace(const char *buf, size_t *start, size_t end) |
skipspace(const char *buf, size_t *start, size_t end) |
{ |
{ |
|
size_t sv = *start; |
|
|
while (*start < end && ' ' == buf[*start]) |
while (*start < end && ' ' == buf[*start]) |
(*start)++; |
(*start)++; |
|
|
|
return(*start > sv); |
} |
} |
|
|
/* |
/* |
Line 168 formatcode(const char *buf, size_t *start, |
|
Line 181 formatcode(const char *buf, size_t *start, |
|
size_t end, int reentrant, int last, int nomacro) |
size_t end, int reentrant, int last, int nomacro) |
{ |
{ |
enum fmt fmt; |
enum fmt fmt; |
|
size_t i, j, dsz; |
|
|
assert(*start + 1 < end); |
assert(*start + 1 < end); |
assert('<' == buf[*start + 1]); |
assert('<' == buf[*start + 1]); |
|
|
|
/* |
|
* Determine whether we're overriding our delimiter. |
|
* According to POD, if we have more than one '<' followed by a |
|
* space, then we need a space followed by matching '>' to close |
|
* the expression. |
|
* Otherwise we use the usual '<' and '>' matched pair. |
|
*/ |
|
i = *start + 1; |
|
while (i < end && '<' == buf[i]) |
|
i++; |
|
assert(i > *start + 1); |
|
dsz = i - (*start + 1); |
|
if (dsz > 1 && (i >= end || ' ' != buf[i])) |
|
dsz = 1; |
|
|
for (fmt = 0; fmt < FMT__MAX; fmt++) |
for (fmt = 0; fmt < FMT__MAX; fmt++) |
if (buf[*start] == fmts[fmt]) |
if (buf[*start] == fmts[fmt]) |
break; |
break; |
Line 184 formatcode(const char *buf, size_t *start, |
|
Line 213 formatcode(const char *buf, size_t *start, |
|
return(0); |
return(0); |
} |
} |
|
|
*start += 2; |
/* Remember, if dsz>1, to jump the trailing space. */ |
|
*start += dsz + 1 + (dsz > 1 ? 1 : 0); |
|
|
/* |
/* |
* Escapes don't print macro sequences, so just output them like |
* Escapes don't print macro sequences, so just output them like |
Line 194 formatcode(const char *buf, size_t *start, |
|
Line 224 formatcode(const char *buf, size_t *start, |
|
formatescape(buf, start, end); |
formatescape(buf, start, end); |
return(0); |
return(0); |
} else if (FMT_NULL == fmt || FMT_INDEX == fmt) { |
} else if (FMT_NULL == fmt || FMT_INDEX == fmt) { |
/* For indices and nulls, just consume. */ |
/* |
while (*start < end && '>' != buf[*start]) |
* For indices and nulls, just consume. |
(*start)++; |
* Be wary of encountering custom delimiters (dsz>1), |
if (*start < end) |
* which require special handling. |
(*start)++; |
*/ |
|
for ( ; *start < end; (*start)++) { |
|
if ('>' != buf[*start]) |
|
continue; |
|
else if (dsz == 1) |
|
break; |
|
assert(*start > 0); |
|
if (' ' != buf[*start - 1]) |
|
continue; |
|
i = *start; |
|
for (j = 0; i < end && j < dsz; j++) |
|
if ('>' != buf[i++]) |
|
break; |
|
if (dsz != j) |
|
continue; |
|
(*start) += dsz; |
|
break; |
|
} |
return(0); |
return(0); |
} |
} |
|
|
Line 231 formatcode(const char *buf, size_t *start, |
|
Line 278 formatcode(const char *buf, size_t *start, |
|
printf("Sy "); |
printf("Sy "); |
break; |
break; |
case (FMT_CODE): |
case (FMT_CODE): |
printf("Li "); |
printf("Qo Li "); |
break; |
break; |
case (FMT_LINK): |
case (FMT_LINK): |
printf("Lk "); |
printf("Lk "); |
Line 249 formatcode(const char *buf, size_t *start, |
|
Line 296 formatcode(const char *buf, size_t *start, |
|
} |
} |
|
|
/* |
/* |
* Read until we reach the end market ('>') or until we find a |
* Read until we reach the end market (e.g., '>') or until we |
* nested format code. |
* find a nested format code. |
* Don't emit any newlines: since we're on a macro line, we |
* Don't emit any newlines: since we're on a macro line, we |
* don't want to break the line. |
* don't want to break the line. |
*/ |
*/ |
while (*start < end) { |
while (*start < end) { |
if ('>' == buf[*start]) { |
if ('>' == buf[*start] && 1 == dsz) { |
(*start)++; |
(*start)++; |
break; |
break; |
|
} else if ('>' == buf[*start] && |
|
' ' == buf[*start - 1]) { |
|
/* |
|
* Handle custom delimiters. |
|
* These require a certain number of |
|
* space-preceded carrots before we're really at |
|
* the end. |
|
*/ |
|
i = *start; |
|
for (j = 0; i < end && j < dsz; j++) |
|
if ('>' != buf[i++]) |
|
break; |
|
if (dsz == j) { |
|
*start += dsz; |
|
break; |
|
} |
} |
} |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
formatcode(buf, start, end, 1, last, nomacro); |
formatcode(buf, start, end, 1, last, nomacro); |
continue; |
continue; |
} |
} |
if ('\n' != buf[*start]) { |
|
/* |
/* |
* Make sure that any macro-like words (or |
* Make sure that any macro-like words (or |
* really any word starting with a capital |
* really any word starting with a capital |
* letter) is assumed to be a macro that must be |
* letter) is assumed to be a macro that must be |
* escaped. |
* escaped. |
* XXX: should this be isalpha()? |
* This matches "Xx " and "XxEOLN". |
*/ |
*/ |
if ((' ' == last || '\n' == last) && |
if ((' ' == last || '\n' == last) && |
isupper(buf[*start])) |
end - *start > 1 && |
printf("\\&"); |
isupper((int)buf[*start]) && |
putchar(last = buf[*start]); |
islower((int)buf[*start + 1]) && |
} |
(end - *start == 2 || |
(*start)++; |
' ' == buf[*start + 2])) |
|
printf("\\&"); |
|
|
|
/* Suppress newline. */ |
|
if ('\n' == (last = buf[(*start)++])) |
|
last = ' '; |
|
|
|
putchar(last); |
} |
} |
|
|
|
if ( ! nomacro && FMT_CODE == fmt) |
|
printf(" Qc "); |
|
|
if (reentrant) |
if (reentrant) |
return(1); |
return(1); |
|
|
|
/* FIXME: with the "Qc", this doens't work good. */ |
|
|
/* |
/* |
* If we're not reentrant, we want to put ending punctuation on |
* If we're not reentrant, we want to put ending punctuation on |
* the macro line so that it's properly handled by being |
* the macro line so that it's properly handled by being |
* smooshed against the terminal word. |
* smooshed against the terminal word. |
*/ |
*/ |
skipspace(buf, start, end); |
skipspace(buf, start, end); |
|
|
if (',' != buf[*start] && '.' != buf[*start] && |
if (',' != buf[*start] && '.' != buf[*start] && |
'!' != buf[*start] && '?' != buf[*start] && |
'!' != buf[*start] && '?' != buf[*start] && |
')' != buf[*start]) |
')' != buf[*start]) |
Line 315 formatcodeln(const char *buf, size_t *start, size_t en |
|
Line 391 formatcodeln(const char *buf, size_t *start, size_t en |
|
{ |
{ |
int last; |
int last; |
|
|
last = '\n'; |
last = ' '; |
while (*start < end) { |
while (*start < end) { |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
if (*start + 1 < end && '<' == buf[*start + 1]) { |
formatcode(buf, start, end, 1, last, nomacro); |
formatcode(buf, start, end, 1, last, nomacro); |
continue; |
continue; |
} |
} |
|
/* |
|
* Since we're already on a macro line, we want to make |
|
* sure that we don't inadvertently invoke a macro. |
|
* We need to do this carefully because section names |
|
* are used in troff and we don't want to escape |
|
* something that needn't be escaped. |
|
*/ |
|
if (' ' == last && end - *start > 1 && |
|
isupper((int)buf[*start]) && |
|
islower((int)buf[*start + 1]) && |
|
(end - *start == 2 || |
|
' ' == buf[*start + 2])) |
|
printf("\\&"); |
|
|
if ('\n' != buf[*start]) |
if ('\n' != buf[*start]) |
putchar(last = buf[*start]); |
putchar(last = buf[*start]); |
|
else |
|
putchar(last = ' '); |
(*start)++; |
(*start)++; |
} |
} |
} |
} |
|
|
/* |
/* |
|
* Guess at what kind of list we are. |
|
* These are taken straight from the POD manual. |
|
* I don't know what people do in real life. |
|
*/ |
|
static enum list |
|
listguess(const char *buf, size_t start, size_t end) |
|
{ |
|
size_t len = end - start; |
|
|
|
assert(end >= start); |
|
|
|
if (len == 1 && '*' == buf[start]) |
|
return(LIST_BULLET); |
|
if (len == 2 && '1' == buf[start] && '.' == buf[start + 1]) |
|
return(LIST_ENUM); |
|
else if (len == 1 && '1' == buf[start]) |
|
return(LIST_ENUM); |
|
else |
|
return(LIST_TAG); |
|
} |
|
|
|
/* |
* A command paragraph, as noted in the perlpod manual, just indicates |
* A command paragraph, as noted in the perlpod manual, just indicates |
* that we should do something, optionally with some text to print as |
* that we should do something, optionally with some text to print as |
* well. |
* well. |
Line 404 command(struct state *st, const char *buf, size_t star |
|
Line 518 command(struct state *st, const char *buf, size_t star |
|
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
case (CMD_OVER): |
case (CMD_OVER): |
/* |
/* |
* TODO: we should be doing this after we process the |
* If we have an existing list that hasn't had an =item |
* first =item to see whether we'll do an -enum, |
* yet, then make sure that we open it now. |
* -bullet, or something else. |
* We use the default list type, but that can't be |
|
* helped (we haven't seen any items yet). |
*/ |
*/ |
puts(".Bl -tag -width Ds"); |
if (st->lpos > 0) |
|
if (LIST__MAX == st->lstack[st->lpos - 1]) { |
|
st->lstack[st->lpos - 1] = LIST_TAG; |
|
puts(".Bl -tag -width Ds"); |
|
} |
|
st->lpos++; |
|
assert(st->lpos < LIST_STACKSZ); |
|
st->lstack[st->lpos - 1] = LIST__MAX; |
break; |
break; |
case (CMD_ITEM): |
case (CMD_ITEM): |
printf(".It "); |
assert(st->lpos > 0); |
formatcodeln(buf, &start, end, 0); |
/* |
putchar('\n'); |
* If we're the first =item, guess at what our content |
|
* will be: "*" is a bullet list, "1." is a numbered |
|
* list, and everything is tagged. |
|
*/ |
|
if (LIST__MAX == st->lstack[st->lpos - 1]) { |
|
st->lstack[st->lpos - 1] = |
|
listguess(buf, start, end); |
|
switch (st->lstack[st->lpos - 1]) { |
|
case (LIST_BULLET): |
|
puts(".Bl -bullet"); |
|
break; |
|
case (LIST_ENUM): |
|
puts(".Bl -enum"); |
|
break; |
|
default: |
|
puts(".Bl -tag -width Ds"); |
|
break; |
|
} |
|
} |
|
switch (st->lstack[st->lpos - 1]) { |
|
case (LIST_TAG): |
|
printf(".It "); |
|
formatcodeln(buf, &start, end, 0); |
|
putchar('\n'); |
|
break; |
|
case (LIST_ENUM): |
|
/* FALLTHROUGH */ |
|
case (LIST_BULLET): |
|
/* |
|
* Abandon the remainder of the paragraph |
|
* because we're going to be a bulletted or |
|
* numbered list. |
|
*/ |
|
puts(".It"); |
|
break; |
|
default: |
|
abort(); |
|
} |
st->haspar = 1; |
st->haspar = 1; |
break; |
break; |
case (CMD_BACK): |
case (CMD_BACK): |
puts(".El"); |
/* Make sure we don't back over the stack. */ |
|
if (st->lpos > 0) { |
|
st->lpos--; |
|
puts(".El"); |
|
} |
break; |
break; |
case (CMD_BEGIN): |
case (CMD_BEGIN): |
/* |
/* |
Line 494 ordinary(struct state *st, const char *buf, size_t sta |
|
Line 657 ordinary(struct state *st, const char *buf, size_t sta |
|
for ( ; i > start; i--) |
for ( ; i > start; i--) |
if ('-' != buf[i]) |
if ('-' != buf[i]) |
break; |
break; |
printf(".Nm %.*s\n", |
printf(".Nm "); |
(int)((i + 1) - start), &buf[start]); |
formatcodeln(buf, &start, i + 1, 1); |
printf(".Nd %.*s\n", |
putchar('\n'); |
(int)(end - (j + 1)), &buf[j + 1]); |
start = j + 1; |
|
printf(".Nd "); |
|
formatcodeln(buf, &start, end, 1); |
|
putchar('\n'); |
return; |
return; |
} |
} |
} |
} |