=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.7 retrieving revision 1.9 diff -u -p -r1.7 -r1.9 --- pod2mdoc/pod2mdoc.c 2014/03/23 23:35:59 1.7 +++ pod2mdoc/pod2mdoc.c 2014/03/24 01:07:30 1.9 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.7 2014/03/23 23:35:59 kristaps Exp $ */ +/* $Id: pod2mdoc.c,v 1.9 2014/03/24 01:07:30 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -158,17 +158,74 @@ formatescape(const char *buf, size_t *start, size_t en } /* - * Skip space characters. + * Run some heuristics to intuit a link format. + * I recognise L as a Perl manpage, printing it in section 3p; + * or a general UNIX foo(5) manpage. + * If I recognise one, I set "start" to be the end of the sequence so + * that the caller can safely just continue processing. + * Otherwise, I don't touch "start". */ static int -skipspace(const char *buf, size_t *start, size_t end) +trylink(const char *buf, size_t *start, size_t end, size_t dsz) { - size_t sv = *start; + size_t sv, nstart, nend, i, j; + int hasdouble; - while (*start < end && ' ' == buf[*start]) - (*start)++; + /* + * Scan to the start of the terminus. + * This function is more or less replicated in the formatcode() + * for null or index formatting codes. + */ + hasdouble = 0; + for (sv = nstart = *start; nstart < end; nstart++) { + /* Do we have a double-colon? */ + if (':' == buf[nstart] && + nstart > sv && + ':' == buf[nstart - 1]) + hasdouble = 1; + if ('>' != buf[nstart]) + continue; + else if (dsz == 1) + break; + assert(nstart > 0); + if (' ' != buf[nstart - 1]) + continue; + i = nstart; + for (j = 0; i < end && j < dsz; j++) + if ('>' != buf[i++]) + break; + if (dsz == j) + break; + } + + /* We don't care about stubs. */ + if (nstart == end || nstart == *start) + return(0); - return(*start > sv); + /* Set nend to the end of content. */ + nend = nstart; + if (dsz > 1) + nend--; + + /* + * Provide for some common invocations of the link primitive. + * First, allow us to link to other Perl manuals. + */ + if (hasdouble) + printf("Xr %.*s 3p", + (int)(nend - sv), &buf[sv]); + else if (nend - sv > 3 && isalnum(buf[sv]) && + ')' == buf[nend - 1] && + isdigit((int)buf[nend - 2]) && + '(' == buf[nend - 3]) + printf("Xr %.*s %c", + (int)(nend - 3 - sv), + &buf[sv], buf[nend - 2]); + else + return(0); + + *start = nstart; + return(1); } /* @@ -204,6 +261,8 @@ formatcode(const char *buf, size_t *start, if (FMT__MAX == fmt) { putchar(last = buf[(*start)++]); + if ('\\' == last) + putchar('e'); return(0); } @@ -296,13 +355,13 @@ formatcode(const char *buf, size_t *start, printf("Qo Li "); break; case (FMT_LINK): - printf("Lk "); + if ( ! trylink(buf, start, end, dsz)) + printf("No "); break; case (FMT_FILE): printf("Pa "); break; case (FMT_NBSP): - /* TODO. */ printf("No "); break; default: @@ -363,6 +422,10 @@ formatcode(const char *buf, size_t *start, else putchar(last = buf[*start]); + /* Protect against character escapes. */ + if ('\\' == last) + putchar('e'); + (*start)++; if (' ' == last) @@ -412,10 +475,15 @@ formatcodeln(const char *buf, size_t *start, size_t en ' ' == buf[*start + 2])) printf("\\&"); - if ('\n' != buf[*start]) - putchar(last = buf[*start]); - else + if ('\n' == buf[*start]) putchar(last = ' '); + else + putchar(last = buf[*start]); + + /* Protect against character escapes. */ + if ('\\' == last) + putchar('e'); + (*start)++; } } @@ -471,7 +539,9 @@ command(struct state *st, const char *buf, size_t star return; start += csz; - skipspace(buf, &start, end); + while (start < end && ' ' == buf[start]) + start++; + len = end - start; if (st->paused) { @@ -624,17 +694,23 @@ command(struct state *st, const char *buf, size_t star static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - size_t sv = start; + int last; if ( ! st->parsing || st->paused) return; puts(".Bd -literal"); - while (start < end) { - if (start > sv && '\n' == buf[start - 1]) + for (last = ' '; start < end; start++) { + /* + * Handle accidental macros (newline starting with + * control character) and escapes. + */ + if ('\n' == last) if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); - putchar(buf[start++]); + putchar(last = buf[start]); + if ('\\' == buf[start]) + printf("e"); } putchar('\n'); puts(".Ed"); @@ -704,6 +780,8 @@ ordinary(struct state *st, const char *buf, size_t sta else if ('\n' == last && '\'' == buf[start]) printf("\\&"); putchar(last = buf[start++]); + if ('\\' == last) + putchar('e'); } if (start < end - 1 && '<' == buf[start + 1]) { @@ -713,6 +791,8 @@ ordinary(struct state *st, const char *buf, size_t sta * what, so print a newline now. * Then print the (possibly nested) macros and * following that, a newline. + * Consume all whitespace so we don't + * accidentally start an implicit literal line. */ if (formatcode(buf, &start, end, 0, 0)) { putchar(last = '\n'); @@ -753,6 +833,8 @@ ordinary(struct state *st, const char *buf, size_t sta else if ('\n' == last && '\'' == buf[start]) printf("\\&"); putchar(last = buf[start++]); + if ('\\' == last) + putchar('e'); } }