=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.7 retrieving revision 1.12 diff -u -p -r1.7 -r1.12 --- pod2mdoc/pod2mdoc.c 2014/03/23 23:35:59 1.7 +++ pod2mdoc/pod2mdoc.c 2014/04/01 11:58:32 1.12 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.7 2014/03/23 23:35:59 kristaps Exp $ */ +/* $Id: pod2mdoc.c,v 1.12 2014/04/01 11:58:32 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -26,6 +26,11 @@ #include #include +/* + * In what section can we find Perl manuals? + */ +#define PERL_SECTION "3p" + struct args { const char *title; /* override "Dt" title */ const char *date; /* override "Dd" date */ @@ -39,11 +44,17 @@ enum list { LIST__MAX }; +enum sect { + SECT_NONE = 0, + SECT_NAME, /* NAME section */ + SECT_SYNOPSIS, /* SYNOPSIS section */ +}; + struct state { int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ int haspar; /* in paragraph: do we need Pp? */ - int isname; /* are we the NAME section? */ + enum sect sect; /* which section are we in? */ const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ @@ -158,17 +169,74 @@ formatescape(const char *buf, size_t *start, size_t en } /* - * Skip space characters. + * Run some heuristics to intuit a link format. + * I recognise L as a Perl manpage, printing it in section 3p; + * or a general UNIX foo(5) manpage. + * If I recognise one, I set "start" to be the end of the sequence so + * that the caller can safely just continue processing. + * Otherwise, I don't touch "start". */ static int -skipspace(const char *buf, size_t *start, size_t end) +trylink(const char *buf, size_t *start, size_t end, size_t dsz) { - size_t sv = *start; + size_t sv, nstart, nend, i, j; + int hasdouble; - while (*start < end && ' ' == buf[*start]) - (*start)++; + /* + * Scan to the start of the terminus. + * This function is more or less replicated in the formatcode() + * for null or index formatting codes. + */ + hasdouble = 0; + for (sv = nstart = *start; nstart < end; nstart++) { + /* Do we have a double-colon? */ + if (':' == buf[nstart] && + nstart > sv && + ':' == buf[nstart - 1]) + hasdouble = 1; + if ('>' != buf[nstart]) + continue; + else if (dsz == 1) + break; + assert(nstart > 0); + if (' ' != buf[nstart - 1]) + continue; + i = nstart; + for (j = 0; i < end && j < dsz; j++) + if ('>' != buf[i++]) + break; + if (dsz == j) + break; + } + + /* We don't care about stubs. */ + if (nstart == end || nstart == *start) + return(0); - return(*start > sv); + /* Set nend to the end of content. */ + nend = nstart; + if (dsz > 1) + nend--; + + /* + * Provide for some common invocations of the link primitive. + * First, allow us to link to other Perl manuals. + */ + if (hasdouble) + printf("Xr %.*s " PERL_SECTION, + (int)(nend - sv), &buf[sv]); + else if (nend - sv > 3 && isalnum(buf[sv]) && + ')' == buf[nend - 1] && + isdigit((int)buf[nend - 2]) && + '(' == buf[nend - 3]) + printf("Xr %.*s %c", + (int)(nend - 3 - sv), + &buf[sv], buf[nend - 2]); + else + return(0); + + *start = nstart; + return(1); } /* @@ -185,8 +253,8 @@ skipspace(const char *buf, size_t *start, size_t end) * reentrant). */ static int -formatcode(const char *buf, size_t *start, - size_t end, int reentrant, int nomacro) +formatcode(struct state *st, const char *buf, + size_t *start, size_t end, int reentrant, int nomacro) { enum fmt fmt; size_t i, j, dsz; @@ -204,6 +272,8 @@ formatcode(const char *buf, size_t *start, if (FMT__MAX == fmt) { putchar(last = buf[(*start)++]); + if ('\\' == last) + putchar('e'); return(0); } @@ -290,19 +360,41 @@ formatcode(const char *buf, size_t *start, printf("Em "); break; case (FMT_BOLD): + /* + * Doclifting: if we're a bold "-xx" and we're + * in the SYNOPSIS section, then it's likely + * that we're a flag. + * Be really strict: only do this when the dash + * is followed by alnums til the end marker, + * which mustn't be a custom. + */ + if (SECT_SYNOPSIS == st->sect && + end - *start > 1 && + '-' == buf[*start] && + (isalnum((int)buf[*start + 1]) || + '?' == buf[*start + 1])) { + for (i = *start + 1; i < end; i++) + if ( ! isalnum((int)buf[i])) + break; + if (i < end && '>' == buf[i]) { + (*start)++; + printf("Fl "); + break; + } + } printf("Sy "); break; case (FMT_CODE): printf("Qo Li "); break; case (FMT_LINK): - printf("Lk "); + if ( ! trylink(buf, start, end, dsz)) + printf("No "); break; case (FMT_FILE): printf("Pa "); break; case (FMT_NBSP): - /* TODO. */ printf("No "); break; default: @@ -338,7 +430,7 @@ formatcode(const char *buf, size_t *start, } } if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(buf, start, end, 1, nomacro); + formatcode(st, buf, start, end, 1, nomacro); continue; } @@ -363,6 +455,10 @@ formatcode(const char *buf, size_t *start, else putchar(last = buf[*start]); + /* Protect against character escapes. */ + if ('\\' == last) + putchar('e'); + (*start)++; if (' ' == last) @@ -389,13 +485,14 @@ formatcode(const char *buf, size_t *start, * Calls formatcode() til the end of a paragraph. */ static void -formatcodeln(const char *buf, size_t *start, size_t end, int nomacro) +formatcodeln(struct state *st, const char *buf, + size_t *start, size_t end, int nomacro) { last = ' '; while (*start < end) { if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(buf, start, end, 1, nomacro); + formatcode(st, buf, start, end, 1, nomacro); continue; } /* @@ -412,10 +509,15 @@ formatcodeln(const char *buf, size_t *start, size_t en ' ' == buf[*start + 2])) printf("\\&"); - if ('\n' != buf[*start]) - putchar(last = buf[*start]); - else + if ('\n' == buf[*start]) putchar(last = ' '); + else + putchar(last = buf[*start]); + + /* Protect against character escapes. */ + if ('\\' == last) + putchar('e'); + (*start)++; } } @@ -471,7 +573,9 @@ command(struct state *st, const char *buf, size_t star return; start += csz; - skipspace(buf, &start, end); + while (start < end && ' ' == buf[start]) + start++; + len = end - start; if (st->paused) { @@ -488,24 +592,28 @@ command(struct state *st, const char *buf, size_t star * how pod2man handles it. */ printf(".Sh "); - st->isname = 0; - if (end - start == 4) + st->sect = SECT_NONE; + if (end - start == 4) { if (0 == memcmp(&buf[start], "NAME", 4)) - st->isname = 1; - formatcodeln(buf, &start, end, 1); + st->sect = SECT_NAME; + } else if (end - start == 8) { + if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) + st->sect = SECT_SYNOPSIS; + } + formatcodeln(st, buf, &start, end, 1); putchar('\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); - formatcodeln(buf, &start, end, 1); + formatcodeln(st, buf, &start, end, 1); putchar('\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); - formatcodeln(buf, &start, end, 0); + formatcodeln(st, buf, &start, end, 0); putchar('\n'); puts(".Pp"); st->haspar = 1; @@ -513,7 +621,7 @@ command(struct state *st, const char *buf, size_t star case (CMD_HEAD4): puts(".Pp"); printf(".No "); - formatcodeln(buf, &start, end, 0); + formatcodeln(st, buf, &start, end, 0); putchar('\n'); puts(".Pp"); st->haspar = 1; @@ -567,7 +675,7 @@ command(struct state *st, const char *buf, size_t star switch (st->lstack[st->lpos - 1]) { case (LIST_TAG): printf(".It "); - formatcodeln(buf, &start, end, 0); + formatcodeln(st, buf, &start, end, 0); putchar('\n'); break; case (LIST_ENUM): @@ -624,17 +732,23 @@ command(struct state *st, const char *buf, size_t star static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - size_t sv = start; + int last; if ( ! st->parsing || st->paused) return; puts(".Bd -literal"); - while (start < end) { - if (start > sv && '\n' == buf[start - 1]) + for (last = ' '; start < end; start++) { + /* + * Handle accidental macros (newline starting with + * control character) and escapes. + */ + if ('\n' == last) if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); - putchar(buf[start++]); + putchar(last = buf[start]); + if ('\\' == buf[start]) + printf("e"); } putchar('\n'); puts(".Ed"); @@ -662,7 +776,7 @@ ordinary(struct state *st, const char *buf, size_t sta * we're in "name - description" format. * To wit, print out a "Nm" and "Nd" in that format. */ - if (st->isname) { + if (SECT_NAME == st->sect) { for (i = end - 1; i > start; i--) if ('-' == buf[i]) break; @@ -673,11 +787,11 @@ ordinary(struct state *st, const char *buf, size_t sta if ('-' != buf[i]) break; printf(".Nm "); - formatcodeln(buf, &start, i + 1, 1); + formatcodeln(st, buf, &start, i + 1, 1); putchar('\n'); start = j + 1; printf(".Nd "); - formatcodeln(buf, &start, end, 1); + formatcodeln(st, buf, &start, end, 1); putchar('\n'); return; } @@ -703,7 +817,29 @@ ordinary(struct state *st, const char *buf, size_t sta printf("\\&"); else if ('\n' == last && '\'' == buf[start]) printf("\\&"); +#if notyet + /* + * If we're in the SYNOPSIS, have square + * brackets indicate that we're opening and + * closing an optional context. + */ + if (SECT_SYNOPSIS == st->sect) { + if ('[' == buf[start] || + ']' == buf[start]) { + if (last != '\n') + putchar('\n'); + if ('[' == buf[start]) + printf(".Oo\n"); + else + printf(".Oc\n"); + start++; + continue; + } + } +#endif putchar(last = buf[start++]); + if ('\\' == last) + putchar('e'); } if (start < end - 1 && '<' == buf[start + 1]) { @@ -713,8 +849,10 @@ ordinary(struct state *st, const char *buf, size_t sta * what, so print a newline now. * Then print the (possibly nested) macros and * following that, a newline. + * Consume all whitespace so we don't + * accidentally start an implicit literal line. */ - if (formatcode(buf, &start, end, 0, 0)) { + if (formatcode(st, buf, &start, end, 0, 0)) { putchar(last = '\n'); while (start < end && ' ' == buf[start]) start++; @@ -743,17 +881,7 @@ ordinary(struct state *st, const char *buf, size_t sta for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; - } else if (start < end) { - /* - * Default: print the character. - * Escape initial control characters. - */ - if ('\n' == last && '.' == buf[start]) - printf("\\&"); - else if ('\n' == last && '\'' == buf[start]) - printf("\\&"); - putchar(last = buf[start++]); - } + } } if (last != '\n') @@ -817,7 +945,7 @@ dofile(const struct args *args, const char *fname, if (NULL != (cp = strrchr(title, '.'))) { *cp++ = '\0'; if (0 == strcmp(cp, "pm")) - section = "3p"; + section = PERL_SECTION; } }