=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.17 retrieving revision 1.29 diff -u -p -r1.17 -r1.29 --- pod2mdoc/pod2mdoc.c 2014/04/02 20:32:41 1.17 +++ pod2mdoc/pod2mdoc.c 2014/07/11 20:45:55 1.29 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.17 2014/04/02 20:32:41 kristaps Exp $ */ +/* $Id: pod2mdoc.c,v 1.29 2014/07/11 20:45:55 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -27,7 +27,9 @@ #include /* - * In what section can we find Perl manuals? + * In what section can we find Perl module manuals? + * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p. + * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL. */ #define PERL_SECTION "3p" @@ -170,72 +172,134 @@ formatescape(const char *buf, size_t *start, size_t en /* * Run some heuristics to intuit a link format. - * I recognise L as a Perl manpage, printing it in section 3p; - * or a general UNIX foo(5) manpage. - * If I recognise one, I set "start" to be the end of the sequence so + * I set "start" to be the end of the sequence (last right-carrot) so * that the caller can safely just continue processing. - * Otherwise, I don't touch "start". + * If this is just an empty tag, I'll return 0. */ static int trylink(const char *buf, size_t *start, size_t end, size_t dsz) { - size_t sv, nstart, nend, i, j; - int hasdouble; + size_t linkstart, realend, linkend, + i, j, textsz, stack; /* * Scan to the start of the terminus. * This function is more or less replicated in the formatcode() * for null or index formatting codes. + * However, we're slightly different because we might have + * nested escapes we need to ignore. */ - hasdouble = 0; - for (sv = nstart = *start; nstart < end; nstart++) { - /* Do we have a double-colon? */ - if (':' == buf[nstart] && - nstart > sv && - ':' == buf[nstart - 1]) - hasdouble = 1; - if ('>' != buf[nstart]) + stack = 0; + for (linkstart = realend = *start; realend < end; realend++) { + if ('<' == buf[realend]) + stack++; + if ('>' != buf[realend]) continue; - else if (dsz == 1) + else if (stack-- > 0) + continue; + if (dsz == 1) break; - assert(nstart > 0); - if (' ' != buf[nstart - 1]) + assert(realend > 0); + if (' ' != buf[realend - 1]) continue; - i = nstart; - for (j = 0; i < end && j < dsz; j++) + for (i = realend, j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; if (dsz == j) break; } - - /* We don't care about stubs. */ - if (nstart == end || nstart == *start) + + /* Ignore stubs. */ + if (realend == end || realend == *start) return(0); - /* Set nend to the end of content. */ - nend = nstart; - if (dsz > 1) - nend--; + /* Set linkend to the end of content. */ + linkend = dsz > 1 ? realend - 1 : realend; - /* - * Provide for some common invocations of the link primitive. - * First, allow us to link to other Perl manuals. - */ - if (hasdouble) + /* Re-scan to see if we have a title or section. */ + for (textsz = *start; textsz < linkend; textsz++) + if ('|' == buf[textsz] || '/' == buf[textsz]) + break; + + if (textsz < linkend && '|' == buf[textsz]) { + /* With title: set start, then end at section. */ + linkstart = textsz + 1; + textsz = textsz - *start; + for (i = linkstart; i < linkend; i++) + if ('/' == buf[i]) + break; + if (i < linkend) + linkend = i; + } else if (textsz < linkend && '/' == buf[textsz]) { + /* With section: set end at section. */ + linkend = textsz; + textsz = 0; + } else + /* No title, no section. */ + textsz = 0; + + *start = realend; + j = linkend - linkstart; + + /* Do we have only subsection material? */ + if (0 == j && '/' == buf[linkend]) { + linkstart = linkend + 1; + linkend = dsz > 1 ? realend - 1 : realend; + if (0 == (j = linkend - linkstart)) + return(0); + printf("Sx %.*s", (int)j, &buf[linkstart]); + return(1); + } else if (0 == j) + return(0); + + /* See if we qualify as being a link or not. */ + if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) || + (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) || + (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) || + (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) || + (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || + (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { + /* Gross. */ + printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : + realend) - linkstart), &buf[linkstart]); + return(1); + } + + /* See if we qualify as a mailto. */ + if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { + printf("Mt %.*s", (int)j, &buf[linkstart]); + return(1); + } + + /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ + if ((j > 3 && ')' == buf[linkend - 1]) && + ('(' == buf[linkend - 3])) { + printf("Xr %.*s %c", (int)(j - 3), + &buf[linkstart], buf[linkend - 2]); + return(1); + } else if ((j > 4 && ')' == buf[linkend - 1]) && + ('(' == buf[linkend - 4])) { + printf("Xr %.*s %.*s", (int)(j - 4), + &buf[linkstart], 2, &buf[linkend - 3]); + return(1); + } else if ((j > 5 && ')' == buf[linkend - 1]) && + ('(' == buf[linkend - 5])) { + printf("Xr %.*s %.*s", (int)(j - 5), + &buf[linkstart], 3, &buf[linkend - 4]); + return(1); + } + + /* Last try: do we have a double-colon? */ + for (i = linkstart + 1; i < linkend; i++) + if (':' == buf[i] && ':' == buf[i - 1]) + break; + + if (i < linkend) printf("Xr %.*s " PERL_SECTION, - (int)(nend - sv), &buf[sv]); - else if (nend - sv > 3 && isalnum(buf[sv]) && - ')' == buf[nend - 1] && - isdigit((int)buf[nend - 2]) && - '(' == buf[nend - 3]) - printf("Xr %.*s %c", - (int)(nend - 3 - sv), - &buf[sv], buf[nend - 2]); + (int)j, &buf[linkstart]); else - return(0); + printf("Xr %.*s 1", (int)j, &buf[linkstart]); - *start = nstart; return(1); } @@ -394,6 +458,13 @@ formatcode(struct state *st, const char *buf, size_t * (*start) += dsz; break; } + if (*start < end) { + assert('>' == buf[*start]); + (*start)++; + } + if (isspace(last)) + while (*start < end && isspace((int)buf[*start])) + (*start)++; return(0); } @@ -438,12 +509,18 @@ formatcode(struct state *st, const char *buf, size_t * printf("Ar "); break; } - printf("Sy "); + if (0 == strncmp(buf + *start, "NULL", 4) && + ('=' == buf[*start + 4] || + '>' == buf[*start + 4])) + printf("Dv "); + else + printf("Sy "); break; case (FMT_CODE): printf("Qo Li "); break; case (FMT_LINK): + /* Try to link; use "No" if it's empty. */ if ( ! trylink(buf, start, end, dsz)) printf("No "); break; @@ -789,10 +866,47 @@ static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { int last; + size_t i; if ( ! st->parsing || st->paused) return; - +again: + /* + * If we're in the SYNOPSIS, see if we're an #include block. + * If we are, then print the "In" macro and re-loop. + * This handles any number of inclusions, but only when they + * come before the remaining parts... + */ + if (SECT_SYNOPSIS == st->sect) { + i = start; + for (i = start; i < end && ' ' == buf[i]; i++) + /* Spin. */ ; + if (i == end) + return; + /* We're an include block! */ + if (end - i > 10 && + 0 == memcmp(&buf[i], "#include <", 10)) { + start = i + 10; + while (start < end && ' ' == buf[start]) + start++; + fputs(".In ", stdout); + /* Stop til the '>' marker or we hit eoln. */ + while (start < end && + '>' != buf[start] && '\n' != buf[start]) + putchar(buf[start++]); + putchar('\n'); + if (start < end && '>' == buf[start]) + start++; + if (start < end && '\n' == buf[start]) + start++; + if (start < end) + goto again; + return; + } + } + + if (start == end) + return; puts(".Bd -literal"); for (last = ' '; start < end; start++) { /* @@ -1064,24 +1178,40 @@ static void dofile(const struct args *args, const char *fname, const struct tm *tm, const char *buf, size_t sz) { - size_t sup, end, i, cur = 0; - struct state st; - const char *section, *date; char datebuf[64]; + struct state st; + const char *fbase, *fext, *section, *date; char *title, *cp; + size_t sup, end, i, cur = 0; if (0 == sz) return; - /* Title is last path component of the filename. */ + /* + * Parsing the filename is almost always required, + * except when both the title and the section + * are provided on the command line. + */ - if (NULL != args->title) - title = strdup(args->title); - else if (NULL != (cp = strrchr(fname, '/'))) - title = strdup(cp + 1); - else - title = strdup(fname); - + if (NULL == args->title || NULL == args->section) { + fbase = strrchr(fname, '/'); + if (NULL == fbase) + fbase = fname; + else + fbase++; + fext = strrchr(fbase, '.'); + } else + fext = NULL; + + /* + * The title will be converted to uppercase, + * so it needs to be copied. + */ + + title = (NULL != args->title) ? strdup(args->title) : + (NULL != fext) ? strndup(fbase, fext - fbase) : + strdup(fbase); + if (NULL == title) { perror(NULL); exit(EXIT_FAILURE); @@ -1089,14 +1219,9 @@ dofile(const struct args *args, const char *fname, /* Section is 1 unless suffix is "pm". */ - if (NULL == (section = args->section)) { - section = "1"; - if (NULL != (cp = strrchr(title, '.'))) { - *cp++ = '\0'; - if (0 == strcmp(cp, "pm")) - section = PERL_SECTION; - } - } + section = (NULL != args->section) ? args->section : + (NULL == fext || strcmp(fext + 1, "pm")) ? "1" : + PERL_SECTION; /* Date. Or the given "tm" if not supplied. */ @@ -1159,8 +1284,6 @@ readfile(const struct args *args, const char *fname) time_t ttm; struct stat st; - assert(NULL != fname); - fd = 0 != strcmp("-", fname) ? open(fname, O_RDONLY, 0) : STDIN_FILENO; @@ -1266,8 +1389,8 @@ main(int argc, char *argv[]) /* Accept only a single input file. */ - if (argc > 2) - return(EXIT_FAILURE); + if (argc > 1) + goto usage; else if (1 == argc) fname = *argv; @@ -1276,7 +1399,7 @@ main(int argc, char *argv[]) usage: fprintf(stderr, "usage: %s [-d date] " - "[-n title] [-s section]\n", name); + "[-n title] [-s section] [file]\n", name); return(EXIT_FAILURE); }