=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.57 retrieving revision 1.62 diff -u -p -r1.57 -r1.62 --- pod2mdoc/pod2mdoc.c 2015/02/21 21:15:41 1.57 +++ pod2mdoc/pod2mdoc.c 2016/11/03 15:50:28 1.62 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.57 2015/02/21 21:15:41 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.62 2016/11/03 15:50:28 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2014, 2015 Ingo Schwarze @@ -16,7 +16,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include -#include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include #include #include "dict.h" @@ -75,6 +75,7 @@ struct state { char *outbuf; /* text buffered for output */ size_t outbufsz; /* allocated size of outbuf */ size_t outbuflen; /* current length of outbuf */ + size_t outlnlen; /* chars so far on this output line */ }; enum fmt { @@ -134,7 +135,7 @@ static int trylink(const char *, size_t *, size_t, si static void verbatim(struct state *, char *, size_t, size_t); static const char *const cmds[CMD__MAX] = { - "pod", /* CMD_POD */ + "pod", /* CMD_POD */ "head1", /* CMD_HEAD1 */ "head2", /* CMD_HEAD2 */ "head3", /* CMD_HEAD3 */ @@ -161,7 +162,7 @@ static const char fmts[FMT__MAX] = { 'Z' /* FMT_NULL */ }; -static unsigned char last; +static unsigned char last; static void @@ -208,6 +209,14 @@ outbuf_flush(struct state *st) if (0 == st->outbuflen) return; + st->outlnlen += st->outbuflen; + if (OUST_TXT == st->oust && st->wantws) { + if (++st->outlnlen > 72) { + putchar('\n'); + st->oust = OUST_NL; + st->outlnlen = st->outbuflen; + } + } if (OUST_NL != st->oust && st->wantws) putchar(' '); @@ -233,6 +242,7 @@ mdoc_newln(struct state *st) putchar('\n'); last = '\n'; st->oust = OUST_NL; + st->outlnlen = 0; st->wantws = 1; } @@ -271,9 +281,9 @@ formatescape(struct state *st, const char *buf, size_t /* * TODO: right now, we only recognise the named escapes. - * Just let the rest of them go. + * Just let the rest of them go. */ - if (0 == strcmp(esc, "lt")) + if (0 == strcmp(esc, "lt")) outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) outbuf_addstr(st, "\\(ra"); @@ -295,11 +305,11 @@ formatescape(struct state *st, const char *buf, size_t static int trylink(const char *buf, size_t *start, size_t end, size_t dsz) { - size_t linkstart, realend, linkend, + size_t linkstart, realend, linkend, i, j, textsz, stack; - /* - * Scan to the start of the terminus. + /* + * Scan to the start of the terminus. * This function is more or less replicated in the formatcode() * for null or index formatting codes. * However, we're slightly different because we might have @@ -318,10 +328,10 @@ trylink(const char *buf, size_t *start, size_t end, si assert(realend > 0); if (' ' != buf[realend - 1]) continue; - for (i = realend, j = 0; i < end && j < dsz; j++) + for (i = realend, j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; - if (dsz == j) + if (dsz == j) break; } @@ -376,11 +386,11 @@ trylink(const char *buf, size_t *start, size_t end, si (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { /* Gross. */ - printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : + printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : realend) - linkstart), &buf[linkstart]); return(1); - } - + } + /* See if we qualify as a mailto. */ if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { printf("Mt %.*s", (int)j, &buf[linkstart]); @@ -388,19 +398,19 @@ trylink(const char *buf, size_t *start, size_t end, si } /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ - if ((j > 3 && ')' == buf[linkend - 1]) && + if ((j > 3 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 3])) { - printf("Xr %.*s %c", (int)(j - 3), + printf("Xr %.*s %c", (int)(j - 3), &buf[linkstart], buf[linkend - 2]); return(1); } else if ((j > 4 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 4])) { - printf("Xr %.*s %.*s", (int)(j - 4), + printf("Xr %.*s %.*s", (int)(j - 4), &buf[linkstart], 2, &buf[linkend - 3]); return(1); } else if ((j > 5 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 5])) { - printf("Xr %.*s %.*s", (int)(j - 5), + printf("Xr %.*s %.*s", (int)(j - 5), &buf[linkstart], 3, &buf[linkend - 4]); return(1); } @@ -411,7 +421,7 @@ trylink(const char *buf, size_t *start, size_t end, si break; if (i < linkend) - printf("Xr %.*s " PERL_SECTION, + printf("Xr %.*s " PERL_SECTION, (int)j, &buf[linkstart]); else printf("Xr %.*s 1", (int)j, &buf[linkstart]); @@ -466,7 +476,7 @@ again: } printf("Fl "); - if (end - *start > 1 && + if (end - *start > 1 && isupper((int)buf[*start]) && islower((int)buf[*start + 1]) && (end - *start == 2 || @@ -505,7 +515,7 @@ again: * entering without OUST_MAC and the code is invalid. */ static int -formatcode(struct state *st, const char *buf, size_t *start, +formatcode(struct state *st, const char *buf, size_t *start, size_t end, int nomacro, int pos) { size_t i, j, dsz; @@ -516,11 +526,11 @@ formatcode(struct state *st, const char *buf, size_t * assert(*start + 1 < end); assert('<' == buf[*start + 1]); - /* - * First, look up the format code. + /* + * First, look up the format code. * If it's not valid, treat it as a NOOP. */ - for (fmt = 0; fmt < FMT__MAX; fmt++) + for (fmt = 0; fmt < FMT__MAX; fmt++) if (buf[*start] == fmts[fmt]) break; @@ -551,7 +561,7 @@ formatcode(struct state *st, const char *buf, size_t * formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { - /* + /* * Just consume til the end delimiter, accounting for * whether it's a custom one. */ @@ -564,10 +574,10 @@ formatcode(struct state *st, const char *buf, size_t * if (' ' != buf[*start - 1]) continue; i = *start; - for (j = 0; i < end && j < dsz; j++) + for (j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; - if (dsz != j) + if (dsz != j) continue; (*start) += dsz; break; @@ -637,11 +647,8 @@ formatcode(struct state *st, const char *buf, size_t * */ switch (fmt) { - case (FMT_ITALIC): - fputs("Em", stdout); - break; case (FMT_BOLD): - if (SECT_SYNOPSIS == st->sect) { + if (SECT_SYNOPSIS == st->sect) { if (1 == dsz && '-' == buf[*start]) dosynopsisfl(buf, start, end); else if (0 == pos) @@ -650,6 +657,8 @@ formatcode(struct state *st, const char *buf, size_t * fputs("Ar", stdout); break; } + /* FALLTHROUGH */ + case (FMT_ITALIC): i = 0; uc = buf[*start]; while (isalnum(uc) || '_' == uc || ' ' == uc) @@ -668,7 +677,7 @@ formatcode(struct state *st, const char *buf, size_t * fputs("Vt", stdout); break; default: - fputs("Sy", stdout); + fputs(FMT_BOLD == fmt ? "Sy" : "Em", stdout); break; } break; @@ -706,7 +715,7 @@ formatcode(struct state *st, const char *buf, size_t * if ('>' == buf[*start] && 1 == dsz) { (*start)++; break; - } else if ('>' == buf[*start] && + } else if ('>' == buf[*start] && ' ' == buf[*start - 1]) { /* * Handle custom delimiters. @@ -926,7 +935,7 @@ command(struct state *st, const char *buf, size_t star } else if (end - start == 8) { if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) st->sect = SECT_SYNOPSIS; - } + } formatcodeln(st, "Sh", buf, &start, end, 1); mdoc_newln(st); st->haspar = 1; @@ -951,7 +960,7 @@ command(struct state *st, const char *buf, size_t star st->haspar = 1; break; case (CMD_OVER): - /* + /* * If we have an existing list that hasn't had an =item * yet, then make sure that we open it now. * We use the default list type, but that can't be @@ -968,7 +977,7 @@ command(struct state *st, const char *buf, size_t star break; case (CMD_ITEM): if (0 == st->lpos) { - /* + /* * Bad markup. * Try to compensate. */ @@ -982,7 +991,7 @@ command(struct state *st, const char *buf, size_t star * list, and everything is tagged. */ if (LIST__MAX == st->lstack[st->lpos - 1]) { - st->lstack[st->lpos - 1] = + st->lstack[st->lpos - 1] = listguess(buf, start, end); switch (st->lstack[st->lpos - 1]) { case (LIST_BULLET): @@ -1024,14 +1033,14 @@ command(struct state *st, const char *buf, size_t star } break; case (CMD_BEGIN): - /* + /* * We disregard all types for now. * TODO: process at least "text" in a -literal block. */ st->paused = 1; break; case (CMD_FOR): - /* + /* * We ignore all types of encodings and formats * unilaterally. */ @@ -1105,7 +1114,7 @@ again: goto again; } - /* + /* * If we're in the SYNOPSIS, see if we're an #include block. * If we are, then print the "In" macro and re-loop. * This handles any number of inclusions, but only when they @@ -1119,7 +1128,7 @@ again: goto again; /* We're an include block! */ - if (end - i > 10 && + if (end - i > 10 && 0 == memcmp(&buf[i], "#include <", 10)) { start = i + 10; while (start < end && ' ' == buf[start]) @@ -1129,7 +1138,7 @@ again: indisplay = wantsp = 0; fputs(".In ", stdout); /* Stop til the '>' marker or we hit eoln. */ - while (start < end && + while (start < end && '>' != buf[start] && '\n' != buf[start]) putchar(buf[start++]); putchar('\n'); @@ -1234,33 +1243,37 @@ again: } putchar('\n'); buf[ifa++] = '\0'; - printf(".Fo %s\n", buf + ifo); dict_put(buf + ifo, 0, MDOC_Fo); buf[ifc++] = '\0'; - for (;;) { - cp = strchr(buf + ifa, ','); - if (cp != NULL) { - cp2 = cp; - *cp++ = '\0'; - } else - cp2 = strchr(buf + ifa, '\0'); - while (isalnum((unsigned char)cp2[-1]) || - '_' == cp2[-1]) - cp2--; - if ('\0' != *cp2) - dict_put(cp2, 0, MDOC_Fa); - register_type(buf + ifa); - if (strchr(buf + ifa, ' ') == NULL) - printf(".Fa %s\n", buf + ifa); - else - printf(".Fa \"%s\"\n", buf + ifa); - if (cp == NULL) - break; - while (*cp == ' ' || *cp == '\t') - cp++; - ifa = cp - buf; - } - puts(".Fc"); + if (strcmp(buf + ifa, "void")) { + printf(".Fo %s\n", buf + ifo); + for (;;) { + cp = strchr(buf + ifa, ','); + if (cp != NULL) { + cp2 = cp; + *cp++ = '\0'; + } else + cp2 = strchr(buf + ifa, '\0'); + while (isalnum((unsigned char)cp2[-1]) + || '_' == cp2[-1]) + cp2--; + if ('\0' != *cp2) + dict_put(cp2, 0, MDOC_Fa); + register_type(buf + ifa); + if (strchr(buf + ifa, ' ') == NULL) + printf(".Fa %s\n", buf + ifa); + else + printf(".Fa \"%s\"\n", + buf + ifa); + if (cp == NULL) + break; + while (*cp == ' ' || *cp == '\t') + cp++; + ifa = cp - buf; + } + puts(".Fc"); + } else + printf(".Fn %s void\n", buf + ifo); if (buf[ifc] == ';') ifc++; if (ifc < inl) { @@ -1308,7 +1321,7 @@ hasmatch(const char *buf, size_t start, size_t end) { size_t stack; - for (stack = 0; start < end; start++) + for (stack = 0; start < end; start++) if (buf[start] == '[') stack++; else if (buf[start] == ']' && 0 == stack) @@ -1456,8 +1469,8 @@ ordinary(struct state *st, const char *buf, size_t sta opstack = 0; for (seq = 0; start < end; seq++) { - /* - * Loop til we get either to a newline or escape. + /* + * Loop til we get either to a newline or escape. * Escape initial control characters. */ while (start < end) { @@ -1477,7 +1490,7 @@ ordinary(struct state *st, const char *buf, size_t sta */ if (SECT_SYNOPSIS == st->sect && - ('[' == buf[start] || + ('[' == buf[start] || ']' == buf[start]) && dosynopsisop(st, buf, &start, end, &opstack)) @@ -1632,7 +1645,7 @@ ordinary(struct state *st, const char *buf, size_t sta } } else if (start < end && '\n' == buf[start]) { outbuf_flush(st); - mdoc_newln(st); + st->wantws = 1; if (++start >= end) continue; /* @@ -1643,12 +1656,14 @@ ordinary(struct state *st, const char *buf, size_t sta * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) + if (' ' == buf[start] || '\t' == buf[start]) { + mdoc_newln(st); puts(".br"); + } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; - } + } } outbuf_flush(st); mdoc_newln(st); @@ -1681,7 +1696,7 @@ dopar(struct state *st, char *buf, size_t start, size_ * POD way. */ static void -dofile(const struct args *args, const char *fname, +dofile(const struct args *args, const char *fname, const struct tm *tm, char *buf, size_t sz) { char datebuf[64]; @@ -1777,7 +1792,7 @@ dofile(const struct args *args, const char *fname, !(verb && end + 2 < sz && isspace((unsigned char)buf[end + 2]))) break; - + /* Adjust end marker for EOF. */ if (end < sz && '\n' != buf[end]) @@ -1806,9 +1821,9 @@ readfile(const struct args *args, const char *fname) ssize_t ssz; struct tm *tm; time_t ttm; - struct stat st; + struct stat st; - fd = 0 != strcmp("-", fname) ? + fd = 0 != strcmp("-", fname) ? open(fname, O_RDONLY, 0) : STDIN_FILENO; if (-1 == fd) { @@ -1822,7 +1837,7 @@ readfile(const struct args *args, const char *fname) } else tm = localtime(&st.st_mtime); - /* + /* * Arbitrarily-sized initial buffer. * Should be big enough for most files... */ @@ -1850,7 +1865,7 @@ readfile(const struct args *args, const char *fname) return(0); } - dofile(args, STDIN_FILENO == fd ? + dofile(args, STDIN_FILENO == fd ? "STDIN" : fname, tm, buf, cur); free(buf); if (STDIN_FILENO != fd) @@ -1918,11 +1933,11 @@ main(int argc, char *argv[]) else if (1 == argc) fname = *argv; - return(readfile(&args, fname) ? + return(readfile(&args, fname) ? EXIT_SUCCESS : EXIT_FAILURE); usage: - fprintf(stderr, "usage: %s [-d date] " + fprintf(stderr, "usage: %s [-d date] " "[-n title] [-s section] [file]\n", name); return(EXIT_FAILURE);