=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.21 retrieving revision 1.31 diff -u -p -r1.21 -r1.31 --- pod2mdoc/pod2mdoc.c 2014/04/03 11:55:01 1.21 +++ pod2mdoc/pod2mdoc.c 2014/07/15 19:03:07 1.31 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.21 2014/04/03 11:55:01 kristaps Exp $ */ +/* $Id: pod2mdoc.c,v 1.31 2014/07/15 19:03:07 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -53,14 +53,18 @@ enum sect { }; struct state { + const char *fname; /* file being parsed */ int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ - int haspar; /* in paragraph: do we need Pp? */ enum sect sect; /* which section are we in? */ - const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ + int haspar; /* in paragraph: do we need Pp? */ + int hasnl; /* in text: just started a new line */ + char *outbuf; /* text buffered for output */ + size_t outbufsz; /* allocated size of outbuf */ + size_t outbuflen; /* current length of outbuf */ }; enum fmt { @@ -123,13 +127,75 @@ static const char fmts[FMT__MAX] = { static int last; + +static void +outbuf_grow(struct state *st, size_t by) +{ + + st->outbufsz += (by / 128 + 1) * 128; + st->outbuf = realloc(st->outbuf, st->outbufsz); + if (NULL == st->outbuf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static void +outbuf_addchar(struct state *st) +{ + + if (st->outbuflen + 2 >= st->outbufsz) + outbuf_grow(st, 1); + st->outbuf[st->outbuflen++] = last; + if ('\\' == last) + st->outbuf[st->outbuflen++] = 'e'; + st->outbuf[st->outbuflen] = '\0'; +} + +static void +outbuf_addstr(struct state *st, const char *str) +{ + size_t slen; + + slen = strlen(str); + if (st->outbuflen + slen >= st->outbufsz) + outbuf_grow(st, slen); + memcpy(st->outbuf + st->outbuflen, str, slen+1); + last = str[slen - 1]; +} + +static void +outbuf_flush(struct state *st) +{ + + if (0 == st->outbuflen) + return; + + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; + st->outbuflen = 0; + st->hasnl = 0; +} + +static void +outbuf_newln(struct state *st) +{ + + if ('\n' == last) + return; + outbuf_flush(st); + putchar('\n'); + last = '\n'; + st->hasnl = 1; +} + /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. */ static void -formatescape(const char *buf, size_t *start, size_t end) +formatescape(struct state *st, const char *buf, size_t *start, size_t end) { char esc[16]; /* no more needed */ size_t i, max; @@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *start, size_t en * Just let the rest of them go. */ if (0 == strcmp(esc, "lt")) - printf("\\(la"); + outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) - printf("\\(ra"); + outbuf_addstr(st, "\\(ra"); else if (0 == strcmp(esc, "vb")) - printf("\\(ba"); + outbuf_addstr(st, "\\(ba"); else if (0 == strcmp(esc, "sol")) - printf("\\(sl"); - else - return; - - last = 'a'; + outbuf_addstr(st, "\\(sl"); } /* @@ -181,18 +243,23 @@ trylink(const char *buf, size_t *start, size_t end, si { size_t linkstart, realend, linkend, i, j, textsz, stack; - const char *text; /* * Scan to the start of the terminus. * This function is more or less replicated in the formatcode() * for null or index formatting codes. + * However, we're slightly different because we might have + * nested escapes we need to ignore. */ stack = 0; for (linkstart = realend = *start; realend < end; realend++) { + if ('<' == buf[realend]) + stack++; if ('>' != buf[realend]) continue; - else if (dsz == 1) + else if (stack-- > 0) + continue; + if (dsz == 1) break; assert(realend > 0); if (' ' != buf[realend - 1]) @@ -212,7 +279,6 @@ trylink(const char *buf, size_t *start, size_t end, si linkend = dsz > 1 ? realend - 1 : realend; /* Re-scan to see if we have a title or section. */ - text = &buf[*start]; for (textsz = *start; textsz < linkend; textsz++) if ('|' == buf[textsz] || '/' == buf[textsz]) break; @@ -387,25 +453,19 @@ formatcode(struct state *st, const char *buf, size_t * { enum fmt fmt; size_t i, j, dsz; + int white; assert(*start + 1 < end); assert('<' == buf[*start + 1]); /* * First, look up the format code. - * If it's not valid, then exit immediately. + * If it's not valid, treat it as a NOOP. */ for (fmt = 0; fmt < FMT__MAX; fmt++) if (buf[*start] == fmts[fmt]) break; - if (FMT__MAX == fmt) { - putchar(last = buf[(*start)++]); - if ('\\' == last) - putchar('e'); - return(0); - } - /* * Determine whether we're overriding our delimiter. * According to POD, if we have more than one '<' followed by a @@ -430,7 +490,7 @@ formatcode(struct state *st, const char *buf, size_t * * processing for real macros. */ if (FMT_ESCAPE == fmt) { - formatescape(buf, start, end); + formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* @@ -454,6 +514,13 @@ formatcode(struct state *st, const char *buf, size_t * (*start) += dsz; break; } + if (*start < end) { + assert('>' == buf[*start]); + (*start)++; + } + if (isspace(last)) + while (*start < end && isspace((int)buf[*start])) + (*start)++; return(0); } @@ -461,29 +528,40 @@ formatcode(struct state *st, const char *buf, size_t * * Check whether we're supposed to print macro stuff (this is * suppressed in, e.g., "Nm" and "Sh" macros). */ - if ( ! nomacro) { + if (FMT__MAX != fmt && !nomacro) { + white = ' ' == last || '\n' == last || + ' ' == buf[*start]; + /* - * Print out the macro describing this format code. - * If we're not "reentrant" (not yet on a macro line) - * then print a newline, if necessary, and the macro - * indicator. - * Otherwise, offset us with a space. + * If we are on a text line and there is no + * whitespace before our content, we have to make + * the previous word a prefix to the macro line. */ - if ( ! reentrant) { + + if ( ! white && ! reentrant) { + if ( ! st->hasnl) + putchar('\n'); + printf(".Pf "); + } + + outbuf_flush(st); + + /* Whitespace is easier to suppress on macro lines. */ + + if ( ! white && reentrant) + printf(" Ns"); + + /* Unless we are on a macro line, start one. */ + + if (white && ! reentrant) { if (last != '\n') putchar('\n'); putchar('.'); - } else + } else putchar(' '); - - /* - * If we don't have whitespace before us (and none after - * the opening delimiter), then suppress macro - * whitespace with Pf. - */ - if (' ' != last && '\n' != last && ' ' != buf[*start]) - printf("Pf "); + /* Print the macro corresponding to this format code. */ + switch (fmt) { case (FMT_ITALIC): printf("Em "); @@ -498,7 +576,12 @@ formatcode(struct state *st, const char *buf, size_t * printf("Ar "); break; } - printf("Sy "); + if (0 == strncmp(buf + *start, "NULL", 4) && + ('=' == buf[*start + 4] || + '>' == buf[*start + 4])) + printf("Dv "); + else + printf("Sy "); break; case (FMT_CODE): printf("Qo Li "); @@ -517,7 +600,8 @@ formatcode(struct state *st, const char *buf, size_t * default: abort(); } - } + } else + outbuf_flush(st); /* * Process until we reach the end marker (e.g., '>') or until we @@ -583,6 +667,9 @@ formatcode(struct state *st, const char *buf, size_t * (*start)++; } + if (FMT__MAX == fmt) + return(0); + if ( ! nomacro && FMT_CODE == fmt) printf(" Qc "); @@ -718,20 +805,20 @@ command(struct state *st, const char *buf, size_t star st->sect = SECT_SYNOPSIS; } formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -739,7 +826,7 @@ command(struct state *st, const char *buf, size_t star puts(".Pp"); printf(".No "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -793,7 +880,7 @@ command(struct state *st, const char *buf, size_t star case (LIST_TAG): printf(".It "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); break; case (LIST_ENUM): /* FALLTHROUGH */ @@ -849,11 +936,47 @@ command(struct state *st, const char *buf, size_t star static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - int last; + size_t i; if ( ! st->parsing || st->paused) return; - +again: + /* + * If we're in the SYNOPSIS, see if we're an #include block. + * If we are, then print the "In" macro and re-loop. + * This handles any number of inclusions, but only when they + * come before the remaining parts... + */ + if (SECT_SYNOPSIS == st->sect) { + i = start; + for (i = start; i < end && ' ' == buf[i]; i++) + /* Spin. */ ; + if (i == end) + return; + /* We're an include block! */ + if (end - i > 10 && + 0 == memcmp(&buf[i], "#include <", 10)) { + start = i + 10; + while (start < end && ' ' == buf[start]) + start++; + fputs(".In ", stdout); + /* Stop til the '>' marker or we hit eoln. */ + while (start < end && + '>' != buf[start] && '\n' != buf[start]) + putchar(buf[start++]); + putchar('\n'); + if (start < end && '>' == buf[start]) + start++; + if (start < end && '\n' == buf[start]) + start++; + if (start < end) + goto again; + return; + } + } + + if (start == end) + return; puts(".Bd -literal"); for (last = ' '; start < end; start++) { /* @@ -867,7 +990,7 @@ verbatim(struct state *st, const char *buf, size_t sta if ('\\' == buf[start]) printf("e"); } - putchar('\n'); + putchar(last = '\n'); puts(".Ed"); } @@ -897,14 +1020,13 @@ hasmatch(const char *buf, size_t start, size_t end) * If we're an ending bracket, see if we have a stack already. */ static int -dosynopsisop(const char *buf, int *last, - size_t *start, size_t end, size_t *opstack) +dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oo"); (*opstack)++; @@ -912,7 +1034,7 @@ dosynopsisop(const char *buf, int *last, return(0); if (']' == buf[*start] && *opstack > 0) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oc"); (*opstack)--; @@ -920,7 +1042,7 @@ dosynopsisop(const char *buf, int *last, return(0); (*start)++; - *last = '\n'; + last = '\n'; while (' ' == buf[*start]) (*start)++; return(1); @@ -949,7 +1071,7 @@ donamenm(struct state *st, const char *buf, size_t *st break; formatcodeln(st, buf, start, word, 1); if (*start == end) { - putchar('\n'); + putchar(last = '\n'); continue; } assert(',' == buf[*start]); @@ -999,7 +1121,7 @@ ordinary(struct state *st, const char *buf, size_t sta start++; fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); return; } } @@ -1008,6 +1130,7 @@ ordinary(struct state *st, const char *buf, size_t sta puts(".Pp"); st->haspar = 0; + st->hasnl = 1; last = '\n'; opstack = 0; @@ -1022,9 +1145,9 @@ ordinary(struct state *st, const char *buf, size_t sta else if ('\n' == buf[start]) break; else if ('\n' == last && '.' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); else if ('\n' == last && '\'' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and @@ -1033,27 +1156,23 @@ ordinary(struct state *st, const char *buf, size_t sta if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && - dosynopsisop(buf, &last, - &start, end, &opstack)) + dosynopsisop(buf, &start, end, &opstack)) continue; - putchar(last = buf[start++]); - if ('\\' == last) - putchar('e'); + last = buf[start++]; + if (' ' == last) { + outbuf_flush(st); + putchar(' '); + } else + outbuf_addchar(st); } if (start < end - 1 && '<' == buf[start + 1]) { - /* - * We've encountered a format code. - * This is going to trigger a macro no matter - * what, so print a newline now. - * Then print the (possibly nested) macros and - * following that, a newline. - * Consume all whitespace so we don't - * accidentally start an implicit literal line. - * If the macro ends with a flush comma or - * period, let mdoc(7) handle it for us. - */ if (formatcode(st, buf, &start, end, 0, 0, seq)) { + /* + * Let mdoc(7) handle trailing punctuation. + * XXX Some punctuation characters + * are not handled yet. + */ if ((start == end - 1 || (start < end - 1 && (' ' == buf[start + 1] || @@ -1063,17 +1182,19 @@ ordinary(struct state *st, const char *buf, size_t sta putchar(' '); putchar(buf[start++]); } + /* End the macro line. */ putchar(last = '\n'); + st->hasnl = 1; + /* + * Consume all whitespace + * so we don't accidentally start + * an implicit literal line. + */ while (start < end && ' ' == buf[start]) start++; } } else if (start < end && '\n' == buf[start]) { - /* - * Print the newline only if we haven't already - * printed a newline. - */ - if (last != '\n') - putchar(last = buf[start]); + outbuf_newln(st); if (++start >= end) continue; /* @@ -1084,18 +1205,14 @@ ordinary(struct state *st, const char *buf, size_t sta * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) { + if (' ' == buf[start] || '\t' == buf[start]) puts(".br"); - last = '\n'; - } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; } } - - if (last != '\n') - putchar('\n'); + outbuf_newln(st); } /* @@ -1125,24 +1242,40 @@ static void dofile(const struct args *args, const char *fname, const struct tm *tm, const char *buf, size_t sz) { - size_t sup, end, i, cur = 0; - struct state st; - const char *section, *date; char datebuf[64]; + struct state st; + const char *fbase, *fext, *section, *date; char *title, *cp; + size_t sup, end, i, cur = 0; if (0 == sz) return; - /* Title is last path component of the filename. */ + /* + * Parsing the filename is almost always required, + * except when both the title and the section + * are provided on the command line. + */ - if (NULL != args->title) - title = strdup(args->title); - else if (NULL != (cp = strrchr(fname, '/'))) - title = strdup(cp + 1); - else - title = strdup(fname); - + if (NULL == args->title || NULL == args->section) { + fbase = strrchr(fname, '/'); + if (NULL == fbase) + fbase = fname; + else + fbase++; + fext = strrchr(fbase, '.'); + } else + fext = NULL; + + /* + * The title will be converted to uppercase, + * so it needs to be copied. + */ + + title = (NULL != args->title) ? strdup(args->title) : + (NULL != fext) ? strndup(fbase, fext - fbase) : + strdup(fbase); + if (NULL == title) { perror(NULL); exit(EXIT_FAILURE); @@ -1150,14 +1283,9 @@ dofile(const struct args *args, const char *fname, /* Section is 1 unless suffix is "pm". */ - if (NULL == (section = args->section)) { - section = "1"; - if (NULL != (cp = strrchr(title, '.'))) { - *cp++ = '\0'; - if (0 == strcmp(cp, "pm")) - section = PERL_SECTION; - } - } + section = (NULL != args->section) ? args->section : + (NULL == fext || strcmp(fext + 1, "pm")) ? "1" : + PERL_SECTION; /* Date. Or the given "tm" if not supplied. */ @@ -1220,8 +1348,6 @@ readfile(const struct args *args, const char *fname) time_t ttm; struct stat st; - assert(NULL != fname); - fd = 0 != strcmp("-", fname) ? open(fname, O_RDONLY, 0) : STDIN_FILENO; @@ -1327,8 +1453,8 @@ main(int argc, char *argv[]) /* Accept only a single input file. */ - if (argc > 2) - return(EXIT_FAILURE); + if (argc > 1) + goto usage; else if (1 == argc) fname = *argv; @@ -1337,7 +1463,7 @@ main(int argc, char *argv[]) usage: fprintf(stderr, "usage: %s [-d date] " - "[-n title] [-s section]\n", name); + "[-n title] [-s section] [file]\n", name); return(EXIT_FAILURE); }