=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.29 retrieving revision 1.31 diff -u -p -r1.29 -r1.31 --- pod2mdoc/pod2mdoc.c 2014/07/11 20:45:55 1.29 +++ pod2mdoc/pod2mdoc.c 2014/07/15 19:03:07 1.31 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.29 2014/07/11 20:45:55 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.31 2014/07/15 19:03:07 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -53,14 +53,18 @@ enum sect { }; struct state { + const char *fname; /* file being parsed */ int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ - int haspar; /* in paragraph: do we need Pp? */ enum sect sect; /* which section are we in? */ - const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ + int haspar; /* in paragraph: do we need Pp? */ + int hasnl; /* in text: just started a new line */ + char *outbuf; /* text buffered for output */ + size_t outbufsz; /* allocated size of outbuf */ + size_t outbuflen; /* current length of outbuf */ }; enum fmt { @@ -123,13 +127,75 @@ static const char fmts[FMT__MAX] = { static int last; + +static void +outbuf_grow(struct state *st, size_t by) +{ + + st->outbufsz += (by / 128 + 1) * 128; + st->outbuf = realloc(st->outbuf, st->outbufsz); + if (NULL == st->outbuf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static void +outbuf_addchar(struct state *st) +{ + + if (st->outbuflen + 2 >= st->outbufsz) + outbuf_grow(st, 1); + st->outbuf[st->outbuflen++] = last; + if ('\\' == last) + st->outbuf[st->outbuflen++] = 'e'; + st->outbuf[st->outbuflen] = '\0'; +} + +static void +outbuf_addstr(struct state *st, const char *str) +{ + size_t slen; + + slen = strlen(str); + if (st->outbuflen + slen >= st->outbufsz) + outbuf_grow(st, slen); + memcpy(st->outbuf + st->outbuflen, str, slen+1); + last = str[slen - 1]; +} + +static void +outbuf_flush(struct state *st) +{ + + if (0 == st->outbuflen) + return; + + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; + st->outbuflen = 0; + st->hasnl = 0; +} + +static void +outbuf_newln(struct state *st) +{ + + if ('\n' == last) + return; + outbuf_flush(st); + putchar('\n'); + last = '\n'; + st->hasnl = 1; +} + /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. */ static void -formatescape(const char *buf, size_t *start, size_t end) +formatescape(struct state *st, const char *buf, size_t *start, size_t end) { char esc[16]; /* no more needed */ size_t i, max; @@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *start, size_t en * Just let the rest of them go. */ if (0 == strcmp(esc, "lt")) - printf("\\(la"); + outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) - printf("\\(ra"); + outbuf_addstr(st, "\\(ra"); else if (0 == strcmp(esc, "vb")) - printf("\\(ba"); + outbuf_addstr(st, "\\(ba"); else if (0 == strcmp(esc, "sol")) - printf("\\(sl"); - else - return; - - last = 'a'; + outbuf_addstr(st, "\\(sl"); } /* @@ -391,25 +453,19 @@ formatcode(struct state *st, const char *buf, size_t * { enum fmt fmt; size_t i, j, dsz; + int white; assert(*start + 1 < end); assert('<' == buf[*start + 1]); /* * First, look up the format code. - * If it's not valid, then exit immediately. + * If it's not valid, treat it as a NOOP. */ for (fmt = 0; fmt < FMT__MAX; fmt++) if (buf[*start] == fmts[fmt]) break; - if (FMT__MAX == fmt) { - putchar(last = buf[(*start)++]); - if ('\\' == last) - putchar('e'); - return(0); - } - /* * Determine whether we're overriding our delimiter. * According to POD, if we have more than one '<' followed by a @@ -434,7 +490,7 @@ formatcode(struct state *st, const char *buf, size_t * * processing for real macros. */ if (FMT_ESCAPE == fmt) { - formatescape(buf, start, end); + formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* @@ -472,29 +528,40 @@ formatcode(struct state *st, const char *buf, size_t * * Check whether we're supposed to print macro stuff (this is * suppressed in, e.g., "Nm" and "Sh" macros). */ - if ( ! nomacro) { + if (FMT__MAX != fmt && !nomacro) { + white = ' ' == last || '\n' == last || + ' ' == buf[*start]; + /* - * Print out the macro describing this format code. - * If we're not "reentrant" (not yet on a macro line) - * then print a newline, if necessary, and the macro - * indicator. - * Otherwise, offset us with a space. + * If we are on a text line and there is no + * whitespace before our content, we have to make + * the previous word a prefix to the macro line. */ - if ( ! reentrant) { + + if ( ! white && ! reentrant) { + if ( ! st->hasnl) + putchar('\n'); + printf(".Pf "); + } + + outbuf_flush(st); + + /* Whitespace is easier to suppress on macro lines. */ + + if ( ! white && reentrant) + printf(" Ns"); + + /* Unless we are on a macro line, start one. */ + + if (white && ! reentrant) { if (last != '\n') putchar('\n'); putchar('.'); - } else + } else putchar(' '); - - /* - * If we don't have whitespace before us (and none after - * the opening delimiter), then suppress macro - * whitespace with Pf. - */ - if (' ' != last && '\n' != last && ' ' != buf[*start]) - printf("Pf "); + /* Print the macro corresponding to this format code. */ + switch (fmt) { case (FMT_ITALIC): printf("Em "); @@ -533,7 +600,8 @@ formatcode(struct state *st, const char *buf, size_t * default: abort(); } - } + } else + outbuf_flush(st); /* * Process until we reach the end marker (e.g., '>') or until we @@ -599,6 +667,9 @@ formatcode(struct state *st, const char *buf, size_t * (*start)++; } + if (FMT__MAX == fmt) + return(0); + if ( ! nomacro && FMT_CODE == fmt) printf(" Qc "); @@ -734,20 +805,20 @@ command(struct state *st, const char *buf, size_t star st->sect = SECT_SYNOPSIS; } formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -755,7 +826,7 @@ command(struct state *st, const char *buf, size_t star puts(".Pp"); printf(".No "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -809,7 +880,7 @@ command(struct state *st, const char *buf, size_t star case (LIST_TAG): printf(".It "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); break; case (LIST_ENUM): /* FALLTHROUGH */ @@ -865,7 +936,6 @@ command(struct state *st, const char *buf, size_t star static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - int last; size_t i; if ( ! st->parsing || st->paused) @@ -920,7 +990,7 @@ again: if ('\\' == buf[start]) printf("e"); } - putchar('\n'); + putchar(last = '\n'); puts(".Ed"); } @@ -950,14 +1020,13 @@ hasmatch(const char *buf, size_t start, size_t end) * If we're an ending bracket, see if we have a stack already. */ static int -dosynopsisop(const char *buf, int *last, - size_t *start, size_t end, size_t *opstack) +dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oo"); (*opstack)++; @@ -965,7 +1034,7 @@ dosynopsisop(const char *buf, int *last, return(0); if (']' == buf[*start] && *opstack > 0) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oc"); (*opstack)--; @@ -973,7 +1042,7 @@ dosynopsisop(const char *buf, int *last, return(0); (*start)++; - *last = '\n'; + last = '\n'; while (' ' == buf[*start]) (*start)++; return(1); @@ -1002,7 +1071,7 @@ donamenm(struct state *st, const char *buf, size_t *st break; formatcodeln(st, buf, start, word, 1); if (*start == end) { - putchar('\n'); + putchar(last = '\n'); continue; } assert(',' == buf[*start]); @@ -1052,7 +1121,7 @@ ordinary(struct state *st, const char *buf, size_t sta start++; fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); return; } } @@ -1061,6 +1130,7 @@ ordinary(struct state *st, const char *buf, size_t sta puts(".Pp"); st->haspar = 0; + st->hasnl = 1; last = '\n'; opstack = 0; @@ -1075,9 +1145,9 @@ ordinary(struct state *st, const char *buf, size_t sta else if ('\n' == buf[start]) break; else if ('\n' == last && '.' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); else if ('\n' == last && '\'' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and @@ -1086,27 +1156,23 @@ ordinary(struct state *st, const char *buf, size_t sta if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && - dosynopsisop(buf, &last, - &start, end, &opstack)) + dosynopsisop(buf, &start, end, &opstack)) continue; - putchar(last = buf[start++]); - if ('\\' == last) - putchar('e'); + last = buf[start++]; + if (' ' == last) { + outbuf_flush(st); + putchar(' '); + } else + outbuf_addchar(st); } if (start < end - 1 && '<' == buf[start + 1]) { - /* - * We've encountered a format code. - * This is going to trigger a macro no matter - * what, so print a newline now. - * Then print the (possibly nested) macros and - * following that, a newline. - * Consume all whitespace so we don't - * accidentally start an implicit literal line. - * If the macro ends with a flush comma or - * period, let mdoc(7) handle it for us. - */ if (formatcode(st, buf, &start, end, 0, 0, seq)) { + /* + * Let mdoc(7) handle trailing punctuation. + * XXX Some punctuation characters + * are not handled yet. + */ if ((start == end - 1 || (start < end - 1 && (' ' == buf[start + 1] || @@ -1116,17 +1182,19 @@ ordinary(struct state *st, const char *buf, size_t sta putchar(' '); putchar(buf[start++]); } + /* End the macro line. */ putchar(last = '\n'); + st->hasnl = 1; + /* + * Consume all whitespace + * so we don't accidentally start + * an implicit literal line. + */ while (start < end && ' ' == buf[start]) start++; } } else if (start < end && '\n' == buf[start]) { - /* - * Print the newline only if we haven't already - * printed a newline. - */ - if (last != '\n') - putchar(last = buf[start]); + outbuf_newln(st); if (++start >= end) continue; /* @@ -1137,18 +1205,14 @@ ordinary(struct state *st, const char *buf, size_t sta * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) { + if (' ' == buf[start] || '\t' == buf[start]) puts(".br"); - last = '\n'; - } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; } } - - if (last != '\n') - putchar('\n'); + outbuf_newln(st); } /*