=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.30 retrieving revision 1.31 diff -u -p -r1.30 -r1.31 --- pod2mdoc/pod2mdoc.c 2014/07/15 19:00:48 1.30 +++ pod2mdoc/pod2mdoc.c 2014/07/15 19:03:07 1.31 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.30 2014/07/15 19:00:48 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.31 2014/07/15 19:03:07 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -53,14 +53,18 @@ enum sect { }; struct state { + const char *fname; /* file being parsed */ int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ - int haspar; /* in paragraph: do we need Pp? */ enum sect sect; /* which section are we in? */ - const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ + int haspar; /* in paragraph: do we need Pp? */ + int hasnl; /* in text: just started a new line */ + char *outbuf; /* text buffered for output */ + size_t outbufsz; /* allocated size of outbuf */ + size_t outbuflen; /* current length of outbuf */ }; enum fmt { @@ -123,13 +127,75 @@ static const char fmts[FMT__MAX] = { static int last; + +static void +outbuf_grow(struct state *st, size_t by) +{ + + st->outbufsz += (by / 128 + 1) * 128; + st->outbuf = realloc(st->outbuf, st->outbufsz); + if (NULL == st->outbuf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static void +outbuf_addchar(struct state *st) +{ + + if (st->outbuflen + 2 >= st->outbufsz) + outbuf_grow(st, 1); + st->outbuf[st->outbuflen++] = last; + if ('\\' == last) + st->outbuf[st->outbuflen++] = 'e'; + st->outbuf[st->outbuflen] = '\0'; +} + +static void +outbuf_addstr(struct state *st, const char *str) +{ + size_t slen; + + slen = strlen(str); + if (st->outbuflen + slen >= st->outbufsz) + outbuf_grow(st, slen); + memcpy(st->outbuf + st->outbuflen, str, slen+1); + last = str[slen - 1]; +} + +static void +outbuf_flush(struct state *st) +{ + + if (0 == st->outbuflen) + return; + + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; + st->outbuflen = 0; + st->hasnl = 0; +} + +static void +outbuf_newln(struct state *st) +{ + + if ('\n' == last) + return; + outbuf_flush(st); + putchar('\n'); + last = '\n'; + st->hasnl = 1; +} + /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. */ static void -formatescape(const char *buf, size_t *start, size_t end) +formatescape(struct state *st, const char *buf, size_t *start, size_t end) { char esc[16]; /* no more needed */ size_t i, max; @@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *start, size_t en * Just let the rest of them go. */ if (0 == strcmp(esc, "lt")) - printf("\\(la"); + outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) - printf("\\(ra"); + outbuf_addstr(st, "\\(ra"); else if (0 == strcmp(esc, "vb")) - printf("\\(ba"); + outbuf_addstr(st, "\\(ba"); else if (0 == strcmp(esc, "sol")) - printf("\\(sl"); - else - return; - - last = 'a'; + outbuf_addstr(st, "\\(sl"); } /* @@ -391,6 +453,7 @@ formatcode(struct state *st, const char *buf, size_t * { enum fmt fmt; size_t i, j, dsz; + int white; assert(*start + 1 < end); assert('<' == buf[*start + 1]); @@ -427,7 +490,7 @@ formatcode(struct state *st, const char *buf, size_t * * processing for real macros. */ if (FMT_ESCAPE == fmt) { - formatescape(buf, start, end); + formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* @@ -466,28 +529,39 @@ formatcode(struct state *st, const char *buf, size_t * * suppressed in, e.g., "Nm" and "Sh" macros). */ if (FMT__MAX != fmt && !nomacro) { + white = ' ' == last || '\n' == last || + ' ' == buf[*start]; + /* - * Print out the macro describing this format code. - * If we're not "reentrant" (not yet on a macro line) - * then print a newline, if necessary, and the macro - * indicator. - * Otherwise, offset us with a space. + * If we are on a text line and there is no + * whitespace before our content, we have to make + * the previous word a prefix to the macro line. */ - if ( ! reentrant) { + + if ( ! white && ! reentrant) { + if ( ! st->hasnl) + putchar('\n'); + printf(".Pf "); + } + + outbuf_flush(st); + + /* Whitespace is easier to suppress on macro lines. */ + + if ( ! white && reentrant) + printf(" Ns"); + + /* Unless we are on a macro line, start one. */ + + if (white && ! reentrant) { if (last != '\n') putchar('\n'); putchar('.'); - } else + } else putchar(' '); - - /* - * If we don't have whitespace before us (and none after - * the opening delimiter), then suppress macro - * whitespace with Pf. - */ - if (' ' != last && '\n' != last && ' ' != buf[*start]) - printf("Pf "); + /* Print the macro corresponding to this format code. */ + switch (fmt) { case (FMT_ITALIC): printf("Em "); @@ -526,7 +600,8 @@ formatcode(struct state *st, const char *buf, size_t * default: abort(); } - } + } else + outbuf_flush(st); /* * Process until we reach the end marker (e.g., '>') or until we @@ -730,20 +805,20 @@ command(struct state *st, const char *buf, size_t star st->sect = SECT_SYNOPSIS; } formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -751,7 +826,7 @@ command(struct state *st, const char *buf, size_t star puts(".Pp"); printf(".No "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -805,7 +880,7 @@ command(struct state *st, const char *buf, size_t star case (LIST_TAG): printf(".It "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); break; case (LIST_ENUM): /* FALLTHROUGH */ @@ -861,7 +936,6 @@ command(struct state *st, const char *buf, size_t star static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - int last; size_t i; if ( ! st->parsing || st->paused) @@ -916,7 +990,7 @@ again: if ('\\' == buf[start]) printf("e"); } - putchar('\n'); + putchar(last = '\n'); puts(".Ed"); } @@ -946,14 +1020,13 @@ hasmatch(const char *buf, size_t start, size_t end) * If we're an ending bracket, see if we have a stack already. */ static int -dosynopsisop(const char *buf, int *last, - size_t *start, size_t end, size_t *opstack) +dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oo"); (*opstack)++; @@ -961,7 +1034,7 @@ dosynopsisop(const char *buf, int *last, return(0); if (']' == buf[*start] && *opstack > 0) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oc"); (*opstack)--; @@ -969,7 +1042,7 @@ dosynopsisop(const char *buf, int *last, return(0); (*start)++; - *last = '\n'; + last = '\n'; while (' ' == buf[*start]) (*start)++; return(1); @@ -998,7 +1071,7 @@ donamenm(struct state *st, const char *buf, size_t *st break; formatcodeln(st, buf, start, word, 1); if (*start == end) { - putchar('\n'); + putchar(last = '\n'); continue; } assert(',' == buf[*start]); @@ -1048,7 +1121,7 @@ ordinary(struct state *st, const char *buf, size_t sta start++; fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); return; } } @@ -1057,6 +1130,7 @@ ordinary(struct state *st, const char *buf, size_t sta puts(".Pp"); st->haspar = 0; + st->hasnl = 1; last = '\n'; opstack = 0; @@ -1071,9 +1145,9 @@ ordinary(struct state *st, const char *buf, size_t sta else if ('\n' == buf[start]) break; else if ('\n' == last && '.' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); else if ('\n' == last && '\'' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and @@ -1082,12 +1156,14 @@ ordinary(struct state *st, const char *buf, size_t sta if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && - dosynopsisop(buf, &last, - &start, end, &opstack)) + dosynopsisop(buf, &start, end, &opstack)) continue; - putchar(last = buf[start++]); - if ('\\' == last) - putchar('e'); + last = buf[start++]; + if (' ' == last) { + outbuf_flush(st); + putchar(' '); + } else + outbuf_addchar(st); } if (start < end - 1 && '<' == buf[start + 1]) { @@ -1108,6 +1184,7 @@ ordinary(struct state *st, const char *buf, size_t sta } /* End the macro line. */ putchar(last = '\n'); + st->hasnl = 1; /* * Consume all whitespace * so we don't accidentally start @@ -1117,12 +1194,7 @@ ordinary(struct state *st, const char *buf, size_t sta start++; } } else if (start < end && '\n' == buf[start]) { - /* - * Print the newline only if we haven't already - * printed a newline. - */ - if (last != '\n') - putchar(last = buf[start]); + outbuf_newln(st); if (++start >= end) continue; /* @@ -1133,18 +1205,14 @@ ordinary(struct state *st, const char *buf, size_t sta * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) { + if (' ' == buf[start] || '\t' == buf[start]) puts(".br"); - last = '\n'; - } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; } } - - if (last != '\n') - putchar('\n'); + outbuf_newln(st); } /*