=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.31 retrieving revision 1.32 diff -u -p -r1.31 -r1.32 --- pod2mdoc/pod2mdoc.c 2014/07/15 19:03:07 1.31 +++ pod2mdoc/pod2mdoc.c 2014/07/18 05:09:32 1.32 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.31 2014/07/15 19:03:07 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.32 2014/07/18 05:09:32 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -52,6 +52,12 @@ enum sect { SECT_SYNOPSIS, /* SYNOPSIS section */ }; +enum outstate { + OUST_NL = 0, /* just started a new output line */ + OUST_TXT, /* text line output in progress */ + OUST_MAC /* macro line output in progress */ +}; + struct state { const char *fname; /* file being parsed */ int parsing; /* after =cut of before command */ @@ -61,7 +67,8 @@ struct state { enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ int haspar; /* in paragraph: do we need Pp? */ - int hasnl; /* in text: just started a new line */ + enum outstate oust; /* state of the mdoc output stream */ + int wantws; /* let mdoc(7) output whitespace here */ char *outbuf; /* text buffered for output */ size_t outbufsz; /* allocated size of outbuf */ size_t outbuflen; /* current length of outbuf */ @@ -150,6 +157,7 @@ outbuf_addchar(struct state *st) if ('\\' == last) st->outbuf[st->outbuflen++] = 'e'; st->outbuf[st->outbuflen] = '\0'; + st->wantws = 0; } static void @@ -162,6 +170,7 @@ outbuf_addstr(struct state *st, const char *str) outbuf_grow(st, slen); memcpy(st->outbuf + st->outbuflen, str, slen+1); last = str[slen - 1]; + st->wantws = 0; } static void @@ -174,25 +183,32 @@ outbuf_flush(struct state *st) fputs(st->outbuf, stdout); *st->outbuf = '\0'; st->outbuflen = 0; - st->hasnl = 0; + + if (OUST_NL == st->oust) + st->oust = OUST_TXT; } static void -outbuf_newln(struct state *st) +mdoc_newln(struct state *st) { - if ('\n' == last) + if (OUST_NL == st->oust) return; - outbuf_flush(st); + putchar('\n'); last = '\n'; - st->hasnl = 1; + st->oust = OUST_NL; + st->wantws = 1; } /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. + * + * This function does not care about output modes, + * it merely appends text to the output buffer, + * which can then be used in any mode. */ static void formatescape(struct state *st, const char *buf, size_t *start, size_t end) @@ -237,6 +253,9 @@ formatescape(struct state *st, const char *buf, size_t * I set "start" to be the end of the sequence (last right-carrot) so * that the caller can safely just continue processing. * If this is just an empty tag, I'll return 0. + * + * Always operates in OUST_MAC mode. + * Mode handling is done by the caller. */ static int trylink(const char *buf, size_t *start, size_t end, size_t dsz) @@ -371,6 +390,9 @@ trylink(const char *buf, size_t *start, size_t end, si * Our flag might be followed by an argument, so make sure that we're * accounting for that, too. * If we don't have a flag at all, however, then assume we're an "Ar". + * + * Always operates in OUST_MAC mode. + * Mode handlinf is done by the caller. */ static void dosynopsisfl(const char *buf, size_t *start, size_t end) @@ -436,24 +458,23 @@ again: * like X<...> and can contain nested format codes. * This consumes the whole format code, and any nested format codes, til * the end of matched production. - * If "reentrant", then we're being called after a macro has already - * been printed to the current line. * If "nomacro", then we don't print any macros, just contained data * (e.g., following "Sh" or "Nm"). * "pos" is only significant in SYNOPSIS, and should be 0 when invoked * as the first format code on a line (for decoration as an "Nm"), * non-zero otherwise. - * Return whether we've printed a macro or not--in other words, whether - * this should trigger a subsequent newline (this should be ignored when - * reentrant). + * + * Output mode handling is most complicated here. + * We may enter in any mode. + * We usually exit in OUST_MAC mode, except when + * entering without OUST_MAC and the code is invalid. */ -static int +static void formatcode(struct state *st, const char *buf, size_t *start, - size_t end, int reentrant, int nomacro, int pos) + size_t end, int nomacro, int pos) { enum fmt fmt; size_t i, j, dsz; - int white; assert(*start + 1 < end); assert('<' == buf[*start + 1]); @@ -491,7 +512,7 @@ formatcode(struct state *st, const char *buf, size_t * */ if (FMT_ESCAPE == fmt) { formatescape(st, buf, start, end); - return(0); + return; } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* * Just consume til the end delimiter, accounting for @@ -521,7 +542,7 @@ formatcode(struct state *st, const char *buf, size_t * if (isspace(last)) while (*start < end && isspace((int)buf[*start])) (*start)++; - return(0); + return; } /* @@ -529,17 +550,26 @@ formatcode(struct state *st, const char *buf, size_t * * suppressed in, e.g., "Nm" and "Sh" macros). */ if (FMT__MAX != fmt && !nomacro) { - white = ' ' == last || '\n' == last || - ' ' == buf[*start]; /* + * We may already have wantws if there was whitespace + * before the code ("text Bwantws |= ' ' == buf[*start]; + + /* * If we are on a text line and there is no * whitespace before our content, we have to make * the previous word a prefix to the macro line. + * In the following, mdoc_newln() must not be used + * lest we clobber out output state. */ - if ( ! white && ! reentrant) { - if ( ! st->hasnl) + if (OUST_MAC != st->oust && !st->wantws) { + if (OUST_NL != st->oust) putchar('\n'); printf(".Pf "); } @@ -548,19 +578,22 @@ formatcode(struct state *st, const char *buf, size_t * /* Whitespace is easier to suppress on macro lines. */ - if ( ! white && reentrant) - printf(" Ns"); + if (OUST_MAC == st->oust && !st->wantws) + printf(" Ns "); /* Unless we are on a macro line, start one. */ - if (white && ! reentrant) { - if (last != '\n') + if (OUST_MAC != st->oust && st->wantws) { + if (OUST_NL != st->oust) putchar('\n'); putchar('.'); } else putchar(' '); - /* Print the macro corresponding to this format code. */ + /* + * Print the macro corresponding to this format code, + * and update the output state afterwards. + */ switch (fmt) { case (FMT_ITALIC): @@ -600,6 +633,8 @@ formatcode(struct state *st, const char *buf, size_t * default: abort(); } + st->oust = OUST_MAC; + st->wantws = 1; } else outbuf_flush(st); @@ -631,74 +666,86 @@ formatcode(struct state *st, const char *buf, size_t * } } if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(st, buf, start, end, 1, nomacro, 1); + formatcode(st, buf, start, end, nomacro, 1); continue; } - /* - * Make sure that any macro-like words (or - * really any word starting with a capital - * letter) is assumed to be a macro that must be - * escaped. - * This matches "Xx " and "XxEOLN". - */ - if ((' ' == last || '\n' == last) && - end - *start > 1 && - isupper((int)buf[*start]) && - islower((int)buf[*start + 1]) && - (end - *start == 2 || - ' ' == buf[*start + 2])) - printf("\\&"); + /* Suppress newlines and multiple spaces. */ - /* Suppress newline. */ - if ('\n' == buf[*start]) - putchar(last = ' '); - else - putchar(last = buf[*start]); + last = buf[(*start)++]; + if (' ' == last || '\n' == last) { + putchar(' '); + while (*start < end && ' ' == buf[*start]) + (*start)++; + continue; + } + if (OUST_MAC == st->oust) { + if ( ! st->wantws) { + printf(" Ns "); + st->wantws = 1; + } + + /* + * Escape macro-like words. + * This matches "Xx " and "XxEOLN". + */ + + if (end - *start > 0 && + isupper((unsigned char)last) && + islower((unsigned char)buf[*start]) && + (end - *start == 1 || + ' ' == buf[*start + 1] || + '>' == buf[*start + 1])) + printf("\\&"); + } + + putchar(last); + /* Protect against character escapes. */ + if ('\\' == last) putchar('e'); - - (*start)++; - - if (' ' == last) - while (*start < end && ' ' == buf[*start]) - (*start)++; } - if (FMT__MAX == fmt) - return(0); - if ( ! nomacro && FMT_CODE == fmt) printf(" Qc "); - /* - * We're now subsequent the format code. - * If there isn't a space (or newline) here, and we haven't just - * printed a space, then suppress space. - */ - if ( ! nomacro && ' ' != last) - if (' ' != buf[*start] && '\n' != buf[*start]) - printf(" Ns "); - - return(1); + if (FMT__MAX != fmt) + st->wantws = ' ' == last; } /* * Calls formatcode() til the end of a paragraph. + * Goes to OUST_MAC mode and stays there when returning, + * such that the caller can add arguments to the macro line + * before closing it out. */ static void -formatcodeln(struct state *st, const char *buf, - size_t *start, size_t end, int nomacro) +formatcodeln(struct state *st, const char *linemac, + const char *buf, size_t *start, size_t end, int nomacro) { + assert(OUST_NL == st->oust); + assert(st->wantws); + printf(".%s ", linemac); + st->oust = OUST_MAC; + last = ' '; while (*start < end) { if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(st, buf, start, end, 1, nomacro, 1); + formatcode(st, buf, start, end, nomacro, 1); continue; } + + if (OUST_MAC == st->oust) { + if ( ! st->wantws && + ' ' != buf[*start] && + '\n' != buf[*start]) + printf(" Ns "); + st->wantws = 1; + } + /* * Since we're already on a macro line, we want to make * sure that we don't inadvertently invoke a macro. @@ -752,6 +799,9 @@ listguess(const char *buf, size_t start, size_t end) * A command paragraph, as noted in the perlpod manual, just indicates * that we should do something, optionally with some text to print as * well. + * From the perspective of external callers, + * always stays in OUST_NL/wantws mode, + * but its children do use OUST_MAC. */ static void command(struct state *st, const char *buf, size_t start, size_t end) @@ -795,7 +845,6 @@ command(struct state *st, const char *buf, size_t star * The behaviour of head= follows from a quick glance at * how pod2man handles it. */ - printf(".Sh "); st->sect = SECT_NONE; if (end - start == 4) { if (0 == memcmp(&buf[start], "NAME", 4)) @@ -804,29 +853,26 @@ command(struct state *st, const char *buf, size_t star if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) st->sect = SECT_SYNOPSIS; } - formatcodeln(st, buf, &start, end, 1); - putchar(last = '\n'); + formatcodeln(st, "Sh", buf, &start, end, 1); + mdoc_newln(st); st->haspar = 1; break; case (CMD_HEAD2): - printf(".Ss "); - formatcodeln(st, buf, &start, end, 1); - putchar(last = '\n'); + formatcodeln(st, "Ss", buf, &start, end, 1); + mdoc_newln(st); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); - printf(".Em "); - formatcodeln(st, buf, &start, end, 0); - putchar(last = '\n'); + formatcodeln(st, "Em", buf, &start, end, 0); + mdoc_newln(st); puts(".Pp"); st->haspar = 1; break; case (CMD_HEAD4): puts(".Pp"); - printf(".No "); - formatcodeln(st, buf, &start, end, 0); - putchar(last = '\n'); + formatcodeln(st, "No", buf, &start, end, 0); + mdoc_newln(st); puts(".Pp"); st->haspar = 1; break; @@ -878,9 +924,8 @@ command(struct state *st, const char *buf, size_t star } switch (st->lstack[st->lpos - 1]) { case (LIST_TAG): - printf(".It "); - formatcodeln(st, buf, &start, end, 0); - putchar(last = '\n'); + formatcodeln(st, "It", buf, &start, end, 0); + mdoc_newln(st); break; case (LIST_ENUM): /* FALLTHROUGH */ @@ -932,6 +977,8 @@ command(struct state *st, const char *buf, size_t star /* * Just pump out the line in a verbatim block. + * From the perspective of external callers, + * always stays in OUST_NL/wantws mode. */ static void verbatim(struct state *st, const char *buf, size_t start, size_t end) @@ -1020,22 +1067,21 @@ hasmatch(const char *buf, size_t start, size_t end) * If we're an ending bracket, see if we have a stack already. */ static int -dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack) +dosynopsisop(struct state *st, const char *buf, + size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { - if ('\n' != last) - putchar('\n'); + mdoc_newln(st); puts(".Oo"); (*opstack)++; } else if ('[' == buf[*start]) return(0); if (']' == buf[*start] && *opstack > 0) { - if ('\n' != last) - putchar('\n'); + mdoc_newln(st); puts(".Oc"); (*opstack)--; } else if (']' == buf[*start]) @@ -1050,12 +1096,18 @@ dosynopsisop(const char *buf, size_t *start, size_t en /* * Format multiple "Nm" manpage names in the NAME section. + * From the perspective of external callers, + * always stays in OUST_NL/wantws mode, + * but its children do use OUST_MAC. */ static void donamenm(struct state *st, const char *buf, size_t *start, size_t end) { size_t word; + assert(OUST_NL == st->oust); + assert(st->wantws); + while (*start < end && ' ' == buf[*start]) (*start)++; @@ -1065,17 +1117,17 @@ donamenm(struct state *st, const char *buf, size_t *st } while (*start < end) { - fputs(".Nm ", stdout); for (word = *start; word < end; word++) if (',' == buf[word]) break; - formatcodeln(st, buf, start, word, 1); + formatcodeln(st, "Nm", buf, start, word, 1); if (*start == end) { - putchar(last = '\n'); - continue; + mdoc_newln(st); + break; } assert(',' == buf[*start]); - puts(" ,"); + printf(" ,"); + mdoc_newln(st); (*start)++; while (*start < end && ' ' == buf[*start]) (*start)++; @@ -1089,6 +1141,11 @@ donamenm(struct state *st, const char *buf, size_t *st * Lots of other snakes in the grass: escaping a newline followed by a * period (accidental mdoc(7) control), double-newlines after macro * passages, etc. + * + * Uses formatcode() to go to OUST_MAC mode + * and outbuf_flush() to go to OUST_TXT mode. + * Main text mode wantws handling is in this function. + * Must make sure to go back to OUST_NL/wantws mode before returning. */ static void ordinary(struct state *st, const char *buf, size_t start, size_t end) @@ -1119,9 +1176,8 @@ ordinary(struct state *st, const char *buf, size_t sta start = j + 1; while (start < end && ' ' == buf[start]) start++; - fputs(".Nd ", stdout); - formatcodeln(st, buf, &start, end, 1); - putchar(last = '\n'); + formatcodeln(st, "Nd", buf, &start, end, 1); + mdoc_newln(st); return; } } @@ -1130,7 +1186,6 @@ ordinary(struct state *st, const char *buf, size_t sta puts(".Pp"); st->haspar = 0; - st->hasnl = 1; last = '\n'; opstack = 0; @@ -1153,21 +1208,32 @@ ordinary(struct state *st, const char *buf, size_t sta * brackets indicate that we're opening and * closing an optional context. */ + if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && - dosynopsisop(buf, &start, end, &opstack)) + dosynopsisop(st, buf, + &start, end, &opstack)) continue; + + /* + * On whitespace, flush the output buffer + * and allow breaking to a macro line. + * Otherwise, buffer text and clear wantws. + */ + last = buf[start++]; if (' ' == last) { outbuf_flush(st); putchar(' '); + st->wantws = 1; } else outbuf_addchar(st); } if (start < end - 1 && '<' == buf[start + 1]) { - if (formatcode(st, buf, &start, end, 0, 0, seq)) { + formatcode(st, buf, &start, end, 0, seq); + if (OUST_MAC == st->oust) { /* * Let mdoc(7) handle trailing punctuation. * XXX Some punctuation characters @@ -1182,19 +1248,35 @@ ordinary(struct state *st, const char *buf, size_t sta putchar(' '); putchar(buf[start++]); } - /* End the macro line. */ - putchar(last = '\n'); - st->hasnl = 1; + + if (st->wantws || + ' ' == buf[start] || + '\n' == buf[start]) + mdoc_newln(st); + /* * Consume all whitespace * so we don't accidentally start * an implicit literal line. */ + while (start < end && ' ' == buf[start]) start++; + + /* + * Some text is following. + * Implement requested spacing. + */ + + if ( ! st->wantws && start < end && + '<' != buf[start + 1]) { + printf(" Ns "); + st->wantws = 1; + } } } else if (start < end && '\n' == buf[start]) { - outbuf_newln(st); + outbuf_flush(st); + mdoc_newln(st); if (++start >= end) continue; /* @@ -1212,7 +1294,8 @@ ordinary(struct state *st, const char *buf, size_t sta break; } } - outbuf_newln(st); + outbuf_flush(st); + mdoc_newln(st); } /* @@ -1224,6 +1307,9 @@ static void dopar(struct state *st, const char *buf, size_t start, size_t end) { + assert(OUST_NL == st->oust); + assert(st->wantws); + if (end == start) return; if (' ' == buf[start] || '\t' == buf[start]) @@ -1306,6 +1392,9 @@ dofile(const struct args *args, const char *fname, free(title); memset(&st, 0, sizeof(struct state)); + st.oust = OUST_NL; + st.wantws = 1; + assert(sz > 0); /* Main loop over file contents. */