=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.46 retrieving revision 1.56 diff -u -p -r1.46 -r1.56 --- pod2mdoc/pod2mdoc.c 2015/02/14 15:34:39 1.46 +++ pod2mdoc/pod2mdoc.c 2015/02/20 13:33:52 1.56 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.46 2015/02/14 15:34:39 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.56 2015/02/20 13:33:52 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2014, 2015 Ingo Schwarze @@ -107,6 +107,32 @@ enum cmd { CMD__MAX }; +static void command(struct state *, const char *, size_t, size_t); +static void dofile(const struct args *, const char *, + const struct tm *, char *, size_t); +static void donamenm(struct state *, const char *, size_t *, size_t); +static void dopar(struct state *, char *, size_t, size_t); +static void dosynopsisfl(const char *, size_t *, size_t); +static int dosynopsisop(struct state *, const char *, size_t *, + size_t, size_t *); +static int formatcode(struct state *, const char *, size_t *, + size_t, int, int); +static void formatcodeln(struct state *, const char *, const char *, + size_t *, size_t, int); +static void formatescape(struct state *, const char *, size_t *, size_t); +static int hasmatch(const char *, size_t, size_t); +static void ordinary(struct state *, const char *, size_t, size_t); +static void outbuf_addchar(struct state *); +static void outbuf_addstr(struct state *, const char *); +static void outbuf_flush(struct state *); +static void outbuf_grow(struct state *, size_t); +static enum list listguess(const char *, size_t, size_t); +static void mdoc_newln(struct state *); +static int readfile(const struct args *, const char *); +static void register_type(const char *); +static int trylink(const char *, size_t *, size_t, size_t); +static void verbatim(struct state *, char *, size_t, size_t); + static const char *const cmds[CMD__MAX] = { "pod", /* CMD_POD */ "head1", /* CMD_HEAD1 */ @@ -182,10 +208,14 @@ outbuf_flush(struct state *st) if (0 == st->outbuflen) return; - if (OUST_TXT == st->oust && st->wantws) + if (OUST_NL != st->oust && st->wantws) putchar(' '); - fputs(st->outbuf, stdout); + if (OUST_MAC == st->oust && '"' == *st->outbuf) + printf("\\(dq%s", st->outbuf + 1); + else + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; st->outbuflen = 0; @@ -411,7 +441,7 @@ again: '?' != buf[*start + 1] && '-' != buf[*start + 1]) { (*start)--; - fputs("Ar ", stdout); + fputs("Ar", stdout); return; } @@ -431,7 +461,7 @@ again: assert(i < end); if ( ! (' ' == buf[i] || '>' == buf[i])) { - printf("Ar "); + fputs("Ar", stdout); return; } @@ -442,7 +472,7 @@ again: (end - *start == 2 || ' ' == buf[*start + 2])) printf("\\&"); - printf("%.*s ", (int)(i - *start), &buf[*start]); + printf("%.*s", (int)(i - *start), &buf[*start]); *start = i; if (' ' == buf[i]) { @@ -453,7 +483,7 @@ again: *start = i; goto again; } - printf("Ar "); + fputs("Ar", stdout); *start = i; } } @@ -480,8 +510,8 @@ formatcode(struct state *st, const char *buf, size_t * { size_t i, j, dsz; enum fmt fmt; - int wantws; unsigned char uc; + int gotmacro, wantws; assert(*start + 1 < end); assert('<' == buf[*start + 1]); @@ -559,28 +589,28 @@ formatcode(struct state *st, const char *buf, size_t * if (FMT__MAX != fmt && !nomacro) { /* + * Do we need spacing before the upcoming macro, + * after any pending text already in the outbuf? * We may already have wantws if there was whitespace - * before the code ("text Boust ? st->wantws : ! st->outbuflen); + (st->wantws && ! st->outbuflen); /* * If we are on a text line and there is no * whitespace before our content, we have to make * the previous word a prefix to the macro line. - * In the following, mdoc_newln() must not be used - * lest we clobber out output state. */ if (OUST_MAC != st->oust && ! wantws) { if (OUST_NL != st->oust) - putchar('\n'); - printf(".Pf "); - st->wantws = 0; + mdoc_newln(st); + fputs(".Pf", stdout); + st->oust = OUST_MAC; + st->wantws = wantws = 1; } outbuf_flush(st); @@ -588,16 +618,18 @@ formatcode(struct state *st, const char *buf, size_t * /* Whitespace is easier to suppress on macro lines. */ if (OUST_MAC == st->oust && ! wantws) - printf(" Ns "); + printf(" Ns"); /* Unless we are on a macro line, start one. */ - if (OUST_MAC != st->oust && wantws) { + if (OUST_MAC != st->oust) { if (OUST_NL != st->oust) - putchar('\n'); + mdoc_newln(st); putchar('.'); + st->oust = OUST_MAC; } else putchar(' '); + st->wantws = 1; /* * Print the macro corresponding to this format code, @@ -606,16 +638,16 @@ formatcode(struct state *st, const char *buf, size_t * switch (fmt) { case (FMT_ITALIC): - printf("Em "); + fputs("Em", stdout); break; case (FMT_BOLD): if (SECT_SYNOPSIS == st->sect) { if (1 == dsz && '-' == buf[*start]) dosynopsisfl(buf, start, end); else if (0 == pos) - printf("Nm "); + fputs("Nm", stdout); else - printf("Ar "); + fputs("Ar", stdout); break; } i = 0; @@ -625,42 +657,42 @@ formatcode(struct state *st, const char *buf, size_t * if ('=' != uc && '>' != uc) i = 0; if (4 == i && ! strncmp(buf + *start, "NULL", 4)) { - printf("Dv "); + fputs("Dv", stdout); break; } switch (i ? dict_get(buf + *start, i) : MDOC_MAX) { case MDOC_Fa: - printf("Fa "); + fputs("Fa", stdout); break; case MDOC_Vt: - printf("Vt "); + fputs("Vt", stdout); break; default: - printf("Sy "); + fputs("Sy", stdout); break; } break; case (FMT_CODE): - printf("Qo Li "); + fputs("Qo Li", stdout); break; case (FMT_LINK): /* Try to link; use "No" if it's empty. */ if ( ! trylink(buf, start, end, dsz)) - printf("No "); + fputs("No", stdout); break; case (FMT_FILE): - printf("Pa "); + fputs("Pa", stdout); break; case (FMT_NBSP): - printf("No "); + fputs("No", stdout); break; default: abort(); } - st->oust = OUST_MAC; - st->wantws = 1; - } else + } else { outbuf_flush(st); + st->wantws = 0; + } /* * Process until we reach the end marker (e.g., '>') or until we @@ -668,6 +700,8 @@ formatcode(struct state *st, const char *buf, size_t * * Don't emit any newlines: since we're on a macro line, we * don't want to break the line. */ + + gotmacro = 0; while (*start < end) { if ('>' == buf[*start] && 1 == dsz) { (*start)++; @@ -691,54 +725,58 @@ formatcode(struct state *st, const char *buf, size_t * } if (*start + 1 < end && '<' == buf[*start + 1] && 'A' <= buf[*start] && 'Z' >= buf[*start]) { - if ( ! formatcode(st, buf, start, end, nomacro, 1)) - st->wantws = 1; + gotmacro = formatcode(st, buf, + start, end, nomacro, 1); continue; } /* Suppress newlines and multiple spaces. */ last = buf[(*start)++]; - if (' ' == last || '\n' == last) { - putchar(' '); - while (*start < end && ' ' == buf[*start]) + if (isspace(last)) { + outbuf_flush(st); + st->wantws = 1; + gotmacro = 0; + while (*start < end && + isspace((unsigned char)buf[*start])) (*start)++; continue; } if (OUST_MAC == st->oust && FMT__MAX != fmt) { - if ( ! st->wantws) { - printf(" Ns "); + if (gotmacro && ! st->wantws) { + printf(" Ns"); st->wantws = 1; } + gotmacro = 0; /* * Escape macro-like words. * This matches "Xx " and "XxEOLN". */ - if (end - *start > 0 && - isupper((unsigned char)last) && + if (*start < end && ! st->outbuflen && + isupper(last) && islower((unsigned char)buf[*start]) && (end - *start == 1 || ' ' == buf[*start + 1] || '>' == buf[*start + 1])) - printf("\\&"); + outbuf_addstr(st, "\\&"); + last = buf[*start - 1]; } + outbuf_addchar(st); + } - putchar(last); + if (FMT__MAX == fmt) + return(0); - /* Protect against character escapes. */ + outbuf_flush(st); - if ('\\' == last) - putchar('e'); - } - if ( ! nomacro && FMT_CODE == fmt) - printf(" Qc "); + fputs(" Qc", stdout); st->wantws = ' ' == last; - return(FMT__MAX != fmt); + return(1); } /* @@ -751,52 +789,45 @@ static void formatcodeln(struct state *st, const char *linemac, const char *buf, size_t *start, size_t end, int nomacro) { - int gotmacro, wantws; + int gotmacro; assert(OUST_NL == st->oust); assert(st->wantws); - printf(".%s ", linemac); + printf(".%s", linemac); st->oust = OUST_MAC; gotmacro = 0; while (*start < end) { - wantws = ' ' == buf[*start] || '\n' == buf[*start]; - if (wantws) { - last = ' '; - do { - (*start)++; - } while (*start < end && ' ' == buf[*start]); - } - if (*start + 1 < end && '<' == buf[*start + 1] && 'A' <= buf[*start] && 'Z' >= buf[*start]) { - st->wantws |= wantws; gotmacro = formatcode(st, buf, start, end, nomacro, 1); continue; } + /* Suppress newlines and multiple spaces. */ + + last = buf[(*start)++]; + if (isspace(last)) { + outbuf_flush(st); + st->wantws = 1; + while (*start < end && + isspace((unsigned char)buf[*start])) + (*start)++; + continue; + } + if (gotmacro) { - if (*start < end || st->outbuflen) { - if (st->wantws || - (wantws && !st->outbuflen)) - printf(" No "); + if (*start < end) { + if (st->wantws) + printf(" No"); else - printf(" Ns "); + printf(" Ns"); } + st->wantws = 1; gotmacro = 0; } - outbuf_flush(st); - st->wantws = wantws; - if (*start >= end) - break; - - if (st->wantws) { - putchar(' '); - st->wantws = 0; - } - /* * Since we're already on a macro line, we want to make * sure that we don't inadvertently invoke a macro. @@ -804,21 +835,16 @@ formatcodeln(struct state *st, const char *linemac, * are used in troff and we don't want to escape * something that needn't be escaped. */ - if (' ' == last && end - *start > 1 && - isupper((unsigned char)buf[*start]) && - islower((unsigned char)buf[*start + 1]) && - (end - *start == 2 || ' ' == buf[*start + 2])) - printf("\\&"); - - putchar(last = buf[*start]); - - /* Protect against character escapes. */ - - if ('\\' == last) - putchar('e'); - - (*start)++; + if (*start < end && ! st->outbuflen && isupper(last) && + islower((unsigned char)buf[*start]) && + (end - *start == 1 || ' ' == buf[*start + 1])) { + outbuf_addstr(st, "\\&"); + last = buf[*start - 1]; + } + outbuf_addchar(st); } + outbuf_flush(st); + st->wantws = 1; } /* @@ -1059,11 +1085,26 @@ verbatim(struct state *st, char *buf, size_t start, si { size_t i, ift, ifo, ifa, ifc, inl; char *cp, *cp2; - int nopen; + int indisplay, nopen, wantsp; - if ( ! st->parsing || st->paused || start == end) + if (st->paused || ! st->parsing) return; + + indisplay = wantsp = 0; + again: + if (start == end) { + if (indisplay) + puts(".Ed"); + return; + } + + if ('\n' == buf[start]) { + wantsp = 1; + start++; + goto again; + } + /* * If we're in the SYNOPSIS, see if we're an #include block. * If we are, then print the "In" macro and re-loop. @@ -1075,7 +1116,7 @@ again: while (i < end && buf[i] == ' ') i++; if (i == end) - return; + goto again; /* We're an include block! */ if (end - i > 10 && @@ -1083,6 +1124,9 @@ again: start = i + 10; while (start < end && ' ' == buf[start]) start++; + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; fputs(".In ", stdout); /* Stop til the '>' marker or we hit eoln. */ while (start < end && @@ -1098,6 +1142,9 @@ again: /* Other preprocessor directives. */ if ('#' == buf[i]) { + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; fputs(".Fd ", stdout); start = i; while(start < end && '\n' != buf[start]) @@ -1105,6 +1152,24 @@ again: putchar('\n'); if (start < end && '\n' == buf[start]) start++; + + /* Remember #define for Dv or Fn. */ + + if (strncmp(buf + i + 1, "define", 6) || + ! isspace((unsigned char)buf[i + 7])) + goto again; + + ifo = i + 7; + while (ifo < start && + isspace((unsigned char)buf[ifo])) + ifo++; + ifa = ifo; + while ('_' == buf[ifa] || + isalnum((unsigned char)buf[ifa])) + ifa++; + dict_put(buf + ifo, ifa - ifo, + '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv); + goto again; } @@ -1159,6 +1224,9 @@ again: buf[i] = ' '; buf[ifo++] = '\0'; register_type(buf + ift); + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; printf(".Ft %s", buf + ift); if (buf[ifo] == '*') { fputs(" *", stdout); @@ -1182,7 +1250,10 @@ again: if ('\0' != *cp2) dict_put(cp2, 0, MDOC_Fa); register_type(buf + ifa); - printf(".Fa \"%s\"\n", buf + ifa); + if (strchr(buf + ifa, ' ') == NULL) + printf(".Fa %s\n", buf + ifa); + else + printf(".Fa \"%s\"\n", buf + ifa); if (cp == NULL) break; while (*cp == ' ' || *cp == '\t') @@ -1196,28 +1267,37 @@ again: buf[inl] = '\0'; puts(buf + ifc); } - start = inl + 1; - if (start < end) - goto again; - return; + start = inl < end ? inl + 1 : end; + goto again; } } - - puts(".Bd -literal"); - for (last = ' '; start < end; start++) { + + if ( ! indisplay) + puts(".Bd -literal"); + else if (wantsp) + putchar('\n'); + indisplay = 1; + wantsp = 0; + + for (last = '\n'; start < end; start++) { /* * Handle accidental macros (newline starting with * control character) and escapes. */ - if ('\n' == last) + if ('\n' == last) { + if ('\n' == buf[start]) + goto again; if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); + } putchar(last = buf[start]); if ('\\' == buf[start]) printf("e"); } - putchar(last = '\n'); - puts(".Ed"); + if ('\n' != last) + putchar('\n'); + if (indisplay) + puts(".Ed"); } /* @@ -1287,7 +1367,7 @@ donamenm(struct state *st, const char *buf, size_t *st assert(OUST_NL == st->oust); assert(st->wantws); - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; if (end == *start) { @@ -1308,7 +1388,7 @@ donamenm(struct state *st, const char *buf, size_t *st printf(" ,"); mdoc_newln(st); (*start)++; - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; } } @@ -1333,6 +1413,7 @@ ordinary(struct state *st, const char *buf, size_t sta size_t i, j, opstack, wend; enum mdoc_type mtype; int eos, noeos, seq; + char savechar; if ( ! st->parsing || st->paused) return; @@ -1345,7 +1426,8 @@ ordinary(struct state *st, const char *buf, size_t sta */ if (SECT_NAME == st->sect) { for (i = end - 2; i > start; i--) - if ('-' == buf[i] && ' ' == buf[i + 1]) + if ('-' == buf[i] && + isspace((unsigned char)buf[i + 1])) break; if ('-' == buf[i]) { j = i; @@ -1355,7 +1437,8 @@ ordinary(struct state *st, const char *buf, size_t sta break; donamenm(st, buf, &start, i + 1); start = j + 1; - while (start < end && ' ' == buf[start]) + while (start < end && + isspace((unsigned char)buf[start])) start++; formatcodeln(st, "Nd", buf, &start, end, 1); mdoc_newln(st); @@ -1404,6 +1487,7 @@ ordinary(struct state *st, const char *buf, size_t sta if ( ! isspace(last)) outbuf_addchar(st); if (start < end && + ! isspace((unsigned char)buf[start - 1]) && ! isspace((unsigned char)buf[start])) continue; @@ -1435,19 +1519,32 @@ ordinary(struct state *st, const char *buf, size_t sta */ mtype = MDOC_Fa; + savechar = '\0'; if (wend && ')' == st->outbuf[wend] && '(' == st->outbuf[wend - 1]) { mtype = dict_get(st->outbuf, --wend); + if (MDOC_Dv == mtype) + mtype = MDOC_Fo; if (MDOC_Fo == mtype || MDOC_MAX == mtype) { st->outbuflen = wend; st->outbuf[wend] = '\0'; mdoc_newln(st); if (MDOC_Fo == mtype) - fputs(".Fn ", stdout); + fputs(".Fn", stdout); else - fputs(".Xr ", stdout); + fputs(".Xr", stdout); st->oust = OUST_MAC; } + } else { + mtype = dict_get(st->outbuf, wend); + if (MDOC_Dv == mtype) { + savechar = st->outbuf[wend]; + st->outbuf[wend] = '\0'; + mdoc_newln(st); + fputs(".Dv", stdout); + st->oust = OUST_MAC; + } else + mtype = MDOC_Fa; } /* @@ -1466,12 +1563,15 @@ ordinary(struct state *st, const char *buf, size_t sta islower((unsigned char)st->outbuf[wend - 1]))) { if (MDOC_MAX == mtype) fputs(" 3", stdout); - if (MDOC_Fa != mtype) - for (wend += 2; - '\0' != st->outbuf[wend]; - wend++) + if (MDOC_Fa != mtype) { + if (MDOC_Dv == mtype) + st->outbuf[wend] = savechar; + else + wend += 2; + while ('\0' != st->outbuf[wend]) printf(" %c", - st->outbuf[wend]); + st->outbuf[wend++]); + } mdoc_newln(st); } @@ -1492,12 +1592,11 @@ ordinary(struct state *st, const char *buf, size_t sta * XXX Some punctuation characters * are not handled yet. */ - if ((start == end - 1 || - (start < end - 1 && - (' ' == buf[start + 1] || - '\n' == buf[start + 1]))) && - ('.' == buf[start] || - ',' == buf[start])) { + if ((start == end - 1 || + (start < end - 1 && + (' ' == buf[start + 1] || + '\n' == buf[start + 1]))) && + NULL != strchr("|.,;:?!)]", buf[start])) { putchar(' '); putchar(buf[start++]); } @@ -1525,7 +1624,7 @@ ordinary(struct state *st, const char *buf, size_t sta ('<' != buf[start + 1] || 'A' > buf[start] || 'Z' < buf[start])) { - printf(" Ns "); + fputs(" Ns", stdout); st->wantws = 1; } } @@ -1587,7 +1686,8 @@ dofile(const struct args *args, const char *fname, struct state st; const char *fbase, *fext, *section, *date, *format; char *title, *cp; - size_t sup, end, i, cur = 0; + size_t cur, end; + int verb; if (0 == sz) return; @@ -1632,7 +1732,7 @@ dofile(const struct args *args, const char *fname, date = args->date; format = (NULL == date) ? "%B %d, %Y" : - strcmp(date, "Mdocdate") ? NULL : "$Mdocdate: February 14 2015 $"; + strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $"; if (NULL != format) { strftime(datebuf, sizeof(datebuf), format, tm); @@ -1659,24 +1759,32 @@ dofile(const struct args *args, const char *fname, /* Main loop over file contents. */ - while (cur < sz) { + cur = 0; + for (;;) { + while (cur < sz && '\n' == buf[cur]) + cur++; + if (cur >= sz) + break; + + verb = isspace((unsigned char)buf[cur]); + /* Read until next paragraph. */ - for (i = cur + 1; i < sz; i++) - if ('\n' == buf[i] && '\n' == buf[i - 1]) { - /* Consume blank paragraphs. */ - while (i + 1 < sz && '\n' == buf[i + 1]) - i++; + + for (end = cur + 1; end + 1 < sz; end++) + if ('\n' == buf[end] && '\n' == buf[end + 1] && + !(verb && end + 2 < sz && + isspace((unsigned char)buf[end + 2]))) break; - } /* Adjust end marker for EOF. */ - end = i < sz ? i - 1 : - ('\n' == buf[sz - 1] ? sz - 1 : sz); - sup = i < sz ? end + 2 : sz; + if (end < sz && '\n' != buf[end]) + end++; + /* Process paragraph and adjust start. */ + dopar(&st, buf, cur, end); - cur = sup; + cur = end + 2; } dict_destroy(); }