=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.34 retrieving revision 1.62 diff -u -p -r1.34 -r1.62 --- pod2mdoc/pod2mdoc.c 2014/07/19 00:42:22 1.34 +++ pod2mdoc/pod2mdoc.c 2016/11/03 15:50:28 1.62 @@ -1,6 +1,7 @@ -/* $Id: pod2mdoc.c,v 1.34 2014/07/19 00:42:22 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.62 2016/11/03 15:50:28 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons + * Copyright (c) 2014, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -15,7 +16,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include -#include #include #include @@ -24,8 +24,11 @@ #include #include #include +#include #include +#include "dict.h" + /* * In what section can we find Perl module manuals? * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p. @@ -72,6 +75,7 @@ struct state { char *outbuf; /* text buffered for output */ size_t outbufsz; /* allocated size of outbuf */ size_t outbuflen; /* current length of outbuf */ + size_t outlnlen; /* chars so far on this output line */ }; enum fmt { @@ -104,8 +108,34 @@ enum cmd { CMD__MAX }; +static void command(struct state *, const char *, size_t, size_t); +static void dofile(const struct args *, const char *, + const struct tm *, char *, size_t); +static void donamenm(struct state *, const char *, size_t *, size_t); +static void dopar(struct state *, char *, size_t, size_t); +static void dosynopsisfl(const char *, size_t *, size_t); +static int dosynopsisop(struct state *, const char *, size_t *, + size_t, size_t *); +static int formatcode(struct state *, const char *, size_t *, + size_t, int, int); +static void formatcodeln(struct state *, const char *, const char *, + size_t *, size_t, int); +static void formatescape(struct state *, const char *, size_t *, size_t); +static int hasmatch(const char *, size_t, size_t); +static void ordinary(struct state *, const char *, size_t, size_t); +static void outbuf_addchar(struct state *); +static void outbuf_addstr(struct state *, const char *); +static void outbuf_flush(struct state *); +static void outbuf_grow(struct state *, size_t); +static enum list listguess(const char *, size_t, size_t); +static void mdoc_newln(struct state *); +static int readfile(const struct args *, const char *); +static void register_type(const char *); +static int trylink(const char *, size_t *, size_t, size_t); +static void verbatim(struct state *, char *, size_t, size_t); + static const char *const cmds[CMD__MAX] = { - "pod", /* CMD_POD */ + "pod", /* CMD_POD */ "head1", /* CMD_HEAD1 */ "head2", /* CMD_HEAD2 */ "head3", /* CMD_HEAD3 */ @@ -132,7 +162,7 @@ static const char fmts[FMT__MAX] = { 'Z' /* FMT_NULL */ }; -static int last; +static unsigned char last; static void @@ -157,7 +187,6 @@ outbuf_addchar(struct state *st) if ('\\' == last) st->outbuf[st->outbuflen++] = 'e'; st->outbuf[st->outbuflen] = '\0'; - st->wantws = 0; } static void @@ -171,7 +200,6 @@ outbuf_addstr(struct state *st, const char *str) memcpy(st->outbuf + st->outbuflen, str, slen+1); st->outbuflen += slen; last = str[slen - 1]; - st->wantws = 0; } static void @@ -181,7 +209,22 @@ outbuf_flush(struct state *st) if (0 == st->outbuflen) return; - fputs(st->outbuf, stdout); + st->outlnlen += st->outbuflen; + if (OUST_TXT == st->oust && st->wantws) { + if (++st->outlnlen > 72) { + putchar('\n'); + st->oust = OUST_NL; + st->outlnlen = st->outbuflen; + } + } + if (OUST_NL != st->oust && st->wantws) + putchar(' '); + + if (OUST_MAC == st->oust && '"' == *st->outbuf) + printf("\\(dq%s", st->outbuf + 1); + else + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; st->outbuflen = 0; @@ -199,6 +242,7 @@ mdoc_newln(struct state *st) putchar('\n'); last = '\n'; st->oust = OUST_NL; + st->outlnlen = 0; st->wantws = 1; } @@ -237,9 +281,9 @@ formatescape(struct state *st, const char *buf, size_t /* * TODO: right now, we only recognise the named escapes. - * Just let the rest of them go. + * Just let the rest of them go. */ - if (0 == strcmp(esc, "lt")) + if (0 == strcmp(esc, "lt")) outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) outbuf_addstr(st, "\\(ra"); @@ -261,11 +305,11 @@ formatescape(struct state *st, const char *buf, size_t static int trylink(const char *buf, size_t *start, size_t end, size_t dsz) { - size_t linkstart, realend, linkend, + size_t linkstart, realend, linkend, i, j, textsz, stack; - /* - * Scan to the start of the terminus. + /* + * Scan to the start of the terminus. * This function is more or less replicated in the formatcode() * for null or index formatting codes. * However, we're slightly different because we might have @@ -284,10 +328,10 @@ trylink(const char *buf, size_t *start, size_t end, si assert(realend > 0); if (' ' != buf[realend - 1]) continue; - for (i = realend, j = 0; i < end && j < dsz; j++) + for (i = realend, j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; - if (dsz == j) + if (dsz == j) break; } @@ -342,11 +386,11 @@ trylink(const char *buf, size_t *start, size_t end, si (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { /* Gross. */ - printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : + printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : realend) - linkstart), &buf[linkstart]); return(1); - } - + } + /* See if we qualify as a mailto. */ if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { printf("Mt %.*s", (int)j, &buf[linkstart]); @@ -354,19 +398,19 @@ trylink(const char *buf, size_t *start, size_t end, si } /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ - if ((j > 3 && ')' == buf[linkend - 1]) && + if ((j > 3 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 3])) { - printf("Xr %.*s %c", (int)(j - 3), + printf("Xr %.*s %c", (int)(j - 3), &buf[linkstart], buf[linkend - 2]); return(1); } else if ((j > 4 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 4])) { - printf("Xr %.*s %.*s", (int)(j - 4), + printf("Xr %.*s %.*s", (int)(j - 4), &buf[linkstart], 2, &buf[linkend - 3]); return(1); } else if ((j > 5 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 5])) { - printf("Xr %.*s %.*s", (int)(j - 5), + printf("Xr %.*s %.*s", (int)(j - 5), &buf[linkstart], 3, &buf[linkend - 4]); return(1); } @@ -377,7 +421,7 @@ trylink(const char *buf, size_t *start, size_t end, si break; if (i < linkend) - printf("Xr %.*s " PERL_SECTION, + printf("Xr %.*s " PERL_SECTION, (int)j, &buf[linkstart]); else printf("Xr %.*s 1", (int)j, &buf[linkstart]); @@ -407,7 +451,7 @@ again: '?' != buf[*start + 1] && '-' != buf[*start + 1]) { (*start)--; - fputs("Ar ", stdout); + fputs("Ar", stdout); return; } @@ -427,18 +471,18 @@ again: assert(i < end); if ( ! (' ' == buf[i] || '>' == buf[i])) { - printf("Ar "); + fputs("Ar", stdout); return; } printf("Fl "); - if (end - *start > 1 && + if (end - *start > 1 && isupper((int)buf[*start]) && islower((int)buf[*start + 1]) && (end - *start == 2 || ' ' == buf[*start + 2])) printf("\\&"); - printf("%.*s ", (int)(i - *start), &buf[*start]); + printf("%.*s", (int)(i - *start), &buf[*start]); *start = i; if (' ' == buf[i]) { @@ -449,7 +493,7 @@ again: *start = i; goto again; } - printf("Ar "); + fputs("Ar", stdout); *start = i; } } @@ -471,20 +515,22 @@ again: * entering without OUST_MAC and the code is invalid. */ static int -formatcode(struct state *st, const char *buf, size_t *start, +formatcode(struct state *st, const char *buf, size_t *start, size_t end, int nomacro, int pos) { - enum fmt fmt; size_t i, j, dsz; + enum fmt fmt; + unsigned char uc; + int gotmacro, wantws; assert(*start + 1 < end); assert('<' == buf[*start + 1]); - /* - * First, look up the format code. + /* + * First, look up the format code. * If it's not valid, treat it as a NOOP. */ - for (fmt = 0; fmt < FMT__MAX; fmt++) + for (fmt = 0; fmt < FMT__MAX; fmt++) if (buf[*start] == fmts[fmt]) break; @@ -515,7 +561,7 @@ formatcode(struct state *st, const char *buf, size_t * formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { - /* + /* * Just consume til the end delimiter, accounting for * whether it's a custom one. */ @@ -528,10 +574,10 @@ formatcode(struct state *st, const char *buf, size_t * if (' ' != buf[*start - 1]) continue; i = *start; - for (j = 0; i < end && j < dsz; j++) + for (j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; - if (dsz != j) + if (dsz != j) continue; (*start) += dsz; break; @@ -553,43 +599,47 @@ formatcode(struct state *st, const char *buf, size_t * if (FMT__MAX != fmt && !nomacro) { /* + * Do we need spacing before the upcoming macro, + * after any pending text already in the outbuf? * We may already have wantws if there was whitespace - * before the code ("text Bwantws |= ' ' == buf[*start]; + wantws = ' ' == buf[*start] || + (st->wantws && ! st->outbuflen); /* * If we are on a text line and there is no * whitespace before our content, we have to make * the previous word a prefix to the macro line. - * In the following, mdoc_newln() must not be used - * lest we clobber out output state. */ - if (OUST_MAC != st->oust && !st->wantws) { + if (OUST_MAC != st->oust && ! wantws) { if (OUST_NL != st->oust) - putchar('\n'); - printf(".Pf "); + mdoc_newln(st); + fputs(".Pf", stdout); + st->oust = OUST_MAC; + st->wantws = wantws = 1; } outbuf_flush(st); /* Whitespace is easier to suppress on macro lines. */ - if (OUST_MAC == st->oust && !st->wantws) - printf(" Ns "); + if (OUST_MAC == st->oust && ! wantws) + printf(" Ns"); /* Unless we are on a macro line, start one. */ - if (OUST_MAC != st->oust && st->wantws) { + if (OUST_MAC != st->oust) { if (OUST_NL != st->oust) - putchar('\n'); + mdoc_newln(st); putchar('.'); + st->oust = OUST_MAC; } else putchar(' '); + st->wantws = 1; /* * Print the macro corresponding to this format code, @@ -597,47 +647,61 @@ formatcode(struct state *st, const char *buf, size_t * */ switch (fmt) { - case (FMT_ITALIC): - printf("Em "); - break; case (FMT_BOLD): - if (SECT_SYNOPSIS == st->sect) { + if (SECT_SYNOPSIS == st->sect) { if (1 == dsz && '-' == buf[*start]) dosynopsisfl(buf, start, end); else if (0 == pos) - printf("Nm "); + fputs("Nm", stdout); else - printf("Ar "); + fputs("Ar", stdout); break; - } - if (0 == strncmp(buf + *start, "NULL", 4) && - ('=' == buf[*start + 4] || - '>' == buf[*start + 4])) - printf("Dv "); - else - printf("Sy "); + } + /* FALLTHROUGH */ + case (FMT_ITALIC): + i = 0; + uc = buf[*start]; + while (isalnum(uc) || '_' == uc || ' ' == uc) + uc = buf[*start + ++i]; + if ('=' != uc && '>' != uc) + i = 0; + if (4 == i && ! strncmp(buf + *start, "NULL", 4)) { + fputs("Dv", stdout); + break; + } + switch (i ? dict_get(buf + *start, i) : MDOC_MAX) { + case MDOC_Fa: + fputs("Fa", stdout); + break; + case MDOC_Vt: + fputs("Vt", stdout); + break; + default: + fputs(FMT_BOLD == fmt ? "Sy" : "Em", stdout); + break; + } break; case (FMT_CODE): - printf("Qo Li "); + fputs("Qo Li", stdout); break; case (FMT_LINK): /* Try to link; use "No" if it's empty. */ if ( ! trylink(buf, start, end, dsz)) - printf("No "); + fputs("No", stdout); break; case (FMT_FILE): - printf("Pa "); + fputs("Pa", stdout); break; case (FMT_NBSP): - printf("No "); + fputs("No", stdout); break; default: abort(); } - st->oust = OUST_MAC; - st->wantws = 1; - } else + } else { outbuf_flush(st); + st->wantws = 0; + } /* * Process until we reach the end marker (e.g., '>') or until we @@ -645,11 +709,13 @@ formatcode(struct state *st, const char *buf, size_t * * Don't emit any newlines: since we're on a macro line, we * don't want to break the line. */ + + gotmacro = 0; while (*start < end) { if ('>' == buf[*start] && 1 == dsz) { (*start)++; break; - } else if ('>' == buf[*start] && + } else if ('>' == buf[*start] && ' ' == buf[*start - 1]) { /* * Handle custom delimiters. @@ -668,53 +734,55 @@ formatcode(struct state *st, const char *buf, size_t * } if (*start + 1 < end && '<' == buf[*start + 1] && 'A' <= buf[*start] && 'Z' >= buf[*start]) { - formatcode(st, buf, start, end, nomacro, 1); + gotmacro = formatcode(st, buf, + start, end, nomacro, 1); continue; } /* Suppress newlines and multiple spaces. */ last = buf[(*start)++]; - if (' ' == last || '\n' == last) { - putchar(' '); - while (*start < end && ' ' == buf[*start]) + if (isspace(last)) { + outbuf_flush(st); + st->wantws = 1; + gotmacro = 0; + while (*start < end && + isspace((unsigned char)buf[*start])) (*start)++; continue; } if (OUST_MAC == st->oust && FMT__MAX != fmt) { - if ( ! st->wantws) { - printf(" Ns "); + if (gotmacro && ! st->wantws) { + printf(" Ns"); st->wantws = 1; } + gotmacro = 0; /* * Escape macro-like words. * This matches "Xx " and "XxEOLN". */ - if (end - *start > 0 && - isupper((unsigned char)last) && + if (*start < end && ! st->outbuflen && + isupper(last) && islower((unsigned char)buf[*start]) && (end - *start == 1 || ' ' == buf[*start + 1] || '>' == buf[*start + 1])) - printf("\\&"); + outbuf_addstr(st, "\\&"); + last = buf[*start - 1]; } - - putchar(last); - - /* Protect against character escapes. */ - - if ('\\' == last) - putchar('e'); + outbuf_addchar(st); } if (FMT__MAX == fmt) return(0); + outbuf_flush(st); + if ( ! nomacro && FMT_CODE == fmt) - printf(" Qc "); + fputs(" Qc", stdout); st->wantws = ' ' == last; return(1); @@ -730,52 +798,45 @@ static void formatcodeln(struct state *st, const char *linemac, const char *buf, size_t *start, size_t end, int nomacro) { - int gotmacro, wantws; + int gotmacro; assert(OUST_NL == st->oust); assert(st->wantws); - printf(".%s ", linemac); + printf(".%s", linemac); st->oust = OUST_MAC; gotmacro = 0; while (*start < end) { - wantws = ' ' == buf[*start] || '\n' == buf[*start]; - if (wantws) { - last = ' '; - do { - (*start)++; - } while (*start < end && ' ' == buf[*start]); - } - if (*start + 1 < end && '<' == buf[*start + 1] && 'A' <= buf[*start] && 'Z' >= buf[*start]) { - st->wantws |= wantws; gotmacro = formatcode(st, buf, start, end, nomacro, 1); continue; } + /* Suppress newlines and multiple spaces. */ + + last = buf[(*start)++]; + if (isspace(last)) { + outbuf_flush(st); + st->wantws = 1; + while (*start < end && + isspace((unsigned char)buf[*start])) + (*start)++; + continue; + } + if (gotmacro) { - if (*start < end || st->outbuflen) { - if (st->wantws || - (wantws && !st->outbuflen)) - printf(" No "); + if (*start < end) { + if (st->wantws) + printf(" No"); else - printf(" Ns "); + printf(" Ns"); } + st->wantws = 1; gotmacro = 0; } - outbuf_flush(st); - st->wantws = wantws; - if (*start >= end) - break; - - if (st->wantws) { - putchar(' '); - st->wantws = 0; - } - /* * Since we're already on a macro line, we want to make * sure that we don't inadvertently invoke a macro. @@ -783,21 +844,16 @@ formatcodeln(struct state *st, const char *linemac, * are used in troff and we don't want to escape * something that needn't be escaped. */ - if (' ' == last && end - *start > 1 && - isupper((unsigned char)buf[*start]) && - islower((unsigned char)buf[*start + 1]) && - (end - *start == 2 || ' ' == buf[*start + 2])) - printf("\\&"); - - putchar(last = buf[*start]); - - /* Protect against character escapes. */ - - if ('\\' == last) - putchar('e'); - - (*start)++; + if (*start < end && ! st->outbuflen && isupper(last) && + islower((unsigned char)buf[*start]) && + (end - *start == 1 || ' ' == buf[*start + 1])) { + outbuf_addstr(st, "\\&"); + last = buf[*start - 1]; + } + outbuf_addchar(st); } + outbuf_flush(st); + st->wantws = 1; } /* @@ -879,7 +935,7 @@ command(struct state *st, const char *buf, size_t star } else if (end - start == 8) { if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) st->sect = SECT_SYNOPSIS; - } + } formatcodeln(st, "Sh", buf, &start, end, 1); mdoc_newln(st); st->haspar = 1; @@ -904,7 +960,7 @@ command(struct state *st, const char *buf, size_t star st->haspar = 1; break; case (CMD_OVER): - /* + /* * If we have an existing list that hasn't had an =item * yet, then make sure that we open it now. * We use the default list type, but that can't be @@ -921,7 +977,7 @@ command(struct state *st, const char *buf, size_t star break; case (CMD_ITEM): if (0 == st->lpos) { - /* + /* * Bad markup. * Try to compensate. */ @@ -935,7 +991,7 @@ command(struct state *st, const char *buf, size_t star * list, and everything is tagged. */ if (LIST__MAX == st->lstack[st->lpos - 1]) { - st->lstack[st->lpos - 1] = + st->lstack[st->lpos - 1] = listguess(buf, start, end); switch (st->lstack[st->lpos - 1]) { case (LIST_BULLET): @@ -977,14 +1033,14 @@ command(struct state *st, const char *buf, size_t star } break; case (CMD_BEGIN): - /* + /* * We disregard all types for now. * TODO: process at least "text" in a -literal block. */ st->paused = 1; break; case (CMD_FOR): - /* + /* * We ignore all types of encodings and formats * unilaterally. */ @@ -1003,19 +1059,62 @@ command(struct state *st, const char *buf, size_t star } /* + * Put the type provided as an argument into the dictionary. + */ +static void +register_type(const char *ptype) +{ + const char *pname, *pend; + + pname = ptype; + while (isalnum((unsigned char)*pname) || '_' == *pname) + pname++; + if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) || + (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) { + while (' ' == *pname) + pname++; + pend = pname; + while (isalnum((unsigned char)*pend) || '_' == *pend) + pend++; + if (pend > pname) + dict_put(pname, pend - pname, MDOC_Vt); + } else + pend = pname; + if (pend > ptype) + dict_put(ptype, pend - ptype, MDOC_Vt); +} + +/* * Just pump out the line in a verbatim block. * From the perspective of external callers, * always stays in OUST_NL/wantws mode. */ static void -verbatim(struct state *st, const char *buf, size_t start, size_t end) +verbatim(struct state *st, char *buf, size_t start, size_t end) { - size_t i; + size_t i, ift, ifo, ifa, ifc, inl; + char *cp, *cp2; + int indisplay, nopen, wantsp; - if ( ! st->parsing || st->paused) + if (st->paused || ! st->parsing) return; + + indisplay = wantsp = 0; + again: - /* + if (start == end) { + if (indisplay) + puts(".Ed"); + return; + } + + if ('\n' == buf[start]) { + wantsp = 1; + start++; + goto again; + } + + /* * If we're in the SYNOPSIS, see if we're an #include block. * If we are, then print the "In" macro and re-loop. * This handles any number of inclusions, but only when they @@ -1023,19 +1122,23 @@ again: */ if (SECT_SYNOPSIS == st->sect) { i = start; - for (i = start; i < end && ' ' == buf[i]; i++) - /* Spin. */ ; + while (i < end && buf[i] == ' ') + i++; if (i == end) - return; + goto again; + /* We're an include block! */ - if (end - i > 10 && + if (end - i > 10 && 0 == memcmp(&buf[i], "#include <", 10)) { start = i + 10; while (start < end && ' ' == buf[start]) start++; + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; fputs(".In ", stdout); /* Stop til the '>' marker or we hit eoln. */ - while (start < end && + while (start < end && '>' != buf[start] && '\n' != buf[start]) putchar(buf[start++]); putchar('\n'); @@ -1043,29 +1146,171 @@ again: start++; if (start < end && '\n' == buf[start]) start++; - if (start < end) + goto again; + } + + /* Other preprocessor directives. */ + if ('#' == buf[i]) { + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; + fputs(".Fd ", stdout); + start = i; + while(start < end && '\n' != buf[start]) + putchar(buf[start++]); + putchar('\n'); + if (start < end && '\n' == buf[start]) + start++; + + /* Remember #define for Dv or Fn. */ + + if (strncmp(buf + i + 1, "define", 6) || + ! isspace((unsigned char)buf[i + 7])) goto again; - return; + + ifo = i + 7; + while (ifo < start && + isspace((unsigned char)buf[ifo])) + ifo++; + ifa = ifo; + while ('_' == buf[ifa] || + isalnum((unsigned char)buf[ifa])) + ifa++; + dict_put(buf + ifo, ifa - ifo, + '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv); + + goto again; } + + /* Parse function declaration. */ + ifo = ifa = ifc = 0; + inl = end; + nopen = 0; + for (ift = i; i < end; i++) { + if (ifc) { + if (buf[i] != '\n') + continue; + inl = i; + break; + } + switch (buf[i]) { + case '\t': + /* FALLTHROUGH */ + case ' ': + if ( ! ifa) + ifo = i; + break; + case '(': + if (ifo) { + nopen++; + if ( ! ifa) + ifa = i; + } else + i = end; + break; + case ')': + switch (nopen) { + case 0: + i = end; + break; + case 1: + ifc = i; + break; + default: + nopen--; + break; + } + break; + default: + break; + } + } + + /* Encode function declaration. */ + if (ifc) { + for (i = ifa; i < ifc; i++) + if (buf[i] == '\n') + buf[i] = ' '; + buf[ifo++] = '\0'; + register_type(buf + ift); + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; + printf(".Ft %s", buf + ift); + if (buf[ifo] == '*') { + fputs(" *", stdout); + ifo++; + } + putchar('\n'); + buf[ifa++] = '\0'; + dict_put(buf + ifo, 0, MDOC_Fo); + buf[ifc++] = '\0'; + if (strcmp(buf + ifa, "void")) { + printf(".Fo %s\n", buf + ifo); + for (;;) { + cp = strchr(buf + ifa, ','); + if (cp != NULL) { + cp2 = cp; + *cp++ = '\0'; + } else + cp2 = strchr(buf + ifa, '\0'); + while (isalnum((unsigned char)cp2[-1]) + || '_' == cp2[-1]) + cp2--; + if ('\0' != *cp2) + dict_put(cp2, 0, MDOC_Fa); + register_type(buf + ifa); + if (strchr(buf + ifa, ' ') == NULL) + printf(".Fa %s\n", buf + ifa); + else + printf(".Fa \"%s\"\n", + buf + ifa); + if (cp == NULL) + break; + while (*cp == ' ' || *cp == '\t') + cp++; + ifa = cp - buf; + } + puts(".Fc"); + } else + printf(".Fn %s void\n", buf + ifo); + if (buf[ifc] == ';') + ifc++; + if (ifc < inl) { + buf[inl] = '\0'; + puts(buf + ifc); + } + start = inl < end ? inl + 1 : end; + goto again; + } } - - if (start == end) - return; - puts(".Bd -literal"); - for (last = ' '; start < end; start++) { + + if ( ! indisplay) + puts(".Bd -literal"); + else if (wantsp) + putchar('\n'); + indisplay = 1; + wantsp = 0; + + for (last = '\n'; start < end; start++) { /* * Handle accidental macros (newline starting with * control character) and escapes. */ - if ('\n' == last) + if ('\n' == last) { + if ('\n' == buf[start]) + goto again; if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); + } putchar(last = buf[start]); if ('\\' == buf[start]) printf("e"); } - putchar(last = '\n'); - puts(".Ed"); + if ('\n' != last) + putchar('\n'); + if (indisplay) + puts(".Ed"); } /* @@ -1076,7 +1321,7 @@ hasmatch(const char *buf, size_t start, size_t end) { size_t stack; - for (stack = 0; start < end; start++) + for (stack = 0; start < end; start++) if (buf[start] == '[') stack++; else if (buf[start] == ']' && 0 == stack) @@ -1135,7 +1380,7 @@ donamenm(struct state *st, const char *buf, size_t *st assert(OUST_NL == st->oust); assert(st->wantws); - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; if (end == *start) { @@ -1156,7 +1401,7 @@ donamenm(struct state *st, const char *buf, size_t *st printf(" ,"); mdoc_newln(st); (*start)++; - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; } } @@ -1171,14 +1416,17 @@ donamenm(struct state *st, const char *buf, size_t *st * * Uses formatcode() to go to OUST_MAC mode * and outbuf_flush() to go to OUST_TXT mode. - * Main text mode wantws handling is in this function. + * In text mode, wantws requests white space before the text + * currently contained in the outbuf, not before upcoming text. * Must make sure to go back to OUST_NL/wantws mode before returning. */ static void ordinary(struct state *st, const char *buf, size_t start, size_t end) { - size_t i, j, opstack; - int seq; + size_t i, j, opstack, wend; + enum mdoc_type mtype; + int eos, noeos, seq; + char savechar; if ( ! st->parsing || st->paused) return; @@ -1191,7 +1439,8 @@ ordinary(struct state *st, const char *buf, size_t sta */ if (SECT_NAME == st->sect) { for (i = end - 2; i > start; i--) - if ('-' == buf[i] && ' ' == buf[i + 1]) + if ('-' == buf[i] && + isspace((unsigned char)buf[i + 1])) break; if ('-' == buf[i]) { j = i; @@ -1201,8 +1450,11 @@ ordinary(struct state *st, const char *buf, size_t sta break; donamenm(st, buf, &start, i + 1); start = j + 1; - while (start < end && ' ' == buf[start]) + while (start < end && + isspace((unsigned char)buf[start])) start++; + while (start < end && '.' == buf[end - 1]) + end--; formatcodeln(st, "Nd", buf, &start, end, 1); mdoc_newln(st); return; @@ -1217,8 +1469,8 @@ ordinary(struct state *st, const char *buf, size_t sta opstack = 0; for (seq = 0; start < end; seq++) { - /* - * Loop til we get either to a newline or escape. + /* + * Loop til we get either to a newline or escape. * Escape initial control characters. */ while (start < end) { @@ -1238,25 +1490,112 @@ ordinary(struct state *st, const char *buf, size_t sta */ if (SECT_SYNOPSIS == st->sect && - ('[' == buf[start] || + ('[' == buf[start] || ']' == buf[start]) && dosynopsisop(st, buf, &start, end, &opstack)) continue; + /* Merely buffer non-whitespace. */ + + last = buf[start++]; + if ( ! isspace(last)) + outbuf_addchar(st); + if (start < end && + ! isspace((unsigned char)buf[start - 1]) && + ! isspace((unsigned char)buf[start])) + continue; + /* + * Found the end of a word. + * Rewind trailing delimiters. + */ + + eos = noeos = 0; + for (wend = st->outbuflen; wend; wend--) + if ('.' == st->outbuf[wend - 1] || + '!' == st->outbuf[wend - 1] || + '?' == st->outbuf[wend - 1]) + eos = 1; + else if ('|' == st->outbuf[wend - 1] || + ',' == st->outbuf[wend - 1] || + ';' == st->outbuf[wend - 1] || + ':' == st->outbuf[wend - 1]) + noeos = 1; + else if ('\'' != st->outbuf[wend - 1] && + '"' != st->outbuf[wend - 1] && + ')' != st->outbuf[wend - 1] && + ']' != st->outbuf[wend - 1]) + break; + eos &= ! noeos; + + /* + * Detect function names. + */ + + mtype = MDOC_Fa; + savechar = '\0'; + if (wend && ')' == st->outbuf[wend] && + '(' == st->outbuf[wend - 1]) { + mtype = dict_get(st->outbuf, --wend); + if (MDOC_Dv == mtype) + mtype = MDOC_Fo; + if (MDOC_Fo == mtype || MDOC_MAX == mtype) { + st->outbuflen = wend; + st->outbuf[wend] = '\0'; + mdoc_newln(st); + if (MDOC_Fo == mtype) + fputs(".Fn", stdout); + else + fputs(".Xr", stdout); + st->oust = OUST_MAC; + } + } else { + mtype = dict_get(st->outbuf, wend); + if (MDOC_Dv == mtype) { + savechar = st->outbuf[wend]; + st->outbuf[wend] = '\0'; + mdoc_newln(st); + fputs(".Dv", stdout); + st->oust = OUST_MAC; + } else + mtype = MDOC_Fa; + } + + /* * On whitespace, flush the output buffer * and allow breaking to a macro line. - * Otherwise, buffer text and clear wantws. */ - last = buf[start++]; - if (' ' == last) { - outbuf_flush(st); - putchar(' '); - st->wantws = 1; - } else - outbuf_addchar(st); + outbuf_flush(st); + + /* + * End macro lines, and + * end text lines at the end of sentences. + */ + + if (OUST_MAC == st->oust || (eos && wend > 1 && + islower((unsigned char)st->outbuf[wend - 1]))) { + if (MDOC_MAX == mtype) + fputs(" 3", stdout); + if (MDOC_Fa != mtype) { + if (MDOC_Dv == mtype) + st->outbuf[wend] = savechar; + else + wend += 2; + while ('\0' != st->outbuf[wend]) + printf(" %c", + st->outbuf[wend++]); + } + mdoc_newln(st); + } + + /* Advance to the next word. */ + + while ('\n' != buf[start] && + isspace((unsigned char)buf[start])) + start++; + st->wantws = 1; } if (start < end - 1 && '<' == buf[start + 1] && @@ -1268,12 +1607,11 @@ ordinary(struct state *st, const char *buf, size_t sta * XXX Some punctuation characters * are not handled yet. */ - if ((start == end - 1 || - (start < end - 1 && - (' ' == buf[start + 1] || - '\n' == buf[start + 1]))) && - ('.' == buf[start] || - ',' == buf[start])) { + if ((start == end - 1 || + (start < end - 1 && + (' ' == buf[start + 1] || + '\n' == buf[start + 1]))) && + NULL != strchr("|.,;:?!)]", buf[start])) { putchar(' '); putchar(buf[start++]); } @@ -1301,13 +1639,13 @@ ordinary(struct state *st, const char *buf, size_t sta ('<' != buf[start + 1] || 'A' > buf[start] || 'Z' < buf[start])) { - printf(" Ns "); + fputs(" Ns", stdout); st->wantws = 1; } } } else if (start < end && '\n' == buf[start]) { outbuf_flush(st); - mdoc_newln(st); + st->wantws = 1; if (++start >= end) continue; /* @@ -1318,12 +1656,14 @@ ordinary(struct state *st, const char *buf, size_t sta * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) + if (' ' == buf[start] || '\t' == buf[start]) { + mdoc_newln(st); puts(".br"); + } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; - } + } } outbuf_flush(st); mdoc_newln(st); @@ -1335,7 +1675,7 @@ ordinary(struct state *st, const char *buf, size_t sta * (default: starts with "="). */ static void -dopar(struct state *st, const char *buf, size_t start, size_t end) +dopar(struct state *st, char *buf, size_t start, size_t end) { assert(OUST_NL == st->oust); @@ -1356,14 +1696,15 @@ dopar(struct state *st, const char *buf, size_t start, * POD way. */ static void -dofile(const struct args *args, const char *fname, - const struct tm *tm, const char *buf, size_t sz) +dofile(const struct args *args, const char *fname, + const struct tm *tm, char *buf, size_t sz) { char datebuf[64]; struct state st; - const char *fbase, *fext, *section, *date; + const char *fbase, *fext, *section, *date, *format; char *title, *cp; - size_t sup, end, i, cur = 0; + size_t cur, end; + int verb; if (0 == sz) return; @@ -1406,8 +1747,12 @@ dofile(const struct args *args, const char *fname, /* Date. Or the given "tm" if not supplied. */ - if (NULL == (date = args->date)) { - strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); + date = args->date; + format = (NULL == date) ? "%B %d, %Y" : + strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $"; + + if (NULL != format) { + strftime(datebuf, sizeof(datebuf), format, tm); date = datebuf; } @@ -1422,6 +1767,7 @@ dofile(const struct args *args, const char *fname, free(title); + dict_init(); memset(&st, 0, sizeof(struct state)); st.oust = OUST_NL; st.wantws = 1; @@ -1430,25 +1776,34 @@ dofile(const struct args *args, const char *fname, /* Main loop over file contents. */ - while (cur < sz) { + cur = 0; + for (;;) { + while (cur < sz && '\n' == buf[cur]) + cur++; + if (cur >= sz) + break; + + verb = isspace((unsigned char)buf[cur]); + /* Read until next paragraph. */ - for (i = cur + 1; i < sz; i++) - if ('\n' == buf[i] && '\n' == buf[i - 1]) { - /* Consume blank paragraphs. */ - while (i + 1 < sz && '\n' == buf[i + 1]) - i++; + + for (end = cur + 1; end + 1 < sz; end++) + if ('\n' == buf[end] && '\n' == buf[end + 1] && + !(verb && end + 2 < sz && + isspace((unsigned char)buf[end + 2]))) break; - } - + /* Adjust end marker for EOF. */ - end = i < sz ? i - 1 : - ('\n' == buf[sz - 1] ? sz - 1 : sz); - sup = i < sz ? end + 2 : sz; + if (end < sz && '\n' != buf[end]) + end++; + /* Process paragraph and adjust start. */ + dopar(&st, buf, cur, end); - cur = sup; + cur = end + 2; } + dict_destroy(); } /* @@ -1466,9 +1821,9 @@ readfile(const struct args *args, const char *fname) ssize_t ssz; struct tm *tm; time_t ttm; - struct stat st; + struct stat st; - fd = 0 != strcmp("-", fname) ? + fd = 0 != strcmp("-", fname) ? open(fname, O_RDONLY, 0) : STDIN_FILENO; if (-1 == fd) { @@ -1482,7 +1837,7 @@ readfile(const struct args *args, const char *fname) } else tm = localtime(&st.st_mtime); - /* + /* * Arbitrarily-sized initial buffer. * Should be big enough for most files... */ @@ -1510,7 +1865,7 @@ readfile(const struct args *args, const char *fname) return(0); } - dofile(args, STDIN_FILENO == fd ? + dofile(args, STDIN_FILENO == fd ? "STDIN" : fname, tm, buf, cur); free(buf); if (STDIN_FILENO != fd) @@ -1578,11 +1933,11 @@ main(int argc, char *argv[]) else if (1 == argc) fname = *argv; - return(readfile(&args, fname) ? + return(readfile(&args, fname) ? EXIT_SUCCESS : EXIT_FAILURE); usage: - fprintf(stderr, "usage: %s [-d date] " + fprintf(stderr, "usage: %s [-d date] " "[-n title] [-s section] [file]\n", name); return(EXIT_FAILURE);