=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.37 retrieving revision 1.54 diff -u -p -r1.37 -r1.54 --- pod2mdoc/pod2mdoc.c 2015/02/13 00:44:16 1.37 +++ pod2mdoc/pod2mdoc.c 2015/02/19 15:26:45 1.54 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.37 2015/02/13 00:44:16 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.54 2015/02/19 15:26:45 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * Copyright (c) 2014, 2015 Ingo Schwarze @@ -135,7 +135,7 @@ static const char fmts[FMT__MAX] = { 'Z' /* FMT_NULL */ }; -static int last; +static unsigned char last; static void @@ -160,7 +160,6 @@ outbuf_addchar(struct state *st) if ('\\' == last) st->outbuf[st->outbuflen++] = 'e'; st->outbuf[st->outbuflen] = '\0'; - st->wantws = 0; } static void @@ -174,7 +173,6 @@ outbuf_addstr(struct state *st, const char *str) memcpy(st->outbuf + st->outbuflen, str, slen+1); st->outbuflen += slen; last = str[slen - 1]; - st->wantws = 0; } static void @@ -184,7 +182,14 @@ outbuf_flush(struct state *st) if (0 == st->outbuflen) return; - fputs(st->outbuf, stdout); + if (OUST_TXT == st->oust && st->wantws) + putchar(' '); + + if (OUST_MAC == st->oust && '"' == *st->outbuf) + printf("\\(dq%s", st->outbuf + 1); + else + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; st->outbuflen = 0; @@ -477,8 +482,9 @@ static int formatcode(struct state *st, const char *buf, size_t *start, size_t end, int nomacro, int pos) { - enum fmt fmt; size_t i, j, dsz; + enum fmt fmt; + unsigned char uc; assert(*start + 1 < end); assert('<' == buf[*start + 1]); @@ -556,43 +562,43 @@ formatcode(struct state *st, const char *buf, size_t * if (FMT__MAX != fmt && !nomacro) { /* - * We may already have wantws if there was whitespace - * before the code ("text Bwantws |= ' ' == buf[*start]; - - /* * If we are on a text line and there is no * whitespace before our content, we have to make * the previous word a prefix to the macro line. - * In the following, mdoc_newln() must not be used - * lest we clobber out output state. */ - if (OUST_MAC != st->oust && !st->wantws) { + if (OUST_MAC != st->oust && ' ' != buf[*start] && + st->outbuflen) { if (OUST_NL != st->oust) - putchar('\n'); + mdoc_newln(st); printf(".Pf "); + st->oust = OUST_MAC; + st->wantws = 1; } outbuf_flush(st); - /* Whitespace is easier to suppress on macro lines. */ + /* + * Whitespace is easier to suppress on macro lines. + * We may already have wantws if there was whitespace + * before the code ("text Boust && !st->wantws) - printf(" Ns "); + if (OUST_MAC == st->oust && ' ' != buf[*start] && + ! st->wantws) + printf(" Ns"); /* Unless we are on a macro line, start one. */ - if (OUST_MAC != st->oust && st->wantws) { + if (OUST_MAC != st->oust) { if (OUST_NL != st->oust) - putchar('\n'); + mdoc_newln(st); putchar('.'); + st->oust = OUST_MAC; } else putchar(' '); + st->wantws = 1; /* * Print the macro corresponding to this format code, @@ -612,13 +618,28 @@ formatcode(struct state *st, const char *buf, size_t * else printf("Ar "); break; - } - if (0 == strncmp(buf + *start, "NULL", 4) && - ('=' == buf[*start + 4] || - '>' == buf[*start + 4])) + } + i = 0; + uc = buf[*start]; + while (isalnum(uc) || '_' == uc || ' ' == uc) + uc = buf[*start + ++i]; + if ('=' != uc && '>' != uc) + i = 0; + if (4 == i && ! strncmp(buf + *start, "NULL", 4)) { printf("Dv "); - else + break; + } + switch (i ? dict_get(buf + *start, i) : MDOC_MAX) { + case MDOC_Fa: + printf("Fa "); + break; + case MDOC_Vt: + printf("Vt "); + break; + default: printf("Sy "); + break; + } break; case (FMT_CODE): printf("Qo Li "); @@ -637,8 +658,6 @@ formatcode(struct state *st, const char *buf, size_t * default: abort(); } - st->oust = OUST_MAC; - st->wantws = 1; } else outbuf_flush(st); @@ -671,7 +690,8 @@ formatcode(struct state *st, const char *buf, size_t * } if (*start + 1 < end && '<' == buf[*start + 1] && 'A' <= buf[*start] && 'Z' >= buf[*start]) { - formatcode(st, buf, start, end, nomacro, 1); + if ( ! formatcode(st, buf, start, end, nomacro, 1)) + st->wantws = 1; continue; } @@ -713,14 +733,11 @@ formatcode(struct state *st, const char *buf, size_t * putchar('e'); } - if (FMT__MAX == fmt) - return(0); - if ( ! nomacro && FMT_CODE == fmt) printf(" Qc "); st->wantws = ' ' == last; - return(1); + return(FMT__MAX != fmt); } /* @@ -1006,6 +1023,32 @@ command(struct state *st, const char *buf, size_t star } /* + * Put the type provided as an argument into the dictionary. + */ +static void +register_type(const char *ptype) +{ + const char *pname, *pend; + + pname = ptype; + while (isalnum((unsigned char)*pname) || '_' == *pname) + pname++; + if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) || + (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) { + while (' ' == *pname) + pname++; + pend = pname; + while (isalnum((unsigned char)*pend) || '_' == *pend) + pend++; + if (pend > pname) + dict_put(pname, pend - pname, MDOC_Vt); + } else + pend = pname; + if (pend > ptype) + dict_put(ptype, pend - ptype, MDOC_Vt); +} + +/* * Just pump out the line in a verbatim block. * From the perspective of external callers, * always stays in OUST_NL/wantws mode. @@ -1014,12 +1057,27 @@ static void verbatim(struct state *st, char *buf, size_t start, size_t end) { size_t i, ift, ifo, ifa, ifc, inl; - char *cp; - int nopen; + char *cp, *cp2; + int indisplay, nopen, wantsp; - if ( ! st->parsing || st->paused || start == end) + if (st->paused || ! st->parsing) return; + + indisplay = wantsp = 0; + again: + if (start == end) { + if (indisplay) + puts(".Ed"); + return; + } + + if ('\n' == buf[start]) { + wantsp = 1; + start++; + goto again; + } + /* * If we're in the SYNOPSIS, see if we're an #include block. * If we are, then print the "In" macro and re-loop. @@ -1031,7 +1089,7 @@ again: while (i < end && buf[i] == ' ') i++; if (i == end) - return; + goto again; /* We're an include block! */ if (end - i > 10 && @@ -1039,6 +1097,9 @@ again: start = i + 10; while (start < end && ' ' == buf[start]) start++; + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; fputs(".In ", stdout); /* Stop til the '>' marker or we hit eoln. */ while (start < end && @@ -1049,9 +1110,40 @@ again: start++; if (start < end && '\n' == buf[start]) start++; - if (start < end) + goto again; + } + + /* Other preprocessor directives. */ + if ('#' == buf[i]) { + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; + fputs(".Fd ", stdout); + start = i; + while(start < end && '\n' != buf[start]) + putchar(buf[start++]); + putchar('\n'); + if (start < end && '\n' == buf[start]) + start++; + + /* Remember #define for Dv or Fn. */ + + if (strncmp(buf + i + 1, "define", 6) || + ! isspace((unsigned char)buf[i + 7])) goto again; - return; + + ifo = i + 7; + while (ifo < start && + isspace((unsigned char)buf[ifo])) + ifo++; + ifa = ifo; + while ('_' == buf[ifa] || + isalnum((unsigned char)buf[ifa])) + ifa++; + dict_put(buf + ifo, ifa - ifo, + '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv); + + goto again; } /* Parse function declaration. */ @@ -1066,6 +1158,8 @@ again: break; } switch (buf[i]) { + case '\t': + /* FALLTHROUGH */ case ' ': if ( ! ifa) ifo = i; @@ -1102,6 +1196,10 @@ again: if (buf[i] == '\n') buf[i] = ' '; buf[ifo++] = '\0'; + register_type(buf + ift); + if (indisplay) + puts(".Ed"); + indisplay = wantsp = 0; printf(".Ft %s", buf + ift); if (buf[ifo] == '*') { fputs(" *", stdout); @@ -1110,16 +1208,28 @@ again: putchar('\n'); buf[ifa++] = '\0'; printf(".Fo %s\n", buf + ifo); - dict_put(buf + ifo, MDOC_Fo); + dict_put(buf + ifo, 0, MDOC_Fo); buf[ifc++] = '\0'; for (;;) { cp = strchr(buf + ifa, ','); - if (cp != NULL) + if (cp != NULL) { + cp2 = cp; *cp++ = '\0'; - printf(".Fa \"%s\"\n", buf + ifa); + } else + cp2 = strchr(buf + ifa, '\0'); + while (isalnum((unsigned char)cp2[-1]) || + '_' == cp2[-1]) + cp2--; + if ('\0' != *cp2) + dict_put(cp2, 0, MDOC_Fa); + register_type(buf + ifa); + if (strchr(buf + ifa, ' ') == NULL) + printf(".Fa %s\n", buf + ifa); + else + printf(".Fa \"%s\"\n", buf + ifa); if (cp == NULL) break; - while (*cp == ' ') + while (*cp == ' ' || *cp == '\t') cp++; ifa = cp - buf; } @@ -1130,28 +1240,37 @@ again: buf[inl] = '\0'; puts(buf + ifc); } - start = inl + 1; - if (start < end) - goto again; - return; + start = inl < end ? inl + 1 : end; + goto again; } } - - puts(".Bd -literal"); - for (last = ' '; start < end; start++) { + + if ( ! indisplay) + puts(".Bd -literal"); + else if (wantsp) + putchar('\n'); + indisplay = 1; + wantsp = 0; + + for (last = '\n'; start < end; start++) { /* * Handle accidental macros (newline starting with * control character) and escapes. */ - if ('\n' == last) + if ('\n' == last) { + if ('\n' == buf[start]) + goto again; if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); + } putchar(last = buf[start]); if ('\\' == buf[start]) printf("e"); } - putchar(last = '\n'); - puts(".Ed"); + if ('\n' != last) + putchar('\n'); + if (indisplay) + puts(".Ed"); } /* @@ -1221,7 +1340,7 @@ donamenm(struct state *st, const char *buf, size_t *st assert(OUST_NL == st->oust); assert(st->wantws); - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; if (end == *start) { @@ -1242,7 +1361,7 @@ donamenm(struct state *st, const char *buf, size_t *st printf(" ,"); mdoc_newln(st); (*start)++; - while (*start < end && ' ' == buf[*start]) + while (*start < end && isspace((unsigned char)buf[*start])) (*start)++; } } @@ -1257,14 +1376,17 @@ donamenm(struct state *st, const char *buf, size_t *st * * Uses formatcode() to go to OUST_MAC mode * and outbuf_flush() to go to OUST_TXT mode. - * Main text mode wantws handling is in this function. + * In text mode, wantws requests white space before the text + * currently contained in the outbuf, not before upcoming text. * Must make sure to go back to OUST_NL/wantws mode before returning. */ static void ordinary(struct state *st, const char *buf, size_t start, size_t end) { - size_t i, j, opstack; - int seq; + size_t i, j, opstack, wend; + enum mdoc_type mtype; + int eos, noeos, seq; + char savechar; if ( ! st->parsing || st->paused) return; @@ -1277,7 +1399,8 @@ ordinary(struct state *st, const char *buf, size_t sta */ if (SECT_NAME == st->sect) { for (i = end - 2; i > start; i--) - if ('-' == buf[i] && ' ' == buf[i + 1]) + if ('-' == buf[i] && + isspace((unsigned char)buf[i + 1])) break; if ('-' == buf[i]) { j = i; @@ -1287,7 +1410,8 @@ ordinary(struct state *st, const char *buf, size_t sta break; donamenm(st, buf, &start, i + 1); start = j + 1; - while (start < end && ' ' == buf[start]) + while (start < end && + isspace((unsigned char)buf[start])) start++; formatcodeln(st, "Nd", buf, &start, end, 1); mdoc_newln(st); @@ -1330,32 +1454,105 @@ ordinary(struct state *st, const char *buf, size_t sta &start, end, &opstack)) continue; - /* - * On whitespace, flush the output buffer - * and allow breaking to a macro line. - * Otherwise, buffer text and clear wantws. - */ + /* Merely buffer non-whitespace. */ last = buf[start++]; - if (' ' != last) { + if ( ! isspace(last)) outbuf_addchar(st); + if (start < end && + ! isspace((unsigned char)buf[start - 1]) && + ! isspace((unsigned char)buf[start])) continue; + + /* + * Found the end of a word. + * Rewind trailing delimiters. + */ + + eos = noeos = 0; + for (wend = st->outbuflen; wend; wend--) + if ('.' == st->outbuf[wend - 1] || + '!' == st->outbuf[wend - 1] || + '?' == st->outbuf[wend - 1]) + eos = 1; + else if ('|' == st->outbuf[wend - 1] || + ',' == st->outbuf[wend - 1] || + ';' == st->outbuf[wend - 1] || + ':' == st->outbuf[wend - 1]) + noeos = 1; + else if ('\'' != st->outbuf[wend - 1] && + '"' != st->outbuf[wend - 1] && + ')' != st->outbuf[wend - 1] && + ']' != st->outbuf[wend - 1]) + break; + eos &= ! noeos; + + /* + * Detect function names. + */ + + mtype = MDOC_Fa; + savechar = '\0'; + if (wend && ')' == st->outbuf[wend] && + '(' == st->outbuf[wend - 1]) { + mtype = dict_get(st->outbuf, --wend); + if (MDOC_Dv == mtype) + mtype = MDOC_Fo; + if (MDOC_Fo == mtype || MDOC_MAX == mtype) { + st->outbuflen = wend; + st->outbuf[wend] = '\0'; + mdoc_newln(st); + if (MDOC_Fo == mtype) + fputs(".Fn ", stdout); + else + fputs(".Xr ", stdout); + st->oust = OUST_MAC; + } + } else { + mtype = dict_get(st->outbuf, wend); + if (MDOC_Dv == mtype) { + savechar = st->outbuf[wend]; + st->outbuf[wend] = '\0'; + mdoc_newln(st); + fputs(".Dv ", stdout); + st->oust = OUST_MAC; + } else + mtype = MDOC_Fa; } - if ( ! strcmp(st->outbuf + st->outbuflen - 2, "()") && - dict_get(st->outbuf, st->outbuflen - 2) == - MDOC_Fo) { - st->outbuflen -= 2; - st->outbuf[st->outbuflen] = '\0'; + /* + * On whitespace, flush the output buffer + * and allow breaking to a macro line. + */ + + outbuf_flush(st); + + /* + * End macro lines, and + * end text lines at the end of sentences. + */ + + if (OUST_MAC == st->oust || (eos && wend > 1 && + islower((unsigned char)st->outbuf[wend - 1]))) { + if (MDOC_MAX == mtype) + fputs(" 3", stdout); + if (MDOC_Fa != mtype) { + if (MDOC_Dv == mtype) + st->outbuf[wend] = savechar; + else + wend += 2; + while ('\0' != st->outbuf[wend]) + printf(" %c", + st->outbuf[wend++]); + } mdoc_newln(st); - fputs(".Fn ", stdout); - outbuf_flush(st); - mdoc_newln(st); - continue; } - outbuf_flush(st); - putchar(' '); + /* Advance to the next word. */ + + while ('\n' != buf[start] && + isspace((unsigned char)buf[start])) + start++; st->wantws = 1; } @@ -1368,12 +1565,11 @@ ordinary(struct state *st, const char *buf, size_t sta * XXX Some punctuation characters * are not handled yet. */ - if ((start == end - 1 || - (start < end - 1 && - (' ' == buf[start + 1] || - '\n' == buf[start + 1]))) && - ('.' == buf[start] || - ',' == buf[start])) { + if ((start == end - 1 || + (start < end - 1 && + (' ' == buf[start + 1] || + '\n' == buf[start + 1]))) && + NULL != strchr("|.,;:?!)]", buf[start])) { putchar(' '); putchar(buf[start++]); } @@ -1461,9 +1657,10 @@ dofile(const struct args *args, const char *fname, { char datebuf[64]; struct state st; - const char *fbase, *fext, *section, *date; + const char *fbase, *fext, *section, *date, *format; char *title, *cp; - size_t sup, end, i, cur = 0; + size_t cur, end; + int verb; if (0 == sz) return; @@ -1506,8 +1703,12 @@ dofile(const struct args *args, const char *fname, /* Date. Or the given "tm" if not supplied. */ - if (NULL == (date = args->date)) { - strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); + date = args->date; + format = (NULL == date) ? "%B %d, %Y" : + strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $"; + + if (NULL != format) { + strftime(datebuf, sizeof(datebuf), format, tm); date = datebuf; } @@ -1531,24 +1732,32 @@ dofile(const struct args *args, const char *fname, /* Main loop over file contents. */ - while (cur < sz) { + cur = 0; + for (;;) { + while (cur < sz && '\n' == buf[cur]) + cur++; + if (cur >= sz) + break; + + verb = isspace((unsigned char)buf[cur]); + /* Read until next paragraph. */ - for (i = cur + 1; i < sz; i++) - if ('\n' == buf[i] && '\n' == buf[i - 1]) { - /* Consume blank paragraphs. */ - while (i + 1 < sz && '\n' == buf[i + 1]) - i++; + + for (end = cur + 1; end + 1 < sz; end++) + if ('\n' == buf[end] && '\n' == buf[end + 1] && + !(verb && end + 2 < sz && + isspace((unsigned char)buf[end + 2]))) break; - } /* Adjust end marker for EOF. */ - end = i < sz ? i - 1 : - ('\n' == buf[sz - 1] ? sz - 1 : sz); - sup = i < sz ? end + 2 : sz; + if (end < sz && '\n' != buf[end]) + end++; + /* Process paragraph and adjust start. */ + dopar(&st, buf, cur, end); - cur = sup; + cur = end + 2; } dict_destroy(); }