/* $Id: pod2mdoc.c,v 1.22 2014/04/03 16:50:32 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include /* * In what section can we find Perl module manuals? * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p. * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL. */ #define PERL_SECTION "3p" struct args { const char *title; /* override "Dt" title */ const char *date; /* override "Dd" date */ const char *section; /* override "Dt" section */ }; enum list { LIST_BULLET = 0, LIST_ENUM, LIST_TAG, LIST__MAX }; enum sect { SECT_NONE = 0, SECT_NAME, /* NAME section */ SECT_SYNOPSIS, /* SYNOPSIS section */ }; struct state { int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ int haspar; /* in paragraph: do we need Pp? */ enum sect sect; /* which section are we in? */ const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ }; enum fmt { FMT_ITALIC, FMT_BOLD, FMT_CODE, FMT_LINK, FMT_ESCAPE, FMT_FILE, FMT_NBSP, FMT_INDEX, FMT_NULL, FMT__MAX }; enum cmd { CMD_POD = 0, CMD_HEAD1, CMD_HEAD2, CMD_HEAD3, CMD_HEAD4, CMD_OVER, CMD_ITEM, CMD_BACK, CMD_BEGIN, CMD_END, CMD_FOR, CMD_ENCODING, CMD_CUT, CMD__MAX }; static const char *const cmds[CMD__MAX] = { "pod", /* CMD_POD */ "head1", /* CMD_HEAD1 */ "head2", /* CMD_HEAD2 */ "head3", /* CMD_HEAD3 */ "head4", /* CMD_HEAD4 */ "over", /* CMD_OVER */ "item", /* CMD_ITEM */ "back", /* CMD_BACK */ "begin", /* CMD_BEGIN */ "end", /* CMD_END */ "for", /* CMD_FOR */ "encoding", /* CMD_ENCODING */ "cut" /* CMD_CUT */ }; static const char fmts[FMT__MAX] = { 'I', /* FMT_ITALIC */ 'B', /* FMT_BOLD */ 'C', /* FMT_CODE */ 'L', /* FMT_LINK */ 'E', /* FMT_ESCAPE */ 'F', /* FMT_FILE */ 'S', /* FMT_NBSP */ 'X', /* FMT_INDEX */ 'Z' /* FMT_NULL */ }; static int last; /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. */ static void formatescape(const char *buf, size_t *start, size_t end) { char esc[16]; /* no more needed */ size_t i, max; max = sizeof(esc) - 1; i = 0; /* Read til our buffer is full. */ while (*start < end && '>' != buf[*start] && i < max) esc[i++] = buf[(*start)++]; esc[i] = '\0'; if (i == max) { /* Too long... skip til we end. */ while (*start < end && '>' != buf[*start]) (*start)++; return; } else if (*start >= end) return; assert('>' == buf[*start]); (*start)++; /* * TODO: right now, we only recognise the named escapes. * Just let the rest of them go. */ if (0 == strcmp(esc, "lt")) printf("\\(la"); else if (0 == strcmp(esc, "gt")) printf("\\(ra"); else if (0 == strcmp(esc, "vb")) printf("\\(ba"); else if (0 == strcmp(esc, "sol")) printf("\\(sl"); else return; last = 'a'; } /* * Run some heuristics to intuit a link format. * I set "start" to be the end of the sequence (last right-carrot) so * that the caller can safely just continue processing. * If this is just an empty tag, I'll return 0. */ static int trylink(const char *buf, size_t *start, size_t end, size_t dsz) { size_t linkstart, realend, linkend, i, j, textsz, stack; const char *text; /* * Scan to the start of the terminus. * This function is more or less replicated in the formatcode() * for null or index formatting codes. */ stack = 0; for (linkstart = realend = *start; realend < end; realend++) { if ('>' != buf[realend]) continue; else if (dsz == 1) break; assert(realend > 0); if (' ' != buf[realend - 1]) continue; for (i = realend, j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; if (dsz == j) break; } /* Ignore stubs. */ if (realend == end || realend == *start) return(0); /* Set linkend to the end of content. */ linkend = dsz > 1 ? realend - 1 : realend; /* Re-scan to see if we have a title or section. */ text = &buf[*start]; for (textsz = *start; textsz < linkend; textsz++) if ('|' == buf[textsz] || '/' == buf[textsz]) break; if (textsz < linkend && '|' == buf[textsz]) { /* With title: set start, then end at section. */ linkstart = textsz + 1; textsz = textsz - *start; for (i = linkstart; i < linkend; i++) if ('/' == buf[i]) break; if (i < linkend) linkend = i; } else if (textsz < linkend && '/' == buf[textsz]) { /* With section: set end at section. */ linkend = textsz; textsz = 0; } else /* No title, no section. */ textsz = 0; *start = realend; j = linkend - linkstart; /* Do we have only subsection material? */ if (0 == j && '/' == buf[linkend]) { linkstart = linkend + 1; linkend = dsz > 1 ? realend - 1 : realend; if (0 == (j = linkend - linkstart)) return(0); printf("Sx %.*s", (int)j, &buf[linkstart]); return(1); } else if (0 == j) return(0); /* See if we qualify as being a link or not. */ if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) || (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) || (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) || (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) || (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) || (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) { /* Gross. */ printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 : realend) - linkstart), &buf[linkstart]); return(1); } /* See if we qualify as a mailto. */ if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) { printf("Mt %.*s", (int)j, &buf[linkstart]); return(1); } /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */ if ((j > 3 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 3])) { printf("Xr %.*s %c", (int)(j - 3), &buf[linkstart], buf[linkend - 2]); return(1); } else if ((j > 4 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 4])) { printf("Xr %.*s %.*s", (int)(j - 4), &buf[linkstart], 2, &buf[linkend - 3]); return(1); } else if ((j > 5 && ')' == buf[linkend - 1]) && ('(' == buf[linkend - 5])) { printf("Xr %.*s %.*s", (int)(j - 5), &buf[linkstart], 3, &buf[linkend - 4]); return(1); } /* Last try: do we have a double-colon? */ for (i = linkstart + 1; i < linkend; i++) if (':' == buf[i] && ':' == buf[i - 1]) break; if (i < linkend) printf("Xr %.*s " PERL_SECTION, (int)j, &buf[linkstart]); else printf("Xr %.*s 1", (int)j, &buf[linkstart]); return(1); } /* * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section, * then it's likely that we're a flag. * Our flag might be followed by an argument, so make sure that we're * accounting for that, too. * If we don't have a flag at all, however, then assume we're an "Ar". */ static void dosynopsisfl(const char *buf, size_t *start, size_t end) { size_t i; again: assert(*start + 1 < end); assert('-' == buf[*start]); if ( ! isalnum((int)buf[*start + 1]) && '?' != buf[*start + 1] && '-' != buf[*start + 1]) { (*start)--; fputs("Ar ", stdout); return; } (*start)++; for (i = *start; i < end; i++) if (isalnum((int)buf[i])) continue; else if ('?' == buf[i]) continue; else if ('-' == buf[i]) continue; else if ('_' == buf[i]) continue; else break; assert(i < end); if ( ! (' ' == buf[i] || '>' == buf[i])) { printf("Ar "); return; } printf("Fl "); if (end - *start > 1 && isupper((int)buf[*start]) && islower((int)buf[*start + 1]) && (end - *start == 2 || ' ' == buf[*start + 2])) printf("\\&"); printf("%.*s ", (int)(i - *start), &buf[*start]); *start = i; if (' ' == buf[i]) { while (i < end && ' ' == buf[i]) i++; assert(i < end); if ('-' == buf[i]) { *start = i; goto again; } printf("Ar "); *start = i; } } /* * We're at the character in front of a format code, which is structured * like X<...> and can contain nested format codes. * This consumes the whole format code, and any nested format codes, til * the end of matched production. * If "reentrant", then we're being called after a macro has already * been printed to the current line. * If "nomacro", then we don't print any macros, just contained data * (e.g., following "Sh" or "Nm"). * "pos" is only significant in SYNOPSIS, and should be 0 when invoked * as the first format code on a line (for decoration as an "Nm"), * non-zero otherwise. * Return whether we've printed a macro or not--in other words, whether * this should trigger a subsequent newline (this should be ignored when * reentrant). */ static int formatcode(struct state *st, const char *buf, size_t *start, size_t end, int reentrant, int nomacro, int pos) { enum fmt fmt; size_t i, j, dsz; assert(*start + 1 < end); assert('<' == buf[*start + 1]); /* * First, look up the format code. * If it's not valid, then exit immediately. */ for (fmt = 0; fmt < FMT__MAX; fmt++) if (buf[*start] == fmts[fmt]) break; if (FMT__MAX == fmt) { putchar(last = buf[(*start)++]); if ('\\' == last) putchar('e'); return(0); } /* * Determine whether we're overriding our delimiter. * According to POD, if we have more than one '<' followed by a * space, then we need a space followed by matching '>' to close * the expression. * Otherwise we use the usual '<' and '>' matched pair. */ i = *start + 1; while (i < end && '<' == buf[i]) i++; assert(i > *start + 1); dsz = i - (*start + 1); if (dsz > 1 && (i >= end || ' ' != buf[i])) dsz = 1; /* Remember, if dsz>1, to jump the trailing space. */ *start += dsz + 1 + (dsz > 1 ? 1 : 0); /* * Escapes and ignored codes (NULL and INDEX) don't print macro * sequences, so just output them like normal text before * processing for real macros. */ if (FMT_ESCAPE == fmt) { formatescape(buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* * Just consume til the end delimiter, accounting for * whether it's a custom one. */ for ( ; *start < end; (*start)++) { if ('>' != buf[*start]) continue; else if (dsz == 1) break; assert(*start > 0); if (' ' != buf[*start - 1]) continue; i = *start; for (j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; if (dsz != j) continue; (*start) += dsz; break; } return(0); } /* * Check whether we're supposed to print macro stuff (this is * suppressed in, e.g., "Nm" and "Sh" macros). */ if ( ! nomacro) { /* * Print out the macro describing this format code. * If we're not "reentrant" (not yet on a macro line) * then print a newline, if necessary, and the macro * indicator. * Otherwise, offset us with a space. */ if ( ! reentrant) { if (last != '\n') putchar('\n'); putchar('.'); } else putchar(' '); /* * If we don't have whitespace before us (and none after * the opening delimiter), then suppress macro * whitespace with Pf. */ if (' ' != last && '\n' != last && ' ' != buf[*start]) printf("Pf "); switch (fmt) { case (FMT_ITALIC): printf("Em "); break; case (FMT_BOLD): if (SECT_SYNOPSIS == st->sect) { if (1 == dsz && '-' == buf[*start]) dosynopsisfl(buf, start, end); else if (0 == pos) printf("Nm "); else printf("Ar "); break; } printf("Sy "); break; case (FMT_CODE): printf("Qo Li "); break; case (FMT_LINK): /* Try to link; use "No" if it's empty. */ if ( ! trylink(buf, start, end, dsz)) printf("No "); break; case (FMT_FILE): printf("Pa "); break; case (FMT_NBSP): printf("No "); break; default: abort(); } } /* * Process until we reach the end marker (e.g., '>') or until we * find a nested format code. * Don't emit any newlines: since we're on a macro line, we * don't want to break the line. */ while (*start < end) { if ('>' == buf[*start] && 1 == dsz) { (*start)++; break; } else if ('>' == buf[*start] && ' ' == buf[*start - 1]) { /* * Handle custom delimiters. * These require a certain number of * space-preceded carrots before we're really at * the end. */ i = *start; for (j = 0; i < end && j < dsz; j++) if ('>' != buf[i++]) break; if (dsz == j) { *start += dsz; break; } } if (*start + 1 < end && '<' == buf[*start + 1]) { formatcode(st, buf, start, end, 1, nomacro, 1); continue; } /* * Make sure that any macro-like words (or * really any word starting with a capital * letter) is assumed to be a macro that must be * escaped. * This matches "Xx " and "XxEOLN". */ if ((' ' == last || '\n' == last) && end - *start > 1 && isupper((int)buf[*start]) && islower((int)buf[*start + 1]) && (end - *start == 2 || ' ' == buf[*start + 2])) printf("\\&"); /* Suppress newline. */ if ('\n' == buf[*start]) putchar(last = ' '); else putchar(last = buf[*start]); /* Protect against character escapes. */ if ('\\' == last) putchar('e'); (*start)++; if (' ' == last) while (*start < end && ' ' == buf[*start]) (*start)++; } if ( ! nomacro && FMT_CODE == fmt) printf(" Qc "); /* * We're now subsequent the format code. * If there isn't a space (or newline) here, and we haven't just * printed a space, then suppress space. */ if ( ! nomacro && ' ' != last) if (' ' != buf[*start] && '\n' != buf[*start]) printf(" Ns "); return(1); } /* * Calls formatcode() til the end of a paragraph. */ static void formatcodeln(struct state *st, const char *buf, size_t *start, size_t end, int nomacro) { last = ' '; while (*start < end) { if (*start + 1 < end && '<' == buf[*start + 1]) { formatcode(st, buf, start, end, 1, nomacro, 1); continue; } /* * Since we're already on a macro line, we want to make * sure that we don't inadvertently invoke a macro. * We need to do this carefully because section names * are used in troff and we don't want to escape * something that needn't be escaped. */ if (' ' == last && end - *start > 1 && isupper((int)buf[*start]) && islower((int)buf[*start + 1]) && (end - *start == 2 || ' ' == buf[*start + 2])) printf("\\&"); if ('\n' == buf[*start]) putchar(last = ' '); else putchar(last = buf[*start]); /* Protect against character escapes. */ if ('\\' == last) putchar('e'); (*start)++; } } /* * Guess at what kind of list we are. * These are taken straight from the POD manual. * I don't know what people do in real life. */ static enum list listguess(const char *buf, size_t start, size_t end) { size_t len = end - start; assert(end >= start); if (len == 1 && '*' == buf[start]) return(LIST_BULLET); if (len == 2 && '1' == buf[start] && '.' == buf[start + 1]) return(LIST_ENUM); else if (len == 1 && '1' == buf[start]) return(LIST_ENUM); else return(LIST_TAG); } /* * A command paragraph, as noted in the perlpod manual, just indicates * that we should do something, optionally with some text to print as * well. */ static void command(struct state *st, const char *buf, size_t start, size_t end) { size_t len, csz; enum cmd cmd; assert('=' == buf[start]); start++; len = end - start; for (cmd = 0; cmd < CMD__MAX; cmd++) { csz = strlen(cmds[cmd]); if (len < csz) continue; if (0 == memcmp(&buf[start], cmd[cmds], csz)) break; } /* Ignore bogus commands. */ if (CMD__MAX == cmd) return; start += csz; while (start < end && ' ' == buf[start]) start++; len = end - start; if (st->paused) { st->paused = CMD_END != cmd; return; } switch (cmd) { case (CMD_POD): break; case (CMD_HEAD1): /* * The behaviour of head= follows from a quick glance at * how pod2man handles it. */ printf(".Sh "); st->sect = SECT_NONE; if (end - start == 4) { if (0 == memcmp(&buf[start], "NAME", 4)) st->sect = SECT_NAME; } else if (end - start == 8) { if (0 == memcmp(&buf[start], "SYNOPSIS", 8)) st->sect = SECT_SYNOPSIS; } formatcodeln(st, buf, &start, end, 1); putchar('\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); formatcodeln(st, buf, &start, end, 1); putchar('\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); formatcodeln(st, buf, &start, end, 0); putchar('\n'); puts(".Pp"); st->haspar = 1; break; case (CMD_HEAD4): puts(".Pp"); printf(".No "); formatcodeln(st, buf, &start, end, 0); putchar('\n'); puts(".Pp"); st->haspar = 1; break; case (CMD_OVER): /* * If we have an existing list that hasn't had an =item * yet, then make sure that we open it now. * We use the default list type, but that can't be * helped (we haven't seen any items yet). */ if (st->lpos > 0) if (LIST__MAX == st->lstack[st->lpos - 1]) { st->lstack[st->lpos - 1] = LIST_TAG; puts(".Bl -tag -width Ds"); } st->lpos++; assert(st->lpos < LIST_STACKSZ); st->lstack[st->lpos - 1] = LIST__MAX; break; case (CMD_ITEM): if (0 == st->lpos) { /* * Bad markup. * Try to compensate. */ st->lstack[st->lpos] = LIST__MAX; st->lpos++; } assert(st->lpos > 0); /* * If we're the first =item, guess at what our content * will be: "*" is a bullet list, "1." is a numbered * list, and everything is tagged. */ if (LIST__MAX == st->lstack[st->lpos - 1]) { st->lstack[st->lpos - 1] = listguess(buf, start, end); switch (st->lstack[st->lpos - 1]) { case (LIST_BULLET): puts(".Bl -bullet"); break; case (LIST_ENUM): puts(".Bl -enum"); break; default: puts(".Bl -tag -width Ds"); break; } } switch (st->lstack[st->lpos - 1]) { case (LIST_TAG): printf(".It "); formatcodeln(st, buf, &start, end, 0); putchar('\n'); break; case (LIST_ENUM): /* FALLTHROUGH */ case (LIST_BULLET): /* * Abandon the remainder of the paragraph * because we're going to be a bulletted or * numbered list. */ puts(".It"); break; default: abort(); } st->haspar = 1; break; case (CMD_BACK): /* Make sure we don't back over the stack. */ if (st->lpos > 0) { st->lpos--; puts(".El"); } break; case (CMD_BEGIN): /* * We disregard all types for now. * TODO: process at least "text" in a -literal block. */ st->paused = 1; break; case (CMD_FOR): /* * We ignore all types of encodings and formats * unilaterally. */ break; case (CMD_ENCODING): break; case (CMD_CUT): st->parsing = 0; return; default: abort(); } /* Any command (but =cut) makes us start parsing. */ st->parsing = 1; } /* * Just pump out the line in a verbatim block. */ static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { int last; size_t i; if ( ! st->parsing || st->paused) return; again: /* * If we're in the SYNOPSIS, see if we're an #include block. * If we are, then print the "In" macro and re-loop. * This handles any number of inclusions, but only when they * come before the remaining parts... */ if (SECT_SYNOPSIS == st->sect) { i = start; for (i = start; i < end && ' ' == buf[i]; i++) /* Spin. */ ; if (i == end) return; /* We're an include block! */ if (end - i > 10 && 0 == memcmp(&buf[i], "#include <", 10)) { start = i + 10; while (start < end && ' ' == buf[start]) start++; fputs(".In ", stdout); /* Stop til the '>' marker or we hit eoln. */ while (start < end && '>' != buf[start] && '\n' != buf[start]) putchar(buf[start++]); putchar('\n'); if (start < end && '>' == buf[start]) start++; if (start < end && '\n' == buf[start]) start++; if (start < end) goto again; return; } } if (start == end) return; puts(".Bd -literal"); for (last = ' '; start < end; start++) { /* * Handle accidental macros (newline starting with * control character) and escapes. */ if ('\n' == last) if ('.' == buf[start] || '\'' == buf[start]) printf("\\&"); putchar(last = buf[start]); if ('\\' == buf[start]) printf("e"); } putchar('\n'); puts(".Ed"); } /* * See dosynopsisop(). */ static int hasmatch(const char *buf, size_t start, size_t end) { size_t stack; for (stack = 0; start < end; start++) if (buf[start] == '[') stack++; else if (buf[start] == ']' && 0 == stack) return(1); else if (buf[start] == ']') stack--; return(0); } /* * If we're in the SYNOPSIS section and we've encounter braces in an * ordinary paragraph, then try to see whether we're an [-option]. * Do this, if we're an opening bracket, by first seeing if we have a * matching end via hasmatch(). * If we're an ending bracket, see if we have a stack already. */ static int dosynopsisop(const char *buf, int *last, size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { if ('\n' != *last) putchar('\n'); puts(".Oo"); (*opstack)++; } else if ('[' == buf[*start]) return(0); if (']' == buf[*start] && *opstack > 0) { if ('\n' != *last) putchar('\n'); puts(".Oc"); (*opstack)--; } else if (']' == buf[*start]) return(0); (*start)++; *last = '\n'; while (' ' == buf[*start]) (*start)++; return(1); } /* * Format multiple "Nm" manpage names in the NAME section. */ static void donamenm(struct state *st, const char *buf, size_t *start, size_t end) { size_t word; while (*start < end && ' ' == buf[*start]) (*start)++; if (end == *start) { puts(".Nm unknown"); return; } while (*start < end) { fputs(".Nm ", stdout); for (word = *start; word < end; word++) if (',' == buf[word]) break; formatcodeln(st, buf, start, word, 1); if (*start == end) { putchar('\n'); continue; } assert(',' == buf[*start]); puts(" ,"); (*start)++; while (*start < end && ' ' == buf[*start]) (*start)++; } } /* * Ordinary paragraph. * Well, this is really the hardest--POD seems to assume that, for * example, a leading space implies a newline, and so on. * Lots of other snakes in the grass: escaping a newline followed by a * period (accidental mdoc(7) control), double-newlines after macro * passages, etc. */ static void ordinary(struct state *st, const char *buf, size_t start, size_t end) { size_t i, j, opstack; int seq; if ( ! st->parsing || st->paused) return; /* * Special-case: the NAME section. * If we find a "-" when searching from the end, assume that * we're in "name - description" format. * To wit, print out a "Nm" and "Nd" in that format. */ if (SECT_NAME == st->sect) { for (i = end - 2; i > start; i--) if ('-' == buf[i] && ' ' == buf[i + 1]) break; if ('-' == buf[i]) { j = i; /* Roll over multiple "-". */ for ( ; i > start; i--) if ('-' != buf[i]) break; donamenm(st, buf, &start, i + 1); start = j + 1; while (start < end && ' ' == buf[start]) start++; fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); putchar('\n'); return; } } if ( ! st->haspar) puts(".Pp"); st->haspar = 0; last = '\n'; opstack = 0; for (seq = 0; start < end; seq++) { /* * Loop til we get either to a newline or escape. * Escape initial control characters. */ while (start < end) { if (start < end - 1 && '<' == buf[start + 1]) break; else if ('\n' == buf[start]) break; else if ('\n' == last && '.' == buf[start]) printf("\\&"); else if ('\n' == last && '\'' == buf[start]) printf("\\&"); /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and * closing an optional context. */ if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && dosynopsisop(buf, &last, &start, end, &opstack)) continue; putchar(last = buf[start++]); if ('\\' == last) putchar('e'); } if (start < end - 1 && '<' == buf[start + 1]) { /* * We've encountered a format code. * This is going to trigger a macro no matter * what, so print a newline now. * Then print the (possibly nested) macros and * following that, a newline. * Consume all whitespace so we don't * accidentally start an implicit literal line. * If the macro ends with a flush comma or * period, let mdoc(7) handle it for us. */ if (formatcode(st, buf, &start, end, 0, 0, seq)) { if ((start == end - 1 || (start < end - 1 && (' ' == buf[start + 1] || '\n' == buf[start + 1]))) && ('.' == buf[start] || ',' == buf[start])) { putchar(' '); putchar(buf[start++]); } putchar(last = '\n'); while (start < end && ' ' == buf[start]) start++; } } else if (start < end && '\n' == buf[start]) { /* * Print the newline only if we haven't already * printed a newline. */ if (last != '\n') putchar(last = buf[start]); if (++start >= end) continue; /* * If we have whitespace next, eat it to prevent * mdoc(7) from thinking that it's meant for * verbatim text. * It is--but if we start with that, we can't * have a macro subsequent it, which may be * possible if we have an escape next. */ if (' ' == buf[start] || '\t' == buf[start]) { puts(".br"); last = '\n'; } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; } } if (last != '\n') putchar('\n'); } /* * There are three kinds of paragraphs: verbatim (starts with whitespace * of some sort), ordinary (starts without "=" marker), or a command * (default: starts with "="). */ static void dopar(struct state *st, const char *buf, size_t start, size_t end) { if (end == start) return; if (' ' == buf[start] || '\t' == buf[start]) verbatim(st, buf, start, end); else if ('=' != buf[start]) ordinary(st, buf, start, end); else command(st, buf, start, end); } /* * Loop around paragraphs within a document, processing each one in the * POD way. */ static void dofile(const struct args *args, const char *fname, const struct tm *tm, const char *buf, size_t sz) { size_t sup, end, i, cur = 0; struct state st; const char *section, *date; char datebuf[64]; char *title, *cp; if (0 == sz) return; /* Title is last path component of the filename. */ if (NULL != args->title) title = strdup(args->title); else if (NULL != (cp = strrchr(fname, '/'))) title = strdup(cp + 1); else title = strdup(fname); if (NULL == title) { perror(NULL); exit(EXIT_FAILURE); } /* Section is 1 unless suffix is "pm". */ if (NULL == (section = args->section)) { section = "1"; if (NULL != (cp = strrchr(title, '.'))) { *cp++ = '\0'; if (0 == strcmp(cp, "pm")) section = PERL_SECTION; } } /* Date. Or the given "tm" if not supplied. */ if (NULL == (date = args->date)) { strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); date = datebuf; } for (cp = title; '\0' != *cp; cp++) *cp = toupper((int)*cp); /* The usual mdoc(7) preamble. */ printf(".Dd %s\n", date); printf(".Dt %s %s\n", title, section); puts(".Os"); free(title); memset(&st, 0, sizeof(struct state)); assert(sz > 0); /* Main loop over file contents. */ while (cur < sz) { /* Read until next paragraph. */ for (i = cur + 1; i < sz; i++) if ('\n' == buf[i] && '\n' == buf[i - 1]) { /* Consume blank paragraphs. */ while (i + 1 < sz && '\n' == buf[i + 1]) i++; break; } /* Adjust end marker for EOF. */ end = i < sz ? i - 1 : ('\n' == buf[sz - 1] ? sz - 1 : sz); sup = i < sz ? end + 2 : sz; /* Process paragraph and adjust start. */ dopar(&st, buf, cur, end); cur = sup; } } /* * Read a single file fully into memory. * If the file is "-", do it from stdin. * If successfully read, send the input buffer to dofile() for further * processing. */ static int readfile(const struct args *args, const char *fname) { int fd; char *buf; size_t bufsz, cur; ssize_t ssz; struct tm *tm; time_t ttm; struct stat st; assert(NULL != fname); fd = 0 != strcmp("-", fname) ? open(fname, O_RDONLY, 0) : STDIN_FILENO; if (-1 == fd) { perror(fname); return(0); } if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) { ttm = time(NULL); tm = localtime(&ttm); } else tm = localtime(&st.st_mtime); /* * Arbitrarily-sized initial buffer. * Should be big enough for most files... */ cur = 0; bufsz = 1 << 14; if (NULL == (buf = malloc(bufsz))) { perror(NULL); exit(EXIT_FAILURE); } while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) { /* Double buffer size on fill. */ if ((size_t)ssz == bufsz - cur) { bufsz *= 2; if (NULL == (buf = realloc(buf, bufsz))) { perror(NULL); exit(EXIT_FAILURE); } } cur += (size_t)ssz; } if (ssz < 0) { perror(fname); free(buf); return(0); } dofile(args, STDIN_FILENO == fd ? "STDIN" : fname, tm, buf, cur); free(buf); if (STDIN_FILENO != fd) close(fd); return(1); } int main(int argc, char *argv[]) { const char *fname, *name; struct args args; int c; name = strrchr(argv[0], '/'); if (name == NULL) name = argv[0]; else ++name; memset(&args, 0, sizeof(struct args)); fname = "-"; /* Accept no arguments for now. */ while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv"))) switch (c) { case ('h'): /* FALLTHROUGH */ case ('l'): /* FALLTHROUGH */ case ('c'): /* FALLTHROUGH */ case ('o'): /* FALLTHROUGH */ case ('q'): /* FALLTHROUGH */ case ('r'): /* FALLTHROUGH */ case ('u'): /* FALLTHROUGH */ case ('v'): /* Ignore these. */ break; case ('d'): args.date = optarg; break; case ('n'): args.title = optarg; break; case ('s'): args.section = optarg; break; default: goto usage; } argc -= optind; argv += optind; /* Accept only a single input file. */ if (argc > 2) return(EXIT_FAILURE); else if (1 == argc) fname = *argv; return(readfile(&args, fname) ? EXIT_SUCCESS : EXIT_FAILURE); usage: fprintf(stderr, "usage: %s [-d date] " "[-n title] [-s section]\n", name); return(EXIT_FAILURE); }