=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -p -r1.3 -r1.4 --- pod2mdoc/pod2mdoc.c 2014/03/20 15:18:56 1.3 +++ pod2mdoc/pod2mdoc.c 2014/03/20 15:29:57 1.4 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.3 2014/03/20 15:18:56 schwarze Exp $ */ +/* $Id: pod2mdoc.c,v 1.4 2014/03/20 15:29:57 schwarze Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -32,12 +32,22 @@ struct args { const char *section; /* override "Dt" section */ }; +enum list { + LIST_BULLET = 0, + LIST_ENUM, + LIST_TAG, + LIST__MAX +}; + struct state { int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ int haspar; /* in paragraph: do we need Pp? */ int isname; /* are we the NAME section? */ const char *fname; /* file being parsed */ +#define LIST_STACKSZ 128 + enum list lstack[LIST_STACKSZ]; /* open lists */ + size_t lpos; /* where in list stack */ }; enum fmt { @@ -264,23 +274,26 @@ formatcode(const char *buf, size_t *start, continue; } - /* - * Make sure that any macro-like words (or - * really any word starting with a capital - * letter) is assumed to be a macro that must be - * escaped. - * XXX: should this be isalpha()? - */ - if ((' ' == last || '\n' == last) && - isupper(buf[*start])) - printf("\\&"); + /* + * Make sure that any macro-like words (or + * really any word starting with a capital + * letter) is assumed to be a macro that must be + * escaped. + * This matches "Xx " and "XxEOLN". + */ + if ((' ' == last || '\n' == last) && + end - *start > 1 && + isupper((int)buf[*start]) && + islower((int)buf[*start + 1]) && + (end - *start == 2 || + ' ' == buf[*start + 2])) + printf("\\&"); - last = buf[*start]; - if ('\n' == last) + /* Suppress newline. */ + if ('\n' == (last = buf[(*start)++])) last = ' '; - putchar(last); - (*start)++; + putchar(last); } if ( ! nomacro && FMT_CODE == fmt) @@ -322,19 +335,57 @@ formatcodeln(const char *buf, size_t *start, size_t en { int last; - last = '\n'; + last = ' '; while (*start < end) { if (*start + 1 < end && '<' == buf[*start + 1]) { formatcode(buf, start, end, 1, last, nomacro); continue; } + /* + * Since we're already on a macro line, we want to make + * sure that we don't inadvertently invoke a macro. + * We need to do this carefully because section names + * are used in troff and we don't want to escape + * something that needn't be escaped. + */ + if (' ' == last && end - *start > 1 && + isupper((int)buf[*start]) && + islower((int)buf[*start + 1]) && + (end - *start == 2 || + ' ' == buf[*start + 2])) + printf("\\&"); + if ('\n' != buf[*start]) putchar(last = buf[*start]); + else + putchar(last = ' '); (*start)++; } } /* + * Guess at what kind of list we are. + * These are taken straight from the POD manual. + * I don't know what people do in real life. + */ +static enum list +listguess(const char *buf, size_t start, size_t end) +{ + size_t len = end - start; + + assert(end >= start); + + if (len == 1 && '*' == buf[start]) + return(LIST_BULLET); + if (len == 2 && '1' == buf[start] && '.' == buf[start + 1]) + return(LIST_ENUM); + else if (len == 1 && '1' == buf[start]) + return(LIST_ENUM); + else + return(LIST_TAG); +} + +/* * A command paragraph, as noted in the perlpod manual, just indicates * that we should do something, optionally with some text to print as * well. @@ -411,21 +462,70 @@ command(struct state *st, const char *buf, size_t star st->haspar = 1; break; case (CMD_OVER): - /* - * TODO: we should be doing this after we process the - * first =item to see whether we'll do an -enum, - * -bullet, or something else. + /* + * If we have an existing list that hasn't had an =item + * yet, then make sure that we open it now. + * We use the default list type, but that can't be + * helped (we haven't seen any items yet). */ - puts(".Bl -tag -width Ds"); + if (st->lpos > 0) + if (LIST__MAX == st->lstack[st->lpos - 1]) { + st->lstack[st->lpos - 1] = LIST_TAG; + puts(".Bl -tag -width Ds"); + } + st->lpos++; + assert(st->lpos < LIST_STACKSZ); + st->lstack[st->lpos - 1] = LIST__MAX; break; case (CMD_ITEM): - printf(".It "); - formatcodeln(buf, &start, end, 0); - putchar('\n'); + assert(st->lpos > 0); + /* + * If we're the first =item, guess at what our content + * will be: "*" is a bullet list, "1." is a numbered + * list, and everything is tagged. + */ + if (LIST__MAX == st->lstack[st->lpos - 1]) { + st->lstack[st->lpos - 1] = + listguess(buf, start, end); + switch (st->lstack[st->lpos - 1]) { + case (LIST_BULLET): + puts(".Bl -bullet"); + break; + case (LIST_ENUM): + puts(".Bl -enum"); + break; + default: + puts(".Bl -tag -width Ds"); + break; + } + } + switch (st->lstack[st->lpos - 1]) { + case (LIST_TAG): + printf(".It "); + formatcodeln(buf, &start, end, 0); + putchar('\n'); + break; + case (LIST_ENUM): + /* FALLTHROUGH */ + case (LIST_BULLET): + /* + * Abandon the remainder of the paragraph + * because we're going to be a bulletted or + * numbered list. + */ + puts(".It"); + break; + default: + abort(); + } st->haspar = 1; break; case (CMD_BACK): - puts(".El"); + /* Make sure we don't back over the stack. */ + if (st->lpos > 0) { + st->lpos--; + puts(".El"); + } break; case (CMD_BEGIN): /* @@ -501,6 +601,7 @@ ordinary(struct state *st, const char *buf, size_t sta for ( ; i > start; i--) if ('-' != buf[i]) break; + /* FIXME: escape macro-like words etc. */ printf(".Nm %.*s\n", (int)((i + 1) - start), &buf[start]); printf(".Nd %.*s\n",