=================================================================== RCS file: /cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.12 retrieving revision 1.15 diff -u -p -r1.12 -r1.15 --- pod2mdoc/pod2mdoc.c 2014/04/01 11:58:32 1.12 +++ pod2mdoc/pod2mdoc.c 2014/04/02 14:50:09 1.15 @@ -1,4 +1,4 @@ -/* $Id: pod2mdoc.c,v 1.12 2014/04/01 11:58:32 kristaps Exp $ */ +/* $Id: pod2mdoc.c,v 1.15 2014/04/02 14:50:09 kristaps Exp $ */ /* * Copyright (c) 2014 Kristaps Dzonsons * @@ -240,6 +240,72 @@ trylink(const char *buf, size_t *start, size_t end, si } /* + * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section, + * then it's likely that we're a flag. + * Our flag might be followed by an argument, so make sure that we're + * accounting for that, too. + * If we don't have a flag at all, however, then assume we're an "Ar". + */ +static void +dosynopsisfl(const char *buf, size_t *start, size_t end) +{ + size_t i; +again: + assert(*start + 1 < end); + assert('-' == buf[*start]); + + if ( ! isalnum((int)buf[*start + 1]) && + '?' != buf[*start + 1] && + '-' != buf[*start + 1]) { + (*start)--; + fputs("Ar ", stdout); + return; + } + + (*start)++; + for (i = *start; i < end; i++) + if (isalnum((int)buf[i])) + continue; + else if ('?' == buf[i]) + continue; + else if ('-' == buf[i]) + continue; + else if ('_' == buf[i]) + continue; + else + break; + + assert(i < end); + + if ( ! (' ' == buf[i] || '>' == buf[i])) { + printf("Ar "); + return; + } + + printf("Fl "); + if (end - *start > 1 && + isupper((int)buf[*start]) && + islower((int)buf[*start + 1]) && + (end - *start == 2 || + ' ' == buf[*start + 2])) + printf("\\&"); + printf("%.*s ", (int)(i - *start), &buf[*start]); + *start = i; + + if (' ' == buf[i]) { + while (i < end && ' ' == buf[i]) + i++; + assert(i < end); + if ('-' == buf[i]) { + *start = i; + goto again; + } + printf("Ar "); + *start = i; + } +} + +/* * We're at the character in front of a format code, which is structured * like X<...> and can contain nested format codes. * This consumes the whole format code, and any nested format codes, til @@ -248,13 +314,16 @@ trylink(const char *buf, size_t *start, size_t end, si * been printed to the current line. * If "nomacro", then we don't print any macros, just contained data * (e.g., following "Sh" or "Nm"). + * "pos" is only significant in SYNOPSIS, and should be 0 when invoked + * as the first format code on a line (for decoration as an "Nm"), + * non-zero otherwise. * Return whether we've printed a macro or not--in other words, whether * this should trigger a subsequent newline (this should be ignored when * reentrant). */ static int -formatcode(struct state *st, const char *buf, - size_t *start, size_t end, int reentrant, int nomacro) +formatcode(struct state *st, const char *buf, size_t *start, + size_t end, int reentrant, int nomacro, int pos) { enum fmt fmt; size_t i, j, dsz; @@ -360,27 +429,14 @@ formatcode(struct state *st, const char *buf, printf("Em "); break; case (FMT_BOLD): - /* - * Doclifting: if we're a bold "-xx" and we're - * in the SYNOPSIS section, then it's likely - * that we're a flag. - * Be really strict: only do this when the dash - * is followed by alnums til the end marker, - * which mustn't be a custom. - */ - if (SECT_SYNOPSIS == st->sect && - end - *start > 1 && - '-' == buf[*start] && - (isalnum((int)buf[*start + 1]) || - '?' == buf[*start + 1])) { - for (i = *start + 1; i < end; i++) - if ( ! isalnum((int)buf[i])) - break; - if (i < end && '>' == buf[i]) { - (*start)++; - printf("Fl "); - break; - } + if (SECT_SYNOPSIS == st->sect) { + if (1 == dsz && '-' == buf[*start]) + dosynopsisfl(buf, start, end); + else if (0 == pos) + printf("Nm "); + else + printf("Ar "); + break; } printf("Sy "); break; @@ -430,7 +486,7 @@ formatcode(struct state *st, const char *buf, } } if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(st, buf, start, end, 1, nomacro); + formatcode(st, buf, start, end, 1, nomacro, 1); continue; } @@ -492,7 +548,7 @@ formatcodeln(struct state *st, const char *buf, last = ' '; while (*start < end) { if (*start + 1 < end && '<' == buf[*start + 1]) { - formatcode(st, buf, start, end, 1, nomacro); + formatcode(st, buf, start, end, 1, nomacro, 1); continue; } /* @@ -755,6 +811,61 @@ verbatim(struct state *st, const char *buf, size_t sta } /* + * See dosynopsisop(). + */ +static int +hasmatch(const char *buf, size_t start, size_t end) +{ + size_t stack; + + for (stack = 0; start < end; start++) + if (buf[start] == '[') + stack++; + else if (buf[start] == ']' && 0 == stack) + return(1); + else if (buf[start] == ']') + stack--; + return(0); +} + +/* + * If we're in the SYNOPSIS section and we've encounter braces in an + * ordinary paragraph, then try to see whether we're an [-option]. + * Do this, if we're an opening bracket, by first seeing if we have a + * matching end via hasmatch(). + * If we're an ending bracket, see if we have a stack already. + */ +static int +dosynopsisop(const char *buf, int *last, + size_t *start, size_t end, size_t *opstack) +{ + + assert('[' == buf[*start] || ']' == buf[*start]); + + if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { + if ('\n' != *last) + putchar('\n'); + puts(".Oo"); + (*opstack)++; + } else if ('[' == buf[*start]) + return(0); + + if (']' == buf[*start] && *opstack > 0) { + if ('\n' != *last) + putchar('\n'); + puts(".Oc"); + (*opstack)--; + } else if (']' == buf[*start]) + return(0); + + (*start)++; + *last = '\n'; + while (' ' == buf[*start]) + (*start)++; + return(1); +} + +/* * Ordinary paragraph. * Well, this is really the hardest--POD seems to assume that, for * example, a leading space implies a newline, and so on. @@ -765,7 +876,8 @@ verbatim(struct state *st, const char *buf, size_t sta static void ordinary(struct state *st, const char *buf, size_t start, size_t end) { - size_t i, j; + size_t i, j, opstack; + int seq; if ( ! st->parsing || st->paused) return; @@ -777,8 +889,8 @@ ordinary(struct state *st, const char *buf, size_t sta * To wit, print out a "Nm" and "Nd" in that format. */ if (SECT_NAME == st->sect) { - for (i = end - 1; i > start; i--) - if ('-' == buf[i]) + for (i = end - 2; i > start; i--) + if ('-' == buf[i] && ' ' == buf[i + 1]) break; if ('-' == buf[i]) { j = i; @@ -786,11 +898,11 @@ ordinary(struct state *st, const char *buf, size_t sta for ( ; i > start; i--) if ('-' != buf[i]) break; - printf(".Nm "); + fputs(".Nm ", stdout); formatcodeln(st, buf, &start, i + 1, 1); putchar('\n'); start = j + 1; - printf(".Nd "); + fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); putchar('\n'); return; @@ -802,8 +914,9 @@ ordinary(struct state *st, const char *buf, size_t sta st->haspar = 0; last = '\n'; + opstack = 0; - while (start < end) { + for (seq = 0; start < end; seq++) { /* * Loop til we get either to a newline or escape. * Escape initial control characters. @@ -817,26 +930,17 @@ ordinary(struct state *st, const char *buf, size_t sta printf("\\&"); else if ('\n' == last && '\'' == buf[start]) printf("\\&"); -#if notyet /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and * closing an optional context. */ - if (SECT_SYNOPSIS == st->sect) { - if ('[' == buf[start] || - ']' == buf[start]) { - if (last != '\n') - putchar('\n'); - if ('[' == buf[start]) - printf(".Oo\n"); - else - printf(".Oc\n"); - start++; - continue; - } - } -#endif + if (SECT_SYNOPSIS == st->sect && + ('[' == buf[start] || + ']' == buf[start]) && + dosynopsisop(buf, &last, + &start, end, &opstack)) + continue; putchar(last = buf[start++]); if ('\\' == last) putchar('e'); @@ -852,7 +956,7 @@ ordinary(struct state *st, const char *buf, size_t sta * Consume all whitespace so we don't * accidentally start an implicit literal line. */ - if (formatcode(st, buf, &start, end, 0, 0)) { + if (formatcode(st, buf, &start, end, 0, 0, seq)) { putchar(last = '\n'); while (start < end && ' ' == buf[start]) start++;