Annotation of pod2mdoc/pod2mdoc.c, Revision 1.19
1.19 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.18 2014/04/02 22:36:56 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
1.19 ! kristaps 30: * In what section can we find Perl module manuals?
! 31: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
! 32: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 33: */
34: #define PERL_SECTION "3p"
35:
1.1 schwarze 36: struct args {
37: const char *title; /* override "Dt" title */
38: const char *date; /* override "Dd" date */
39: const char *section; /* override "Dt" section */
40: };
41:
1.4 schwarze 42: enum list {
43: LIST_BULLET = 0,
44: LIST_ENUM,
45: LIST_TAG,
46: LIST__MAX
47: };
48:
1.11 kristaps 49: enum sect {
50: SECT_NONE = 0,
51: SECT_NAME, /* NAME section */
52: SECT_SYNOPSIS, /* SYNOPSIS section */
53: };
54:
1.1 schwarze 55: struct state {
56: int parsing; /* after =cut of before command */
57: int paused; /* in =begin and before =end */
58: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 59: enum sect sect; /* which section are we in? */
1.1 schwarze 60: const char *fname; /* file being parsed */
1.4 schwarze 61: #define LIST_STACKSZ 128
62: enum list lstack[LIST_STACKSZ]; /* open lists */
63: size_t lpos; /* where in list stack */
1.1 schwarze 64: };
65:
66: enum fmt {
67: FMT_ITALIC,
68: FMT_BOLD,
69: FMT_CODE,
70: FMT_LINK,
71: FMT_ESCAPE,
72: FMT_FILE,
73: FMT_NBSP,
74: FMT_INDEX,
75: FMT_NULL,
76: FMT__MAX
77: };
78:
79: enum cmd {
80: CMD_POD = 0,
81: CMD_HEAD1,
82: CMD_HEAD2,
83: CMD_HEAD3,
84: CMD_HEAD4,
85: CMD_OVER,
86: CMD_ITEM,
87: CMD_BACK,
88: CMD_BEGIN,
89: CMD_END,
90: CMD_FOR,
91: CMD_ENCODING,
92: CMD_CUT,
93: CMD__MAX
94: };
95:
96: static const char *const cmds[CMD__MAX] = {
97: "pod", /* CMD_POD */
98: "head1", /* CMD_HEAD1 */
99: "head2", /* CMD_HEAD2 */
100: "head3", /* CMD_HEAD3 */
101: "head4", /* CMD_HEAD4 */
102: "over", /* CMD_OVER */
103: "item", /* CMD_ITEM */
104: "back", /* CMD_BACK */
105: "begin", /* CMD_BEGIN */
106: "end", /* CMD_END */
107: "for", /* CMD_FOR */
108: "encoding", /* CMD_ENCODING */
109: "cut" /* CMD_CUT */
110: };
111:
112: static const char fmts[FMT__MAX] = {
113: 'I', /* FMT_ITALIC */
114: 'B', /* FMT_BOLD */
115: 'C', /* FMT_CODE */
116: 'L', /* FMT_LINK */
117: 'E', /* FMT_ESCAPE */
118: 'F', /* FMT_FILE */
119: 'S', /* FMT_NBSP */
120: 'X', /* FMT_INDEX */
121: 'Z' /* FMT_NULL */
122: };
123:
1.6 kristaps 124: static int last;
125:
1.1 schwarze 126: /*
127: * Given buf[*start] is at the start of an escape name, read til the end
128: * of the escape ('>') then try to do something with it.
129: * Sets start to be one after the '>'.
130: */
131: static void
132: formatescape(const char *buf, size_t *start, size_t end)
133: {
134: char esc[16]; /* no more needed */
135: size_t i, max;
136:
137: max = sizeof(esc) - 1;
138: i = 0;
139: /* Read til our buffer is full. */
140: while (*start < end && '>' != buf[*start] && i < max)
141: esc[i++] = buf[(*start)++];
142: esc[i] = '\0';
143:
144: if (i == max) {
145: /* Too long... skip til we end. */
146: while (*start < end && '>' != buf[*start])
147: (*start)++;
148: return;
149: } else if (*start >= end)
150: return;
151:
152: assert('>' == buf[*start]);
153: (*start)++;
154:
155: /*
156: * TODO: right now, we only recognise the named escapes.
157: * Just let the rest of them go.
158: */
1.6 kristaps 159: if (0 == strcmp(esc, "lt"))
1.1 schwarze 160: printf("\\(la");
161: else if (0 == strcmp(esc, "gt"))
162: printf("\\(ra");
163: else if (0 == strcmp(esc, "vb"))
164: printf("\\(ba");
165: else if (0 == strcmp(esc, "sol"))
166: printf("\\(sl");
1.6 kristaps 167: else
168: return;
169:
170: last = 'a';
1.1 schwarze 171: }
172:
173: /*
1.9 kristaps 174: * Run some heuristics to intuit a link format.
1.19 ! kristaps 175: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 176: * that the caller can safely just continue processing.
1.19 ! kristaps 177: * If this is just an empty tag, I'll return 0.
1.9 kristaps 178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
1.19 ! kristaps 182: size_t linkstart, realend, linkend, i, j, textsz;
1.18 kristaps 183: const char *text;
1.9 kristaps 184:
185: /*
186: * Scan to the start of the terminus.
187: * This function is more or less replicated in the formatcode()
188: * for null or index formatting codes.
189: */
1.19 ! kristaps 190: for (linkstart = realend = *start; realend < end; realend++) {
! 191: if ('>' != buf[realend])
1.9 kristaps 192: continue;
193: else if (dsz == 1)
194: break;
1.19 ! kristaps 195: assert(realend > 0);
! 196: if (' ' != buf[realend - 1])
1.9 kristaps 197: continue;
1.19 ! kristaps 198: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 199: if ('>' != buf[i++])
200: break;
201: if (dsz == j)
202: break;
203: }
1.19 ! kristaps 204:
! 205: /* Ignore stubs. */
! 206: if (realend == end || realend == *start)
1.9 kristaps 207: return(0);
208:
1.19 ! kristaps 209: /* Set linkend to the end of content. */
! 210: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 211:
1.19 ! kristaps 212: /* Re-scan to see if we have a title or section. */
1.18 kristaps 213: text = &buf[*start];
1.19 ! kristaps 214: for (textsz = *start; textsz < linkend; textsz++)
! 215: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 216: break;
217:
1.19 ! kristaps 218: /* If we have a title, find the section. */
! 219: if (textsz < linkend && '|' == buf[textsz]) {
! 220: linkstart = textsz + 1;
1.18 kristaps 221: textsz = textsz - *start;
1.19 ! kristaps 222: for (i = linkstart; i < linkend; i++)
! 223: if ('/' == buf[i])
! 224: break;
! 225: if (i < linkend)
! 226: linkend = i;
! 227: } else {
1.18 kristaps 228: textsz = 0;
1.19 ! kristaps 229: if (textsz < linkend && '/' == buf[textsz])
! 230: linkend = textsz;
! 231: }
! 232:
! 233: *start = realend;
1.18 kristaps 234:
1.19 ! kristaps 235: j = linkend - linkstart;
! 236:
! 237: if (0 == j)
! 238: return(0);
! 239:
! 240: /* See if we qualify as being a link or not. */
! 241: if ((j > 5 && 0 == memcmp("http:", &buf[linkstart], j)) ||
! 242: (j > 6 && 0 == memcmp("https:", &buf[linkstart], j)) ||
! 243: (j > 4 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
! 244: (j > 5 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
! 245: (j > 4 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
! 246: (j > 4 && 0 == memcmp("afs:", &buf[linkstart], j))) {
! 247: printf("Lk %.*s", (int)j, &buf[linkstart]);
! 248: return(1);
! 249: }
! 250:
! 251: /* See if we qualify as a mailto. */
! 252: if (j > 7 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
! 253: printf("Mt %.*s", (int)j, &buf[linkstart]);
! 254: return(1);
! 255: }
! 256:
! 257: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
! 258: if ((j > 3 && ')' == buf[linkend - 1]) &&
! 259: ('(' == buf[linkend - 3])) {
! 260: printf("Xr %.*s %c", (int)(j - 3),
! 261: &buf[linkstart], buf[linkend - 2]);
! 262: return(1);
! 263: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
! 264: ('(' == buf[linkend - 4])) {
! 265: printf("Xr %.*s %.*s", (int)(j - 4),
! 266: &buf[linkstart], 2, &buf[linkend - 3]);
! 267: return(1);
! 268: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
! 269: ('(' == buf[linkend - 5])) {
! 270: printf("Xr %.*s %.*s", (int)(j - 5),
! 271: &buf[linkstart], 3, &buf[linkend - 4]);
! 272: return(1);
! 273: }
! 274:
! 275: /* Last try: do we have a double-colon? */
! 276: for (i = linkstart + 1; i < linkend; i++)
! 277: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 278: break;
1.9 kristaps 279:
1.19 ! kristaps 280: if (i < linkend)
1.10 kristaps 281: printf("Xr %.*s " PERL_SECTION,
1.19 ! kristaps 282: (int)j, &buf[linkstart]);
1.9 kristaps 283: else
1.19 ! kristaps 284: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 285:
286: return(1);
287: }
288:
1.13 kristaps 289: /*
290: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
291: * then it's likely that we're a flag.
292: * Our flag might be followed by an argument, so make sure that we're
293: * accounting for that, too.
294: * If we don't have a flag at all, however, then assume we're an "Ar".
295: */
296: static void
297: dosynopsisfl(const char *buf, size_t *start, size_t end)
298: {
299: size_t i;
300: again:
1.14 kristaps 301: assert(*start + 1 < end);
302: assert('-' == buf[*start]);
303:
304: if ( ! isalnum((int)buf[*start + 1]) &&
305: '?' != buf[*start + 1] &&
306: '-' != buf[*start + 1]) {
307: (*start)--;
308: fputs("Ar ", stdout);
309: return;
310: }
311:
1.13 kristaps 312: (*start)++;
313: for (i = *start; i < end; i++)
314: if (isalnum((int)buf[i]))
315: continue;
1.14 kristaps 316: else if ('?' == buf[i])
317: continue;
1.13 kristaps 318: else if ('-' == buf[i])
319: continue;
320: else if ('_' == buf[i])
321: continue;
322: else
323: break;
324:
325: assert(i < end);
326:
327: if ( ! (' ' == buf[i] || '>' == buf[i])) {
328: printf("Ar ");
329: return;
330: }
331:
332: printf("Fl ");
333: if (end - *start > 1 &&
334: isupper((int)buf[*start]) &&
335: islower((int)buf[*start + 1]) &&
336: (end - *start == 2 ||
337: ' ' == buf[*start + 2]))
338: printf("\\&");
339: printf("%.*s ", (int)(i - *start), &buf[*start]);
340: *start = i;
341:
342: if (' ' == buf[i]) {
343: while (i < end && ' ' == buf[i])
344: i++;
345: assert(i < end);
346: if ('-' == buf[i]) {
347: *start = i;
348: goto again;
349: }
350: printf("Ar ");
351: *start = i;
352: }
353: }
354:
1.9 kristaps 355: /*
1.1 schwarze 356: * We're at the character in front of a format code, which is structured
357: * like X<...> and can contain nested format codes.
358: * This consumes the whole format code, and any nested format codes, til
359: * the end of matched production.
360: * If "reentrant", then we're being called after a macro has already
361: * been printed to the current line.
1.6 kristaps 362: * If "nomacro", then we don't print any macros, just contained data
363: * (e.g., following "Sh" or "Nm").
1.15 kristaps 364: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
365: * as the first format code on a line (for decoration as an "Nm"),
366: * non-zero otherwise.
1.6 kristaps 367: * Return whether we've printed a macro or not--in other words, whether
368: * this should trigger a subsequent newline (this should be ignored when
369: * reentrant).
1.1 schwarze 370: */
371: static int
1.15 kristaps 372: formatcode(struct state *st, const char *buf, size_t *start,
373: size_t end, int reentrant, int nomacro, int pos)
1.1 schwarze 374: {
375: enum fmt fmt;
1.5 kristaps 376: size_t i, j, dsz;
1.1 schwarze 377:
378: assert(*start + 1 < end);
379: assert('<' == buf[*start + 1]);
380:
1.6 kristaps 381: /*
382: * First, look up the format code.
383: * If it's not valid, then exit immediately.
384: */
385: for (fmt = 0; fmt < FMT__MAX; fmt++)
386: if (buf[*start] == fmts[fmt])
387: break;
388:
389: if (FMT__MAX == fmt) {
390: putchar(last = buf[(*start)++]);
1.8 kristaps 391: if ('\\' == last)
392: putchar('e');
1.6 kristaps 393: return(0);
394: }
395:
1.5 kristaps 396: /*
397: * Determine whether we're overriding our delimiter.
398: * According to POD, if we have more than one '<' followed by a
399: * space, then we need a space followed by matching '>' to close
400: * the expression.
401: * Otherwise we use the usual '<' and '>' matched pair.
402: */
403: i = *start + 1;
404: while (i < end && '<' == buf[i])
405: i++;
406: assert(i > *start + 1);
407: dsz = i - (*start + 1);
408: if (dsz > 1 && (i >= end || ' ' != buf[i]))
409: dsz = 1;
410:
411: /* Remember, if dsz>1, to jump the trailing space. */
412: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 413:
414: /*
1.6 kristaps 415: * Escapes and ignored codes (NULL and INDEX) don't print macro
416: * sequences, so just output them like normal text before
417: * processing for real macros.
1.1 schwarze 418: */
419: if (FMT_ESCAPE == fmt) {
420: formatescape(buf, start, end);
421: return(0);
422: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 423: /*
1.6 kristaps 424: * Just consume til the end delimiter, accounting for
425: * whether it's a custom one.
1.5 kristaps 426: */
427: for ( ; *start < end; (*start)++) {
428: if ('>' != buf[*start])
429: continue;
430: else if (dsz == 1)
431: break;
432: assert(*start > 0);
433: if (' ' != buf[*start - 1])
434: continue;
435: i = *start;
436: for (j = 0; i < end && j < dsz; j++)
437: if ('>' != buf[i++])
438: break;
439: if (dsz != j)
440: continue;
441: (*start) += dsz;
442: break;
443: }
1.1 schwarze 444: return(0);
445: }
446:
1.6 kristaps 447: /*
448: * Check whether we're supposed to print macro stuff (this is
449: * suppressed in, e.g., "Nm" and "Sh" macros).
450: */
1.1 schwarze 451: if ( ! nomacro) {
452: /*
453: * Print out the macro describing this format code.
454: * If we're not "reentrant" (not yet on a macro line)
455: * then print a newline, if necessary, and the macro
456: * indicator.
457: * Otherwise, offset us with a space.
458: */
1.6 kristaps 459: if ( ! reentrant) {
460: if (last != '\n')
461: putchar('\n');
1.1 schwarze 462: putchar('.');
1.6 kristaps 463: } else
1.1 schwarze 464: putchar(' ');
465:
466: /*
1.6 kristaps 467: * If we don't have whitespace before us (and none after
468: * the opening delimiter), then suppress macro
469: * whitespace with Pf.
1.1 schwarze 470: */
1.6 kristaps 471: if (' ' != last && '\n' != last && ' ' != buf[*start])
472: printf("Pf ");
473:
1.1 schwarze 474: switch (fmt) {
475: case (FMT_ITALIC):
476: printf("Em ");
477: break;
478: case (FMT_BOLD):
1.14 kristaps 479: if (SECT_SYNOPSIS == st->sect) {
480: if (1 == dsz && '-' == buf[*start])
481: dosynopsisfl(buf, start, end);
1.15 kristaps 482: else if (0 == pos)
483: printf("Nm ");
1.14 kristaps 484: else
485: printf("Ar ");
486: break;
487: }
488: printf("Sy ");
1.1 schwarze 489: break;
490: case (FMT_CODE):
1.2 schwarze 491: printf("Qo Li ");
1.1 schwarze 492: break;
493: case (FMT_LINK):
1.19 ! kristaps 494: /* Try to link; use "No" if it's empty. */
1.9 kristaps 495: if ( ! trylink(buf, start, end, dsz))
496: printf("No ");
1.1 schwarze 497: break;
498: case (FMT_FILE):
499: printf("Pa ");
500: break;
501: case (FMT_NBSP):
502: printf("No ");
503: break;
504: default:
505: abort();
506: }
507: }
508:
509: /*
1.6 kristaps 510: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 511: * find a nested format code.
1.1 schwarze 512: * Don't emit any newlines: since we're on a macro line, we
513: * don't want to break the line.
514: */
515: while (*start < end) {
1.5 kristaps 516: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 517: (*start)++;
518: break;
1.5 kristaps 519: } else if ('>' == buf[*start] &&
520: ' ' == buf[*start - 1]) {
521: /*
522: * Handle custom delimiters.
523: * These require a certain number of
524: * space-preceded carrots before we're really at
525: * the end.
526: */
527: i = *start;
528: for (j = 0; i < end && j < dsz; j++)
529: if ('>' != buf[i++])
530: break;
531: if (dsz == j) {
532: *start += dsz;
533: break;
534: }
1.1 schwarze 535: }
536: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 kristaps 537: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 538: continue;
539: }
1.3 schwarze 540:
1.4 schwarze 541: /*
542: * Make sure that any macro-like words (or
543: * really any word starting with a capital
544: * letter) is assumed to be a macro that must be
545: * escaped.
546: * This matches "Xx " and "XxEOLN".
547: */
548: if ((' ' == last || '\n' == last) &&
549: end - *start > 1 &&
550: isupper((int)buf[*start]) &&
551: islower((int)buf[*start + 1]) &&
552: (end - *start == 2 ||
553: ' ' == buf[*start + 2]))
554: printf("\\&");
1.3 schwarze 555:
1.4 schwarze 556: /* Suppress newline. */
1.6 kristaps 557: if ('\n' == buf[*start])
558: putchar(last = ' ');
559: else
560: putchar(last = buf[*start]);
1.4 schwarze 561:
1.8 kristaps 562: /* Protect against character escapes. */
563: if ('\\' == last)
564: putchar('e');
565:
1.6 kristaps 566: (*start)++;
567:
568: if (' ' == last)
569: while (*start < end && ' ' == buf[*start])
570: (*start)++;
1.1 schwarze 571: }
1.2 schwarze 572:
573: if ( ! nomacro && FMT_CODE == fmt)
574: printf(" Qc ");
1.1 schwarze 575:
576: /*
1.6 kristaps 577: * We're now subsequent the format code.
578: * If there isn't a space (or newline) here, and we haven't just
579: * printed a space, then suppress space.
1.1 schwarze 580: */
1.6 kristaps 581: if ( ! nomacro && ' ' != last)
582: if (' ' != buf[*start] && '\n' != buf[*start])
583: printf(" Ns ");
1.5 kristaps 584:
1.1 schwarze 585: return(1);
586: }
587:
588: /*
589: * Calls formatcode() til the end of a paragraph.
590: */
591: static void
1.11 kristaps 592: formatcodeln(struct state *st, const char *buf,
593: size_t *start, size_t end, int nomacro)
1.1 schwarze 594: {
595:
1.4 schwarze 596: last = ' ';
1.1 schwarze 597: while (*start < end) {
598: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 kristaps 599: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 600: continue;
601: }
1.4 schwarze 602: /*
603: * Since we're already on a macro line, we want to make
604: * sure that we don't inadvertently invoke a macro.
605: * We need to do this carefully because section names
606: * are used in troff and we don't want to escape
607: * something that needn't be escaped.
608: */
609: if (' ' == last && end - *start > 1 &&
610: isupper((int)buf[*start]) &&
611: islower((int)buf[*start + 1]) &&
612: (end - *start == 2 ||
613: ' ' == buf[*start + 2]))
614: printf("\\&");
615:
1.8 kristaps 616: if ('\n' == buf[*start])
617: putchar(last = ' ');
618: else
1.1 schwarze 619: putchar(last = buf[*start]);
1.8 kristaps 620:
621: /* Protect against character escapes. */
622: if ('\\' == last)
623: putchar('e');
624:
1.1 schwarze 625: (*start)++;
626: }
627: }
628:
629: /*
1.4 schwarze 630: * Guess at what kind of list we are.
631: * These are taken straight from the POD manual.
632: * I don't know what people do in real life.
633: */
634: static enum list
635: listguess(const char *buf, size_t start, size_t end)
636: {
637: size_t len = end - start;
638:
639: assert(end >= start);
640:
641: if (len == 1 && '*' == buf[start])
642: return(LIST_BULLET);
643: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
644: return(LIST_ENUM);
645: else if (len == 1 && '1' == buf[start])
646: return(LIST_ENUM);
647: else
648: return(LIST_TAG);
649: }
650:
651: /*
1.1 schwarze 652: * A command paragraph, as noted in the perlpod manual, just indicates
653: * that we should do something, optionally with some text to print as
654: * well.
655: */
656: static void
657: command(struct state *st, const char *buf, size_t start, size_t end)
658: {
659: size_t len, csz;
660: enum cmd cmd;
661:
662: assert('=' == buf[start]);
663: start++;
664: len = end - start;
665:
666: for (cmd = 0; cmd < CMD__MAX; cmd++) {
667: csz = strlen(cmds[cmd]);
668: if (len < csz)
669: continue;
670: if (0 == memcmp(&buf[start], cmd[cmds], csz))
671: break;
672: }
673:
674: /* Ignore bogus commands. */
675:
676: if (CMD__MAX == cmd)
677: return;
678:
679: start += csz;
1.8 kristaps 680: while (start < end && ' ' == buf[start])
681: start++;
682:
1.1 schwarze 683: len = end - start;
684:
685: if (st->paused) {
686: st->paused = CMD_END != cmd;
687: return;
688: }
689:
690: switch (cmd) {
691: case (CMD_POD):
692: break;
693: case (CMD_HEAD1):
694: /*
695: * The behaviour of head= follows from a quick glance at
696: * how pod2man handles it.
697: */
698: printf(".Sh ");
1.11 kristaps 699: st->sect = SECT_NONE;
700: if (end - start == 4) {
1.1 schwarze 701: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 702: st->sect = SECT_NAME;
703: } else if (end - start == 8) {
704: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
705: st->sect = SECT_SYNOPSIS;
706: }
707: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 708: putchar('\n');
709: st->haspar = 1;
710: break;
711: case (CMD_HEAD2):
712: printf(".Ss ");
1.11 kristaps 713: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 714: putchar('\n');
715: st->haspar = 1;
716: break;
717: case (CMD_HEAD3):
718: puts(".Pp");
719: printf(".Em ");
1.11 kristaps 720: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 721: putchar('\n');
722: puts(".Pp");
723: st->haspar = 1;
724: break;
725: case (CMD_HEAD4):
726: puts(".Pp");
727: printf(".No ");
1.11 kristaps 728: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 729: putchar('\n');
730: puts(".Pp");
731: st->haspar = 1;
732: break;
733: case (CMD_OVER):
1.4 schwarze 734: /*
735: * If we have an existing list that hasn't had an =item
736: * yet, then make sure that we open it now.
737: * We use the default list type, but that can't be
738: * helped (we haven't seen any items yet).
1.1 schwarze 739: */
1.4 schwarze 740: if (st->lpos > 0)
741: if (LIST__MAX == st->lstack[st->lpos - 1]) {
742: st->lstack[st->lpos - 1] = LIST_TAG;
743: puts(".Bl -tag -width Ds");
744: }
745: st->lpos++;
746: assert(st->lpos < LIST_STACKSZ);
747: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 748: break;
749: case (CMD_ITEM):
1.6 kristaps 750: if (0 == st->lpos) {
751: /*
752: * Bad markup.
753: * Try to compensate.
754: */
755: st->lstack[st->lpos] = LIST__MAX;
756: st->lpos++;
757: }
1.4 schwarze 758: assert(st->lpos > 0);
759: /*
760: * If we're the first =item, guess at what our content
761: * will be: "*" is a bullet list, "1." is a numbered
762: * list, and everything is tagged.
763: */
764: if (LIST__MAX == st->lstack[st->lpos - 1]) {
765: st->lstack[st->lpos - 1] =
766: listguess(buf, start, end);
767: switch (st->lstack[st->lpos - 1]) {
768: case (LIST_BULLET):
769: puts(".Bl -bullet");
770: break;
771: case (LIST_ENUM):
772: puts(".Bl -enum");
773: break;
774: default:
775: puts(".Bl -tag -width Ds");
776: break;
777: }
778: }
779: switch (st->lstack[st->lpos - 1]) {
780: case (LIST_TAG):
781: printf(".It ");
1.11 kristaps 782: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 783: putchar('\n');
784: break;
785: case (LIST_ENUM):
786: /* FALLTHROUGH */
787: case (LIST_BULLET):
788: /*
789: * Abandon the remainder of the paragraph
790: * because we're going to be a bulletted or
791: * numbered list.
792: */
793: puts(".It");
794: break;
795: default:
796: abort();
797: }
1.1 schwarze 798: st->haspar = 1;
799: break;
800: case (CMD_BACK):
1.4 schwarze 801: /* Make sure we don't back over the stack. */
802: if (st->lpos > 0) {
803: st->lpos--;
804: puts(".El");
805: }
1.1 schwarze 806: break;
807: case (CMD_BEGIN):
808: /*
809: * We disregard all types for now.
810: * TODO: process at least "text" in a -literal block.
811: */
812: st->paused = 1;
813: break;
814: case (CMD_FOR):
815: /*
816: * We ignore all types of encodings and formats
817: * unilaterally.
818: */
819: break;
820: case (CMD_ENCODING):
821: break;
822: case (CMD_CUT):
823: st->parsing = 0;
824: return;
825: default:
826: abort();
827: }
828:
829: /* Any command (but =cut) makes us start parsing. */
830: st->parsing = 1;
831: }
832:
833: /*
834: * Just pump out the line in a verbatim block.
835: */
836: static void
837: verbatim(struct state *st, const char *buf, size_t start, size_t end)
838: {
1.8 kristaps 839: int last;
1.1 schwarze 840:
841: if ( ! st->parsing || st->paused)
842: return;
843:
844: puts(".Bd -literal");
1.8 kristaps 845: for (last = ' '; start < end; start++) {
846: /*
847: * Handle accidental macros (newline starting with
848: * control character) and escapes.
849: */
850: if ('\n' == last)
1.7 kristaps 851: if ('.' == buf[start] || '\'' == buf[start])
852: printf("\\&");
1.8 kristaps 853: putchar(last = buf[start]);
854: if ('\\' == buf[start])
855: printf("e");
1.7 kristaps 856: }
857: putchar('\n');
1.1 schwarze 858: puts(".Ed");
859: }
860:
861: /*
1.13 kristaps 862: * See dosynopsisop().
863: */
864: static int
865: hasmatch(const char *buf, size_t start, size_t end)
866: {
867: size_t stack;
868:
869: for (stack = 0; start < end; start++)
870: if (buf[start] == '[')
871: stack++;
872: else if (buf[start] == ']' && 0 == stack)
873: return(1);
874: else if (buf[start] == ']')
875: stack--;
876: return(0);
877: }
878:
879: /*
880: * If we're in the SYNOPSIS section and we've encounter braces in an
881: * ordinary paragraph, then try to see whether we're an [-option].
882: * Do this, if we're an opening bracket, by first seeing if we have a
883: * matching end via hasmatch().
884: * If we're an ending bracket, see if we have a stack already.
885: */
886: static int
887: dosynopsisop(const char *buf, int *last,
888: size_t *start, size_t end, size_t *opstack)
889: {
890:
891: assert('[' == buf[*start] || ']' == buf[*start]);
892:
893: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
894: if ('\n' != *last)
895: putchar('\n');
896: puts(".Oo");
897: (*opstack)++;
898: } else if ('[' == buf[*start])
899: return(0);
900:
901: if (']' == buf[*start] && *opstack > 0) {
902: if ('\n' != *last)
903: putchar('\n');
904: puts(".Oc");
905: (*opstack)--;
906: } else if (']' == buf[*start])
907: return(0);
908:
909: (*start)++;
910: *last = '\n';
911: while (' ' == buf[*start])
912: (*start)++;
913: return(1);
914: }
915:
916: /*
1.17 kristaps 917: * Format multiple "Nm" manpage names in the NAME section.
918: */
919: static void
920: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
921: {
922: size_t word;
923:
924: while (*start < end && ' ' == buf[*start])
925: (*start)++;
926:
927: if (end == *start) {
928: puts(".Nm unknown");
929: return;
930: }
931:
932: while (*start < end) {
933: fputs(".Nm ", stdout);
934: for (word = *start; word < end; word++)
935: if (',' == buf[word])
936: break;
937: formatcodeln(st, buf, start, word, 1);
938: if (*start == end) {
939: putchar('\n');
940: continue;
941: }
942: assert(',' == buf[*start]);
943: puts(" ,");
944: (*start)++;
945: while (*start < end && ' ' == buf[*start])
946: (*start)++;
947: }
948: }
949:
950: /*
1.1 schwarze 951: * Ordinary paragraph.
952: * Well, this is really the hardest--POD seems to assume that, for
953: * example, a leading space implies a newline, and so on.
954: * Lots of other snakes in the grass: escaping a newline followed by a
955: * period (accidental mdoc(7) control), double-newlines after macro
956: * passages, etc.
957: */
958: static void
959: ordinary(struct state *st, const char *buf, size_t start, size_t end)
960: {
1.13 kristaps 961: size_t i, j, opstack;
1.15 kristaps 962: int seq;
1.1 schwarze 963:
964: if ( ! st->parsing || st->paused)
965: return;
966:
967: /*
968: * Special-case: the NAME section.
969: * If we find a "-" when searching from the end, assume that
970: * we're in "name - description" format.
971: * To wit, print out a "Nm" and "Nd" in that format.
972: */
1.11 kristaps 973: if (SECT_NAME == st->sect) {
1.15 kristaps 974: for (i = end - 2; i > start; i--)
975: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 976: break;
977: if ('-' == buf[i]) {
978: j = i;
979: /* Roll over multiple "-". */
980: for ( ; i > start; i--)
981: if ('-' != buf[i])
982: break;
1.17 kristaps 983: donamenm(st, buf, &start, i + 1);
1.5 kristaps 984: start = j + 1;
1.17 kristaps 985: while (start < end && ' ' == buf[start])
986: start++;
1.15 kristaps 987: fputs(".Nd ", stdout);
1.11 kristaps 988: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 989: putchar('\n');
1.1 schwarze 990: return;
991: }
992: }
993:
994: if ( ! st->haspar)
995: puts(".Pp");
996:
997: st->haspar = 0;
998: last = '\n';
1.13 kristaps 999: opstack = 0;
1.1 schwarze 1000:
1.15 kristaps 1001: for (seq = 0; start < end; seq++) {
1.1 schwarze 1002: /*
1003: * Loop til we get either to a newline or escape.
1004: * Escape initial control characters.
1005: */
1006: while (start < end) {
1007: if (start < end - 1 && '<' == buf[start + 1])
1008: break;
1009: else if ('\n' == buf[start])
1010: break;
1011: else if ('\n' == last && '.' == buf[start])
1012: printf("\\&");
1013: else if ('\n' == last && '\'' == buf[start])
1014: printf("\\&");
1.12 kristaps 1015: /*
1016: * If we're in the SYNOPSIS, have square
1017: * brackets indicate that we're opening and
1018: * closing an optional context.
1019: */
1.13 kristaps 1020: if (SECT_SYNOPSIS == st->sect &&
1021: ('[' == buf[start] ||
1022: ']' == buf[start]) &&
1023: dosynopsisop(buf, &last,
1024: &start, end, &opstack))
1025: continue;
1.1 schwarze 1026: putchar(last = buf[start++]);
1.8 kristaps 1027: if ('\\' == last)
1028: putchar('e');
1.1 schwarze 1029: }
1030:
1031: if (start < end - 1 && '<' == buf[start + 1]) {
1032: /*
1033: * We've encountered a format code.
1034: * This is going to trigger a macro no matter
1035: * what, so print a newline now.
1036: * Then print the (possibly nested) macros and
1037: * following that, a newline.
1.8 kristaps 1038: * Consume all whitespace so we don't
1039: * accidentally start an implicit literal line.
1.16 kristaps 1040: * If the macro ends with a flush comma or
1041: * period, let mdoc(7) handle it for us.
1.1 schwarze 1042: */
1.15 kristaps 1043: if (formatcode(st, buf, &start, end, 0, 0, seq)) {
1.16 kristaps 1044: if ((start == end - 1 ||
1045: (start < end - 1 &&
1046: (' ' == buf[start + 1] ||
1047: '\n' == buf[start + 1]))) &&
1048: ('.' == buf[start] ||
1049: ',' == buf[start])) {
1050: putchar(' ');
1051: putchar(buf[start++]);
1052: }
1.1 schwarze 1053: putchar(last = '\n');
1.6 kristaps 1054: while (start < end && ' ' == buf[start])
1055: start++;
1056: }
1.1 schwarze 1057: } else if (start < end && '\n' == buf[start]) {
1058: /*
1059: * Print the newline only if we haven't already
1060: * printed a newline.
1061: */
1062: if (last != '\n')
1063: putchar(last = buf[start]);
1064: if (++start >= end)
1065: continue;
1066: /*
1067: * If we have whitespace next, eat it to prevent
1068: * mdoc(7) from thinking that it's meant for
1069: * verbatim text.
1070: * It is--but if we start with that, we can't
1071: * have a macro subsequent it, which may be
1072: * possible if we have an escape next.
1073: */
1074: if (' ' == buf[start] || '\t' == buf[start]) {
1075: puts(".br");
1076: last = '\n';
1077: }
1078: for ( ; start < end; start++)
1079: if (' ' != buf[start] && '\t' != buf[start])
1080: break;
1.12 kristaps 1081: }
1.1 schwarze 1082: }
1083:
1084: if (last != '\n')
1085: putchar('\n');
1086: }
1087:
1088: /*
1089: * There are three kinds of paragraphs: verbatim (starts with whitespace
1090: * of some sort), ordinary (starts without "=" marker), or a command
1091: * (default: starts with "=").
1092: */
1093: static void
1094: dopar(struct state *st, const char *buf, size_t start, size_t end)
1095: {
1096:
1097: if (end == start)
1098: return;
1099: if (' ' == buf[start] || '\t' == buf[start])
1100: verbatim(st, buf, start, end);
1101: else if ('=' != buf[start])
1102: ordinary(st, buf, start, end);
1103: else
1104: command(st, buf, start, end);
1105: }
1106:
1107: /*
1108: * Loop around paragraphs within a document, processing each one in the
1109: * POD way.
1110: */
1111: static void
1112: dofile(const struct args *args, const char *fname,
1113: const struct tm *tm, const char *buf, size_t sz)
1114: {
1115: size_t sup, end, i, cur = 0;
1116: struct state st;
1117: const char *section, *date;
1118: char datebuf[64];
1119: char *title, *cp;
1120:
1121: if (0 == sz)
1122: return;
1123:
1124: /* Title is last path component of the filename. */
1125:
1126: if (NULL != args->title)
1127: title = strdup(args->title);
1128: else if (NULL != (cp = strrchr(fname, '/')))
1129: title = strdup(cp + 1);
1130: else
1131: title = strdup(fname);
1132:
1133: if (NULL == title) {
1134: perror(NULL);
1135: exit(EXIT_FAILURE);
1136: }
1137:
1138: /* Section is 1 unless suffix is "pm". */
1139:
1140: if (NULL == (section = args->section)) {
1141: section = "1";
1142: if (NULL != (cp = strrchr(title, '.'))) {
1143: *cp++ = '\0';
1144: if (0 == strcmp(cp, "pm"))
1.10 kristaps 1145: section = PERL_SECTION;
1.1 schwarze 1146: }
1147: }
1148:
1149: /* Date. Or the given "tm" if not supplied. */
1150:
1151: if (NULL == (date = args->date)) {
1152: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1153: date = datebuf;
1154: }
1155:
1156: for (cp = title; '\0' != *cp; cp++)
1157: *cp = toupper((int)*cp);
1158:
1159: /* The usual mdoc(7) preamble. */
1160:
1161: printf(".Dd %s\n", date);
1162: printf(".Dt %s %s\n", title, section);
1163: puts(".Os");
1164:
1165: free(title);
1166:
1167: memset(&st, 0, sizeof(struct state));
1168: assert(sz > 0);
1169:
1170: /* Main loop over file contents. */
1171:
1172: while (cur < sz) {
1173: /* Read until next paragraph. */
1174: for (i = cur + 1; i < sz; i++)
1175: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1176: /* Consume blank paragraphs. */
1177: while (i + 1 < sz && '\n' == buf[i + 1])
1178: i++;
1179: break;
1180: }
1181:
1182: /* Adjust end marker for EOF. */
1183: end = i < sz ? i - 1 :
1184: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1185: sup = i < sz ? end + 2 : sz;
1186:
1187: /* Process paragraph and adjust start. */
1188: dopar(&st, buf, cur, end);
1189: cur = sup;
1190: }
1191: }
1192:
1193: /*
1194: * Read a single file fully into memory.
1195: * If the file is "-", do it from stdin.
1196: * If successfully read, send the input buffer to dofile() for further
1197: * processing.
1198: */
1199: static int
1200: readfile(const struct args *args, const char *fname)
1201: {
1202: int fd;
1203: char *buf;
1204: size_t bufsz, cur;
1205: ssize_t ssz;
1206: struct tm *tm;
1207: time_t ttm;
1208: struct stat st;
1209:
1210: assert(NULL != fname);
1211:
1212: fd = 0 != strcmp("-", fname) ?
1213: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1214:
1215: if (-1 == fd) {
1216: perror(fname);
1217: return(0);
1218: }
1219:
1220: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1221: ttm = time(NULL);
1222: tm = localtime(&ttm);
1223: } else
1224: tm = localtime(&st.st_mtime);
1225:
1226: /*
1227: * Arbitrarily-sized initial buffer.
1228: * Should be big enough for most files...
1229: */
1230: cur = 0;
1231: bufsz = 1 << 14;
1232: if (NULL == (buf = malloc(bufsz))) {
1233: perror(NULL);
1234: exit(EXIT_FAILURE);
1235: }
1236:
1237: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1238: /* Double buffer size on fill. */
1239: if ((size_t)ssz == bufsz - cur) {
1240: bufsz *= 2;
1241: if (NULL == (buf = realloc(buf, bufsz))) {
1242: perror(NULL);
1243: exit(EXIT_FAILURE);
1244: }
1245: }
1246: cur += (size_t)ssz;
1247: }
1248: if (ssz < 0) {
1249: perror(fname);
1250: free(buf);
1251: return(0);
1252: }
1253:
1254: dofile(args, STDIN_FILENO == fd ?
1255: "STDIN" : fname, tm, buf, cur);
1256: free(buf);
1257: if (STDIN_FILENO != fd)
1258: close(fd);
1259: return(1);
1260: }
1261:
1262: int
1263: main(int argc, char *argv[])
1264: {
1265: const char *fname, *name;
1266: struct args args;
1267: int c;
1268:
1269: name = strrchr(argv[0], '/');
1270: if (name == NULL)
1271: name = argv[0];
1272: else
1273: ++name;
1274:
1275: memset(&args, 0, sizeof(struct args));
1276: fname = "-";
1277:
1278: /* Accept no arguments for now. */
1279:
1280: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1281: switch (c) {
1282: case ('h'):
1283: /* FALLTHROUGH */
1284: case ('l'):
1285: /* FALLTHROUGH */
1286: case ('c'):
1287: /* FALLTHROUGH */
1288: case ('o'):
1289: /* FALLTHROUGH */
1290: case ('q'):
1291: /* FALLTHROUGH */
1292: case ('r'):
1293: /* FALLTHROUGH */
1294: case ('u'):
1295: /* FALLTHROUGH */
1296: case ('v'):
1297: /* Ignore these. */
1298: break;
1299: case ('d'):
1300: args.date = optarg;
1301: break;
1302: case ('n'):
1303: args.title = optarg;
1304: break;
1305: case ('s'):
1306: args.section = optarg;
1307: break;
1308: default:
1309: goto usage;
1310: }
1311:
1312: argc -= optind;
1313: argv += optind;
1314:
1315: /* Accept only a single input file. */
1316:
1317: if (argc > 2)
1318: return(EXIT_FAILURE);
1319: else if (1 == argc)
1320: fname = *argv;
1321:
1322: return(readfile(&args, fname) ?
1323: EXIT_SUCCESS : EXIT_FAILURE);
1324:
1325: usage:
1326: fprintf(stderr, "usage: %s [-d date] "
1327: "[-n title] [-s section]\n", name);
1328:
1329: return(EXIT_FAILURE);
1330: }
CVSweb