Annotation of pod2mdoc/pod2mdoc.c, Revision 1.14
1.14 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.13 2014/04/01 13:05:50 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
30: * In what section can we find Perl manuals?
31: */
32: #define PERL_SECTION "3p"
33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.11 kristaps 47: enum sect {
48: SECT_NONE = 0,
49: SECT_NAME, /* NAME section */
50: SECT_SYNOPSIS, /* SYNOPSIS section */
51: };
52:
1.1 schwarze 53: struct state {
54: int parsing; /* after =cut of before command */
55: int paused; /* in =begin and before =end */
56: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 57: enum sect sect; /* which section are we in? */
1.1 schwarze 58: const char *fname; /* file being parsed */
1.4 schwarze 59: #define LIST_STACKSZ 128
60: enum list lstack[LIST_STACKSZ]; /* open lists */
61: size_t lpos; /* where in list stack */
1.1 schwarze 62: };
63:
64: enum fmt {
65: FMT_ITALIC,
66: FMT_BOLD,
67: FMT_CODE,
68: FMT_LINK,
69: FMT_ESCAPE,
70: FMT_FILE,
71: FMT_NBSP,
72: FMT_INDEX,
73: FMT_NULL,
74: FMT__MAX
75: };
76:
77: enum cmd {
78: CMD_POD = 0,
79: CMD_HEAD1,
80: CMD_HEAD2,
81: CMD_HEAD3,
82: CMD_HEAD4,
83: CMD_OVER,
84: CMD_ITEM,
85: CMD_BACK,
86: CMD_BEGIN,
87: CMD_END,
88: CMD_FOR,
89: CMD_ENCODING,
90: CMD_CUT,
91: CMD__MAX
92: };
93:
94: static const char *const cmds[CMD__MAX] = {
95: "pod", /* CMD_POD */
96: "head1", /* CMD_HEAD1 */
97: "head2", /* CMD_HEAD2 */
98: "head3", /* CMD_HEAD3 */
99: "head4", /* CMD_HEAD4 */
100: "over", /* CMD_OVER */
101: "item", /* CMD_ITEM */
102: "back", /* CMD_BACK */
103: "begin", /* CMD_BEGIN */
104: "end", /* CMD_END */
105: "for", /* CMD_FOR */
106: "encoding", /* CMD_ENCODING */
107: "cut" /* CMD_CUT */
108: };
109:
110: static const char fmts[FMT__MAX] = {
111: 'I', /* FMT_ITALIC */
112: 'B', /* FMT_BOLD */
113: 'C', /* FMT_CODE */
114: 'L', /* FMT_LINK */
115: 'E', /* FMT_ESCAPE */
116: 'F', /* FMT_FILE */
117: 'S', /* FMT_NBSP */
118: 'X', /* FMT_INDEX */
119: 'Z' /* FMT_NULL */
120: };
121:
1.6 kristaps 122: static int last;
123:
1.1 schwarze 124: /*
125: * Given buf[*start] is at the start of an escape name, read til the end
126: * of the escape ('>') then try to do something with it.
127: * Sets start to be one after the '>'.
128: */
129: static void
130: formatescape(const char *buf, size_t *start, size_t end)
131: {
132: char esc[16]; /* no more needed */
133: size_t i, max;
134:
135: max = sizeof(esc) - 1;
136: i = 0;
137: /* Read til our buffer is full. */
138: while (*start < end && '>' != buf[*start] && i < max)
139: esc[i++] = buf[(*start)++];
140: esc[i] = '\0';
141:
142: if (i == max) {
143: /* Too long... skip til we end. */
144: while (*start < end && '>' != buf[*start])
145: (*start)++;
146: return;
147: } else if (*start >= end)
148: return;
149:
150: assert('>' == buf[*start]);
151: (*start)++;
152:
153: /*
154: * TODO: right now, we only recognise the named escapes.
155: * Just let the rest of them go.
156: */
1.6 kristaps 157: if (0 == strcmp(esc, "lt"))
1.1 schwarze 158: printf("\\(la");
159: else if (0 == strcmp(esc, "gt"))
160: printf("\\(ra");
161: else if (0 == strcmp(esc, "vb"))
162: printf("\\(ba");
163: else if (0 == strcmp(esc, "sol"))
164: printf("\\(sl");
1.6 kristaps 165: else
166: return;
167:
168: last = 'a';
1.1 schwarze 169: }
170:
171: /*
1.9 kristaps 172: * Run some heuristics to intuit a link format.
173: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
174: * or a general UNIX foo(5) manpage.
175: * If I recognise one, I set "start" to be the end of the sequence so
176: * that the caller can safely just continue processing.
177: * Otherwise, I don't touch "start".
178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
182: size_t sv, nstart, nend, i, j;
183: int hasdouble;
184:
185: /*
186: * Scan to the start of the terminus.
187: * This function is more or less replicated in the formatcode()
188: * for null or index formatting codes.
189: */
190: hasdouble = 0;
191: for (sv = nstart = *start; nstart < end; nstart++) {
192: /* Do we have a double-colon? */
193: if (':' == buf[nstart] &&
194: nstart > sv &&
195: ':' == buf[nstart - 1])
196: hasdouble = 1;
197: if ('>' != buf[nstart])
198: continue;
199: else if (dsz == 1)
200: break;
201: assert(nstart > 0);
202: if (' ' != buf[nstart - 1])
203: continue;
204: i = nstart;
205: for (j = 0; i < end && j < dsz; j++)
206: if ('>' != buf[i++])
207: break;
208: if (dsz == j)
209: break;
210: }
211:
212: /* We don't care about stubs. */
213: if (nstart == end || nstart == *start)
214: return(0);
215:
216: /* Set nend to the end of content. */
217: nend = nstart;
218: if (dsz > 1)
219: nend--;
220:
221: /*
222: * Provide for some common invocations of the link primitive.
223: * First, allow us to link to other Perl manuals.
224: */
225: if (hasdouble)
1.10 kristaps 226: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 227: (int)(nend - sv), &buf[sv]);
228: else if (nend - sv > 3 && isalnum(buf[sv]) &&
229: ')' == buf[nend - 1] &&
230: isdigit((int)buf[nend - 2]) &&
231: '(' == buf[nend - 3])
232: printf("Xr %.*s %c",
233: (int)(nend - 3 - sv),
234: &buf[sv], buf[nend - 2]);
235: else
236: return(0);
237:
238: *start = nstart;
239: return(1);
240: }
241:
1.13 kristaps 242:
243: /*
244: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
245: * then it's likely that we're a flag.
246: * Our flag might be followed by an argument, so make sure that we're
247: * accounting for that, too.
248: * If we don't have a flag at all, however, then assume we're an "Ar".
249: */
250: static void
251: dosynopsisfl(const char *buf, size_t *start, size_t end)
252: {
253: size_t i;
254: again:
1.14 ! kristaps 255: assert(*start + 1 < end);
! 256: assert('-' == buf[*start]);
! 257:
! 258: if ( ! isalnum((int)buf[*start + 1]) &&
! 259: '?' != buf[*start + 1] &&
! 260: '-' != buf[*start + 1]) {
! 261: (*start)--;
! 262: fputs("Ar ", stdout);
! 263: return;
! 264: }
! 265:
1.13 kristaps 266: (*start)++;
267: for (i = *start; i < end; i++)
268: if (isalnum((int)buf[i]))
269: continue;
1.14 ! kristaps 270: else if ('?' == buf[i])
! 271: continue;
1.13 kristaps 272: else if ('-' == buf[i])
273: continue;
274: else if ('_' == buf[i])
275: continue;
276: else
277: break;
278:
279: assert(i < end);
280:
281: if ( ! (' ' == buf[i] || '>' == buf[i])) {
282: printf("Ar ");
283: return;
284: }
285:
286: printf("Fl ");
287: if (end - *start > 1 &&
288: isupper((int)buf[*start]) &&
289: islower((int)buf[*start + 1]) &&
290: (end - *start == 2 ||
291: ' ' == buf[*start + 2]))
292: printf("\\&");
293: printf("%.*s ", (int)(i - *start), &buf[*start]);
294: *start = i;
295:
296: if (' ' == buf[i]) {
297: while (i < end && ' ' == buf[i])
298: i++;
299: assert(i < end);
300: if ('-' == buf[i]) {
301: *start = i;
302: goto again;
303: }
304: printf("Ar ");
305: *start = i;
306: }
307: }
308:
1.9 kristaps 309: /*
1.1 schwarze 310: * We're at the character in front of a format code, which is structured
311: * like X<...> and can contain nested format codes.
312: * This consumes the whole format code, and any nested format codes, til
313: * the end of matched production.
314: * If "reentrant", then we're being called after a macro has already
315: * been printed to the current line.
1.6 kristaps 316: * If "nomacro", then we don't print any macros, just contained data
317: * (e.g., following "Sh" or "Nm").
318: * Return whether we've printed a macro or not--in other words, whether
319: * this should trigger a subsequent newline (this should be ignored when
320: * reentrant).
1.1 schwarze 321: */
322: static int
1.11 kristaps 323: formatcode(struct state *st, const char *buf,
324: size_t *start, size_t end, int reentrant, int nomacro)
1.1 schwarze 325: {
326: enum fmt fmt;
1.5 kristaps 327: size_t i, j, dsz;
1.1 schwarze 328:
329: assert(*start + 1 < end);
330: assert('<' == buf[*start + 1]);
331:
1.6 kristaps 332: /*
333: * First, look up the format code.
334: * If it's not valid, then exit immediately.
335: */
336: for (fmt = 0; fmt < FMT__MAX; fmt++)
337: if (buf[*start] == fmts[fmt])
338: break;
339:
340: if (FMT__MAX == fmt) {
341: putchar(last = buf[(*start)++]);
1.8 kristaps 342: if ('\\' == last)
343: putchar('e');
1.6 kristaps 344: return(0);
345: }
346:
1.5 kristaps 347: /*
348: * Determine whether we're overriding our delimiter.
349: * According to POD, if we have more than one '<' followed by a
350: * space, then we need a space followed by matching '>' to close
351: * the expression.
352: * Otherwise we use the usual '<' and '>' matched pair.
353: */
354: i = *start + 1;
355: while (i < end && '<' == buf[i])
356: i++;
357: assert(i > *start + 1);
358: dsz = i - (*start + 1);
359: if (dsz > 1 && (i >= end || ' ' != buf[i]))
360: dsz = 1;
361:
362: /* Remember, if dsz>1, to jump the trailing space. */
363: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 364:
365: /*
1.6 kristaps 366: * Escapes and ignored codes (NULL and INDEX) don't print macro
367: * sequences, so just output them like normal text before
368: * processing for real macros.
1.1 schwarze 369: */
370: if (FMT_ESCAPE == fmt) {
371: formatescape(buf, start, end);
372: return(0);
373: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 374: /*
1.6 kristaps 375: * Just consume til the end delimiter, accounting for
376: * whether it's a custom one.
1.5 kristaps 377: */
378: for ( ; *start < end; (*start)++) {
379: if ('>' != buf[*start])
380: continue;
381: else if (dsz == 1)
382: break;
383: assert(*start > 0);
384: if (' ' != buf[*start - 1])
385: continue;
386: i = *start;
387: for (j = 0; i < end && j < dsz; j++)
388: if ('>' != buf[i++])
389: break;
390: if (dsz != j)
391: continue;
392: (*start) += dsz;
393: break;
394: }
1.1 schwarze 395: return(0);
396: }
397:
1.6 kristaps 398: /*
399: * Check whether we're supposed to print macro stuff (this is
400: * suppressed in, e.g., "Nm" and "Sh" macros).
401: */
1.1 schwarze 402: if ( ! nomacro) {
403: /*
404: * Print out the macro describing this format code.
405: * If we're not "reentrant" (not yet on a macro line)
406: * then print a newline, if necessary, and the macro
407: * indicator.
408: * Otherwise, offset us with a space.
409: */
1.6 kristaps 410: if ( ! reentrant) {
411: if (last != '\n')
412: putchar('\n');
1.1 schwarze 413: putchar('.');
1.6 kristaps 414: } else
1.1 schwarze 415: putchar(' ');
416:
417: /*
1.6 kristaps 418: * If we don't have whitespace before us (and none after
419: * the opening delimiter), then suppress macro
420: * whitespace with Pf.
1.1 schwarze 421: */
1.6 kristaps 422: if (' ' != last && '\n' != last && ' ' != buf[*start])
423: printf("Pf ");
424:
1.1 schwarze 425: switch (fmt) {
426: case (FMT_ITALIC):
427: printf("Em ");
428: break;
429: case (FMT_BOLD):
1.14 ! kristaps 430: if (SECT_SYNOPSIS == st->sect) {
! 431: if (1 == dsz && '-' == buf[*start])
! 432: dosynopsisfl(buf, start, end);
! 433: else
! 434: printf("Ar ");
! 435: break;
! 436: }
! 437: printf("Sy ");
1.1 schwarze 438: break;
439: case (FMT_CODE):
1.2 schwarze 440: printf("Qo Li ");
1.1 schwarze 441: break;
442: case (FMT_LINK):
1.9 kristaps 443: if ( ! trylink(buf, start, end, dsz))
444: printf("No ");
1.1 schwarze 445: break;
446: case (FMT_FILE):
447: printf("Pa ");
448: break;
449: case (FMT_NBSP):
450: printf("No ");
451: break;
452: default:
453: abort();
454: }
455: }
456:
457: /*
1.6 kristaps 458: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 459: * find a nested format code.
1.1 schwarze 460: * Don't emit any newlines: since we're on a macro line, we
461: * don't want to break the line.
462: */
463: while (*start < end) {
1.5 kristaps 464: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 465: (*start)++;
466: break;
1.5 kristaps 467: } else if ('>' == buf[*start] &&
468: ' ' == buf[*start - 1]) {
469: /*
470: * Handle custom delimiters.
471: * These require a certain number of
472: * space-preceded carrots before we're really at
473: * the end.
474: */
475: i = *start;
476: for (j = 0; i < end && j < dsz; j++)
477: if ('>' != buf[i++])
478: break;
479: if (dsz == j) {
480: *start += dsz;
481: break;
482: }
1.1 schwarze 483: }
484: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 485: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 486: continue;
487: }
1.3 schwarze 488:
1.4 schwarze 489: /*
490: * Make sure that any macro-like words (or
491: * really any word starting with a capital
492: * letter) is assumed to be a macro that must be
493: * escaped.
494: * This matches "Xx " and "XxEOLN".
495: */
496: if ((' ' == last || '\n' == last) &&
497: end - *start > 1 &&
498: isupper((int)buf[*start]) &&
499: islower((int)buf[*start + 1]) &&
500: (end - *start == 2 ||
501: ' ' == buf[*start + 2]))
502: printf("\\&");
1.3 schwarze 503:
1.4 schwarze 504: /* Suppress newline. */
1.6 kristaps 505: if ('\n' == buf[*start])
506: putchar(last = ' ');
507: else
508: putchar(last = buf[*start]);
1.4 schwarze 509:
1.8 kristaps 510: /* Protect against character escapes. */
511: if ('\\' == last)
512: putchar('e');
513:
1.6 kristaps 514: (*start)++;
515:
516: if (' ' == last)
517: while (*start < end && ' ' == buf[*start])
518: (*start)++;
1.1 schwarze 519: }
1.2 schwarze 520:
521: if ( ! nomacro && FMT_CODE == fmt)
522: printf(" Qc ");
1.1 schwarze 523:
524: /*
1.6 kristaps 525: * We're now subsequent the format code.
526: * If there isn't a space (or newline) here, and we haven't just
527: * printed a space, then suppress space.
1.1 schwarze 528: */
1.6 kristaps 529: if ( ! nomacro && ' ' != last)
530: if (' ' != buf[*start] && '\n' != buf[*start])
531: printf(" Ns ");
1.5 kristaps 532:
1.1 schwarze 533: return(1);
534: }
535:
536: /*
537: * Calls formatcode() til the end of a paragraph.
538: */
539: static void
1.11 kristaps 540: formatcodeln(struct state *st, const char *buf,
541: size_t *start, size_t end, int nomacro)
1.1 schwarze 542: {
543:
1.4 schwarze 544: last = ' ';
1.1 schwarze 545: while (*start < end) {
546: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 547: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 548: continue;
549: }
1.4 schwarze 550: /*
551: * Since we're already on a macro line, we want to make
552: * sure that we don't inadvertently invoke a macro.
553: * We need to do this carefully because section names
554: * are used in troff and we don't want to escape
555: * something that needn't be escaped.
556: */
557: if (' ' == last && end - *start > 1 &&
558: isupper((int)buf[*start]) &&
559: islower((int)buf[*start + 1]) &&
560: (end - *start == 2 ||
561: ' ' == buf[*start + 2]))
562: printf("\\&");
563:
1.8 kristaps 564: if ('\n' == buf[*start])
565: putchar(last = ' ');
566: else
1.1 schwarze 567: putchar(last = buf[*start]);
1.8 kristaps 568:
569: /* Protect against character escapes. */
570: if ('\\' == last)
571: putchar('e');
572:
1.1 schwarze 573: (*start)++;
574: }
575: }
576:
577: /*
1.4 schwarze 578: * Guess at what kind of list we are.
579: * These are taken straight from the POD manual.
580: * I don't know what people do in real life.
581: */
582: static enum list
583: listguess(const char *buf, size_t start, size_t end)
584: {
585: size_t len = end - start;
586:
587: assert(end >= start);
588:
589: if (len == 1 && '*' == buf[start])
590: return(LIST_BULLET);
591: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
592: return(LIST_ENUM);
593: else if (len == 1 && '1' == buf[start])
594: return(LIST_ENUM);
595: else
596: return(LIST_TAG);
597: }
598:
599: /*
1.1 schwarze 600: * A command paragraph, as noted in the perlpod manual, just indicates
601: * that we should do something, optionally with some text to print as
602: * well.
603: */
604: static void
605: command(struct state *st, const char *buf, size_t start, size_t end)
606: {
607: size_t len, csz;
608: enum cmd cmd;
609:
610: assert('=' == buf[start]);
611: start++;
612: len = end - start;
613:
614: for (cmd = 0; cmd < CMD__MAX; cmd++) {
615: csz = strlen(cmds[cmd]);
616: if (len < csz)
617: continue;
618: if (0 == memcmp(&buf[start], cmd[cmds], csz))
619: break;
620: }
621:
622: /* Ignore bogus commands. */
623:
624: if (CMD__MAX == cmd)
625: return;
626:
627: start += csz;
1.8 kristaps 628: while (start < end && ' ' == buf[start])
629: start++;
630:
1.1 schwarze 631: len = end - start;
632:
633: if (st->paused) {
634: st->paused = CMD_END != cmd;
635: return;
636: }
637:
638: switch (cmd) {
639: case (CMD_POD):
640: break;
641: case (CMD_HEAD1):
642: /*
643: * The behaviour of head= follows from a quick glance at
644: * how pod2man handles it.
645: */
646: printf(".Sh ");
1.11 kristaps 647: st->sect = SECT_NONE;
648: if (end - start == 4) {
1.1 schwarze 649: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 650: st->sect = SECT_NAME;
651: } else if (end - start == 8) {
652: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
653: st->sect = SECT_SYNOPSIS;
654: }
655: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 656: putchar('\n');
657: st->haspar = 1;
658: break;
659: case (CMD_HEAD2):
660: printf(".Ss ");
1.11 kristaps 661: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 662: putchar('\n');
663: st->haspar = 1;
664: break;
665: case (CMD_HEAD3):
666: puts(".Pp");
667: printf(".Em ");
1.11 kristaps 668: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 669: putchar('\n');
670: puts(".Pp");
671: st->haspar = 1;
672: break;
673: case (CMD_HEAD4):
674: puts(".Pp");
675: printf(".No ");
1.11 kristaps 676: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 677: putchar('\n');
678: puts(".Pp");
679: st->haspar = 1;
680: break;
681: case (CMD_OVER):
1.4 schwarze 682: /*
683: * If we have an existing list that hasn't had an =item
684: * yet, then make sure that we open it now.
685: * We use the default list type, but that can't be
686: * helped (we haven't seen any items yet).
1.1 schwarze 687: */
1.4 schwarze 688: if (st->lpos > 0)
689: if (LIST__MAX == st->lstack[st->lpos - 1]) {
690: st->lstack[st->lpos - 1] = LIST_TAG;
691: puts(".Bl -tag -width Ds");
692: }
693: st->lpos++;
694: assert(st->lpos < LIST_STACKSZ);
695: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 696: break;
697: case (CMD_ITEM):
1.6 kristaps 698: if (0 == st->lpos) {
699: /*
700: * Bad markup.
701: * Try to compensate.
702: */
703: st->lstack[st->lpos] = LIST__MAX;
704: st->lpos++;
705: }
1.4 schwarze 706: assert(st->lpos > 0);
707: /*
708: * If we're the first =item, guess at what our content
709: * will be: "*" is a bullet list, "1." is a numbered
710: * list, and everything is tagged.
711: */
712: if (LIST__MAX == st->lstack[st->lpos - 1]) {
713: st->lstack[st->lpos - 1] =
714: listguess(buf, start, end);
715: switch (st->lstack[st->lpos - 1]) {
716: case (LIST_BULLET):
717: puts(".Bl -bullet");
718: break;
719: case (LIST_ENUM):
720: puts(".Bl -enum");
721: break;
722: default:
723: puts(".Bl -tag -width Ds");
724: break;
725: }
726: }
727: switch (st->lstack[st->lpos - 1]) {
728: case (LIST_TAG):
729: printf(".It ");
1.11 kristaps 730: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 731: putchar('\n');
732: break;
733: case (LIST_ENUM):
734: /* FALLTHROUGH */
735: case (LIST_BULLET):
736: /*
737: * Abandon the remainder of the paragraph
738: * because we're going to be a bulletted or
739: * numbered list.
740: */
741: puts(".It");
742: break;
743: default:
744: abort();
745: }
1.1 schwarze 746: st->haspar = 1;
747: break;
748: case (CMD_BACK):
1.4 schwarze 749: /* Make sure we don't back over the stack. */
750: if (st->lpos > 0) {
751: st->lpos--;
752: puts(".El");
753: }
1.1 schwarze 754: break;
755: case (CMD_BEGIN):
756: /*
757: * We disregard all types for now.
758: * TODO: process at least "text" in a -literal block.
759: */
760: st->paused = 1;
761: break;
762: case (CMD_FOR):
763: /*
764: * We ignore all types of encodings and formats
765: * unilaterally.
766: */
767: break;
768: case (CMD_ENCODING):
769: break;
770: case (CMD_CUT):
771: st->parsing = 0;
772: return;
773: default:
774: abort();
775: }
776:
777: /* Any command (but =cut) makes us start parsing. */
778: st->parsing = 1;
779: }
780:
781: /*
782: * Just pump out the line in a verbatim block.
783: */
784: static void
785: verbatim(struct state *st, const char *buf, size_t start, size_t end)
786: {
1.8 kristaps 787: int last;
1.1 schwarze 788:
789: if ( ! st->parsing || st->paused)
790: return;
791:
792: puts(".Bd -literal");
1.8 kristaps 793: for (last = ' '; start < end; start++) {
794: /*
795: * Handle accidental macros (newline starting with
796: * control character) and escapes.
797: */
798: if ('\n' == last)
1.7 kristaps 799: if ('.' == buf[start] || '\'' == buf[start])
800: printf("\\&");
1.8 kristaps 801: putchar(last = buf[start]);
802: if ('\\' == buf[start])
803: printf("e");
1.7 kristaps 804: }
805: putchar('\n');
1.1 schwarze 806: puts(".Ed");
807: }
808:
809: /*
1.13 kristaps 810: * See dosynopsisop().
811: */
812: static int
813: hasmatch(const char *buf, size_t start, size_t end)
814: {
815: size_t stack;
816:
817: for (stack = 0; start < end; start++)
818: if (buf[start] == '[')
819: stack++;
820: else if (buf[start] == ']' && 0 == stack)
821: return(1);
822: else if (buf[start] == ']')
823: stack--;
824: return(0);
825: }
826:
827: /*
828: * If we're in the SYNOPSIS section and we've encounter braces in an
829: * ordinary paragraph, then try to see whether we're an [-option].
830: * Do this, if we're an opening bracket, by first seeing if we have a
831: * matching end via hasmatch().
832: * If we're an ending bracket, see if we have a stack already.
833: */
834: static int
835: dosynopsisop(const char *buf, int *last,
836: size_t *start, size_t end, size_t *opstack)
837: {
838:
839: assert('[' == buf[*start] || ']' == buf[*start]);
840:
841: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
842: if ('\n' != *last)
843: putchar('\n');
844: puts(".Oo");
845: (*opstack)++;
846: } else if ('[' == buf[*start])
847: return(0);
848:
849: if (']' == buf[*start] && *opstack > 0) {
850: if ('\n' != *last)
851: putchar('\n');
852: puts(".Oc");
853: (*opstack)--;
854: } else if (']' == buf[*start])
855: return(0);
856:
857: (*start)++;
858: *last = '\n';
859: while (' ' == buf[*start])
860: (*start)++;
861: return(1);
862: }
863:
864: /*
1.1 schwarze 865: * Ordinary paragraph.
866: * Well, this is really the hardest--POD seems to assume that, for
867: * example, a leading space implies a newline, and so on.
868: * Lots of other snakes in the grass: escaping a newline followed by a
869: * period (accidental mdoc(7) control), double-newlines after macro
870: * passages, etc.
871: */
872: static void
873: ordinary(struct state *st, const char *buf, size_t start, size_t end)
874: {
1.13 kristaps 875: size_t i, j, opstack;
1.1 schwarze 876:
877: if ( ! st->parsing || st->paused)
878: return;
879:
880: /*
881: * Special-case: the NAME section.
882: * If we find a "-" when searching from the end, assume that
883: * we're in "name - description" format.
884: * To wit, print out a "Nm" and "Nd" in that format.
885: */
1.11 kristaps 886: if (SECT_NAME == st->sect) {
1.1 schwarze 887: for (i = end - 1; i > start; i--)
888: if ('-' == buf[i])
889: break;
890: if ('-' == buf[i]) {
891: j = i;
892: /* Roll over multiple "-". */
893: for ( ; i > start; i--)
894: if ('-' != buf[i])
895: break;
1.5 kristaps 896: printf(".Nm ");
1.11 kristaps 897: formatcodeln(st, buf, &start, i + 1, 1);
1.5 kristaps 898: putchar('\n');
899: start = j + 1;
900: printf(".Nd ");
1.11 kristaps 901: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 902: putchar('\n');
1.1 schwarze 903: return;
904: }
905: }
906:
907: if ( ! st->haspar)
908: puts(".Pp");
909:
910: st->haspar = 0;
911: last = '\n';
1.13 kristaps 912: opstack = 0;
1.1 schwarze 913:
914: while (start < end) {
915: /*
916: * Loop til we get either to a newline or escape.
917: * Escape initial control characters.
918: */
919: while (start < end) {
920: if (start < end - 1 && '<' == buf[start + 1])
921: break;
922: else if ('\n' == buf[start])
923: break;
924: else if ('\n' == last && '.' == buf[start])
925: printf("\\&");
926: else if ('\n' == last && '\'' == buf[start])
927: printf("\\&");
1.12 kristaps 928: /*
929: * If we're in the SYNOPSIS, have square
930: * brackets indicate that we're opening and
931: * closing an optional context.
932: */
1.13 kristaps 933: if (SECT_SYNOPSIS == st->sect &&
934: ('[' == buf[start] ||
935: ']' == buf[start]) &&
936: dosynopsisop(buf, &last,
937: &start, end, &opstack))
938: continue;
1.1 schwarze 939: putchar(last = buf[start++]);
1.8 kristaps 940: if ('\\' == last)
941: putchar('e');
1.1 schwarze 942: }
943:
944: if (start < end - 1 && '<' == buf[start + 1]) {
945: /*
946: * We've encountered a format code.
947: * This is going to trigger a macro no matter
948: * what, so print a newline now.
949: * Then print the (possibly nested) macros and
950: * following that, a newline.
1.8 kristaps 951: * Consume all whitespace so we don't
952: * accidentally start an implicit literal line.
1.1 schwarze 953: */
1.11 kristaps 954: if (formatcode(st, buf, &start, end, 0, 0)) {
1.1 schwarze 955: putchar(last = '\n');
1.6 kristaps 956: while (start < end && ' ' == buf[start])
957: start++;
958: }
1.1 schwarze 959: } else if (start < end && '\n' == buf[start]) {
960: /*
961: * Print the newline only if we haven't already
962: * printed a newline.
963: */
964: if (last != '\n')
965: putchar(last = buf[start]);
966: if (++start >= end)
967: continue;
968: /*
969: * If we have whitespace next, eat it to prevent
970: * mdoc(7) from thinking that it's meant for
971: * verbatim text.
972: * It is--but if we start with that, we can't
973: * have a macro subsequent it, which may be
974: * possible if we have an escape next.
975: */
976: if (' ' == buf[start] || '\t' == buf[start]) {
977: puts(".br");
978: last = '\n';
979: }
980: for ( ; start < end; start++)
981: if (' ' != buf[start] && '\t' != buf[start])
982: break;
1.12 kristaps 983: }
1.1 schwarze 984: }
985:
986: if (last != '\n')
987: putchar('\n');
988: }
989:
990: /*
991: * There are three kinds of paragraphs: verbatim (starts with whitespace
992: * of some sort), ordinary (starts without "=" marker), or a command
993: * (default: starts with "=").
994: */
995: static void
996: dopar(struct state *st, const char *buf, size_t start, size_t end)
997: {
998:
999: if (end == start)
1000: return;
1001: if (' ' == buf[start] || '\t' == buf[start])
1002: verbatim(st, buf, start, end);
1003: else if ('=' != buf[start])
1004: ordinary(st, buf, start, end);
1005: else
1006: command(st, buf, start, end);
1007: }
1008:
1009: /*
1010: * Loop around paragraphs within a document, processing each one in the
1011: * POD way.
1012: */
1013: static void
1014: dofile(const struct args *args, const char *fname,
1015: const struct tm *tm, const char *buf, size_t sz)
1016: {
1017: size_t sup, end, i, cur = 0;
1018: struct state st;
1019: const char *section, *date;
1020: char datebuf[64];
1021: char *title, *cp;
1022:
1023: if (0 == sz)
1024: return;
1025:
1026: /* Title is last path component of the filename. */
1027:
1028: if (NULL != args->title)
1029: title = strdup(args->title);
1030: else if (NULL != (cp = strrchr(fname, '/')))
1031: title = strdup(cp + 1);
1032: else
1033: title = strdup(fname);
1034:
1035: if (NULL == title) {
1036: perror(NULL);
1037: exit(EXIT_FAILURE);
1038: }
1039:
1040: /* Section is 1 unless suffix is "pm". */
1041:
1042: if (NULL == (section = args->section)) {
1043: section = "1";
1044: if (NULL != (cp = strrchr(title, '.'))) {
1045: *cp++ = '\0';
1046: if (0 == strcmp(cp, "pm"))
1.10 kristaps 1047: section = PERL_SECTION;
1.1 schwarze 1048: }
1049: }
1050:
1051: /* Date. Or the given "tm" if not supplied. */
1052:
1053: if (NULL == (date = args->date)) {
1054: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1055: date = datebuf;
1056: }
1057:
1058: for (cp = title; '\0' != *cp; cp++)
1059: *cp = toupper((int)*cp);
1060:
1061: /* The usual mdoc(7) preamble. */
1062:
1063: printf(".Dd %s\n", date);
1064: printf(".Dt %s %s\n", title, section);
1065: puts(".Os");
1066:
1067: free(title);
1068:
1069: memset(&st, 0, sizeof(struct state));
1070: assert(sz > 0);
1071:
1072: /* Main loop over file contents. */
1073:
1074: while (cur < sz) {
1075: /* Read until next paragraph. */
1076: for (i = cur + 1; i < sz; i++)
1077: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1078: /* Consume blank paragraphs. */
1079: while (i + 1 < sz && '\n' == buf[i + 1])
1080: i++;
1081: break;
1082: }
1083:
1084: /* Adjust end marker for EOF. */
1085: end = i < sz ? i - 1 :
1086: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1087: sup = i < sz ? end + 2 : sz;
1088:
1089: /* Process paragraph and adjust start. */
1090: dopar(&st, buf, cur, end);
1091: cur = sup;
1092: }
1093: }
1094:
1095: /*
1096: * Read a single file fully into memory.
1097: * If the file is "-", do it from stdin.
1098: * If successfully read, send the input buffer to dofile() for further
1099: * processing.
1100: */
1101: static int
1102: readfile(const struct args *args, const char *fname)
1103: {
1104: int fd;
1105: char *buf;
1106: size_t bufsz, cur;
1107: ssize_t ssz;
1108: struct tm *tm;
1109: time_t ttm;
1110: struct stat st;
1111:
1112: assert(NULL != fname);
1113:
1114: fd = 0 != strcmp("-", fname) ?
1115: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1116:
1117: if (-1 == fd) {
1118: perror(fname);
1119: return(0);
1120: }
1121:
1122: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1123: ttm = time(NULL);
1124: tm = localtime(&ttm);
1125: } else
1126: tm = localtime(&st.st_mtime);
1127:
1128: /*
1129: * Arbitrarily-sized initial buffer.
1130: * Should be big enough for most files...
1131: */
1132: cur = 0;
1133: bufsz = 1 << 14;
1134: if (NULL == (buf = malloc(bufsz))) {
1135: perror(NULL);
1136: exit(EXIT_FAILURE);
1137: }
1138:
1139: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1140: /* Double buffer size on fill. */
1141: if ((size_t)ssz == bufsz - cur) {
1142: bufsz *= 2;
1143: if (NULL == (buf = realloc(buf, bufsz))) {
1144: perror(NULL);
1145: exit(EXIT_FAILURE);
1146: }
1147: }
1148: cur += (size_t)ssz;
1149: }
1150: if (ssz < 0) {
1151: perror(fname);
1152: free(buf);
1153: return(0);
1154: }
1155:
1156: dofile(args, STDIN_FILENO == fd ?
1157: "STDIN" : fname, tm, buf, cur);
1158: free(buf);
1159: if (STDIN_FILENO != fd)
1160: close(fd);
1161: return(1);
1162: }
1163:
1164: int
1165: main(int argc, char *argv[])
1166: {
1167: const char *fname, *name;
1168: struct args args;
1169: int c;
1170:
1171: name = strrchr(argv[0], '/');
1172: if (name == NULL)
1173: name = argv[0];
1174: else
1175: ++name;
1176:
1177: memset(&args, 0, sizeof(struct args));
1178: fname = "-";
1179:
1180: /* Accept no arguments for now. */
1181:
1182: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1183: switch (c) {
1184: case ('h'):
1185: /* FALLTHROUGH */
1186: case ('l'):
1187: /* FALLTHROUGH */
1188: case ('c'):
1189: /* FALLTHROUGH */
1190: case ('o'):
1191: /* FALLTHROUGH */
1192: case ('q'):
1193: /* FALLTHROUGH */
1194: case ('r'):
1195: /* FALLTHROUGH */
1196: case ('u'):
1197: /* FALLTHROUGH */
1198: case ('v'):
1199: /* Ignore these. */
1200: break;
1201: case ('d'):
1202: args.date = optarg;
1203: break;
1204: case ('n'):
1205: args.title = optarg;
1206: break;
1207: case ('s'):
1208: args.section = optarg;
1209: break;
1210: default:
1211: goto usage;
1212: }
1213:
1214: argc -= optind;
1215: argv += optind;
1216:
1217: /* Accept only a single input file. */
1218:
1219: if (argc > 2)
1220: return(EXIT_FAILURE);
1221: else if (1 == argc)
1222: fname = *argv;
1223:
1224: return(readfile(&args, fname) ?
1225: EXIT_SUCCESS : EXIT_FAILURE);
1226:
1227: usage:
1228: fprintf(stderr, "usage: %s [-d date] "
1229: "[-n title] [-s section]\n", name);
1230:
1231: return(EXIT_FAILURE);
1232: }
CVSweb