Annotation of pod2mdoc/pod2mdoc.c, Revision 1.15
1.15 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.14 2014/04/01 19:50:34 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
30: * In what section can we find Perl manuals?
31: */
32: #define PERL_SECTION "3p"
33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.11 kristaps 47: enum sect {
48: SECT_NONE = 0,
49: SECT_NAME, /* NAME section */
50: SECT_SYNOPSIS, /* SYNOPSIS section */
51: };
52:
1.1 schwarze 53: struct state {
54: int parsing; /* after =cut of before command */
55: int paused; /* in =begin and before =end */
56: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 57: enum sect sect; /* which section are we in? */
1.1 schwarze 58: const char *fname; /* file being parsed */
1.4 schwarze 59: #define LIST_STACKSZ 128
60: enum list lstack[LIST_STACKSZ]; /* open lists */
61: size_t lpos; /* where in list stack */
1.1 schwarze 62: };
63:
64: enum fmt {
65: FMT_ITALIC,
66: FMT_BOLD,
67: FMT_CODE,
68: FMT_LINK,
69: FMT_ESCAPE,
70: FMT_FILE,
71: FMT_NBSP,
72: FMT_INDEX,
73: FMT_NULL,
74: FMT__MAX
75: };
76:
77: enum cmd {
78: CMD_POD = 0,
79: CMD_HEAD1,
80: CMD_HEAD2,
81: CMD_HEAD3,
82: CMD_HEAD4,
83: CMD_OVER,
84: CMD_ITEM,
85: CMD_BACK,
86: CMD_BEGIN,
87: CMD_END,
88: CMD_FOR,
89: CMD_ENCODING,
90: CMD_CUT,
91: CMD__MAX
92: };
93:
94: static const char *const cmds[CMD__MAX] = {
95: "pod", /* CMD_POD */
96: "head1", /* CMD_HEAD1 */
97: "head2", /* CMD_HEAD2 */
98: "head3", /* CMD_HEAD3 */
99: "head4", /* CMD_HEAD4 */
100: "over", /* CMD_OVER */
101: "item", /* CMD_ITEM */
102: "back", /* CMD_BACK */
103: "begin", /* CMD_BEGIN */
104: "end", /* CMD_END */
105: "for", /* CMD_FOR */
106: "encoding", /* CMD_ENCODING */
107: "cut" /* CMD_CUT */
108: };
109:
110: static const char fmts[FMT__MAX] = {
111: 'I', /* FMT_ITALIC */
112: 'B', /* FMT_BOLD */
113: 'C', /* FMT_CODE */
114: 'L', /* FMT_LINK */
115: 'E', /* FMT_ESCAPE */
116: 'F', /* FMT_FILE */
117: 'S', /* FMT_NBSP */
118: 'X', /* FMT_INDEX */
119: 'Z' /* FMT_NULL */
120: };
121:
1.6 kristaps 122: static int last;
123:
1.1 schwarze 124: /*
125: * Given buf[*start] is at the start of an escape name, read til the end
126: * of the escape ('>') then try to do something with it.
127: * Sets start to be one after the '>'.
128: */
129: static void
130: formatescape(const char *buf, size_t *start, size_t end)
131: {
132: char esc[16]; /* no more needed */
133: size_t i, max;
134:
135: max = sizeof(esc) - 1;
136: i = 0;
137: /* Read til our buffer is full. */
138: while (*start < end && '>' != buf[*start] && i < max)
139: esc[i++] = buf[(*start)++];
140: esc[i] = '\0';
141:
142: if (i == max) {
143: /* Too long... skip til we end. */
144: while (*start < end && '>' != buf[*start])
145: (*start)++;
146: return;
147: } else if (*start >= end)
148: return;
149:
150: assert('>' == buf[*start]);
151: (*start)++;
152:
153: /*
154: * TODO: right now, we only recognise the named escapes.
155: * Just let the rest of them go.
156: */
1.6 kristaps 157: if (0 == strcmp(esc, "lt"))
1.1 schwarze 158: printf("\\(la");
159: else if (0 == strcmp(esc, "gt"))
160: printf("\\(ra");
161: else if (0 == strcmp(esc, "vb"))
162: printf("\\(ba");
163: else if (0 == strcmp(esc, "sol"))
164: printf("\\(sl");
1.6 kristaps 165: else
166: return;
167:
168: last = 'a';
1.1 schwarze 169: }
170:
171: /*
1.9 kristaps 172: * Run some heuristics to intuit a link format.
173: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
174: * or a general UNIX foo(5) manpage.
175: * If I recognise one, I set "start" to be the end of the sequence so
176: * that the caller can safely just continue processing.
177: * Otherwise, I don't touch "start".
178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
182: size_t sv, nstart, nend, i, j;
183: int hasdouble;
184:
185: /*
186: * Scan to the start of the terminus.
187: * This function is more or less replicated in the formatcode()
188: * for null or index formatting codes.
189: */
190: hasdouble = 0;
191: for (sv = nstart = *start; nstart < end; nstart++) {
192: /* Do we have a double-colon? */
193: if (':' == buf[nstart] &&
194: nstart > sv &&
195: ':' == buf[nstart - 1])
196: hasdouble = 1;
197: if ('>' != buf[nstart])
198: continue;
199: else if (dsz == 1)
200: break;
201: assert(nstart > 0);
202: if (' ' != buf[nstart - 1])
203: continue;
204: i = nstart;
205: for (j = 0; i < end && j < dsz; j++)
206: if ('>' != buf[i++])
207: break;
208: if (dsz == j)
209: break;
210: }
211:
212: /* We don't care about stubs. */
213: if (nstart == end || nstart == *start)
214: return(0);
215:
216: /* Set nend to the end of content. */
217: nend = nstart;
218: if (dsz > 1)
219: nend--;
220:
221: /*
222: * Provide for some common invocations of the link primitive.
223: * First, allow us to link to other Perl manuals.
224: */
225: if (hasdouble)
1.10 kristaps 226: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 227: (int)(nend - sv), &buf[sv]);
228: else if (nend - sv > 3 && isalnum(buf[sv]) &&
229: ')' == buf[nend - 1] &&
230: isdigit((int)buf[nend - 2]) &&
231: '(' == buf[nend - 3])
232: printf("Xr %.*s %c",
233: (int)(nend - 3 - sv),
234: &buf[sv], buf[nend - 2]);
235: else
236: return(0);
237:
238: *start = nstart;
239: return(1);
240: }
241:
1.13 kristaps 242: /*
243: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
244: * then it's likely that we're a flag.
245: * Our flag might be followed by an argument, so make sure that we're
246: * accounting for that, too.
247: * If we don't have a flag at all, however, then assume we're an "Ar".
248: */
249: static void
250: dosynopsisfl(const char *buf, size_t *start, size_t end)
251: {
252: size_t i;
253: again:
1.14 kristaps 254: assert(*start + 1 < end);
255: assert('-' == buf[*start]);
256:
257: if ( ! isalnum((int)buf[*start + 1]) &&
258: '?' != buf[*start + 1] &&
259: '-' != buf[*start + 1]) {
260: (*start)--;
261: fputs("Ar ", stdout);
262: return;
263: }
264:
1.13 kristaps 265: (*start)++;
266: for (i = *start; i < end; i++)
267: if (isalnum((int)buf[i]))
268: continue;
1.14 kristaps 269: else if ('?' == buf[i])
270: continue;
1.13 kristaps 271: else if ('-' == buf[i])
272: continue;
273: else if ('_' == buf[i])
274: continue;
275: else
276: break;
277:
278: assert(i < end);
279:
280: if ( ! (' ' == buf[i] || '>' == buf[i])) {
281: printf("Ar ");
282: return;
283: }
284:
285: printf("Fl ");
286: if (end - *start > 1 &&
287: isupper((int)buf[*start]) &&
288: islower((int)buf[*start + 1]) &&
289: (end - *start == 2 ||
290: ' ' == buf[*start + 2]))
291: printf("\\&");
292: printf("%.*s ", (int)(i - *start), &buf[*start]);
293: *start = i;
294:
295: if (' ' == buf[i]) {
296: while (i < end && ' ' == buf[i])
297: i++;
298: assert(i < end);
299: if ('-' == buf[i]) {
300: *start = i;
301: goto again;
302: }
303: printf("Ar ");
304: *start = i;
305: }
306: }
307:
1.9 kristaps 308: /*
1.1 schwarze 309: * We're at the character in front of a format code, which is structured
310: * like X<...> and can contain nested format codes.
311: * This consumes the whole format code, and any nested format codes, til
312: * the end of matched production.
313: * If "reentrant", then we're being called after a macro has already
314: * been printed to the current line.
1.6 kristaps 315: * If "nomacro", then we don't print any macros, just contained data
316: * (e.g., following "Sh" or "Nm").
1.15 ! kristaps 317: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
! 318: * as the first format code on a line (for decoration as an "Nm"),
! 319: * non-zero otherwise.
1.6 kristaps 320: * Return whether we've printed a macro or not--in other words, whether
321: * this should trigger a subsequent newline (this should be ignored when
322: * reentrant).
1.1 schwarze 323: */
324: static int
1.15 ! kristaps 325: formatcode(struct state *st, const char *buf, size_t *start,
! 326: size_t end, int reentrant, int nomacro, int pos)
1.1 schwarze 327: {
328: enum fmt fmt;
1.5 kristaps 329: size_t i, j, dsz;
1.1 schwarze 330:
331: assert(*start + 1 < end);
332: assert('<' == buf[*start + 1]);
333:
1.6 kristaps 334: /*
335: * First, look up the format code.
336: * If it's not valid, then exit immediately.
337: */
338: for (fmt = 0; fmt < FMT__MAX; fmt++)
339: if (buf[*start] == fmts[fmt])
340: break;
341:
342: if (FMT__MAX == fmt) {
343: putchar(last = buf[(*start)++]);
1.8 kristaps 344: if ('\\' == last)
345: putchar('e');
1.6 kristaps 346: return(0);
347: }
348:
1.5 kristaps 349: /*
350: * Determine whether we're overriding our delimiter.
351: * According to POD, if we have more than one '<' followed by a
352: * space, then we need a space followed by matching '>' to close
353: * the expression.
354: * Otherwise we use the usual '<' and '>' matched pair.
355: */
356: i = *start + 1;
357: while (i < end && '<' == buf[i])
358: i++;
359: assert(i > *start + 1);
360: dsz = i - (*start + 1);
361: if (dsz > 1 && (i >= end || ' ' != buf[i]))
362: dsz = 1;
363:
364: /* Remember, if dsz>1, to jump the trailing space. */
365: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 366:
367: /*
1.6 kristaps 368: * Escapes and ignored codes (NULL and INDEX) don't print macro
369: * sequences, so just output them like normal text before
370: * processing for real macros.
1.1 schwarze 371: */
372: if (FMT_ESCAPE == fmt) {
373: formatescape(buf, start, end);
374: return(0);
375: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 376: /*
1.6 kristaps 377: * Just consume til the end delimiter, accounting for
378: * whether it's a custom one.
1.5 kristaps 379: */
380: for ( ; *start < end; (*start)++) {
381: if ('>' != buf[*start])
382: continue;
383: else if (dsz == 1)
384: break;
385: assert(*start > 0);
386: if (' ' != buf[*start - 1])
387: continue;
388: i = *start;
389: for (j = 0; i < end && j < dsz; j++)
390: if ('>' != buf[i++])
391: break;
392: if (dsz != j)
393: continue;
394: (*start) += dsz;
395: break;
396: }
1.1 schwarze 397: return(0);
398: }
399:
1.6 kristaps 400: /*
401: * Check whether we're supposed to print macro stuff (this is
402: * suppressed in, e.g., "Nm" and "Sh" macros).
403: */
1.1 schwarze 404: if ( ! nomacro) {
405: /*
406: * Print out the macro describing this format code.
407: * If we're not "reentrant" (not yet on a macro line)
408: * then print a newline, if necessary, and the macro
409: * indicator.
410: * Otherwise, offset us with a space.
411: */
1.6 kristaps 412: if ( ! reentrant) {
413: if (last != '\n')
414: putchar('\n');
1.1 schwarze 415: putchar('.');
1.6 kristaps 416: } else
1.1 schwarze 417: putchar(' ');
418:
419: /*
1.6 kristaps 420: * If we don't have whitespace before us (and none after
421: * the opening delimiter), then suppress macro
422: * whitespace with Pf.
1.1 schwarze 423: */
1.6 kristaps 424: if (' ' != last && '\n' != last && ' ' != buf[*start])
425: printf("Pf ");
426:
1.1 schwarze 427: switch (fmt) {
428: case (FMT_ITALIC):
429: printf("Em ");
430: break;
431: case (FMT_BOLD):
1.14 kristaps 432: if (SECT_SYNOPSIS == st->sect) {
433: if (1 == dsz && '-' == buf[*start])
434: dosynopsisfl(buf, start, end);
1.15 ! kristaps 435: else if (0 == pos)
! 436: printf("Nm ");
1.14 kristaps 437: else
438: printf("Ar ");
439: break;
440: }
441: printf("Sy ");
1.1 schwarze 442: break;
443: case (FMT_CODE):
1.2 schwarze 444: printf("Qo Li ");
1.1 schwarze 445: break;
446: case (FMT_LINK):
1.9 kristaps 447: if ( ! trylink(buf, start, end, dsz))
448: printf("No ");
1.1 schwarze 449: break;
450: case (FMT_FILE):
451: printf("Pa ");
452: break;
453: case (FMT_NBSP):
454: printf("No ");
455: break;
456: default:
457: abort();
458: }
459: }
460:
461: /*
1.6 kristaps 462: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 463: * find a nested format code.
1.1 schwarze 464: * Don't emit any newlines: since we're on a macro line, we
465: * don't want to break the line.
466: */
467: while (*start < end) {
1.5 kristaps 468: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 469: (*start)++;
470: break;
1.5 kristaps 471: } else if ('>' == buf[*start] &&
472: ' ' == buf[*start - 1]) {
473: /*
474: * Handle custom delimiters.
475: * These require a certain number of
476: * space-preceded carrots before we're really at
477: * the end.
478: */
479: i = *start;
480: for (j = 0; i < end && j < dsz; j++)
481: if ('>' != buf[i++])
482: break;
483: if (dsz == j) {
484: *start += dsz;
485: break;
486: }
1.1 schwarze 487: }
488: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 ! kristaps 489: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 490: continue;
491: }
1.3 schwarze 492:
1.4 schwarze 493: /*
494: * Make sure that any macro-like words (or
495: * really any word starting with a capital
496: * letter) is assumed to be a macro that must be
497: * escaped.
498: * This matches "Xx " and "XxEOLN".
499: */
500: if ((' ' == last || '\n' == last) &&
501: end - *start > 1 &&
502: isupper((int)buf[*start]) &&
503: islower((int)buf[*start + 1]) &&
504: (end - *start == 2 ||
505: ' ' == buf[*start + 2]))
506: printf("\\&");
1.3 schwarze 507:
1.4 schwarze 508: /* Suppress newline. */
1.6 kristaps 509: if ('\n' == buf[*start])
510: putchar(last = ' ');
511: else
512: putchar(last = buf[*start]);
1.4 schwarze 513:
1.8 kristaps 514: /* Protect against character escapes. */
515: if ('\\' == last)
516: putchar('e');
517:
1.6 kristaps 518: (*start)++;
519:
520: if (' ' == last)
521: while (*start < end && ' ' == buf[*start])
522: (*start)++;
1.1 schwarze 523: }
1.2 schwarze 524:
525: if ( ! nomacro && FMT_CODE == fmt)
526: printf(" Qc ");
1.1 schwarze 527:
528: /*
1.6 kristaps 529: * We're now subsequent the format code.
530: * If there isn't a space (or newline) here, and we haven't just
531: * printed a space, then suppress space.
1.1 schwarze 532: */
1.6 kristaps 533: if ( ! nomacro && ' ' != last)
534: if (' ' != buf[*start] && '\n' != buf[*start])
535: printf(" Ns ");
1.5 kristaps 536:
1.1 schwarze 537: return(1);
538: }
539:
540: /*
541: * Calls formatcode() til the end of a paragraph.
542: */
543: static void
1.11 kristaps 544: formatcodeln(struct state *st, const char *buf,
545: size_t *start, size_t end, int nomacro)
1.1 schwarze 546: {
547:
1.4 schwarze 548: last = ' ';
1.1 schwarze 549: while (*start < end) {
550: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 ! kristaps 551: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 552: continue;
553: }
1.4 schwarze 554: /*
555: * Since we're already on a macro line, we want to make
556: * sure that we don't inadvertently invoke a macro.
557: * We need to do this carefully because section names
558: * are used in troff and we don't want to escape
559: * something that needn't be escaped.
560: */
561: if (' ' == last && end - *start > 1 &&
562: isupper((int)buf[*start]) &&
563: islower((int)buf[*start + 1]) &&
564: (end - *start == 2 ||
565: ' ' == buf[*start + 2]))
566: printf("\\&");
567:
1.8 kristaps 568: if ('\n' == buf[*start])
569: putchar(last = ' ');
570: else
1.1 schwarze 571: putchar(last = buf[*start]);
1.8 kristaps 572:
573: /* Protect against character escapes. */
574: if ('\\' == last)
575: putchar('e');
576:
1.1 schwarze 577: (*start)++;
578: }
579: }
580:
581: /*
1.4 schwarze 582: * Guess at what kind of list we are.
583: * These are taken straight from the POD manual.
584: * I don't know what people do in real life.
585: */
586: static enum list
587: listguess(const char *buf, size_t start, size_t end)
588: {
589: size_t len = end - start;
590:
591: assert(end >= start);
592:
593: if (len == 1 && '*' == buf[start])
594: return(LIST_BULLET);
595: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
596: return(LIST_ENUM);
597: else if (len == 1 && '1' == buf[start])
598: return(LIST_ENUM);
599: else
600: return(LIST_TAG);
601: }
602:
603: /*
1.1 schwarze 604: * A command paragraph, as noted in the perlpod manual, just indicates
605: * that we should do something, optionally with some text to print as
606: * well.
607: */
608: static void
609: command(struct state *st, const char *buf, size_t start, size_t end)
610: {
611: size_t len, csz;
612: enum cmd cmd;
613:
614: assert('=' == buf[start]);
615: start++;
616: len = end - start;
617:
618: for (cmd = 0; cmd < CMD__MAX; cmd++) {
619: csz = strlen(cmds[cmd]);
620: if (len < csz)
621: continue;
622: if (0 == memcmp(&buf[start], cmd[cmds], csz))
623: break;
624: }
625:
626: /* Ignore bogus commands. */
627:
628: if (CMD__MAX == cmd)
629: return;
630:
631: start += csz;
1.8 kristaps 632: while (start < end && ' ' == buf[start])
633: start++;
634:
1.1 schwarze 635: len = end - start;
636:
637: if (st->paused) {
638: st->paused = CMD_END != cmd;
639: return;
640: }
641:
642: switch (cmd) {
643: case (CMD_POD):
644: break;
645: case (CMD_HEAD1):
646: /*
647: * The behaviour of head= follows from a quick glance at
648: * how pod2man handles it.
649: */
650: printf(".Sh ");
1.11 kristaps 651: st->sect = SECT_NONE;
652: if (end - start == 4) {
1.1 schwarze 653: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 654: st->sect = SECT_NAME;
655: } else if (end - start == 8) {
656: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
657: st->sect = SECT_SYNOPSIS;
658: }
659: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 660: putchar('\n');
661: st->haspar = 1;
662: break;
663: case (CMD_HEAD2):
664: printf(".Ss ");
1.11 kristaps 665: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 666: putchar('\n');
667: st->haspar = 1;
668: break;
669: case (CMD_HEAD3):
670: puts(".Pp");
671: printf(".Em ");
1.11 kristaps 672: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 673: putchar('\n');
674: puts(".Pp");
675: st->haspar = 1;
676: break;
677: case (CMD_HEAD4):
678: puts(".Pp");
679: printf(".No ");
1.11 kristaps 680: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 681: putchar('\n');
682: puts(".Pp");
683: st->haspar = 1;
684: break;
685: case (CMD_OVER):
1.4 schwarze 686: /*
687: * If we have an existing list that hasn't had an =item
688: * yet, then make sure that we open it now.
689: * We use the default list type, but that can't be
690: * helped (we haven't seen any items yet).
1.1 schwarze 691: */
1.4 schwarze 692: if (st->lpos > 0)
693: if (LIST__MAX == st->lstack[st->lpos - 1]) {
694: st->lstack[st->lpos - 1] = LIST_TAG;
695: puts(".Bl -tag -width Ds");
696: }
697: st->lpos++;
698: assert(st->lpos < LIST_STACKSZ);
699: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 700: break;
701: case (CMD_ITEM):
1.6 kristaps 702: if (0 == st->lpos) {
703: /*
704: * Bad markup.
705: * Try to compensate.
706: */
707: st->lstack[st->lpos] = LIST__MAX;
708: st->lpos++;
709: }
1.4 schwarze 710: assert(st->lpos > 0);
711: /*
712: * If we're the first =item, guess at what our content
713: * will be: "*" is a bullet list, "1." is a numbered
714: * list, and everything is tagged.
715: */
716: if (LIST__MAX == st->lstack[st->lpos - 1]) {
717: st->lstack[st->lpos - 1] =
718: listguess(buf, start, end);
719: switch (st->lstack[st->lpos - 1]) {
720: case (LIST_BULLET):
721: puts(".Bl -bullet");
722: break;
723: case (LIST_ENUM):
724: puts(".Bl -enum");
725: break;
726: default:
727: puts(".Bl -tag -width Ds");
728: break;
729: }
730: }
731: switch (st->lstack[st->lpos - 1]) {
732: case (LIST_TAG):
733: printf(".It ");
1.11 kristaps 734: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 735: putchar('\n');
736: break;
737: case (LIST_ENUM):
738: /* FALLTHROUGH */
739: case (LIST_BULLET):
740: /*
741: * Abandon the remainder of the paragraph
742: * because we're going to be a bulletted or
743: * numbered list.
744: */
745: puts(".It");
746: break;
747: default:
748: abort();
749: }
1.1 schwarze 750: st->haspar = 1;
751: break;
752: case (CMD_BACK):
1.4 schwarze 753: /* Make sure we don't back over the stack. */
754: if (st->lpos > 0) {
755: st->lpos--;
756: puts(".El");
757: }
1.1 schwarze 758: break;
759: case (CMD_BEGIN):
760: /*
761: * We disregard all types for now.
762: * TODO: process at least "text" in a -literal block.
763: */
764: st->paused = 1;
765: break;
766: case (CMD_FOR):
767: /*
768: * We ignore all types of encodings and formats
769: * unilaterally.
770: */
771: break;
772: case (CMD_ENCODING):
773: break;
774: case (CMD_CUT):
775: st->parsing = 0;
776: return;
777: default:
778: abort();
779: }
780:
781: /* Any command (but =cut) makes us start parsing. */
782: st->parsing = 1;
783: }
784:
785: /*
786: * Just pump out the line in a verbatim block.
787: */
788: static void
789: verbatim(struct state *st, const char *buf, size_t start, size_t end)
790: {
1.8 kristaps 791: int last;
1.1 schwarze 792:
793: if ( ! st->parsing || st->paused)
794: return;
795:
796: puts(".Bd -literal");
1.8 kristaps 797: for (last = ' '; start < end; start++) {
798: /*
799: * Handle accidental macros (newline starting with
800: * control character) and escapes.
801: */
802: if ('\n' == last)
1.7 kristaps 803: if ('.' == buf[start] || '\'' == buf[start])
804: printf("\\&");
1.8 kristaps 805: putchar(last = buf[start]);
806: if ('\\' == buf[start])
807: printf("e");
1.7 kristaps 808: }
809: putchar('\n');
1.1 schwarze 810: puts(".Ed");
811: }
812:
813: /*
1.13 kristaps 814: * See dosynopsisop().
815: */
816: static int
817: hasmatch(const char *buf, size_t start, size_t end)
818: {
819: size_t stack;
820:
821: for (stack = 0; start < end; start++)
822: if (buf[start] == '[')
823: stack++;
824: else if (buf[start] == ']' && 0 == stack)
825: return(1);
826: else if (buf[start] == ']')
827: stack--;
828: return(0);
829: }
830:
831: /*
832: * If we're in the SYNOPSIS section and we've encounter braces in an
833: * ordinary paragraph, then try to see whether we're an [-option].
834: * Do this, if we're an opening bracket, by first seeing if we have a
835: * matching end via hasmatch().
836: * If we're an ending bracket, see if we have a stack already.
837: */
838: static int
839: dosynopsisop(const char *buf, int *last,
840: size_t *start, size_t end, size_t *opstack)
841: {
842:
843: assert('[' == buf[*start] || ']' == buf[*start]);
844:
845: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
846: if ('\n' != *last)
847: putchar('\n');
848: puts(".Oo");
849: (*opstack)++;
850: } else if ('[' == buf[*start])
851: return(0);
852:
853: if (']' == buf[*start] && *opstack > 0) {
854: if ('\n' != *last)
855: putchar('\n');
856: puts(".Oc");
857: (*opstack)--;
858: } else if (']' == buf[*start])
859: return(0);
860:
861: (*start)++;
862: *last = '\n';
863: while (' ' == buf[*start])
864: (*start)++;
865: return(1);
866: }
867:
868: /*
1.1 schwarze 869: * Ordinary paragraph.
870: * Well, this is really the hardest--POD seems to assume that, for
871: * example, a leading space implies a newline, and so on.
872: * Lots of other snakes in the grass: escaping a newline followed by a
873: * period (accidental mdoc(7) control), double-newlines after macro
874: * passages, etc.
875: */
876: static void
877: ordinary(struct state *st, const char *buf, size_t start, size_t end)
878: {
1.13 kristaps 879: size_t i, j, opstack;
1.15 ! kristaps 880: int seq;
1.1 schwarze 881:
882: if ( ! st->parsing || st->paused)
883: return;
884:
885: /*
886: * Special-case: the NAME section.
887: * If we find a "-" when searching from the end, assume that
888: * we're in "name - description" format.
889: * To wit, print out a "Nm" and "Nd" in that format.
890: */
1.11 kristaps 891: if (SECT_NAME == st->sect) {
1.15 ! kristaps 892: for (i = end - 2; i > start; i--)
! 893: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 894: break;
895: if ('-' == buf[i]) {
896: j = i;
897: /* Roll over multiple "-". */
898: for ( ; i > start; i--)
899: if ('-' != buf[i])
900: break;
1.15 ! kristaps 901: fputs(".Nm ", stdout);
1.11 kristaps 902: formatcodeln(st, buf, &start, i + 1, 1);
1.5 kristaps 903: putchar('\n');
904: start = j + 1;
1.15 ! kristaps 905: fputs(".Nd ", stdout);
1.11 kristaps 906: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 907: putchar('\n');
1.1 schwarze 908: return;
909: }
910: }
911:
912: if ( ! st->haspar)
913: puts(".Pp");
914:
915: st->haspar = 0;
916: last = '\n';
1.13 kristaps 917: opstack = 0;
1.1 schwarze 918:
1.15 ! kristaps 919: for (seq = 0; start < end; seq++) {
1.1 schwarze 920: /*
921: * Loop til we get either to a newline or escape.
922: * Escape initial control characters.
923: */
924: while (start < end) {
925: if (start < end - 1 && '<' == buf[start + 1])
926: break;
927: else if ('\n' == buf[start])
928: break;
929: else if ('\n' == last && '.' == buf[start])
930: printf("\\&");
931: else if ('\n' == last && '\'' == buf[start])
932: printf("\\&");
1.12 kristaps 933: /*
934: * If we're in the SYNOPSIS, have square
935: * brackets indicate that we're opening and
936: * closing an optional context.
937: */
1.13 kristaps 938: if (SECT_SYNOPSIS == st->sect &&
939: ('[' == buf[start] ||
940: ']' == buf[start]) &&
941: dosynopsisop(buf, &last,
942: &start, end, &opstack))
943: continue;
1.1 schwarze 944: putchar(last = buf[start++]);
1.8 kristaps 945: if ('\\' == last)
946: putchar('e');
1.1 schwarze 947: }
948:
949: if (start < end - 1 && '<' == buf[start + 1]) {
950: /*
951: * We've encountered a format code.
952: * This is going to trigger a macro no matter
953: * what, so print a newline now.
954: * Then print the (possibly nested) macros and
955: * following that, a newline.
1.8 kristaps 956: * Consume all whitespace so we don't
957: * accidentally start an implicit literal line.
1.1 schwarze 958: */
1.15 ! kristaps 959: if (formatcode(st, buf, &start, end, 0, 0, seq)) {
1.1 schwarze 960: putchar(last = '\n');
1.6 kristaps 961: while (start < end && ' ' == buf[start])
962: start++;
963: }
1.1 schwarze 964: } else if (start < end && '\n' == buf[start]) {
965: /*
966: * Print the newline only if we haven't already
967: * printed a newline.
968: */
969: if (last != '\n')
970: putchar(last = buf[start]);
971: if (++start >= end)
972: continue;
973: /*
974: * If we have whitespace next, eat it to prevent
975: * mdoc(7) from thinking that it's meant for
976: * verbatim text.
977: * It is--but if we start with that, we can't
978: * have a macro subsequent it, which may be
979: * possible if we have an escape next.
980: */
981: if (' ' == buf[start] || '\t' == buf[start]) {
982: puts(".br");
983: last = '\n';
984: }
985: for ( ; start < end; start++)
986: if (' ' != buf[start] && '\t' != buf[start])
987: break;
1.12 kristaps 988: }
1.1 schwarze 989: }
990:
991: if (last != '\n')
992: putchar('\n');
993: }
994:
995: /*
996: * There are three kinds of paragraphs: verbatim (starts with whitespace
997: * of some sort), ordinary (starts without "=" marker), or a command
998: * (default: starts with "=").
999: */
1000: static void
1001: dopar(struct state *st, const char *buf, size_t start, size_t end)
1002: {
1003:
1004: if (end == start)
1005: return;
1006: if (' ' == buf[start] || '\t' == buf[start])
1007: verbatim(st, buf, start, end);
1008: else if ('=' != buf[start])
1009: ordinary(st, buf, start, end);
1010: else
1011: command(st, buf, start, end);
1012: }
1013:
1014: /*
1015: * Loop around paragraphs within a document, processing each one in the
1016: * POD way.
1017: */
1018: static void
1019: dofile(const struct args *args, const char *fname,
1020: const struct tm *tm, const char *buf, size_t sz)
1021: {
1022: size_t sup, end, i, cur = 0;
1023: struct state st;
1024: const char *section, *date;
1025: char datebuf[64];
1026: char *title, *cp;
1027:
1028: if (0 == sz)
1029: return;
1030:
1031: /* Title is last path component of the filename. */
1032:
1033: if (NULL != args->title)
1034: title = strdup(args->title);
1035: else if (NULL != (cp = strrchr(fname, '/')))
1036: title = strdup(cp + 1);
1037: else
1038: title = strdup(fname);
1039:
1040: if (NULL == title) {
1041: perror(NULL);
1042: exit(EXIT_FAILURE);
1043: }
1044:
1045: /* Section is 1 unless suffix is "pm". */
1046:
1047: if (NULL == (section = args->section)) {
1048: section = "1";
1049: if (NULL != (cp = strrchr(title, '.'))) {
1050: *cp++ = '\0';
1051: if (0 == strcmp(cp, "pm"))
1.10 kristaps 1052: section = PERL_SECTION;
1.1 schwarze 1053: }
1054: }
1055:
1056: /* Date. Or the given "tm" if not supplied. */
1057:
1058: if (NULL == (date = args->date)) {
1059: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1060: date = datebuf;
1061: }
1062:
1063: for (cp = title; '\0' != *cp; cp++)
1064: *cp = toupper((int)*cp);
1065:
1066: /* The usual mdoc(7) preamble. */
1067:
1068: printf(".Dd %s\n", date);
1069: printf(".Dt %s %s\n", title, section);
1070: puts(".Os");
1071:
1072: free(title);
1073:
1074: memset(&st, 0, sizeof(struct state));
1075: assert(sz > 0);
1076:
1077: /* Main loop over file contents. */
1078:
1079: while (cur < sz) {
1080: /* Read until next paragraph. */
1081: for (i = cur + 1; i < sz; i++)
1082: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1083: /* Consume blank paragraphs. */
1084: while (i + 1 < sz && '\n' == buf[i + 1])
1085: i++;
1086: break;
1087: }
1088:
1089: /* Adjust end marker for EOF. */
1090: end = i < sz ? i - 1 :
1091: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1092: sup = i < sz ? end + 2 : sz;
1093:
1094: /* Process paragraph and adjust start. */
1095: dopar(&st, buf, cur, end);
1096: cur = sup;
1097: }
1098: }
1099:
1100: /*
1101: * Read a single file fully into memory.
1102: * If the file is "-", do it from stdin.
1103: * If successfully read, send the input buffer to dofile() for further
1104: * processing.
1105: */
1106: static int
1107: readfile(const struct args *args, const char *fname)
1108: {
1109: int fd;
1110: char *buf;
1111: size_t bufsz, cur;
1112: ssize_t ssz;
1113: struct tm *tm;
1114: time_t ttm;
1115: struct stat st;
1116:
1117: assert(NULL != fname);
1118:
1119: fd = 0 != strcmp("-", fname) ?
1120: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1121:
1122: if (-1 == fd) {
1123: perror(fname);
1124: return(0);
1125: }
1126:
1127: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1128: ttm = time(NULL);
1129: tm = localtime(&ttm);
1130: } else
1131: tm = localtime(&st.st_mtime);
1132:
1133: /*
1134: * Arbitrarily-sized initial buffer.
1135: * Should be big enough for most files...
1136: */
1137: cur = 0;
1138: bufsz = 1 << 14;
1139: if (NULL == (buf = malloc(bufsz))) {
1140: perror(NULL);
1141: exit(EXIT_FAILURE);
1142: }
1143:
1144: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1145: /* Double buffer size on fill. */
1146: if ((size_t)ssz == bufsz - cur) {
1147: bufsz *= 2;
1148: if (NULL == (buf = realloc(buf, bufsz))) {
1149: perror(NULL);
1150: exit(EXIT_FAILURE);
1151: }
1152: }
1153: cur += (size_t)ssz;
1154: }
1155: if (ssz < 0) {
1156: perror(fname);
1157: free(buf);
1158: return(0);
1159: }
1160:
1161: dofile(args, STDIN_FILENO == fd ?
1162: "STDIN" : fname, tm, buf, cur);
1163: free(buf);
1164: if (STDIN_FILENO != fd)
1165: close(fd);
1166: return(1);
1167: }
1168:
1169: int
1170: main(int argc, char *argv[])
1171: {
1172: const char *fname, *name;
1173: struct args args;
1174: int c;
1175:
1176: name = strrchr(argv[0], '/');
1177: if (name == NULL)
1178: name = argv[0];
1179: else
1180: ++name;
1181:
1182: memset(&args, 0, sizeof(struct args));
1183: fname = "-";
1184:
1185: /* Accept no arguments for now. */
1186:
1187: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1188: switch (c) {
1189: case ('h'):
1190: /* FALLTHROUGH */
1191: case ('l'):
1192: /* FALLTHROUGH */
1193: case ('c'):
1194: /* FALLTHROUGH */
1195: case ('o'):
1196: /* FALLTHROUGH */
1197: case ('q'):
1198: /* FALLTHROUGH */
1199: case ('r'):
1200: /* FALLTHROUGH */
1201: case ('u'):
1202: /* FALLTHROUGH */
1203: case ('v'):
1204: /* Ignore these. */
1205: break;
1206: case ('d'):
1207: args.date = optarg;
1208: break;
1209: case ('n'):
1210: args.title = optarg;
1211: break;
1212: case ('s'):
1213: args.section = optarg;
1214: break;
1215: default:
1216: goto usage;
1217: }
1218:
1219: argc -= optind;
1220: argv += optind;
1221:
1222: /* Accept only a single input file. */
1223:
1224: if (argc > 2)
1225: return(EXIT_FAILURE);
1226: else if (1 == argc)
1227: fname = *argv;
1228:
1229: return(readfile(&args, fname) ?
1230: EXIT_SUCCESS : EXIT_FAILURE);
1231:
1232: usage:
1233: fprintf(stderr, "usage: %s [-d date] "
1234: "[-n title] [-s section]\n", name);
1235:
1236: return(EXIT_FAILURE);
1237: }
CVSweb