Annotation of pod2mdoc/pod2mdoc.c, Revision 1.12
1.12 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.11 2014/04/01 08:31:00 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
30: * In what section can we find Perl manuals?
31: */
32: #define PERL_SECTION "3p"
33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.11 kristaps 47: enum sect {
48: SECT_NONE = 0,
49: SECT_NAME, /* NAME section */
50: SECT_SYNOPSIS, /* SYNOPSIS section */
51: };
52:
1.1 schwarze 53: struct state {
54: int parsing; /* after =cut of before command */
55: int paused; /* in =begin and before =end */
56: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 57: enum sect sect; /* which section are we in? */
1.1 schwarze 58: const char *fname; /* file being parsed */
1.4 schwarze 59: #define LIST_STACKSZ 128
60: enum list lstack[LIST_STACKSZ]; /* open lists */
61: size_t lpos; /* where in list stack */
1.1 schwarze 62: };
63:
64: enum fmt {
65: FMT_ITALIC,
66: FMT_BOLD,
67: FMT_CODE,
68: FMT_LINK,
69: FMT_ESCAPE,
70: FMT_FILE,
71: FMT_NBSP,
72: FMT_INDEX,
73: FMT_NULL,
74: FMT__MAX
75: };
76:
77: enum cmd {
78: CMD_POD = 0,
79: CMD_HEAD1,
80: CMD_HEAD2,
81: CMD_HEAD3,
82: CMD_HEAD4,
83: CMD_OVER,
84: CMD_ITEM,
85: CMD_BACK,
86: CMD_BEGIN,
87: CMD_END,
88: CMD_FOR,
89: CMD_ENCODING,
90: CMD_CUT,
91: CMD__MAX
92: };
93:
94: static const char *const cmds[CMD__MAX] = {
95: "pod", /* CMD_POD */
96: "head1", /* CMD_HEAD1 */
97: "head2", /* CMD_HEAD2 */
98: "head3", /* CMD_HEAD3 */
99: "head4", /* CMD_HEAD4 */
100: "over", /* CMD_OVER */
101: "item", /* CMD_ITEM */
102: "back", /* CMD_BACK */
103: "begin", /* CMD_BEGIN */
104: "end", /* CMD_END */
105: "for", /* CMD_FOR */
106: "encoding", /* CMD_ENCODING */
107: "cut" /* CMD_CUT */
108: };
109:
110: static const char fmts[FMT__MAX] = {
111: 'I', /* FMT_ITALIC */
112: 'B', /* FMT_BOLD */
113: 'C', /* FMT_CODE */
114: 'L', /* FMT_LINK */
115: 'E', /* FMT_ESCAPE */
116: 'F', /* FMT_FILE */
117: 'S', /* FMT_NBSP */
118: 'X', /* FMT_INDEX */
119: 'Z' /* FMT_NULL */
120: };
121:
1.6 kristaps 122: static int last;
123:
1.1 schwarze 124: /*
125: * Given buf[*start] is at the start of an escape name, read til the end
126: * of the escape ('>') then try to do something with it.
127: * Sets start to be one after the '>'.
128: */
129: static void
130: formatescape(const char *buf, size_t *start, size_t end)
131: {
132: char esc[16]; /* no more needed */
133: size_t i, max;
134:
135: max = sizeof(esc) - 1;
136: i = 0;
137: /* Read til our buffer is full. */
138: while (*start < end && '>' != buf[*start] && i < max)
139: esc[i++] = buf[(*start)++];
140: esc[i] = '\0';
141:
142: if (i == max) {
143: /* Too long... skip til we end. */
144: while (*start < end && '>' != buf[*start])
145: (*start)++;
146: return;
147: } else if (*start >= end)
148: return;
149:
150: assert('>' == buf[*start]);
151: (*start)++;
152:
153: /*
154: * TODO: right now, we only recognise the named escapes.
155: * Just let the rest of them go.
156: */
1.6 kristaps 157: if (0 == strcmp(esc, "lt"))
1.1 schwarze 158: printf("\\(la");
159: else if (0 == strcmp(esc, "gt"))
160: printf("\\(ra");
161: else if (0 == strcmp(esc, "vb"))
162: printf("\\(ba");
163: else if (0 == strcmp(esc, "sol"))
164: printf("\\(sl");
1.6 kristaps 165: else
166: return;
167:
168: last = 'a';
1.1 schwarze 169: }
170:
171: /*
1.9 kristaps 172: * Run some heuristics to intuit a link format.
173: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
174: * or a general UNIX foo(5) manpage.
175: * If I recognise one, I set "start" to be the end of the sequence so
176: * that the caller can safely just continue processing.
177: * Otherwise, I don't touch "start".
178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
182: size_t sv, nstart, nend, i, j;
183: int hasdouble;
184:
185: /*
186: * Scan to the start of the terminus.
187: * This function is more or less replicated in the formatcode()
188: * for null or index formatting codes.
189: */
190: hasdouble = 0;
191: for (sv = nstart = *start; nstart < end; nstart++) {
192: /* Do we have a double-colon? */
193: if (':' == buf[nstart] &&
194: nstart > sv &&
195: ':' == buf[nstart - 1])
196: hasdouble = 1;
197: if ('>' != buf[nstart])
198: continue;
199: else if (dsz == 1)
200: break;
201: assert(nstart > 0);
202: if (' ' != buf[nstart - 1])
203: continue;
204: i = nstart;
205: for (j = 0; i < end && j < dsz; j++)
206: if ('>' != buf[i++])
207: break;
208: if (dsz == j)
209: break;
210: }
211:
212: /* We don't care about stubs. */
213: if (nstart == end || nstart == *start)
214: return(0);
215:
216: /* Set nend to the end of content. */
217: nend = nstart;
218: if (dsz > 1)
219: nend--;
220:
221: /*
222: * Provide for some common invocations of the link primitive.
223: * First, allow us to link to other Perl manuals.
224: */
225: if (hasdouble)
1.10 kristaps 226: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 227: (int)(nend - sv), &buf[sv]);
228: else if (nend - sv > 3 && isalnum(buf[sv]) &&
229: ')' == buf[nend - 1] &&
230: isdigit((int)buf[nend - 2]) &&
231: '(' == buf[nend - 3])
232: printf("Xr %.*s %c",
233: (int)(nend - 3 - sv),
234: &buf[sv], buf[nend - 2]);
235: else
236: return(0);
237:
238: *start = nstart;
239: return(1);
240: }
241:
242: /*
1.1 schwarze 243: * We're at the character in front of a format code, which is structured
244: * like X<...> and can contain nested format codes.
245: * This consumes the whole format code, and any nested format codes, til
246: * the end of matched production.
247: * If "reentrant", then we're being called after a macro has already
248: * been printed to the current line.
1.6 kristaps 249: * If "nomacro", then we don't print any macros, just contained data
250: * (e.g., following "Sh" or "Nm").
251: * Return whether we've printed a macro or not--in other words, whether
252: * this should trigger a subsequent newline (this should be ignored when
253: * reentrant).
1.1 schwarze 254: */
255: static int
1.11 kristaps 256: formatcode(struct state *st, const char *buf,
257: size_t *start, size_t end, int reentrant, int nomacro)
1.1 schwarze 258: {
259: enum fmt fmt;
1.5 kristaps 260: size_t i, j, dsz;
1.1 schwarze 261:
262: assert(*start + 1 < end);
263: assert('<' == buf[*start + 1]);
264:
1.6 kristaps 265: /*
266: * First, look up the format code.
267: * If it's not valid, then exit immediately.
268: */
269: for (fmt = 0; fmt < FMT__MAX; fmt++)
270: if (buf[*start] == fmts[fmt])
271: break;
272:
273: if (FMT__MAX == fmt) {
274: putchar(last = buf[(*start)++]);
1.8 kristaps 275: if ('\\' == last)
276: putchar('e');
1.6 kristaps 277: return(0);
278: }
279:
1.5 kristaps 280: /*
281: * Determine whether we're overriding our delimiter.
282: * According to POD, if we have more than one '<' followed by a
283: * space, then we need a space followed by matching '>' to close
284: * the expression.
285: * Otherwise we use the usual '<' and '>' matched pair.
286: */
287: i = *start + 1;
288: while (i < end && '<' == buf[i])
289: i++;
290: assert(i > *start + 1);
291: dsz = i - (*start + 1);
292: if (dsz > 1 && (i >= end || ' ' != buf[i]))
293: dsz = 1;
294:
295: /* Remember, if dsz>1, to jump the trailing space. */
296: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 297:
298: /*
1.6 kristaps 299: * Escapes and ignored codes (NULL and INDEX) don't print macro
300: * sequences, so just output them like normal text before
301: * processing for real macros.
1.1 schwarze 302: */
303: if (FMT_ESCAPE == fmt) {
304: formatescape(buf, start, end);
305: return(0);
306: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 307: /*
1.6 kristaps 308: * Just consume til the end delimiter, accounting for
309: * whether it's a custom one.
1.5 kristaps 310: */
311: for ( ; *start < end; (*start)++) {
312: if ('>' != buf[*start])
313: continue;
314: else if (dsz == 1)
315: break;
316: assert(*start > 0);
317: if (' ' != buf[*start - 1])
318: continue;
319: i = *start;
320: for (j = 0; i < end && j < dsz; j++)
321: if ('>' != buf[i++])
322: break;
323: if (dsz != j)
324: continue;
325: (*start) += dsz;
326: break;
327: }
1.1 schwarze 328: return(0);
329: }
330:
1.6 kristaps 331: /*
332: * Check whether we're supposed to print macro stuff (this is
333: * suppressed in, e.g., "Nm" and "Sh" macros).
334: */
1.1 schwarze 335: if ( ! nomacro) {
336: /*
337: * Print out the macro describing this format code.
338: * If we're not "reentrant" (not yet on a macro line)
339: * then print a newline, if necessary, and the macro
340: * indicator.
341: * Otherwise, offset us with a space.
342: */
1.6 kristaps 343: if ( ! reentrant) {
344: if (last != '\n')
345: putchar('\n');
1.1 schwarze 346: putchar('.');
1.6 kristaps 347: } else
1.1 schwarze 348: putchar(' ');
349:
350: /*
1.6 kristaps 351: * If we don't have whitespace before us (and none after
352: * the opening delimiter), then suppress macro
353: * whitespace with Pf.
1.1 schwarze 354: */
1.6 kristaps 355: if (' ' != last && '\n' != last && ' ' != buf[*start])
356: printf("Pf ");
357:
1.1 schwarze 358: switch (fmt) {
359: case (FMT_ITALIC):
360: printf("Em ");
361: break;
362: case (FMT_BOLD):
1.11 kristaps 363: /*
364: * Doclifting: if we're a bold "-xx" and we're
365: * in the SYNOPSIS section, then it's likely
366: * that we're a flag.
367: * Be really strict: only do this when the dash
368: * is followed by alnums til the end marker,
369: * which mustn't be a custom.
370: */
371: if (SECT_SYNOPSIS == st->sect &&
372: end - *start > 1 &&
373: '-' == buf[*start] &&
374: (isalnum((int)buf[*start + 1]) ||
375: '?' == buf[*start + 1])) {
376: for (i = *start + 1; i < end; i++)
377: if ( ! isalnum((int)buf[i]))
378: break;
379: if (i < end && '>' == buf[i]) {
380: (*start)++;
381: printf("Fl ");
382: break;
383: }
384: }
1.1 schwarze 385: printf("Sy ");
386: break;
387: case (FMT_CODE):
1.2 schwarze 388: printf("Qo Li ");
1.1 schwarze 389: break;
390: case (FMT_LINK):
1.9 kristaps 391: if ( ! trylink(buf, start, end, dsz))
392: printf("No ");
1.1 schwarze 393: break;
394: case (FMT_FILE):
395: printf("Pa ");
396: break;
397: case (FMT_NBSP):
398: printf("No ");
399: break;
400: default:
401: abort();
402: }
403: }
404:
405: /*
1.6 kristaps 406: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 407: * find a nested format code.
1.1 schwarze 408: * Don't emit any newlines: since we're on a macro line, we
409: * don't want to break the line.
410: */
411: while (*start < end) {
1.5 kristaps 412: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 413: (*start)++;
414: break;
1.5 kristaps 415: } else if ('>' == buf[*start] &&
416: ' ' == buf[*start - 1]) {
417: /*
418: * Handle custom delimiters.
419: * These require a certain number of
420: * space-preceded carrots before we're really at
421: * the end.
422: */
423: i = *start;
424: for (j = 0; i < end && j < dsz; j++)
425: if ('>' != buf[i++])
426: break;
427: if (dsz == j) {
428: *start += dsz;
429: break;
430: }
1.1 schwarze 431: }
432: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 433: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 434: continue;
435: }
1.3 schwarze 436:
1.4 schwarze 437: /*
438: * Make sure that any macro-like words (or
439: * really any word starting with a capital
440: * letter) is assumed to be a macro that must be
441: * escaped.
442: * This matches "Xx " and "XxEOLN".
443: */
444: if ((' ' == last || '\n' == last) &&
445: end - *start > 1 &&
446: isupper((int)buf[*start]) &&
447: islower((int)buf[*start + 1]) &&
448: (end - *start == 2 ||
449: ' ' == buf[*start + 2]))
450: printf("\\&");
1.3 schwarze 451:
1.4 schwarze 452: /* Suppress newline. */
1.6 kristaps 453: if ('\n' == buf[*start])
454: putchar(last = ' ');
455: else
456: putchar(last = buf[*start]);
1.4 schwarze 457:
1.8 kristaps 458: /* Protect against character escapes. */
459: if ('\\' == last)
460: putchar('e');
461:
1.6 kristaps 462: (*start)++;
463:
464: if (' ' == last)
465: while (*start < end && ' ' == buf[*start])
466: (*start)++;
1.1 schwarze 467: }
1.2 schwarze 468:
469: if ( ! nomacro && FMT_CODE == fmt)
470: printf(" Qc ");
1.1 schwarze 471:
472: /*
1.6 kristaps 473: * We're now subsequent the format code.
474: * If there isn't a space (or newline) here, and we haven't just
475: * printed a space, then suppress space.
1.1 schwarze 476: */
1.6 kristaps 477: if ( ! nomacro && ' ' != last)
478: if (' ' != buf[*start] && '\n' != buf[*start])
479: printf(" Ns ");
1.5 kristaps 480:
1.1 schwarze 481: return(1);
482: }
483:
484: /*
485: * Calls formatcode() til the end of a paragraph.
486: */
487: static void
1.11 kristaps 488: formatcodeln(struct state *st, const char *buf,
489: size_t *start, size_t end, int nomacro)
1.1 schwarze 490: {
491:
1.4 schwarze 492: last = ' ';
1.1 schwarze 493: while (*start < end) {
494: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 495: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 496: continue;
497: }
1.4 schwarze 498: /*
499: * Since we're already on a macro line, we want to make
500: * sure that we don't inadvertently invoke a macro.
501: * We need to do this carefully because section names
502: * are used in troff and we don't want to escape
503: * something that needn't be escaped.
504: */
505: if (' ' == last && end - *start > 1 &&
506: isupper((int)buf[*start]) &&
507: islower((int)buf[*start + 1]) &&
508: (end - *start == 2 ||
509: ' ' == buf[*start + 2]))
510: printf("\\&");
511:
1.8 kristaps 512: if ('\n' == buf[*start])
513: putchar(last = ' ');
514: else
1.1 schwarze 515: putchar(last = buf[*start]);
1.8 kristaps 516:
517: /* Protect against character escapes. */
518: if ('\\' == last)
519: putchar('e');
520:
1.1 schwarze 521: (*start)++;
522: }
523: }
524:
525: /*
1.4 schwarze 526: * Guess at what kind of list we are.
527: * These are taken straight from the POD manual.
528: * I don't know what people do in real life.
529: */
530: static enum list
531: listguess(const char *buf, size_t start, size_t end)
532: {
533: size_t len = end - start;
534:
535: assert(end >= start);
536:
537: if (len == 1 && '*' == buf[start])
538: return(LIST_BULLET);
539: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
540: return(LIST_ENUM);
541: else if (len == 1 && '1' == buf[start])
542: return(LIST_ENUM);
543: else
544: return(LIST_TAG);
545: }
546:
547: /*
1.1 schwarze 548: * A command paragraph, as noted in the perlpod manual, just indicates
549: * that we should do something, optionally with some text to print as
550: * well.
551: */
552: static void
553: command(struct state *st, const char *buf, size_t start, size_t end)
554: {
555: size_t len, csz;
556: enum cmd cmd;
557:
558: assert('=' == buf[start]);
559: start++;
560: len = end - start;
561:
562: for (cmd = 0; cmd < CMD__MAX; cmd++) {
563: csz = strlen(cmds[cmd]);
564: if (len < csz)
565: continue;
566: if (0 == memcmp(&buf[start], cmd[cmds], csz))
567: break;
568: }
569:
570: /* Ignore bogus commands. */
571:
572: if (CMD__MAX == cmd)
573: return;
574:
575: start += csz;
1.8 kristaps 576: while (start < end && ' ' == buf[start])
577: start++;
578:
1.1 schwarze 579: len = end - start;
580:
581: if (st->paused) {
582: st->paused = CMD_END != cmd;
583: return;
584: }
585:
586: switch (cmd) {
587: case (CMD_POD):
588: break;
589: case (CMD_HEAD1):
590: /*
591: * The behaviour of head= follows from a quick glance at
592: * how pod2man handles it.
593: */
594: printf(".Sh ");
1.11 kristaps 595: st->sect = SECT_NONE;
596: if (end - start == 4) {
1.1 schwarze 597: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 598: st->sect = SECT_NAME;
599: } else if (end - start == 8) {
600: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
601: st->sect = SECT_SYNOPSIS;
602: }
603: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 604: putchar('\n');
605: st->haspar = 1;
606: break;
607: case (CMD_HEAD2):
608: printf(".Ss ");
1.11 kristaps 609: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 610: putchar('\n');
611: st->haspar = 1;
612: break;
613: case (CMD_HEAD3):
614: puts(".Pp");
615: printf(".Em ");
1.11 kristaps 616: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 617: putchar('\n');
618: puts(".Pp");
619: st->haspar = 1;
620: break;
621: case (CMD_HEAD4):
622: puts(".Pp");
623: printf(".No ");
1.11 kristaps 624: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 625: putchar('\n');
626: puts(".Pp");
627: st->haspar = 1;
628: break;
629: case (CMD_OVER):
1.4 schwarze 630: /*
631: * If we have an existing list that hasn't had an =item
632: * yet, then make sure that we open it now.
633: * We use the default list type, but that can't be
634: * helped (we haven't seen any items yet).
1.1 schwarze 635: */
1.4 schwarze 636: if (st->lpos > 0)
637: if (LIST__MAX == st->lstack[st->lpos - 1]) {
638: st->lstack[st->lpos - 1] = LIST_TAG;
639: puts(".Bl -tag -width Ds");
640: }
641: st->lpos++;
642: assert(st->lpos < LIST_STACKSZ);
643: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 644: break;
645: case (CMD_ITEM):
1.6 kristaps 646: if (0 == st->lpos) {
647: /*
648: * Bad markup.
649: * Try to compensate.
650: */
651: st->lstack[st->lpos] = LIST__MAX;
652: st->lpos++;
653: }
1.4 schwarze 654: assert(st->lpos > 0);
655: /*
656: * If we're the first =item, guess at what our content
657: * will be: "*" is a bullet list, "1." is a numbered
658: * list, and everything is tagged.
659: */
660: if (LIST__MAX == st->lstack[st->lpos - 1]) {
661: st->lstack[st->lpos - 1] =
662: listguess(buf, start, end);
663: switch (st->lstack[st->lpos - 1]) {
664: case (LIST_BULLET):
665: puts(".Bl -bullet");
666: break;
667: case (LIST_ENUM):
668: puts(".Bl -enum");
669: break;
670: default:
671: puts(".Bl -tag -width Ds");
672: break;
673: }
674: }
675: switch (st->lstack[st->lpos - 1]) {
676: case (LIST_TAG):
677: printf(".It ");
1.11 kristaps 678: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 679: putchar('\n');
680: break;
681: case (LIST_ENUM):
682: /* FALLTHROUGH */
683: case (LIST_BULLET):
684: /*
685: * Abandon the remainder of the paragraph
686: * because we're going to be a bulletted or
687: * numbered list.
688: */
689: puts(".It");
690: break;
691: default:
692: abort();
693: }
1.1 schwarze 694: st->haspar = 1;
695: break;
696: case (CMD_BACK):
1.4 schwarze 697: /* Make sure we don't back over the stack. */
698: if (st->lpos > 0) {
699: st->lpos--;
700: puts(".El");
701: }
1.1 schwarze 702: break;
703: case (CMD_BEGIN):
704: /*
705: * We disregard all types for now.
706: * TODO: process at least "text" in a -literal block.
707: */
708: st->paused = 1;
709: break;
710: case (CMD_FOR):
711: /*
712: * We ignore all types of encodings and formats
713: * unilaterally.
714: */
715: break;
716: case (CMD_ENCODING):
717: break;
718: case (CMD_CUT):
719: st->parsing = 0;
720: return;
721: default:
722: abort();
723: }
724:
725: /* Any command (but =cut) makes us start parsing. */
726: st->parsing = 1;
727: }
728:
729: /*
730: * Just pump out the line in a verbatim block.
731: */
732: static void
733: verbatim(struct state *st, const char *buf, size_t start, size_t end)
734: {
1.8 kristaps 735: int last;
1.1 schwarze 736:
737: if ( ! st->parsing || st->paused)
738: return;
739:
740: puts(".Bd -literal");
1.8 kristaps 741: for (last = ' '; start < end; start++) {
742: /*
743: * Handle accidental macros (newline starting with
744: * control character) and escapes.
745: */
746: if ('\n' == last)
1.7 kristaps 747: if ('.' == buf[start] || '\'' == buf[start])
748: printf("\\&");
1.8 kristaps 749: putchar(last = buf[start]);
750: if ('\\' == buf[start])
751: printf("e");
1.7 kristaps 752: }
753: putchar('\n');
1.1 schwarze 754: puts(".Ed");
755: }
756:
757: /*
758: * Ordinary paragraph.
759: * Well, this is really the hardest--POD seems to assume that, for
760: * example, a leading space implies a newline, and so on.
761: * Lots of other snakes in the grass: escaping a newline followed by a
762: * period (accidental mdoc(7) control), double-newlines after macro
763: * passages, etc.
764: */
765: static void
766: ordinary(struct state *st, const char *buf, size_t start, size_t end)
767: {
768: size_t i, j;
769:
770: if ( ! st->parsing || st->paused)
771: return;
772:
773: /*
774: * Special-case: the NAME section.
775: * If we find a "-" when searching from the end, assume that
776: * we're in "name - description" format.
777: * To wit, print out a "Nm" and "Nd" in that format.
778: */
1.11 kristaps 779: if (SECT_NAME == st->sect) {
1.1 schwarze 780: for (i = end - 1; i > start; i--)
781: if ('-' == buf[i])
782: break;
783: if ('-' == buf[i]) {
784: j = i;
785: /* Roll over multiple "-". */
786: for ( ; i > start; i--)
787: if ('-' != buf[i])
788: break;
1.5 kristaps 789: printf(".Nm ");
1.11 kristaps 790: formatcodeln(st, buf, &start, i + 1, 1);
1.5 kristaps 791: putchar('\n');
792: start = j + 1;
793: printf(".Nd ");
1.11 kristaps 794: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 795: putchar('\n');
1.1 schwarze 796: return;
797: }
798: }
799:
800: if ( ! st->haspar)
801: puts(".Pp");
802:
803: st->haspar = 0;
804: last = '\n';
805:
806: while (start < end) {
807: /*
808: * Loop til we get either to a newline or escape.
809: * Escape initial control characters.
810: */
811: while (start < end) {
812: if (start < end - 1 && '<' == buf[start + 1])
813: break;
814: else if ('\n' == buf[start])
815: break;
816: else if ('\n' == last && '.' == buf[start])
817: printf("\\&");
818: else if ('\n' == last && '\'' == buf[start])
819: printf("\\&");
1.12 ! kristaps 820: #if notyet
! 821: /*
! 822: * If we're in the SYNOPSIS, have square
! 823: * brackets indicate that we're opening and
! 824: * closing an optional context.
! 825: */
! 826: if (SECT_SYNOPSIS == st->sect) {
! 827: if ('[' == buf[start] ||
! 828: ']' == buf[start]) {
! 829: if (last != '\n')
! 830: putchar('\n');
! 831: if ('[' == buf[start])
! 832: printf(".Oo\n");
! 833: else
! 834: printf(".Oc\n");
! 835: start++;
! 836: continue;
! 837: }
! 838: }
! 839: #endif
1.1 schwarze 840: putchar(last = buf[start++]);
1.8 kristaps 841: if ('\\' == last)
842: putchar('e');
1.1 schwarze 843: }
844:
845: if (start < end - 1 && '<' == buf[start + 1]) {
846: /*
847: * We've encountered a format code.
848: * This is going to trigger a macro no matter
849: * what, so print a newline now.
850: * Then print the (possibly nested) macros and
851: * following that, a newline.
1.8 kristaps 852: * Consume all whitespace so we don't
853: * accidentally start an implicit literal line.
1.1 schwarze 854: */
1.11 kristaps 855: if (formatcode(st, buf, &start, end, 0, 0)) {
1.1 schwarze 856: putchar(last = '\n');
1.6 kristaps 857: while (start < end && ' ' == buf[start])
858: start++;
859: }
1.1 schwarze 860: } else if (start < end && '\n' == buf[start]) {
861: /*
862: * Print the newline only if we haven't already
863: * printed a newline.
864: */
865: if (last != '\n')
866: putchar(last = buf[start]);
867: if (++start >= end)
868: continue;
869: /*
870: * If we have whitespace next, eat it to prevent
871: * mdoc(7) from thinking that it's meant for
872: * verbatim text.
873: * It is--but if we start with that, we can't
874: * have a macro subsequent it, which may be
875: * possible if we have an escape next.
876: */
877: if (' ' == buf[start] || '\t' == buf[start]) {
878: puts(".br");
879: last = '\n';
880: }
881: for ( ; start < end; start++)
882: if (' ' != buf[start] && '\t' != buf[start])
883: break;
1.12 ! kristaps 884: }
1.1 schwarze 885: }
886:
887: if (last != '\n')
888: putchar('\n');
889: }
890:
891: /*
892: * There are three kinds of paragraphs: verbatim (starts with whitespace
893: * of some sort), ordinary (starts without "=" marker), or a command
894: * (default: starts with "=").
895: */
896: static void
897: dopar(struct state *st, const char *buf, size_t start, size_t end)
898: {
899:
900: if (end == start)
901: return;
902: if (' ' == buf[start] || '\t' == buf[start])
903: verbatim(st, buf, start, end);
904: else if ('=' != buf[start])
905: ordinary(st, buf, start, end);
906: else
907: command(st, buf, start, end);
908: }
909:
910: /*
911: * Loop around paragraphs within a document, processing each one in the
912: * POD way.
913: */
914: static void
915: dofile(const struct args *args, const char *fname,
916: const struct tm *tm, const char *buf, size_t sz)
917: {
918: size_t sup, end, i, cur = 0;
919: struct state st;
920: const char *section, *date;
921: char datebuf[64];
922: char *title, *cp;
923:
924: if (0 == sz)
925: return;
926:
927: /* Title is last path component of the filename. */
928:
929: if (NULL != args->title)
930: title = strdup(args->title);
931: else if (NULL != (cp = strrchr(fname, '/')))
932: title = strdup(cp + 1);
933: else
934: title = strdup(fname);
935:
936: if (NULL == title) {
937: perror(NULL);
938: exit(EXIT_FAILURE);
939: }
940:
941: /* Section is 1 unless suffix is "pm". */
942:
943: if (NULL == (section = args->section)) {
944: section = "1";
945: if (NULL != (cp = strrchr(title, '.'))) {
946: *cp++ = '\0';
947: if (0 == strcmp(cp, "pm"))
1.10 kristaps 948: section = PERL_SECTION;
1.1 schwarze 949: }
950: }
951:
952: /* Date. Or the given "tm" if not supplied. */
953:
954: if (NULL == (date = args->date)) {
955: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
956: date = datebuf;
957: }
958:
959: for (cp = title; '\0' != *cp; cp++)
960: *cp = toupper((int)*cp);
961:
962: /* The usual mdoc(7) preamble. */
963:
964: printf(".Dd %s\n", date);
965: printf(".Dt %s %s\n", title, section);
966: puts(".Os");
967:
968: free(title);
969:
970: memset(&st, 0, sizeof(struct state));
971: assert(sz > 0);
972:
973: /* Main loop over file contents. */
974:
975: while (cur < sz) {
976: /* Read until next paragraph. */
977: for (i = cur + 1; i < sz; i++)
978: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
979: /* Consume blank paragraphs. */
980: while (i + 1 < sz && '\n' == buf[i + 1])
981: i++;
982: break;
983: }
984:
985: /* Adjust end marker for EOF. */
986: end = i < sz ? i - 1 :
987: ('\n' == buf[sz - 1] ? sz - 1 : sz);
988: sup = i < sz ? end + 2 : sz;
989:
990: /* Process paragraph and adjust start. */
991: dopar(&st, buf, cur, end);
992: cur = sup;
993: }
994: }
995:
996: /*
997: * Read a single file fully into memory.
998: * If the file is "-", do it from stdin.
999: * If successfully read, send the input buffer to dofile() for further
1000: * processing.
1001: */
1002: static int
1003: readfile(const struct args *args, const char *fname)
1004: {
1005: int fd;
1006: char *buf;
1007: size_t bufsz, cur;
1008: ssize_t ssz;
1009: struct tm *tm;
1010: time_t ttm;
1011: struct stat st;
1012:
1013: assert(NULL != fname);
1014:
1015: fd = 0 != strcmp("-", fname) ?
1016: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1017:
1018: if (-1 == fd) {
1019: perror(fname);
1020: return(0);
1021: }
1022:
1023: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1024: ttm = time(NULL);
1025: tm = localtime(&ttm);
1026: } else
1027: tm = localtime(&st.st_mtime);
1028:
1029: /*
1030: * Arbitrarily-sized initial buffer.
1031: * Should be big enough for most files...
1032: */
1033: cur = 0;
1034: bufsz = 1 << 14;
1035: if (NULL == (buf = malloc(bufsz))) {
1036: perror(NULL);
1037: exit(EXIT_FAILURE);
1038: }
1039:
1040: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1041: /* Double buffer size on fill. */
1042: if ((size_t)ssz == bufsz - cur) {
1043: bufsz *= 2;
1044: if (NULL == (buf = realloc(buf, bufsz))) {
1045: perror(NULL);
1046: exit(EXIT_FAILURE);
1047: }
1048: }
1049: cur += (size_t)ssz;
1050: }
1051: if (ssz < 0) {
1052: perror(fname);
1053: free(buf);
1054: return(0);
1055: }
1056:
1057: dofile(args, STDIN_FILENO == fd ?
1058: "STDIN" : fname, tm, buf, cur);
1059: free(buf);
1060: if (STDIN_FILENO != fd)
1061: close(fd);
1062: return(1);
1063: }
1064:
1065: int
1066: main(int argc, char *argv[])
1067: {
1068: const char *fname, *name;
1069: struct args args;
1070: int c;
1071:
1072: name = strrchr(argv[0], '/');
1073: if (name == NULL)
1074: name = argv[0];
1075: else
1076: ++name;
1077:
1078: memset(&args, 0, sizeof(struct args));
1079: fname = "-";
1080:
1081: /* Accept no arguments for now. */
1082:
1083: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1084: switch (c) {
1085: case ('h'):
1086: /* FALLTHROUGH */
1087: case ('l'):
1088: /* FALLTHROUGH */
1089: case ('c'):
1090: /* FALLTHROUGH */
1091: case ('o'):
1092: /* FALLTHROUGH */
1093: case ('q'):
1094: /* FALLTHROUGH */
1095: case ('r'):
1096: /* FALLTHROUGH */
1097: case ('u'):
1098: /* FALLTHROUGH */
1099: case ('v'):
1100: /* Ignore these. */
1101: break;
1102: case ('d'):
1103: args.date = optarg;
1104: break;
1105: case ('n'):
1106: args.title = optarg;
1107: break;
1108: case ('s'):
1109: args.section = optarg;
1110: break;
1111: default:
1112: goto usage;
1113: }
1114:
1115: argc -= optind;
1116: argv += optind;
1117:
1118: /* Accept only a single input file. */
1119:
1120: if (argc > 2)
1121: return(EXIT_FAILURE);
1122: else if (1 == argc)
1123: fname = *argv;
1124:
1125: return(readfile(&args, fname) ?
1126: EXIT_SUCCESS : EXIT_FAILURE);
1127:
1128: usage:
1129: fprintf(stderr, "usage: %s [-d date] "
1130: "[-n title] [-s section]\n", name);
1131:
1132: return(EXIT_FAILURE);
1133: }
CVSweb