Annotation of pod2mdoc/pod2mdoc.c, Revision 1.6
1.6 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.5 2014/03/23 13:00:24 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 schwarze 35: enum list {
36: LIST_BULLET = 0,
37: LIST_ENUM,
38: LIST_TAG,
39: LIST__MAX
40: };
41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 schwarze 48: #define LIST_STACKSZ 128
49: enum list lstack[LIST_STACKSZ]; /* open lists */
50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
1.6 ! kristaps 111: static int last;
! 112:
1.1 schwarze 113: /*
114: * Given buf[*start] is at the start of an escape name, read til the end
115: * of the escape ('>') then try to do something with it.
116: * Sets start to be one after the '>'.
117: */
118: static void
119: formatescape(const char *buf, size_t *start, size_t end)
120: {
121: char esc[16]; /* no more needed */
122: size_t i, max;
123:
124: max = sizeof(esc) - 1;
125: i = 0;
126: /* Read til our buffer is full. */
127: while (*start < end && '>' != buf[*start] && i < max)
128: esc[i++] = buf[(*start)++];
129: esc[i] = '\0';
130:
131: if (i == max) {
132: /* Too long... skip til we end. */
133: while (*start < end && '>' != buf[*start])
134: (*start)++;
135: return;
136: } else if (*start >= end)
137: return;
138:
139: assert('>' == buf[*start]);
140: (*start)++;
141:
142: /*
143: * TODO: right now, we only recognise the named escapes.
144: * Just let the rest of them go.
145: */
1.6 ! kristaps 146: if (0 == strcmp(esc, "lt"))
1.1 schwarze 147: printf("\\(la");
148: else if (0 == strcmp(esc, "gt"))
149: printf("\\(ra");
150: else if (0 == strcmp(esc, "vb"))
151: printf("\\(ba");
152: else if (0 == strcmp(esc, "sol"))
153: printf("\\(sl");
1.6 ! kristaps 154: else
! 155: return;
! 156:
! 157: last = 'a';
1.1 schwarze 158: }
159:
160: /*
161: * Skip space characters.
162: */
1.5 kristaps 163: static int
1.1 schwarze 164: skipspace(const char *buf, size_t *start, size_t end)
165: {
1.5 kristaps 166: size_t sv = *start;
1.1 schwarze 167:
168: while (*start < end && ' ' == buf[*start])
169: (*start)++;
1.5 kristaps 170:
171: return(*start > sv);
1.1 schwarze 172: }
173:
174: /*
175: * We're at the character in front of a format code, which is structured
176: * like X<...> and can contain nested format codes.
177: * This consumes the whole format code, and any nested format codes, til
178: * the end of matched production.
179: * If "reentrant", then we're being called after a macro has already
180: * been printed to the current line.
1.6 ! kristaps 181: * If "nomacro", then we don't print any macros, just contained data
! 182: * (e.g., following "Sh" or "Nm").
! 183: * Return whether we've printed a macro or not--in other words, whether
! 184: * this should trigger a subsequent newline (this should be ignored when
! 185: * reentrant).
1.1 schwarze 186: */
187: static int
188: formatcode(const char *buf, size_t *start,
1.6 ! kristaps 189: size_t end, int reentrant, int nomacro)
1.1 schwarze 190: {
191: enum fmt fmt;
1.5 kristaps 192: size_t i, j, dsz;
1.1 schwarze 193:
194: assert(*start + 1 < end);
195: assert('<' == buf[*start + 1]);
196:
1.6 ! kristaps 197: /*
! 198: * First, look up the format code.
! 199: * If it's not valid, then exit immediately.
! 200: */
! 201: for (fmt = 0; fmt < FMT__MAX; fmt++)
! 202: if (buf[*start] == fmts[fmt])
! 203: break;
! 204:
! 205: if (FMT__MAX == fmt) {
! 206: putchar(last = buf[(*start)++]);
! 207: return(0);
! 208: }
! 209:
1.5 kristaps 210: /*
211: * Determine whether we're overriding our delimiter.
212: * According to POD, if we have more than one '<' followed by a
213: * space, then we need a space followed by matching '>' to close
214: * the expression.
215: * Otherwise we use the usual '<' and '>' matched pair.
216: */
217: i = *start + 1;
218: while (i < end && '<' == buf[i])
219: i++;
220: assert(i > *start + 1);
221: dsz = i - (*start + 1);
222: if (dsz > 1 && (i >= end || ' ' != buf[i]))
223: dsz = 1;
224:
225: /* Remember, if dsz>1, to jump the trailing space. */
226: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 227:
228: /*
1.6 ! kristaps 229: * Escapes and ignored codes (NULL and INDEX) don't print macro
! 230: * sequences, so just output them like normal text before
! 231: * processing for real macros.
1.1 schwarze 232: */
233: if (FMT_ESCAPE == fmt) {
234: formatescape(buf, start, end);
235: return(0);
236: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 237: /*
1.6 ! kristaps 238: * Just consume til the end delimiter, accounting for
! 239: * whether it's a custom one.
1.5 kristaps 240: */
241: for ( ; *start < end; (*start)++) {
242: if ('>' != buf[*start])
243: continue;
244: else if (dsz == 1)
245: break;
246: assert(*start > 0);
247: if (' ' != buf[*start - 1])
248: continue;
249: i = *start;
250: for (j = 0; i < end && j < dsz; j++)
251: if ('>' != buf[i++])
252: break;
253: if (dsz != j)
254: continue;
255: (*start) += dsz;
256: break;
257: }
1.1 schwarze 258: return(0);
259: }
260:
1.6 ! kristaps 261: /*
! 262: * Check whether we're supposed to print macro stuff (this is
! 263: * suppressed in, e.g., "Nm" and "Sh" macros).
! 264: */
1.1 schwarze 265: if ( ! nomacro) {
266: /*
267: * Print out the macro describing this format code.
268: * If we're not "reentrant" (not yet on a macro line)
269: * then print a newline, if necessary, and the macro
270: * indicator.
271: * Otherwise, offset us with a space.
272: */
1.6 ! kristaps 273: if ( ! reentrant) {
! 274: if (last != '\n')
! 275: putchar('\n');
1.1 schwarze 276: putchar('.');
1.6 ! kristaps 277: } else
1.1 schwarze 278: putchar(' ');
279:
280: /*
1.6 ! kristaps 281: * If we don't have whitespace before us (and none after
! 282: * the opening delimiter), then suppress macro
! 283: * whitespace with Pf.
1.1 schwarze 284: */
1.6 ! kristaps 285: if (' ' != last && '\n' != last && ' ' != buf[*start])
! 286: printf("Pf ");
! 287:
1.1 schwarze 288: switch (fmt) {
289: case (FMT_ITALIC):
290: printf("Em ");
291: break;
292: case (FMT_BOLD):
293: printf("Sy ");
294: break;
295: case (FMT_CODE):
1.2 schwarze 296: printf("Qo Li ");
1.1 schwarze 297: break;
298: case (FMT_LINK):
299: printf("Lk ");
300: break;
301: case (FMT_FILE):
302: printf("Pa ");
303: break;
304: case (FMT_NBSP):
305: /* TODO. */
306: printf("No ");
307: break;
308: default:
309: abort();
310: }
311: }
312:
313: /*
1.6 ! kristaps 314: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 315: * find a nested format code.
1.1 schwarze 316: * Don't emit any newlines: since we're on a macro line, we
317: * don't want to break the line.
318: */
319: while (*start < end) {
1.5 kristaps 320: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 321: (*start)++;
322: break;
1.5 kristaps 323: } else if ('>' == buf[*start] &&
324: ' ' == buf[*start - 1]) {
325: /*
326: * Handle custom delimiters.
327: * These require a certain number of
328: * space-preceded carrots before we're really at
329: * the end.
330: */
331: i = *start;
332: for (j = 0; i < end && j < dsz; j++)
333: if ('>' != buf[i++])
334: break;
335: if (dsz == j) {
336: *start += dsz;
337: break;
338: }
1.1 schwarze 339: }
340: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 ! kristaps 341: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 342: continue;
343: }
1.3 schwarze 344:
1.4 schwarze 345: /*
346: * Make sure that any macro-like words (or
347: * really any word starting with a capital
348: * letter) is assumed to be a macro that must be
349: * escaped.
350: * This matches "Xx " and "XxEOLN".
351: */
352: if ((' ' == last || '\n' == last) &&
353: end - *start > 1 &&
354: isupper((int)buf[*start]) &&
355: islower((int)buf[*start + 1]) &&
356: (end - *start == 2 ||
357: ' ' == buf[*start + 2]))
358: printf("\\&");
1.3 schwarze 359:
1.4 schwarze 360: /* Suppress newline. */
1.6 ! kristaps 361: if ('\n' == buf[*start])
! 362: putchar(last = ' ');
! 363: else
! 364: putchar(last = buf[*start]);
1.4 schwarze 365:
1.6 ! kristaps 366: (*start)++;
! 367:
! 368: if (' ' == last)
! 369: while (*start < end && ' ' == buf[*start])
! 370: (*start)++;
1.1 schwarze 371: }
1.2 schwarze 372:
373: if ( ! nomacro && FMT_CODE == fmt)
374: printf(" Qc ");
1.1 schwarze 375:
376: /*
1.6 ! kristaps 377: * We're now subsequent the format code.
! 378: * If there isn't a space (or newline) here, and we haven't just
! 379: * printed a space, then suppress space.
1.1 schwarze 380: */
1.6 ! kristaps 381: if ( ! nomacro && ' ' != last)
! 382: if (' ' != buf[*start] && '\n' != buf[*start])
! 383: printf(" Ns ");
1.5 kristaps 384:
1.1 schwarze 385: return(1);
386: }
387:
388: /*
389: * Calls formatcode() til the end of a paragraph.
390: */
391: static void
392: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
393: {
394:
1.4 schwarze 395: last = ' ';
1.1 schwarze 396: while (*start < end) {
397: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 ! kristaps 398: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 399: continue;
400: }
1.4 schwarze 401: /*
402: * Since we're already on a macro line, we want to make
403: * sure that we don't inadvertently invoke a macro.
404: * We need to do this carefully because section names
405: * are used in troff and we don't want to escape
406: * something that needn't be escaped.
407: */
408: if (' ' == last && end - *start > 1 &&
409: isupper((int)buf[*start]) &&
410: islower((int)buf[*start + 1]) &&
411: (end - *start == 2 ||
412: ' ' == buf[*start + 2]))
413: printf("\\&");
414:
1.1 schwarze 415: if ('\n' != buf[*start])
416: putchar(last = buf[*start]);
1.4 schwarze 417: else
418: putchar(last = ' ');
1.1 schwarze 419: (*start)++;
420: }
421: }
422:
423: /*
1.4 schwarze 424: * Guess at what kind of list we are.
425: * These are taken straight from the POD manual.
426: * I don't know what people do in real life.
427: */
428: static enum list
429: listguess(const char *buf, size_t start, size_t end)
430: {
431: size_t len = end - start;
432:
433: assert(end >= start);
434:
435: if (len == 1 && '*' == buf[start])
436: return(LIST_BULLET);
437: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
438: return(LIST_ENUM);
439: else if (len == 1 && '1' == buf[start])
440: return(LIST_ENUM);
441: else
442: return(LIST_TAG);
443: }
444:
445: /*
1.1 schwarze 446: * A command paragraph, as noted in the perlpod manual, just indicates
447: * that we should do something, optionally with some text to print as
448: * well.
449: */
450: static void
451: command(struct state *st, const char *buf, size_t start, size_t end)
452: {
453: size_t len, csz;
454: enum cmd cmd;
455:
456: assert('=' == buf[start]);
457: start++;
458: len = end - start;
459:
460: for (cmd = 0; cmd < CMD__MAX; cmd++) {
461: csz = strlen(cmds[cmd]);
462: if (len < csz)
463: continue;
464: if (0 == memcmp(&buf[start], cmd[cmds], csz))
465: break;
466: }
467:
468: /* Ignore bogus commands. */
469:
470: if (CMD__MAX == cmd)
471: return;
472:
473: start += csz;
474: skipspace(buf, &start, end);
475: len = end - start;
476:
477: if (st->paused) {
478: st->paused = CMD_END != cmd;
479: return;
480: }
481:
482: switch (cmd) {
483: case (CMD_POD):
484: break;
485: case (CMD_HEAD1):
486: /*
487: * The behaviour of head= follows from a quick glance at
488: * how pod2man handles it.
489: */
490: printf(".Sh ");
491: st->isname = 0;
492: if (end - start == 4)
493: if (0 == memcmp(&buf[start], "NAME", 4))
494: st->isname = 1;
495: formatcodeln(buf, &start, end, 1);
496: putchar('\n');
497: st->haspar = 1;
498: break;
499: case (CMD_HEAD2):
500: printf(".Ss ");
501: formatcodeln(buf, &start, end, 1);
502: putchar('\n');
503: st->haspar = 1;
504: break;
505: case (CMD_HEAD3):
506: puts(".Pp");
507: printf(".Em ");
508: formatcodeln(buf, &start, end, 0);
509: putchar('\n');
510: puts(".Pp");
511: st->haspar = 1;
512: break;
513: case (CMD_HEAD4):
514: puts(".Pp");
515: printf(".No ");
516: formatcodeln(buf, &start, end, 0);
517: putchar('\n');
518: puts(".Pp");
519: st->haspar = 1;
520: break;
521: case (CMD_OVER):
1.4 schwarze 522: /*
523: * If we have an existing list that hasn't had an =item
524: * yet, then make sure that we open it now.
525: * We use the default list type, but that can't be
526: * helped (we haven't seen any items yet).
1.1 schwarze 527: */
1.4 schwarze 528: if (st->lpos > 0)
529: if (LIST__MAX == st->lstack[st->lpos - 1]) {
530: st->lstack[st->lpos - 1] = LIST_TAG;
531: puts(".Bl -tag -width Ds");
532: }
533: st->lpos++;
534: assert(st->lpos < LIST_STACKSZ);
535: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 536: break;
537: case (CMD_ITEM):
1.6 ! kristaps 538: if (0 == st->lpos) {
! 539: /*
! 540: * Bad markup.
! 541: * Try to compensate.
! 542: */
! 543: st->lstack[st->lpos] = LIST__MAX;
! 544: st->lpos++;
! 545: }
1.4 schwarze 546: assert(st->lpos > 0);
547: /*
548: * If we're the first =item, guess at what our content
549: * will be: "*" is a bullet list, "1." is a numbered
550: * list, and everything is tagged.
551: */
552: if (LIST__MAX == st->lstack[st->lpos - 1]) {
553: st->lstack[st->lpos - 1] =
554: listguess(buf, start, end);
555: switch (st->lstack[st->lpos - 1]) {
556: case (LIST_BULLET):
557: puts(".Bl -bullet");
558: break;
559: case (LIST_ENUM):
560: puts(".Bl -enum");
561: break;
562: default:
563: puts(".Bl -tag -width Ds");
564: break;
565: }
566: }
567: switch (st->lstack[st->lpos - 1]) {
568: case (LIST_TAG):
569: printf(".It ");
570: formatcodeln(buf, &start, end, 0);
571: putchar('\n');
572: break;
573: case (LIST_ENUM):
574: /* FALLTHROUGH */
575: case (LIST_BULLET):
576: /*
577: * Abandon the remainder of the paragraph
578: * because we're going to be a bulletted or
579: * numbered list.
580: */
581: puts(".It");
582: break;
583: default:
584: abort();
585: }
1.1 schwarze 586: st->haspar = 1;
587: break;
588: case (CMD_BACK):
1.4 schwarze 589: /* Make sure we don't back over the stack. */
590: if (st->lpos > 0) {
591: st->lpos--;
592: puts(".El");
593: }
1.1 schwarze 594: break;
595: case (CMD_BEGIN):
596: /*
597: * We disregard all types for now.
598: * TODO: process at least "text" in a -literal block.
599: */
600: st->paused = 1;
601: break;
602: case (CMD_FOR):
603: /*
604: * We ignore all types of encodings and formats
605: * unilaterally.
606: */
607: break;
608: case (CMD_ENCODING):
609: break;
610: case (CMD_CUT):
611: st->parsing = 0;
612: return;
613: default:
614: abort();
615: }
616:
617: /* Any command (but =cut) makes us start parsing. */
618: st->parsing = 1;
619: }
620:
621: /*
622: * Just pump out the line in a verbatim block.
623: */
624: static void
625: verbatim(struct state *st, const char *buf, size_t start, size_t end)
626: {
627:
628: if ( ! st->parsing || st->paused)
629: return;
630:
631: puts(".Bd -literal");
632: printf("%.*s\n", (int)(end - start), &buf[start]);
633: puts(".Ed");
634: }
635:
636: /*
637: * Ordinary paragraph.
638: * Well, this is really the hardest--POD seems to assume that, for
639: * example, a leading space implies a newline, and so on.
640: * Lots of other snakes in the grass: escaping a newline followed by a
641: * period (accidental mdoc(7) control), double-newlines after macro
642: * passages, etc.
643: */
644: static void
645: ordinary(struct state *st, const char *buf, size_t start, size_t end)
646: {
647: size_t i, j;
648:
649: if ( ! st->parsing || st->paused)
650: return;
651:
652: /*
653: * Special-case: the NAME section.
654: * If we find a "-" when searching from the end, assume that
655: * we're in "name - description" format.
656: * To wit, print out a "Nm" and "Nd" in that format.
657: */
658: if (st->isname) {
659: for (i = end - 1; i > start; i--)
660: if ('-' == buf[i])
661: break;
662: if ('-' == buf[i]) {
663: j = i;
664: /* Roll over multiple "-". */
665: for ( ; i > start; i--)
666: if ('-' != buf[i])
667: break;
1.5 kristaps 668: printf(".Nm ");
669: formatcodeln(buf, &start, i + 1, 1);
670: putchar('\n');
671: start = j + 1;
672: printf(".Nd ");
673: formatcodeln(buf, &start, end, 1);
674: putchar('\n');
1.1 schwarze 675: return;
676: }
677: }
678:
679: if ( ! st->haspar)
680: puts(".Pp");
681:
682: st->haspar = 0;
683: last = '\n';
684:
685: while (start < end) {
686: /*
687: * Loop til we get either to a newline or escape.
688: * Escape initial control characters.
689: */
690: while (start < end) {
691: if (start < end - 1 && '<' == buf[start + 1])
692: break;
693: else if ('\n' == buf[start])
694: break;
695: else if ('\n' == last && '.' == buf[start])
696: printf("\\&");
697: else if ('\n' == last && '\'' == buf[start])
698: printf("\\&");
699: putchar(last = buf[start++]);
700: }
701:
702: if (start < end - 1 && '<' == buf[start + 1]) {
703: /*
704: * We've encountered a format code.
705: * This is going to trigger a macro no matter
706: * what, so print a newline now.
707: * Then print the (possibly nested) macros and
708: * following that, a newline.
709: */
1.6 ! kristaps 710: if (formatcode(buf, &start, end, 0, 0)) {
1.1 schwarze 711: putchar(last = '\n');
1.6 ! kristaps 712: while (start < end && ' ' == buf[start])
! 713: start++;
! 714: }
1.1 schwarze 715: } else if (start < end && '\n' == buf[start]) {
716: /*
717: * Print the newline only if we haven't already
718: * printed a newline.
719: */
720: if (last != '\n')
721: putchar(last = buf[start]);
722: if (++start >= end)
723: continue;
724: /*
725: * If we have whitespace next, eat it to prevent
726: * mdoc(7) from thinking that it's meant for
727: * verbatim text.
728: * It is--but if we start with that, we can't
729: * have a macro subsequent it, which may be
730: * possible if we have an escape next.
731: */
732: if (' ' == buf[start] || '\t' == buf[start]) {
733: puts(".br");
734: last = '\n';
735: }
736: for ( ; start < end; start++)
737: if (' ' != buf[start] && '\t' != buf[start])
738: break;
739: } else if (start < end) {
740: /*
741: * Default: print the character.
742: * Escape initial control characters.
743: */
744: if ('\n' == last && '.' == buf[start])
745: printf("\\&");
746: else if ('\n' == last && '\'' == buf[start])
747: printf("\\&");
748: putchar(last = buf[start++]);
749: }
750: }
751:
752: if (last != '\n')
753: putchar('\n');
754: }
755:
756: /*
757: * There are three kinds of paragraphs: verbatim (starts with whitespace
758: * of some sort), ordinary (starts without "=" marker), or a command
759: * (default: starts with "=").
760: */
761: static void
762: dopar(struct state *st, const char *buf, size_t start, size_t end)
763: {
764:
765: if (end == start)
766: return;
767: if (' ' == buf[start] || '\t' == buf[start])
768: verbatim(st, buf, start, end);
769: else if ('=' != buf[start])
770: ordinary(st, buf, start, end);
771: else
772: command(st, buf, start, end);
773: }
774:
775: /*
776: * Loop around paragraphs within a document, processing each one in the
777: * POD way.
778: */
779: static void
780: dofile(const struct args *args, const char *fname,
781: const struct tm *tm, const char *buf, size_t sz)
782: {
783: size_t sup, end, i, cur = 0;
784: struct state st;
785: const char *section, *date;
786: char datebuf[64];
787: char *title, *cp;
788:
789: if (0 == sz)
790: return;
791:
792: /* Title is last path component of the filename. */
793:
794: if (NULL != args->title)
795: title = strdup(args->title);
796: else if (NULL != (cp = strrchr(fname, '/')))
797: title = strdup(cp + 1);
798: else
799: title = strdup(fname);
800:
801: if (NULL == title) {
802: perror(NULL);
803: exit(EXIT_FAILURE);
804: }
805:
806: /* Section is 1 unless suffix is "pm". */
807:
808: if (NULL == (section = args->section)) {
809: section = "1";
810: if (NULL != (cp = strrchr(title, '.'))) {
811: *cp++ = '\0';
812: if (0 == strcmp(cp, "pm"))
813: section = "3p";
814: }
815: }
816:
817: /* Date. Or the given "tm" if not supplied. */
818:
819: if (NULL == (date = args->date)) {
820: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
821: date = datebuf;
822: }
823:
824: for (cp = title; '\0' != *cp; cp++)
825: *cp = toupper((int)*cp);
826:
827: /* The usual mdoc(7) preamble. */
828:
829: printf(".Dd %s\n", date);
830: printf(".Dt %s %s\n", title, section);
831: puts(".Os");
832:
833: free(title);
834:
835: memset(&st, 0, sizeof(struct state));
836: assert(sz > 0);
837:
838: /* Main loop over file contents. */
839:
840: while (cur < sz) {
841: /* Read until next paragraph. */
842: for (i = cur + 1; i < sz; i++)
843: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
844: /* Consume blank paragraphs. */
845: while (i + 1 < sz && '\n' == buf[i + 1])
846: i++;
847: break;
848: }
849:
850: /* Adjust end marker for EOF. */
851: end = i < sz ? i - 1 :
852: ('\n' == buf[sz - 1] ? sz - 1 : sz);
853: sup = i < sz ? end + 2 : sz;
854:
855: /* Process paragraph and adjust start. */
856: dopar(&st, buf, cur, end);
857: cur = sup;
858: }
859: }
860:
861: /*
862: * Read a single file fully into memory.
863: * If the file is "-", do it from stdin.
864: * If successfully read, send the input buffer to dofile() for further
865: * processing.
866: */
867: static int
868: readfile(const struct args *args, const char *fname)
869: {
870: int fd;
871: char *buf;
872: size_t bufsz, cur;
873: ssize_t ssz;
874: struct tm *tm;
875: time_t ttm;
876: struct stat st;
877:
878: assert(NULL != fname);
879:
880: fd = 0 != strcmp("-", fname) ?
881: open(fname, O_RDONLY, 0) : STDIN_FILENO;
882:
883: if (-1 == fd) {
884: perror(fname);
885: return(0);
886: }
887:
888: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
889: ttm = time(NULL);
890: tm = localtime(&ttm);
891: } else
892: tm = localtime(&st.st_mtime);
893:
894: /*
895: * Arbitrarily-sized initial buffer.
896: * Should be big enough for most files...
897: */
898: cur = 0;
899: bufsz = 1 << 14;
900: if (NULL == (buf = malloc(bufsz))) {
901: perror(NULL);
902: exit(EXIT_FAILURE);
903: }
904:
905: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
906: /* Double buffer size on fill. */
907: if ((size_t)ssz == bufsz - cur) {
908: bufsz *= 2;
909: if (NULL == (buf = realloc(buf, bufsz))) {
910: perror(NULL);
911: exit(EXIT_FAILURE);
912: }
913: }
914: cur += (size_t)ssz;
915: }
916: if (ssz < 0) {
917: perror(fname);
918: free(buf);
919: return(0);
920: }
921:
922: dofile(args, STDIN_FILENO == fd ?
923: "STDIN" : fname, tm, buf, cur);
924: free(buf);
925: if (STDIN_FILENO != fd)
926: close(fd);
927: return(1);
928: }
929:
930: int
931: main(int argc, char *argv[])
932: {
933: const char *fname, *name;
934: struct args args;
935: int c;
936:
937: name = strrchr(argv[0], '/');
938: if (name == NULL)
939: name = argv[0];
940: else
941: ++name;
942:
943: memset(&args, 0, sizeof(struct args));
944: fname = "-";
945:
946: /* Accept no arguments for now. */
947:
948: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
949: switch (c) {
950: case ('h'):
951: /* FALLTHROUGH */
952: case ('l'):
953: /* FALLTHROUGH */
954: case ('c'):
955: /* FALLTHROUGH */
956: case ('o'):
957: /* FALLTHROUGH */
958: case ('q'):
959: /* FALLTHROUGH */
960: case ('r'):
961: /* FALLTHROUGH */
962: case ('u'):
963: /* FALLTHROUGH */
964: case ('v'):
965: /* Ignore these. */
966: break;
967: case ('d'):
968: args.date = optarg;
969: break;
970: case ('n'):
971: args.title = optarg;
972: break;
973: case ('s'):
974: args.section = optarg;
975: break;
976: default:
977: goto usage;
978: }
979:
980: argc -= optind;
981: argv += optind;
982:
983: /* Accept only a single input file. */
984:
985: if (argc > 2)
986: return(EXIT_FAILURE);
987: else if (1 == argc)
988: fname = *argv;
989:
990: return(readfile(&args, fname) ?
991: EXIT_SUCCESS : EXIT_FAILURE);
992:
993: usage:
994: fprintf(stderr, "usage: %s [-d date] "
995: "[-n title] [-s section]\n", name);
996:
997: return(EXIT_FAILURE);
998: }
CVSweb