Annotation of pod2mdoc/pod2mdoc.c, Revision 1.8
1.8 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.7 2014/03/23 23:35:59 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 schwarze 35: enum list {
36: LIST_BULLET = 0,
37: LIST_ENUM,
38: LIST_TAG,
39: LIST__MAX
40: };
41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 schwarze 48: #define LIST_STACKSZ 128
49: enum list lstack[LIST_STACKSZ]; /* open lists */
50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
1.6 kristaps 111: static int last;
112:
1.1 schwarze 113: /*
114: * Given buf[*start] is at the start of an escape name, read til the end
115: * of the escape ('>') then try to do something with it.
116: * Sets start to be one after the '>'.
117: */
118: static void
119: formatescape(const char *buf, size_t *start, size_t end)
120: {
121: char esc[16]; /* no more needed */
122: size_t i, max;
123:
124: max = sizeof(esc) - 1;
125: i = 0;
126: /* Read til our buffer is full. */
127: while (*start < end && '>' != buf[*start] && i < max)
128: esc[i++] = buf[(*start)++];
129: esc[i] = '\0';
130:
131: if (i == max) {
132: /* Too long... skip til we end. */
133: while (*start < end && '>' != buf[*start])
134: (*start)++;
135: return;
136: } else if (*start >= end)
137: return;
138:
139: assert('>' == buf[*start]);
140: (*start)++;
141:
142: /*
143: * TODO: right now, we only recognise the named escapes.
144: * Just let the rest of them go.
145: */
1.6 kristaps 146: if (0 == strcmp(esc, "lt"))
1.1 schwarze 147: printf("\\(la");
148: else if (0 == strcmp(esc, "gt"))
149: printf("\\(ra");
150: else if (0 == strcmp(esc, "vb"))
151: printf("\\(ba");
152: else if (0 == strcmp(esc, "sol"))
153: printf("\\(sl");
1.6 kristaps 154: else
155: return;
156:
157: last = 'a';
1.1 schwarze 158: }
159:
160: /*
161: * We're at the character in front of a format code, which is structured
162: * like X<...> and can contain nested format codes.
163: * This consumes the whole format code, and any nested format codes, til
164: * the end of matched production.
165: * If "reentrant", then we're being called after a macro has already
166: * been printed to the current line.
1.6 kristaps 167: * If "nomacro", then we don't print any macros, just contained data
168: * (e.g., following "Sh" or "Nm").
169: * Return whether we've printed a macro or not--in other words, whether
170: * this should trigger a subsequent newline (this should be ignored when
171: * reentrant).
1.1 schwarze 172: */
173: static int
174: formatcode(const char *buf, size_t *start,
1.6 kristaps 175: size_t end, int reentrant, int nomacro)
1.1 schwarze 176: {
177: enum fmt fmt;
1.5 kristaps 178: size_t i, j, dsz;
1.1 schwarze 179:
180: assert(*start + 1 < end);
181: assert('<' == buf[*start + 1]);
182:
1.6 kristaps 183: /*
184: * First, look up the format code.
185: * If it's not valid, then exit immediately.
186: */
187: for (fmt = 0; fmt < FMT__MAX; fmt++)
188: if (buf[*start] == fmts[fmt])
189: break;
190:
191: if (FMT__MAX == fmt) {
192: putchar(last = buf[(*start)++]);
1.8 ! kristaps 193: if ('\\' == last)
! 194: putchar('e');
1.6 kristaps 195: return(0);
196: }
197:
1.5 kristaps 198: /*
199: * Determine whether we're overriding our delimiter.
200: * According to POD, if we have more than one '<' followed by a
201: * space, then we need a space followed by matching '>' to close
202: * the expression.
203: * Otherwise we use the usual '<' and '>' matched pair.
204: */
205: i = *start + 1;
206: while (i < end && '<' == buf[i])
207: i++;
208: assert(i > *start + 1);
209: dsz = i - (*start + 1);
210: if (dsz > 1 && (i >= end || ' ' != buf[i]))
211: dsz = 1;
212:
213: /* Remember, if dsz>1, to jump the trailing space. */
214: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 215:
216: /*
1.6 kristaps 217: * Escapes and ignored codes (NULL and INDEX) don't print macro
218: * sequences, so just output them like normal text before
219: * processing for real macros.
1.1 schwarze 220: */
221: if (FMT_ESCAPE == fmt) {
222: formatescape(buf, start, end);
223: return(0);
224: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 225: /*
1.6 kristaps 226: * Just consume til the end delimiter, accounting for
227: * whether it's a custom one.
1.5 kristaps 228: */
229: for ( ; *start < end; (*start)++) {
230: if ('>' != buf[*start])
231: continue;
232: else if (dsz == 1)
233: break;
234: assert(*start > 0);
235: if (' ' != buf[*start - 1])
236: continue;
237: i = *start;
238: for (j = 0; i < end && j < dsz; j++)
239: if ('>' != buf[i++])
240: break;
241: if (dsz != j)
242: continue;
243: (*start) += dsz;
244: break;
245: }
1.1 schwarze 246: return(0);
247: }
248:
1.6 kristaps 249: /*
250: * Check whether we're supposed to print macro stuff (this is
251: * suppressed in, e.g., "Nm" and "Sh" macros).
252: */
1.1 schwarze 253: if ( ! nomacro) {
254: /*
255: * Print out the macro describing this format code.
256: * If we're not "reentrant" (not yet on a macro line)
257: * then print a newline, if necessary, and the macro
258: * indicator.
259: * Otherwise, offset us with a space.
260: */
1.6 kristaps 261: if ( ! reentrant) {
262: if (last != '\n')
263: putchar('\n');
1.1 schwarze 264: putchar('.');
1.6 kristaps 265: } else
1.1 schwarze 266: putchar(' ');
267:
268: /*
1.6 kristaps 269: * If we don't have whitespace before us (and none after
270: * the opening delimiter), then suppress macro
271: * whitespace with Pf.
1.1 schwarze 272: */
1.6 kristaps 273: if (' ' != last && '\n' != last && ' ' != buf[*start])
274: printf("Pf ");
275:
1.1 schwarze 276: switch (fmt) {
277: case (FMT_ITALIC):
278: printf("Em ");
279: break;
280: case (FMT_BOLD):
281: printf("Sy ");
282: break;
283: case (FMT_CODE):
1.2 schwarze 284: printf("Qo Li ");
1.1 schwarze 285: break;
286: case (FMT_LINK):
287: printf("Lk ");
288: break;
289: case (FMT_FILE):
290: printf("Pa ");
291: break;
292: case (FMT_NBSP):
293: /* TODO. */
294: printf("No ");
295: break;
296: default:
297: abort();
298: }
299: }
300:
301: /*
1.6 kristaps 302: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 303: * find a nested format code.
1.1 schwarze 304: * Don't emit any newlines: since we're on a macro line, we
305: * don't want to break the line.
306: */
307: while (*start < end) {
1.5 kristaps 308: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 309: (*start)++;
310: break;
1.5 kristaps 311: } else if ('>' == buf[*start] &&
312: ' ' == buf[*start - 1]) {
313: /*
314: * Handle custom delimiters.
315: * These require a certain number of
316: * space-preceded carrots before we're really at
317: * the end.
318: */
319: i = *start;
320: for (j = 0; i < end && j < dsz; j++)
321: if ('>' != buf[i++])
322: break;
323: if (dsz == j) {
324: *start += dsz;
325: break;
326: }
1.1 schwarze 327: }
328: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 329: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 330: continue;
331: }
1.3 schwarze 332:
1.4 schwarze 333: /*
334: * Make sure that any macro-like words (or
335: * really any word starting with a capital
336: * letter) is assumed to be a macro that must be
337: * escaped.
338: * This matches "Xx " and "XxEOLN".
339: */
340: if ((' ' == last || '\n' == last) &&
341: end - *start > 1 &&
342: isupper((int)buf[*start]) &&
343: islower((int)buf[*start + 1]) &&
344: (end - *start == 2 ||
345: ' ' == buf[*start + 2]))
346: printf("\\&");
1.3 schwarze 347:
1.4 schwarze 348: /* Suppress newline. */
1.6 kristaps 349: if ('\n' == buf[*start])
350: putchar(last = ' ');
351: else
352: putchar(last = buf[*start]);
1.4 schwarze 353:
1.8 ! kristaps 354: /* Protect against character escapes. */
! 355: if ('\\' == last)
! 356: putchar('e');
! 357:
1.6 kristaps 358: (*start)++;
359:
360: if (' ' == last)
361: while (*start < end && ' ' == buf[*start])
362: (*start)++;
1.1 schwarze 363: }
1.2 schwarze 364:
365: if ( ! nomacro && FMT_CODE == fmt)
366: printf(" Qc ");
1.1 schwarze 367:
368: /*
1.6 kristaps 369: * We're now subsequent the format code.
370: * If there isn't a space (or newline) here, and we haven't just
371: * printed a space, then suppress space.
1.1 schwarze 372: */
1.6 kristaps 373: if ( ! nomacro && ' ' != last)
374: if (' ' != buf[*start] && '\n' != buf[*start])
375: printf(" Ns ");
1.5 kristaps 376:
1.1 schwarze 377: return(1);
378: }
379:
380: /*
381: * Calls formatcode() til the end of a paragraph.
382: */
383: static void
384: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
385: {
386:
1.4 schwarze 387: last = ' ';
1.1 schwarze 388: while (*start < end) {
389: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 390: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 391: continue;
392: }
1.4 schwarze 393: /*
394: * Since we're already on a macro line, we want to make
395: * sure that we don't inadvertently invoke a macro.
396: * We need to do this carefully because section names
397: * are used in troff and we don't want to escape
398: * something that needn't be escaped.
399: */
400: if (' ' == last && end - *start > 1 &&
401: isupper((int)buf[*start]) &&
402: islower((int)buf[*start + 1]) &&
403: (end - *start == 2 ||
404: ' ' == buf[*start + 2]))
405: printf("\\&");
406:
1.8 ! kristaps 407: if ('\n' == buf[*start])
! 408: putchar(last = ' ');
! 409: else
1.1 schwarze 410: putchar(last = buf[*start]);
1.8 ! kristaps 411:
! 412: /* Protect against character escapes. */
! 413: if ('\\' == last)
! 414: putchar('e');
! 415:
1.1 schwarze 416: (*start)++;
417: }
418: }
419:
420: /*
1.4 schwarze 421: * Guess at what kind of list we are.
422: * These are taken straight from the POD manual.
423: * I don't know what people do in real life.
424: */
425: static enum list
426: listguess(const char *buf, size_t start, size_t end)
427: {
428: size_t len = end - start;
429:
430: assert(end >= start);
431:
432: if (len == 1 && '*' == buf[start])
433: return(LIST_BULLET);
434: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
435: return(LIST_ENUM);
436: else if (len == 1 && '1' == buf[start])
437: return(LIST_ENUM);
438: else
439: return(LIST_TAG);
440: }
441:
442: /*
1.1 schwarze 443: * A command paragraph, as noted in the perlpod manual, just indicates
444: * that we should do something, optionally with some text to print as
445: * well.
446: */
447: static void
448: command(struct state *st, const char *buf, size_t start, size_t end)
449: {
450: size_t len, csz;
451: enum cmd cmd;
452:
453: assert('=' == buf[start]);
454: start++;
455: len = end - start;
456:
457: for (cmd = 0; cmd < CMD__MAX; cmd++) {
458: csz = strlen(cmds[cmd]);
459: if (len < csz)
460: continue;
461: if (0 == memcmp(&buf[start], cmd[cmds], csz))
462: break;
463: }
464:
465: /* Ignore bogus commands. */
466:
467: if (CMD__MAX == cmd)
468: return;
469:
470: start += csz;
1.8 ! kristaps 471: while (start < end && ' ' == buf[start])
! 472: start++;
! 473:
1.1 schwarze 474: len = end - start;
475:
476: if (st->paused) {
477: st->paused = CMD_END != cmd;
478: return;
479: }
480:
481: switch (cmd) {
482: case (CMD_POD):
483: break;
484: case (CMD_HEAD1):
485: /*
486: * The behaviour of head= follows from a quick glance at
487: * how pod2man handles it.
488: */
489: printf(".Sh ");
490: st->isname = 0;
491: if (end - start == 4)
492: if (0 == memcmp(&buf[start], "NAME", 4))
493: st->isname = 1;
494: formatcodeln(buf, &start, end, 1);
495: putchar('\n');
496: st->haspar = 1;
497: break;
498: case (CMD_HEAD2):
499: printf(".Ss ");
500: formatcodeln(buf, &start, end, 1);
501: putchar('\n');
502: st->haspar = 1;
503: break;
504: case (CMD_HEAD3):
505: puts(".Pp");
506: printf(".Em ");
507: formatcodeln(buf, &start, end, 0);
508: putchar('\n');
509: puts(".Pp");
510: st->haspar = 1;
511: break;
512: case (CMD_HEAD4):
513: puts(".Pp");
514: printf(".No ");
515: formatcodeln(buf, &start, end, 0);
516: putchar('\n');
517: puts(".Pp");
518: st->haspar = 1;
519: break;
520: case (CMD_OVER):
1.4 schwarze 521: /*
522: * If we have an existing list that hasn't had an =item
523: * yet, then make sure that we open it now.
524: * We use the default list type, but that can't be
525: * helped (we haven't seen any items yet).
1.1 schwarze 526: */
1.4 schwarze 527: if (st->lpos > 0)
528: if (LIST__MAX == st->lstack[st->lpos - 1]) {
529: st->lstack[st->lpos - 1] = LIST_TAG;
530: puts(".Bl -tag -width Ds");
531: }
532: st->lpos++;
533: assert(st->lpos < LIST_STACKSZ);
534: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 535: break;
536: case (CMD_ITEM):
1.6 kristaps 537: if (0 == st->lpos) {
538: /*
539: * Bad markup.
540: * Try to compensate.
541: */
542: st->lstack[st->lpos] = LIST__MAX;
543: st->lpos++;
544: }
1.4 schwarze 545: assert(st->lpos > 0);
546: /*
547: * If we're the first =item, guess at what our content
548: * will be: "*" is a bullet list, "1." is a numbered
549: * list, and everything is tagged.
550: */
551: if (LIST__MAX == st->lstack[st->lpos - 1]) {
552: st->lstack[st->lpos - 1] =
553: listguess(buf, start, end);
554: switch (st->lstack[st->lpos - 1]) {
555: case (LIST_BULLET):
556: puts(".Bl -bullet");
557: break;
558: case (LIST_ENUM):
559: puts(".Bl -enum");
560: break;
561: default:
562: puts(".Bl -tag -width Ds");
563: break;
564: }
565: }
566: switch (st->lstack[st->lpos - 1]) {
567: case (LIST_TAG):
568: printf(".It ");
569: formatcodeln(buf, &start, end, 0);
570: putchar('\n');
571: break;
572: case (LIST_ENUM):
573: /* FALLTHROUGH */
574: case (LIST_BULLET):
575: /*
576: * Abandon the remainder of the paragraph
577: * because we're going to be a bulletted or
578: * numbered list.
579: */
580: puts(".It");
581: break;
582: default:
583: abort();
584: }
1.1 schwarze 585: st->haspar = 1;
586: break;
587: case (CMD_BACK):
1.4 schwarze 588: /* Make sure we don't back over the stack. */
589: if (st->lpos > 0) {
590: st->lpos--;
591: puts(".El");
592: }
1.1 schwarze 593: break;
594: case (CMD_BEGIN):
595: /*
596: * We disregard all types for now.
597: * TODO: process at least "text" in a -literal block.
598: */
599: st->paused = 1;
600: break;
601: case (CMD_FOR):
602: /*
603: * We ignore all types of encodings and formats
604: * unilaterally.
605: */
606: break;
607: case (CMD_ENCODING):
608: break;
609: case (CMD_CUT):
610: st->parsing = 0;
611: return;
612: default:
613: abort();
614: }
615:
616: /* Any command (but =cut) makes us start parsing. */
617: st->parsing = 1;
618: }
619:
620: /*
621: * Just pump out the line in a verbatim block.
622: */
623: static void
624: verbatim(struct state *st, const char *buf, size_t start, size_t end)
625: {
1.8 ! kristaps 626: int last;
1.1 schwarze 627:
628: if ( ! st->parsing || st->paused)
629: return;
630:
631: puts(".Bd -literal");
1.8 ! kristaps 632: for (last = ' '; start < end; start++) {
! 633: /*
! 634: * Handle accidental macros (newline starting with
! 635: * control character) and escapes.
! 636: */
! 637: if ('\n' == last)
1.7 kristaps 638: if ('.' == buf[start] || '\'' == buf[start])
639: printf("\\&");
1.8 ! kristaps 640: putchar(last = buf[start]);
! 641: if ('\\' == buf[start])
! 642: printf("e");
1.7 kristaps 643: }
644: putchar('\n');
1.1 schwarze 645: puts(".Ed");
646: }
647:
648: /*
649: * Ordinary paragraph.
650: * Well, this is really the hardest--POD seems to assume that, for
651: * example, a leading space implies a newline, and so on.
652: * Lots of other snakes in the grass: escaping a newline followed by a
653: * period (accidental mdoc(7) control), double-newlines after macro
654: * passages, etc.
655: */
656: static void
657: ordinary(struct state *st, const char *buf, size_t start, size_t end)
658: {
659: size_t i, j;
660:
661: if ( ! st->parsing || st->paused)
662: return;
663:
664: /*
665: * Special-case: the NAME section.
666: * If we find a "-" when searching from the end, assume that
667: * we're in "name - description" format.
668: * To wit, print out a "Nm" and "Nd" in that format.
669: */
670: if (st->isname) {
671: for (i = end - 1; i > start; i--)
672: if ('-' == buf[i])
673: break;
674: if ('-' == buf[i]) {
675: j = i;
676: /* Roll over multiple "-". */
677: for ( ; i > start; i--)
678: if ('-' != buf[i])
679: break;
1.5 kristaps 680: printf(".Nm ");
681: formatcodeln(buf, &start, i + 1, 1);
682: putchar('\n');
683: start = j + 1;
684: printf(".Nd ");
685: formatcodeln(buf, &start, end, 1);
686: putchar('\n');
1.1 schwarze 687: return;
688: }
689: }
690:
691: if ( ! st->haspar)
692: puts(".Pp");
693:
694: st->haspar = 0;
695: last = '\n';
696:
697: while (start < end) {
698: /*
699: * Loop til we get either to a newline or escape.
700: * Escape initial control characters.
701: */
702: while (start < end) {
703: if (start < end - 1 && '<' == buf[start + 1])
704: break;
705: else if ('\n' == buf[start])
706: break;
707: else if ('\n' == last && '.' == buf[start])
708: printf("\\&");
709: else if ('\n' == last && '\'' == buf[start])
710: printf("\\&");
711: putchar(last = buf[start++]);
1.8 ! kristaps 712: if ('\\' == last)
! 713: putchar('e');
1.1 schwarze 714: }
715:
716: if (start < end - 1 && '<' == buf[start + 1]) {
717: /*
718: * We've encountered a format code.
719: * This is going to trigger a macro no matter
720: * what, so print a newline now.
721: * Then print the (possibly nested) macros and
722: * following that, a newline.
1.8 ! kristaps 723: * Consume all whitespace so we don't
! 724: * accidentally start an implicit literal line.
1.1 schwarze 725: */
1.6 kristaps 726: if (formatcode(buf, &start, end, 0, 0)) {
1.1 schwarze 727: putchar(last = '\n');
1.6 kristaps 728: while (start < end && ' ' == buf[start])
729: start++;
730: }
1.1 schwarze 731: } else if (start < end && '\n' == buf[start]) {
732: /*
733: * Print the newline only if we haven't already
734: * printed a newline.
735: */
736: if (last != '\n')
737: putchar(last = buf[start]);
738: if (++start >= end)
739: continue;
740: /*
741: * If we have whitespace next, eat it to prevent
742: * mdoc(7) from thinking that it's meant for
743: * verbatim text.
744: * It is--but if we start with that, we can't
745: * have a macro subsequent it, which may be
746: * possible if we have an escape next.
747: */
748: if (' ' == buf[start] || '\t' == buf[start]) {
749: puts(".br");
750: last = '\n';
751: }
752: for ( ; start < end; start++)
753: if (' ' != buf[start] && '\t' != buf[start])
754: break;
755: } else if (start < end) {
756: /*
757: * Default: print the character.
758: * Escape initial control characters.
759: */
760: if ('\n' == last && '.' == buf[start])
761: printf("\\&");
762: else if ('\n' == last && '\'' == buf[start])
763: printf("\\&");
764: putchar(last = buf[start++]);
1.8 ! kristaps 765: if ('\\' == last)
! 766: putchar('e');
1.1 schwarze 767: }
768: }
769:
770: if (last != '\n')
771: putchar('\n');
772: }
773:
774: /*
775: * There are three kinds of paragraphs: verbatim (starts with whitespace
776: * of some sort), ordinary (starts without "=" marker), or a command
777: * (default: starts with "=").
778: */
779: static void
780: dopar(struct state *st, const char *buf, size_t start, size_t end)
781: {
782:
783: if (end == start)
784: return;
785: if (' ' == buf[start] || '\t' == buf[start])
786: verbatim(st, buf, start, end);
787: else if ('=' != buf[start])
788: ordinary(st, buf, start, end);
789: else
790: command(st, buf, start, end);
791: }
792:
793: /*
794: * Loop around paragraphs within a document, processing each one in the
795: * POD way.
796: */
797: static void
798: dofile(const struct args *args, const char *fname,
799: const struct tm *tm, const char *buf, size_t sz)
800: {
801: size_t sup, end, i, cur = 0;
802: struct state st;
803: const char *section, *date;
804: char datebuf[64];
805: char *title, *cp;
806:
807: if (0 == sz)
808: return;
809:
810: /* Title is last path component of the filename. */
811:
812: if (NULL != args->title)
813: title = strdup(args->title);
814: else if (NULL != (cp = strrchr(fname, '/')))
815: title = strdup(cp + 1);
816: else
817: title = strdup(fname);
818:
819: if (NULL == title) {
820: perror(NULL);
821: exit(EXIT_FAILURE);
822: }
823:
824: /* Section is 1 unless suffix is "pm". */
825:
826: if (NULL == (section = args->section)) {
827: section = "1";
828: if (NULL != (cp = strrchr(title, '.'))) {
829: *cp++ = '\0';
830: if (0 == strcmp(cp, "pm"))
831: section = "3p";
832: }
833: }
834:
835: /* Date. Or the given "tm" if not supplied. */
836:
837: if (NULL == (date = args->date)) {
838: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
839: date = datebuf;
840: }
841:
842: for (cp = title; '\0' != *cp; cp++)
843: *cp = toupper((int)*cp);
844:
845: /* The usual mdoc(7) preamble. */
846:
847: printf(".Dd %s\n", date);
848: printf(".Dt %s %s\n", title, section);
849: puts(".Os");
850:
851: free(title);
852:
853: memset(&st, 0, sizeof(struct state));
854: assert(sz > 0);
855:
856: /* Main loop over file contents. */
857:
858: while (cur < sz) {
859: /* Read until next paragraph. */
860: for (i = cur + 1; i < sz; i++)
861: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
862: /* Consume blank paragraphs. */
863: while (i + 1 < sz && '\n' == buf[i + 1])
864: i++;
865: break;
866: }
867:
868: /* Adjust end marker for EOF. */
869: end = i < sz ? i - 1 :
870: ('\n' == buf[sz - 1] ? sz - 1 : sz);
871: sup = i < sz ? end + 2 : sz;
872:
873: /* Process paragraph and adjust start. */
874: dopar(&st, buf, cur, end);
875: cur = sup;
876: }
877: }
878:
879: /*
880: * Read a single file fully into memory.
881: * If the file is "-", do it from stdin.
882: * If successfully read, send the input buffer to dofile() for further
883: * processing.
884: */
885: static int
886: readfile(const struct args *args, const char *fname)
887: {
888: int fd;
889: char *buf;
890: size_t bufsz, cur;
891: ssize_t ssz;
892: struct tm *tm;
893: time_t ttm;
894: struct stat st;
895:
896: assert(NULL != fname);
897:
898: fd = 0 != strcmp("-", fname) ?
899: open(fname, O_RDONLY, 0) : STDIN_FILENO;
900:
901: if (-1 == fd) {
902: perror(fname);
903: return(0);
904: }
905:
906: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
907: ttm = time(NULL);
908: tm = localtime(&ttm);
909: } else
910: tm = localtime(&st.st_mtime);
911:
912: /*
913: * Arbitrarily-sized initial buffer.
914: * Should be big enough for most files...
915: */
916: cur = 0;
917: bufsz = 1 << 14;
918: if (NULL == (buf = malloc(bufsz))) {
919: perror(NULL);
920: exit(EXIT_FAILURE);
921: }
922:
923: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
924: /* Double buffer size on fill. */
925: if ((size_t)ssz == bufsz - cur) {
926: bufsz *= 2;
927: if (NULL == (buf = realloc(buf, bufsz))) {
928: perror(NULL);
929: exit(EXIT_FAILURE);
930: }
931: }
932: cur += (size_t)ssz;
933: }
934: if (ssz < 0) {
935: perror(fname);
936: free(buf);
937: return(0);
938: }
939:
940: dofile(args, STDIN_FILENO == fd ?
941: "STDIN" : fname, tm, buf, cur);
942: free(buf);
943: if (STDIN_FILENO != fd)
944: close(fd);
945: return(1);
946: }
947:
948: int
949: main(int argc, char *argv[])
950: {
951: const char *fname, *name;
952: struct args args;
953: int c;
954:
955: name = strrchr(argv[0], '/');
956: if (name == NULL)
957: name = argv[0];
958: else
959: ++name;
960:
961: memset(&args, 0, sizeof(struct args));
962: fname = "-";
963:
964: /* Accept no arguments for now. */
965:
966: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
967: switch (c) {
968: case ('h'):
969: /* FALLTHROUGH */
970: case ('l'):
971: /* FALLTHROUGH */
972: case ('c'):
973: /* FALLTHROUGH */
974: case ('o'):
975: /* FALLTHROUGH */
976: case ('q'):
977: /* FALLTHROUGH */
978: case ('r'):
979: /* FALLTHROUGH */
980: case ('u'):
981: /* FALLTHROUGH */
982: case ('v'):
983: /* Ignore these. */
984: break;
985: case ('d'):
986: args.date = optarg;
987: break;
988: case ('n'):
989: args.title = optarg;
990: break;
991: case ('s'):
992: args.section = optarg;
993: break;
994: default:
995: goto usage;
996: }
997:
998: argc -= optind;
999: argv += optind;
1000:
1001: /* Accept only a single input file. */
1002:
1003: if (argc > 2)
1004: return(EXIT_FAILURE);
1005: else if (1 == argc)
1006: fname = *argv;
1007:
1008: return(readfile(&args, fname) ?
1009: EXIT_SUCCESS : EXIT_FAILURE);
1010:
1011: usage:
1012: fprintf(stderr, "usage: %s [-d date] "
1013: "[-n title] [-s section]\n", name);
1014:
1015: return(EXIT_FAILURE);
1016: }
CVSweb