Annotation of pod2mdoc/pod2mdoc.c, Revision 1.7
1.7 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.6 2014/03/23 23:23:38 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 schwarze 35: enum list {
36: LIST_BULLET = 0,
37: LIST_ENUM,
38: LIST_TAG,
39: LIST__MAX
40: };
41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 schwarze 48: #define LIST_STACKSZ 128
49: enum list lstack[LIST_STACKSZ]; /* open lists */
50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
1.6 kristaps 111: static int last;
112:
1.1 schwarze 113: /*
114: * Given buf[*start] is at the start of an escape name, read til the end
115: * of the escape ('>') then try to do something with it.
116: * Sets start to be one after the '>'.
117: */
118: static void
119: formatescape(const char *buf, size_t *start, size_t end)
120: {
121: char esc[16]; /* no more needed */
122: size_t i, max;
123:
124: max = sizeof(esc) - 1;
125: i = 0;
126: /* Read til our buffer is full. */
127: while (*start < end && '>' != buf[*start] && i < max)
128: esc[i++] = buf[(*start)++];
129: esc[i] = '\0';
130:
131: if (i == max) {
132: /* Too long... skip til we end. */
133: while (*start < end && '>' != buf[*start])
134: (*start)++;
135: return;
136: } else if (*start >= end)
137: return;
138:
139: assert('>' == buf[*start]);
140: (*start)++;
141:
142: /*
143: * TODO: right now, we only recognise the named escapes.
144: * Just let the rest of them go.
145: */
1.6 kristaps 146: if (0 == strcmp(esc, "lt"))
1.1 schwarze 147: printf("\\(la");
148: else if (0 == strcmp(esc, "gt"))
149: printf("\\(ra");
150: else if (0 == strcmp(esc, "vb"))
151: printf("\\(ba");
152: else if (0 == strcmp(esc, "sol"))
153: printf("\\(sl");
1.6 kristaps 154: else
155: return;
156:
157: last = 'a';
1.1 schwarze 158: }
159:
160: /*
161: * Skip space characters.
162: */
1.5 kristaps 163: static int
1.1 schwarze 164: skipspace(const char *buf, size_t *start, size_t end)
165: {
1.5 kristaps 166: size_t sv = *start;
1.1 schwarze 167:
168: while (*start < end && ' ' == buf[*start])
169: (*start)++;
1.5 kristaps 170:
171: return(*start > sv);
1.1 schwarze 172: }
173:
174: /*
175: * We're at the character in front of a format code, which is structured
176: * like X<...> and can contain nested format codes.
177: * This consumes the whole format code, and any nested format codes, til
178: * the end of matched production.
179: * If "reentrant", then we're being called after a macro has already
180: * been printed to the current line.
1.6 kristaps 181: * If "nomacro", then we don't print any macros, just contained data
182: * (e.g., following "Sh" or "Nm").
183: * Return whether we've printed a macro or not--in other words, whether
184: * this should trigger a subsequent newline (this should be ignored when
185: * reentrant).
1.1 schwarze 186: */
187: static int
188: formatcode(const char *buf, size_t *start,
1.6 kristaps 189: size_t end, int reentrant, int nomacro)
1.1 schwarze 190: {
191: enum fmt fmt;
1.5 kristaps 192: size_t i, j, dsz;
1.1 schwarze 193:
194: assert(*start + 1 < end);
195: assert('<' == buf[*start + 1]);
196:
1.6 kristaps 197: /*
198: * First, look up the format code.
199: * If it's not valid, then exit immediately.
200: */
201: for (fmt = 0; fmt < FMT__MAX; fmt++)
202: if (buf[*start] == fmts[fmt])
203: break;
204:
205: if (FMT__MAX == fmt) {
206: putchar(last = buf[(*start)++]);
207: return(0);
208: }
209:
1.5 kristaps 210: /*
211: * Determine whether we're overriding our delimiter.
212: * According to POD, if we have more than one '<' followed by a
213: * space, then we need a space followed by matching '>' to close
214: * the expression.
215: * Otherwise we use the usual '<' and '>' matched pair.
216: */
217: i = *start + 1;
218: while (i < end && '<' == buf[i])
219: i++;
220: assert(i > *start + 1);
221: dsz = i - (*start + 1);
222: if (dsz > 1 && (i >= end || ' ' != buf[i]))
223: dsz = 1;
224:
225: /* Remember, if dsz>1, to jump the trailing space. */
226: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 227:
228: /*
1.6 kristaps 229: * Escapes and ignored codes (NULL and INDEX) don't print macro
230: * sequences, so just output them like normal text before
231: * processing for real macros.
1.1 schwarze 232: */
233: if (FMT_ESCAPE == fmt) {
234: formatescape(buf, start, end);
235: return(0);
236: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 237: /*
1.6 kristaps 238: * Just consume til the end delimiter, accounting for
239: * whether it's a custom one.
1.5 kristaps 240: */
241: for ( ; *start < end; (*start)++) {
242: if ('>' != buf[*start])
243: continue;
244: else if (dsz == 1)
245: break;
246: assert(*start > 0);
247: if (' ' != buf[*start - 1])
248: continue;
249: i = *start;
250: for (j = 0; i < end && j < dsz; j++)
251: if ('>' != buf[i++])
252: break;
253: if (dsz != j)
254: continue;
255: (*start) += dsz;
256: break;
257: }
1.1 schwarze 258: return(0);
259: }
260:
1.6 kristaps 261: /*
262: * Check whether we're supposed to print macro stuff (this is
263: * suppressed in, e.g., "Nm" and "Sh" macros).
264: */
1.1 schwarze 265: if ( ! nomacro) {
266: /*
267: * Print out the macro describing this format code.
268: * If we're not "reentrant" (not yet on a macro line)
269: * then print a newline, if necessary, and the macro
270: * indicator.
271: * Otherwise, offset us with a space.
272: */
1.6 kristaps 273: if ( ! reentrant) {
274: if (last != '\n')
275: putchar('\n');
1.1 schwarze 276: putchar('.');
1.6 kristaps 277: } else
1.1 schwarze 278: putchar(' ');
279:
280: /*
1.6 kristaps 281: * If we don't have whitespace before us (and none after
282: * the opening delimiter), then suppress macro
283: * whitespace with Pf.
1.1 schwarze 284: */
1.6 kristaps 285: if (' ' != last && '\n' != last && ' ' != buf[*start])
286: printf("Pf ");
287:
1.1 schwarze 288: switch (fmt) {
289: case (FMT_ITALIC):
290: printf("Em ");
291: break;
292: case (FMT_BOLD):
293: printf("Sy ");
294: break;
295: case (FMT_CODE):
1.2 schwarze 296: printf("Qo Li ");
1.1 schwarze 297: break;
298: case (FMT_LINK):
299: printf("Lk ");
300: break;
301: case (FMT_FILE):
302: printf("Pa ");
303: break;
304: case (FMT_NBSP):
305: /* TODO. */
306: printf("No ");
307: break;
308: default:
309: abort();
310: }
311: }
312:
313: /*
1.6 kristaps 314: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 315: * find a nested format code.
1.1 schwarze 316: * Don't emit any newlines: since we're on a macro line, we
317: * don't want to break the line.
318: */
319: while (*start < end) {
1.5 kristaps 320: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 321: (*start)++;
322: break;
1.5 kristaps 323: } else if ('>' == buf[*start] &&
324: ' ' == buf[*start - 1]) {
325: /*
326: * Handle custom delimiters.
327: * These require a certain number of
328: * space-preceded carrots before we're really at
329: * the end.
330: */
331: i = *start;
332: for (j = 0; i < end && j < dsz; j++)
333: if ('>' != buf[i++])
334: break;
335: if (dsz == j) {
336: *start += dsz;
337: break;
338: }
1.1 schwarze 339: }
340: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 341: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 342: continue;
343: }
1.3 schwarze 344:
1.4 schwarze 345: /*
346: * Make sure that any macro-like words (or
347: * really any word starting with a capital
348: * letter) is assumed to be a macro that must be
349: * escaped.
350: * This matches "Xx " and "XxEOLN".
351: */
352: if ((' ' == last || '\n' == last) &&
353: end - *start > 1 &&
354: isupper((int)buf[*start]) &&
355: islower((int)buf[*start + 1]) &&
356: (end - *start == 2 ||
357: ' ' == buf[*start + 2]))
358: printf("\\&");
1.3 schwarze 359:
1.4 schwarze 360: /* Suppress newline. */
1.6 kristaps 361: if ('\n' == buf[*start])
362: putchar(last = ' ');
363: else
364: putchar(last = buf[*start]);
1.4 schwarze 365:
1.6 kristaps 366: (*start)++;
367:
368: if (' ' == last)
369: while (*start < end && ' ' == buf[*start])
370: (*start)++;
1.1 schwarze 371: }
1.2 schwarze 372:
373: if ( ! nomacro && FMT_CODE == fmt)
374: printf(" Qc ");
1.1 schwarze 375:
376: /*
1.6 kristaps 377: * We're now subsequent the format code.
378: * If there isn't a space (or newline) here, and we haven't just
379: * printed a space, then suppress space.
1.1 schwarze 380: */
1.6 kristaps 381: if ( ! nomacro && ' ' != last)
382: if (' ' != buf[*start] && '\n' != buf[*start])
383: printf(" Ns ");
1.5 kristaps 384:
1.1 schwarze 385: return(1);
386: }
387:
388: /*
389: * Calls formatcode() til the end of a paragraph.
390: */
391: static void
392: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
393: {
394:
1.4 schwarze 395: last = ' ';
1.1 schwarze 396: while (*start < end) {
397: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 398: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 399: continue;
400: }
1.4 schwarze 401: /*
402: * Since we're already on a macro line, we want to make
403: * sure that we don't inadvertently invoke a macro.
404: * We need to do this carefully because section names
405: * are used in troff and we don't want to escape
406: * something that needn't be escaped.
407: */
408: if (' ' == last && end - *start > 1 &&
409: isupper((int)buf[*start]) &&
410: islower((int)buf[*start + 1]) &&
411: (end - *start == 2 ||
412: ' ' == buf[*start + 2]))
413: printf("\\&");
414:
1.1 schwarze 415: if ('\n' != buf[*start])
416: putchar(last = buf[*start]);
1.4 schwarze 417: else
418: putchar(last = ' ');
1.1 schwarze 419: (*start)++;
420: }
421: }
422:
423: /*
1.4 schwarze 424: * Guess at what kind of list we are.
425: * These are taken straight from the POD manual.
426: * I don't know what people do in real life.
427: */
428: static enum list
429: listguess(const char *buf, size_t start, size_t end)
430: {
431: size_t len = end - start;
432:
433: assert(end >= start);
434:
435: if (len == 1 && '*' == buf[start])
436: return(LIST_BULLET);
437: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
438: return(LIST_ENUM);
439: else if (len == 1 && '1' == buf[start])
440: return(LIST_ENUM);
441: else
442: return(LIST_TAG);
443: }
444:
445: /*
1.1 schwarze 446: * A command paragraph, as noted in the perlpod manual, just indicates
447: * that we should do something, optionally with some text to print as
448: * well.
449: */
450: static void
451: command(struct state *st, const char *buf, size_t start, size_t end)
452: {
453: size_t len, csz;
454: enum cmd cmd;
455:
456: assert('=' == buf[start]);
457: start++;
458: len = end - start;
459:
460: for (cmd = 0; cmd < CMD__MAX; cmd++) {
461: csz = strlen(cmds[cmd]);
462: if (len < csz)
463: continue;
464: if (0 == memcmp(&buf[start], cmd[cmds], csz))
465: break;
466: }
467:
468: /* Ignore bogus commands. */
469:
470: if (CMD__MAX == cmd)
471: return;
472:
473: start += csz;
474: skipspace(buf, &start, end);
475: len = end - start;
476:
477: if (st->paused) {
478: st->paused = CMD_END != cmd;
479: return;
480: }
481:
482: switch (cmd) {
483: case (CMD_POD):
484: break;
485: case (CMD_HEAD1):
486: /*
487: * The behaviour of head= follows from a quick glance at
488: * how pod2man handles it.
489: */
490: printf(".Sh ");
491: st->isname = 0;
492: if (end - start == 4)
493: if (0 == memcmp(&buf[start], "NAME", 4))
494: st->isname = 1;
495: formatcodeln(buf, &start, end, 1);
496: putchar('\n');
497: st->haspar = 1;
498: break;
499: case (CMD_HEAD2):
500: printf(".Ss ");
501: formatcodeln(buf, &start, end, 1);
502: putchar('\n');
503: st->haspar = 1;
504: break;
505: case (CMD_HEAD3):
506: puts(".Pp");
507: printf(".Em ");
508: formatcodeln(buf, &start, end, 0);
509: putchar('\n');
510: puts(".Pp");
511: st->haspar = 1;
512: break;
513: case (CMD_HEAD4):
514: puts(".Pp");
515: printf(".No ");
516: formatcodeln(buf, &start, end, 0);
517: putchar('\n');
518: puts(".Pp");
519: st->haspar = 1;
520: break;
521: case (CMD_OVER):
1.4 schwarze 522: /*
523: * If we have an existing list that hasn't had an =item
524: * yet, then make sure that we open it now.
525: * We use the default list type, but that can't be
526: * helped (we haven't seen any items yet).
1.1 schwarze 527: */
1.4 schwarze 528: if (st->lpos > 0)
529: if (LIST__MAX == st->lstack[st->lpos - 1]) {
530: st->lstack[st->lpos - 1] = LIST_TAG;
531: puts(".Bl -tag -width Ds");
532: }
533: st->lpos++;
534: assert(st->lpos < LIST_STACKSZ);
535: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 536: break;
537: case (CMD_ITEM):
1.6 kristaps 538: if (0 == st->lpos) {
539: /*
540: * Bad markup.
541: * Try to compensate.
542: */
543: st->lstack[st->lpos] = LIST__MAX;
544: st->lpos++;
545: }
1.4 schwarze 546: assert(st->lpos > 0);
547: /*
548: * If we're the first =item, guess at what our content
549: * will be: "*" is a bullet list, "1." is a numbered
550: * list, and everything is tagged.
551: */
552: if (LIST__MAX == st->lstack[st->lpos - 1]) {
553: st->lstack[st->lpos - 1] =
554: listguess(buf, start, end);
555: switch (st->lstack[st->lpos - 1]) {
556: case (LIST_BULLET):
557: puts(".Bl -bullet");
558: break;
559: case (LIST_ENUM):
560: puts(".Bl -enum");
561: break;
562: default:
563: puts(".Bl -tag -width Ds");
564: break;
565: }
566: }
567: switch (st->lstack[st->lpos - 1]) {
568: case (LIST_TAG):
569: printf(".It ");
570: formatcodeln(buf, &start, end, 0);
571: putchar('\n');
572: break;
573: case (LIST_ENUM):
574: /* FALLTHROUGH */
575: case (LIST_BULLET):
576: /*
577: * Abandon the remainder of the paragraph
578: * because we're going to be a bulletted or
579: * numbered list.
580: */
581: puts(".It");
582: break;
583: default:
584: abort();
585: }
1.1 schwarze 586: st->haspar = 1;
587: break;
588: case (CMD_BACK):
1.4 schwarze 589: /* Make sure we don't back over the stack. */
590: if (st->lpos > 0) {
591: st->lpos--;
592: puts(".El");
593: }
1.1 schwarze 594: break;
595: case (CMD_BEGIN):
596: /*
597: * We disregard all types for now.
598: * TODO: process at least "text" in a -literal block.
599: */
600: st->paused = 1;
601: break;
602: case (CMD_FOR):
603: /*
604: * We ignore all types of encodings and formats
605: * unilaterally.
606: */
607: break;
608: case (CMD_ENCODING):
609: break;
610: case (CMD_CUT):
611: st->parsing = 0;
612: return;
613: default:
614: abort();
615: }
616:
617: /* Any command (but =cut) makes us start parsing. */
618: st->parsing = 1;
619: }
620:
621: /*
622: * Just pump out the line in a verbatim block.
623: */
624: static void
625: verbatim(struct state *st, const char *buf, size_t start, size_t end)
626: {
1.7 ! kristaps 627: size_t sv = start;
1.1 schwarze 628:
629: if ( ! st->parsing || st->paused)
630: return;
631:
632: puts(".Bd -literal");
1.7 ! kristaps 633: while (start < end) {
! 634: if (start > sv && '\n' == buf[start - 1])
! 635: if ('.' == buf[start] || '\'' == buf[start])
! 636: printf("\\&");
! 637: putchar(buf[start++]);
! 638: }
! 639: putchar('\n');
1.1 schwarze 640: puts(".Ed");
641: }
642:
643: /*
644: * Ordinary paragraph.
645: * Well, this is really the hardest--POD seems to assume that, for
646: * example, a leading space implies a newline, and so on.
647: * Lots of other snakes in the grass: escaping a newline followed by a
648: * period (accidental mdoc(7) control), double-newlines after macro
649: * passages, etc.
650: */
651: static void
652: ordinary(struct state *st, const char *buf, size_t start, size_t end)
653: {
654: size_t i, j;
655:
656: if ( ! st->parsing || st->paused)
657: return;
658:
659: /*
660: * Special-case: the NAME section.
661: * If we find a "-" when searching from the end, assume that
662: * we're in "name - description" format.
663: * To wit, print out a "Nm" and "Nd" in that format.
664: */
665: if (st->isname) {
666: for (i = end - 1; i > start; i--)
667: if ('-' == buf[i])
668: break;
669: if ('-' == buf[i]) {
670: j = i;
671: /* Roll over multiple "-". */
672: for ( ; i > start; i--)
673: if ('-' != buf[i])
674: break;
1.5 kristaps 675: printf(".Nm ");
676: formatcodeln(buf, &start, i + 1, 1);
677: putchar('\n');
678: start = j + 1;
679: printf(".Nd ");
680: formatcodeln(buf, &start, end, 1);
681: putchar('\n');
1.1 schwarze 682: return;
683: }
684: }
685:
686: if ( ! st->haspar)
687: puts(".Pp");
688:
689: st->haspar = 0;
690: last = '\n';
691:
692: while (start < end) {
693: /*
694: * Loop til we get either to a newline or escape.
695: * Escape initial control characters.
696: */
697: while (start < end) {
698: if (start < end - 1 && '<' == buf[start + 1])
699: break;
700: else if ('\n' == buf[start])
701: break;
702: else if ('\n' == last && '.' == buf[start])
703: printf("\\&");
704: else if ('\n' == last && '\'' == buf[start])
705: printf("\\&");
706: putchar(last = buf[start++]);
707: }
708:
709: if (start < end - 1 && '<' == buf[start + 1]) {
710: /*
711: * We've encountered a format code.
712: * This is going to trigger a macro no matter
713: * what, so print a newline now.
714: * Then print the (possibly nested) macros and
715: * following that, a newline.
716: */
1.6 kristaps 717: if (formatcode(buf, &start, end, 0, 0)) {
1.1 schwarze 718: putchar(last = '\n');
1.6 kristaps 719: while (start < end && ' ' == buf[start])
720: start++;
721: }
1.1 schwarze 722: } else if (start < end && '\n' == buf[start]) {
723: /*
724: * Print the newline only if we haven't already
725: * printed a newline.
726: */
727: if (last != '\n')
728: putchar(last = buf[start]);
729: if (++start >= end)
730: continue;
731: /*
732: * If we have whitespace next, eat it to prevent
733: * mdoc(7) from thinking that it's meant for
734: * verbatim text.
735: * It is--but if we start with that, we can't
736: * have a macro subsequent it, which may be
737: * possible if we have an escape next.
738: */
739: if (' ' == buf[start] || '\t' == buf[start]) {
740: puts(".br");
741: last = '\n';
742: }
743: for ( ; start < end; start++)
744: if (' ' != buf[start] && '\t' != buf[start])
745: break;
746: } else if (start < end) {
747: /*
748: * Default: print the character.
749: * Escape initial control characters.
750: */
751: if ('\n' == last && '.' == buf[start])
752: printf("\\&");
753: else if ('\n' == last && '\'' == buf[start])
754: printf("\\&");
755: putchar(last = buf[start++]);
756: }
757: }
758:
759: if (last != '\n')
760: putchar('\n');
761: }
762:
763: /*
764: * There are three kinds of paragraphs: verbatim (starts with whitespace
765: * of some sort), ordinary (starts without "=" marker), or a command
766: * (default: starts with "=").
767: */
768: static void
769: dopar(struct state *st, const char *buf, size_t start, size_t end)
770: {
771:
772: if (end == start)
773: return;
774: if (' ' == buf[start] || '\t' == buf[start])
775: verbatim(st, buf, start, end);
776: else if ('=' != buf[start])
777: ordinary(st, buf, start, end);
778: else
779: command(st, buf, start, end);
780: }
781:
782: /*
783: * Loop around paragraphs within a document, processing each one in the
784: * POD way.
785: */
786: static void
787: dofile(const struct args *args, const char *fname,
788: const struct tm *tm, const char *buf, size_t sz)
789: {
790: size_t sup, end, i, cur = 0;
791: struct state st;
792: const char *section, *date;
793: char datebuf[64];
794: char *title, *cp;
795:
796: if (0 == sz)
797: return;
798:
799: /* Title is last path component of the filename. */
800:
801: if (NULL != args->title)
802: title = strdup(args->title);
803: else if (NULL != (cp = strrchr(fname, '/')))
804: title = strdup(cp + 1);
805: else
806: title = strdup(fname);
807:
808: if (NULL == title) {
809: perror(NULL);
810: exit(EXIT_FAILURE);
811: }
812:
813: /* Section is 1 unless suffix is "pm". */
814:
815: if (NULL == (section = args->section)) {
816: section = "1";
817: if (NULL != (cp = strrchr(title, '.'))) {
818: *cp++ = '\0';
819: if (0 == strcmp(cp, "pm"))
820: section = "3p";
821: }
822: }
823:
824: /* Date. Or the given "tm" if not supplied. */
825:
826: if (NULL == (date = args->date)) {
827: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
828: date = datebuf;
829: }
830:
831: for (cp = title; '\0' != *cp; cp++)
832: *cp = toupper((int)*cp);
833:
834: /* The usual mdoc(7) preamble. */
835:
836: printf(".Dd %s\n", date);
837: printf(".Dt %s %s\n", title, section);
838: puts(".Os");
839:
840: free(title);
841:
842: memset(&st, 0, sizeof(struct state));
843: assert(sz > 0);
844:
845: /* Main loop over file contents. */
846:
847: while (cur < sz) {
848: /* Read until next paragraph. */
849: for (i = cur + 1; i < sz; i++)
850: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
851: /* Consume blank paragraphs. */
852: while (i + 1 < sz && '\n' == buf[i + 1])
853: i++;
854: break;
855: }
856:
857: /* Adjust end marker for EOF. */
858: end = i < sz ? i - 1 :
859: ('\n' == buf[sz - 1] ? sz - 1 : sz);
860: sup = i < sz ? end + 2 : sz;
861:
862: /* Process paragraph and adjust start. */
863: dopar(&st, buf, cur, end);
864: cur = sup;
865: }
866: }
867:
868: /*
869: * Read a single file fully into memory.
870: * If the file is "-", do it from stdin.
871: * If successfully read, send the input buffer to dofile() for further
872: * processing.
873: */
874: static int
875: readfile(const struct args *args, const char *fname)
876: {
877: int fd;
878: char *buf;
879: size_t bufsz, cur;
880: ssize_t ssz;
881: struct tm *tm;
882: time_t ttm;
883: struct stat st;
884:
885: assert(NULL != fname);
886:
887: fd = 0 != strcmp("-", fname) ?
888: open(fname, O_RDONLY, 0) : STDIN_FILENO;
889:
890: if (-1 == fd) {
891: perror(fname);
892: return(0);
893: }
894:
895: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
896: ttm = time(NULL);
897: tm = localtime(&ttm);
898: } else
899: tm = localtime(&st.st_mtime);
900:
901: /*
902: * Arbitrarily-sized initial buffer.
903: * Should be big enough for most files...
904: */
905: cur = 0;
906: bufsz = 1 << 14;
907: if (NULL == (buf = malloc(bufsz))) {
908: perror(NULL);
909: exit(EXIT_FAILURE);
910: }
911:
912: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
913: /* Double buffer size on fill. */
914: if ((size_t)ssz == bufsz - cur) {
915: bufsz *= 2;
916: if (NULL == (buf = realloc(buf, bufsz))) {
917: perror(NULL);
918: exit(EXIT_FAILURE);
919: }
920: }
921: cur += (size_t)ssz;
922: }
923: if (ssz < 0) {
924: perror(fname);
925: free(buf);
926: return(0);
927: }
928:
929: dofile(args, STDIN_FILENO == fd ?
930: "STDIN" : fname, tm, buf, cur);
931: free(buf);
932: if (STDIN_FILENO != fd)
933: close(fd);
934: return(1);
935: }
936:
937: int
938: main(int argc, char *argv[])
939: {
940: const char *fname, *name;
941: struct args args;
942: int c;
943:
944: name = strrchr(argv[0], '/');
945: if (name == NULL)
946: name = argv[0];
947: else
948: ++name;
949:
950: memset(&args, 0, sizeof(struct args));
951: fname = "-";
952:
953: /* Accept no arguments for now. */
954:
955: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
956: switch (c) {
957: case ('h'):
958: /* FALLTHROUGH */
959: case ('l'):
960: /* FALLTHROUGH */
961: case ('c'):
962: /* FALLTHROUGH */
963: case ('o'):
964: /* FALLTHROUGH */
965: case ('q'):
966: /* FALLTHROUGH */
967: case ('r'):
968: /* FALLTHROUGH */
969: case ('u'):
970: /* FALLTHROUGH */
971: case ('v'):
972: /* Ignore these. */
973: break;
974: case ('d'):
975: args.date = optarg;
976: break;
977: case ('n'):
978: args.title = optarg;
979: break;
980: case ('s'):
981: args.section = optarg;
982: break;
983: default:
984: goto usage;
985: }
986:
987: argc -= optind;
988: argv += optind;
989:
990: /* Accept only a single input file. */
991:
992: if (argc > 2)
993: return(EXIT_FAILURE);
994: else if (1 == argc)
995: fname = *argv;
996:
997: return(readfile(&args, fname) ?
998: EXIT_SUCCESS : EXIT_FAILURE);
999:
1000: usage:
1001: fprintf(stderr, "usage: %s [-d date] "
1002: "[-n title] [-s section]\n", name);
1003:
1004: return(EXIT_FAILURE);
1005: }
CVSweb