Annotation of pod2mdoc/pod2mdoc.c, Revision 1.5
1.5 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.4 2014/03/20 15:29:57 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 schwarze 35: enum list {
36: LIST_BULLET = 0,
37: LIST_ENUM,
38: LIST_TAG,
39: LIST__MAX
40: };
41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 schwarze 48: #define LIST_STACKSZ 128
49: enum list lstack[LIST_STACKSZ]; /* open lists */
50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
111: /*
112: * Given buf[*start] is at the start of an escape name, read til the end
113: * of the escape ('>') then try to do something with it.
114: * Sets start to be one after the '>'.
115: */
116: static void
117: formatescape(const char *buf, size_t *start, size_t end)
118: {
119: char esc[16]; /* no more needed */
120: size_t i, max;
121:
122: max = sizeof(esc) - 1;
123: i = 0;
124: /* Read til our buffer is full. */
125: while (*start < end && '>' != buf[*start] && i < max)
126: esc[i++] = buf[(*start)++];
127: esc[i] = '\0';
128:
129: if (i == max) {
130: /* Too long... skip til we end. */
131: while (*start < end && '>' != buf[*start])
132: (*start)++;
133: return;
134: } else if (*start >= end)
135: return;
136:
137: assert('>' == buf[*start]);
138: (*start)++;
139:
140: /*
141: * TODO: right now, we only recognise the named escapes.
142: * Just let the rest of them go.
143: */
144: if (0 == strcmp(esc, "lt"))
145: printf("\\(la");
146: else if (0 == strcmp(esc, "gt"))
147: printf("\\(ra");
148: else if (0 == strcmp(esc, "vb"))
149: printf("\\(ba");
150: else if (0 == strcmp(esc, "sol"))
151: printf("\\(sl");
152: }
153:
154: /*
155: * Skip space characters.
156: */
1.5 ! kristaps 157: static int
1.1 schwarze 158: skipspace(const char *buf, size_t *start, size_t end)
159: {
1.5 ! kristaps 160: size_t sv = *start;
1.1 schwarze 161:
162: while (*start < end && ' ' == buf[*start])
163: (*start)++;
1.5 ! kristaps 164:
! 165: return(*start > sv);
1.1 schwarze 166: }
167:
168: /*
169: * We're at the character in front of a format code, which is structured
170: * like X<...> and can contain nested format codes.
171: * This consumes the whole format code, and any nested format codes, til
172: * the end of matched production.
173: * If "reentrant", then we're being called after a macro has already
174: * been printed to the current line.
175: * "last" is set to the last read character: this is used to determine
176: * whether we should buffer with space or not.
177: * If "nomacro", then we don't print any macros, just contained data.
178: */
179: static int
180: formatcode(const char *buf, size_t *start,
181: size_t end, int reentrant, int last, int nomacro)
182: {
183: enum fmt fmt;
1.5 ! kristaps 184: size_t i, j, dsz;
1.1 schwarze 185:
186: assert(*start + 1 < end);
187: assert('<' == buf[*start + 1]);
188:
1.5 ! kristaps 189: /*
! 190: * Determine whether we're overriding our delimiter.
! 191: * According to POD, if we have more than one '<' followed by a
! 192: * space, then we need a space followed by matching '>' to close
! 193: * the expression.
! 194: * Otherwise we use the usual '<' and '>' matched pair.
! 195: */
! 196: i = *start + 1;
! 197: while (i < end && '<' == buf[i])
! 198: i++;
! 199: assert(i > *start + 1);
! 200: dsz = i - (*start + 1);
! 201: if (dsz > 1 && (i >= end || ' ' != buf[i]))
! 202: dsz = 1;
! 203:
1.1 schwarze 204: for (fmt = 0; fmt < FMT__MAX; fmt++)
205: if (buf[*start] == fmts[fmt])
206: break;
207:
208: /* Invalid macros are just regular text. */
209:
210: if (FMT__MAX == fmt) {
211: putchar(buf[*start]);
212: (*start)++;
213: return(0);
214: }
215:
1.5 ! kristaps 216: /* Remember, if dsz>1, to jump the trailing space. */
! 217: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 218:
219: /*
220: * Escapes don't print macro sequences, so just output them like
221: * normal text before processing for macros.
222: */
223: if (FMT_ESCAPE == fmt) {
224: formatescape(buf, start, end);
225: return(0);
226: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 ! kristaps 227: /*
! 228: * For indices and nulls, just consume.
! 229: * Be wary of encountering custom delimiters (dsz>1),
! 230: * which require special handling.
! 231: */
! 232: for ( ; *start < end; (*start)++) {
! 233: if ('>' != buf[*start])
! 234: continue;
! 235: else if (dsz == 1)
! 236: break;
! 237: assert(*start > 0);
! 238: if (' ' != buf[*start - 1])
! 239: continue;
! 240: i = *start;
! 241: for (j = 0; i < end && j < dsz; j++)
! 242: if ('>' != buf[i++])
! 243: break;
! 244: if (dsz != j)
! 245: continue;
! 246: (*start) += dsz;
! 247: break;
! 248: }
1.1 schwarze 249: return(0);
250: }
251:
252: if ( ! nomacro) {
253: /*
254: * Print out the macro describing this format code.
255: * If we're not "reentrant" (not yet on a macro line)
256: * then print a newline, if necessary, and the macro
257: * indicator.
258: * Otherwise, offset us with a space.
259: */
260: if ( ! reentrant && last != '\n')
261: putchar('\n');
262: if ( ! reentrant)
263: putchar('.');
264: else
265: putchar(' ');
266:
267: /*
268: * If we don't have whitespace before us, then suppress
269: * macro whitespace with Ns.
270: */
271: if (' ' != last)
272: printf("Ns ");
273: switch (fmt) {
274: case (FMT_ITALIC):
275: printf("Em ");
276: break;
277: case (FMT_BOLD):
278: printf("Sy ");
279: break;
280: case (FMT_CODE):
1.2 schwarze 281: printf("Qo Li ");
1.1 schwarze 282: break;
283: case (FMT_LINK):
284: printf("Lk ");
285: break;
286: case (FMT_FILE):
287: printf("Pa ");
288: break;
289: case (FMT_NBSP):
290: /* TODO. */
291: printf("No ");
292: break;
293: default:
294: abort();
295: }
296: }
297:
298: /*
1.5 ! kristaps 299: * Read until we reach the end market (e.g., '>') or until we
! 300: * find a nested format code.
1.1 schwarze 301: * Don't emit any newlines: since we're on a macro line, we
302: * don't want to break the line.
303: */
304: while (*start < end) {
1.5 ! kristaps 305: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 306: (*start)++;
307: break;
1.5 ! kristaps 308: } else if ('>' == buf[*start] &&
! 309: ' ' == buf[*start - 1]) {
! 310: /*
! 311: * Handle custom delimiters.
! 312: * These require a certain number of
! 313: * space-preceded carrots before we're really at
! 314: * the end.
! 315: */
! 316: i = *start;
! 317: for (j = 0; i < end && j < dsz; j++)
! 318: if ('>' != buf[i++])
! 319: break;
! 320: if (dsz == j) {
! 321: *start += dsz;
! 322: break;
! 323: }
1.1 schwarze 324: }
325: if (*start + 1 < end && '<' == buf[*start + 1]) {
326: formatcode(buf, start, end, 1, last, nomacro);
327: continue;
328: }
1.3 schwarze 329:
1.4 schwarze 330: /*
331: * Make sure that any macro-like words (or
332: * really any word starting with a capital
333: * letter) is assumed to be a macro that must be
334: * escaped.
335: * This matches "Xx " and "XxEOLN".
336: */
337: if ((' ' == last || '\n' == last) &&
338: end - *start > 1 &&
339: isupper((int)buf[*start]) &&
340: islower((int)buf[*start + 1]) &&
341: (end - *start == 2 ||
342: ' ' == buf[*start + 2]))
343: printf("\\&");
1.3 schwarze 344:
1.4 schwarze 345: /* Suppress newline. */
346: if ('\n' == (last = buf[(*start)++]))
1.3 schwarze 347: last = ' ';
1.4 schwarze 348:
1.3 schwarze 349: putchar(last);
1.1 schwarze 350: }
1.2 schwarze 351:
352: if ( ! nomacro && FMT_CODE == fmt)
353: printf(" Qc ");
1.1 schwarze 354:
355: if (reentrant)
356: return(1);
357:
1.5 ! kristaps 358: /* FIXME: with the "Qc", this doens't work good. */
! 359:
1.1 schwarze 360: /*
361: * If we're not reentrant, we want to put ending punctuation on
362: * the macro line so that it's properly handled by being
363: * smooshed against the terminal word.
364: */
365: skipspace(buf, start, end);
1.5 ! kristaps 366:
1.1 schwarze 367: if (',' != buf[*start] && '.' != buf[*start] &&
368: '!' != buf[*start] && '?' != buf[*start] &&
369: ')' != buf[*start])
370: return(1);
371: while (*start < end) {
372: if (',' != buf[*start] &&
373: '.' != buf[*start] &&
374: '!' != buf[*start] &&
375: '?' != buf[*start] &&
376: ')' != buf[*start])
377: break;
378: putchar(' ');
379: putchar(buf[*start]);
380: (*start)++;
381: }
382: skipspace(buf, start, end);
383: return(1);
384: }
385:
386: /*
387: * Calls formatcode() til the end of a paragraph.
388: */
389: static void
390: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
391: {
392: int last;
393:
1.4 schwarze 394: last = ' ';
1.1 schwarze 395: while (*start < end) {
396: if (*start + 1 < end && '<' == buf[*start + 1]) {
397: formatcode(buf, start, end, 1, last, nomacro);
398: continue;
399: }
1.4 schwarze 400: /*
401: * Since we're already on a macro line, we want to make
402: * sure that we don't inadvertently invoke a macro.
403: * We need to do this carefully because section names
404: * are used in troff and we don't want to escape
405: * something that needn't be escaped.
406: */
407: if (' ' == last && end - *start > 1 &&
408: isupper((int)buf[*start]) &&
409: islower((int)buf[*start + 1]) &&
410: (end - *start == 2 ||
411: ' ' == buf[*start + 2]))
412: printf("\\&");
413:
1.1 schwarze 414: if ('\n' != buf[*start])
415: putchar(last = buf[*start]);
1.4 schwarze 416: else
417: putchar(last = ' ');
1.1 schwarze 418: (*start)++;
419: }
420: }
421:
422: /*
1.4 schwarze 423: * Guess at what kind of list we are.
424: * These are taken straight from the POD manual.
425: * I don't know what people do in real life.
426: */
427: static enum list
428: listguess(const char *buf, size_t start, size_t end)
429: {
430: size_t len = end - start;
431:
432: assert(end >= start);
433:
434: if (len == 1 && '*' == buf[start])
435: return(LIST_BULLET);
436: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
437: return(LIST_ENUM);
438: else if (len == 1 && '1' == buf[start])
439: return(LIST_ENUM);
440: else
441: return(LIST_TAG);
442: }
443:
444: /*
1.1 schwarze 445: * A command paragraph, as noted in the perlpod manual, just indicates
446: * that we should do something, optionally with some text to print as
447: * well.
448: */
449: static void
450: command(struct state *st, const char *buf, size_t start, size_t end)
451: {
452: size_t len, csz;
453: enum cmd cmd;
454:
455: assert('=' == buf[start]);
456: start++;
457: len = end - start;
458:
459: for (cmd = 0; cmd < CMD__MAX; cmd++) {
460: csz = strlen(cmds[cmd]);
461: if (len < csz)
462: continue;
463: if (0 == memcmp(&buf[start], cmd[cmds], csz))
464: break;
465: }
466:
467: /* Ignore bogus commands. */
468:
469: if (CMD__MAX == cmd)
470: return;
471:
472: start += csz;
473: skipspace(buf, &start, end);
474: len = end - start;
475:
476: if (st->paused) {
477: st->paused = CMD_END != cmd;
478: return;
479: }
480:
481: switch (cmd) {
482: case (CMD_POD):
483: break;
484: case (CMD_HEAD1):
485: /*
486: * The behaviour of head= follows from a quick glance at
487: * how pod2man handles it.
488: */
489: printf(".Sh ");
490: st->isname = 0;
491: if (end - start == 4)
492: if (0 == memcmp(&buf[start], "NAME", 4))
493: st->isname = 1;
494: formatcodeln(buf, &start, end, 1);
495: putchar('\n');
496: st->haspar = 1;
497: break;
498: case (CMD_HEAD2):
499: printf(".Ss ");
500: formatcodeln(buf, &start, end, 1);
501: putchar('\n');
502: st->haspar = 1;
503: break;
504: case (CMD_HEAD3):
505: puts(".Pp");
506: printf(".Em ");
507: formatcodeln(buf, &start, end, 0);
508: putchar('\n');
509: puts(".Pp");
510: st->haspar = 1;
511: break;
512: case (CMD_HEAD4):
513: puts(".Pp");
514: printf(".No ");
515: formatcodeln(buf, &start, end, 0);
516: putchar('\n');
517: puts(".Pp");
518: st->haspar = 1;
519: break;
520: case (CMD_OVER):
1.4 schwarze 521: /*
522: * If we have an existing list that hasn't had an =item
523: * yet, then make sure that we open it now.
524: * We use the default list type, but that can't be
525: * helped (we haven't seen any items yet).
1.1 schwarze 526: */
1.4 schwarze 527: if (st->lpos > 0)
528: if (LIST__MAX == st->lstack[st->lpos - 1]) {
529: st->lstack[st->lpos - 1] = LIST_TAG;
530: puts(".Bl -tag -width Ds");
531: }
532: st->lpos++;
533: assert(st->lpos < LIST_STACKSZ);
534: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 535: break;
536: case (CMD_ITEM):
1.4 schwarze 537: assert(st->lpos > 0);
538: /*
539: * If we're the first =item, guess at what our content
540: * will be: "*" is a bullet list, "1." is a numbered
541: * list, and everything is tagged.
542: */
543: if (LIST__MAX == st->lstack[st->lpos - 1]) {
544: st->lstack[st->lpos - 1] =
545: listguess(buf, start, end);
546: switch (st->lstack[st->lpos - 1]) {
547: case (LIST_BULLET):
548: puts(".Bl -bullet");
549: break;
550: case (LIST_ENUM):
551: puts(".Bl -enum");
552: break;
553: default:
554: puts(".Bl -tag -width Ds");
555: break;
556: }
557: }
558: switch (st->lstack[st->lpos - 1]) {
559: case (LIST_TAG):
560: printf(".It ");
561: formatcodeln(buf, &start, end, 0);
562: putchar('\n');
563: break;
564: case (LIST_ENUM):
565: /* FALLTHROUGH */
566: case (LIST_BULLET):
567: /*
568: * Abandon the remainder of the paragraph
569: * because we're going to be a bulletted or
570: * numbered list.
571: */
572: puts(".It");
573: break;
574: default:
575: abort();
576: }
1.1 schwarze 577: st->haspar = 1;
578: break;
579: case (CMD_BACK):
1.4 schwarze 580: /* Make sure we don't back over the stack. */
581: if (st->lpos > 0) {
582: st->lpos--;
583: puts(".El");
584: }
1.1 schwarze 585: break;
586: case (CMD_BEGIN):
587: /*
588: * We disregard all types for now.
589: * TODO: process at least "text" in a -literal block.
590: */
591: st->paused = 1;
592: break;
593: case (CMD_FOR):
594: /*
595: * We ignore all types of encodings and formats
596: * unilaterally.
597: */
598: break;
599: case (CMD_ENCODING):
600: break;
601: case (CMD_CUT):
602: st->parsing = 0;
603: return;
604: default:
605: abort();
606: }
607:
608: /* Any command (but =cut) makes us start parsing. */
609: st->parsing = 1;
610: }
611:
612: /*
613: * Just pump out the line in a verbatim block.
614: */
615: static void
616: verbatim(struct state *st, const char *buf, size_t start, size_t end)
617: {
618:
619: if ( ! st->parsing || st->paused)
620: return;
621:
622: puts(".Bd -literal");
623: printf("%.*s\n", (int)(end - start), &buf[start]);
624: puts(".Ed");
625: }
626:
627: /*
628: * Ordinary paragraph.
629: * Well, this is really the hardest--POD seems to assume that, for
630: * example, a leading space implies a newline, and so on.
631: * Lots of other snakes in the grass: escaping a newline followed by a
632: * period (accidental mdoc(7) control), double-newlines after macro
633: * passages, etc.
634: */
635: static void
636: ordinary(struct state *st, const char *buf, size_t start, size_t end)
637: {
638: int last;
639: size_t i, j;
640:
641: if ( ! st->parsing || st->paused)
642: return;
643:
644: /*
645: * Special-case: the NAME section.
646: * If we find a "-" when searching from the end, assume that
647: * we're in "name - description" format.
648: * To wit, print out a "Nm" and "Nd" in that format.
649: */
650: if (st->isname) {
651: for (i = end - 1; i > start; i--)
652: if ('-' == buf[i])
653: break;
654: if ('-' == buf[i]) {
655: j = i;
656: /* Roll over multiple "-". */
657: for ( ; i > start; i--)
658: if ('-' != buf[i])
659: break;
1.5 ! kristaps 660: printf(".Nm ");
! 661: formatcodeln(buf, &start, i + 1, 1);
! 662: putchar('\n');
! 663: start = j + 1;
! 664: printf(".Nd ");
! 665: formatcodeln(buf, &start, end, 1);
! 666: putchar('\n');
1.1 schwarze 667: return;
668: }
669: }
670:
671: if ( ! st->haspar)
672: puts(".Pp");
673:
674: st->haspar = 0;
675: last = '\n';
676:
677: while (start < end) {
678: /*
679: * Loop til we get either to a newline or escape.
680: * Escape initial control characters.
681: */
682: while (start < end) {
683: if (start < end - 1 && '<' == buf[start + 1])
684: break;
685: else if ('\n' == buf[start])
686: break;
687: else if ('\n' == last && '.' == buf[start])
688: printf("\\&");
689: else if ('\n' == last && '\'' == buf[start])
690: printf("\\&");
691: putchar(last = buf[start++]);
692: }
693:
694: if (start < end - 1 && '<' == buf[start + 1]) {
695: /*
696: * We've encountered a format code.
697: * This is going to trigger a macro no matter
698: * what, so print a newline now.
699: * Then print the (possibly nested) macros and
700: * following that, a newline.
701: */
702: if (formatcode(buf, &start, end, 0, last, 0))
703: putchar(last = '\n');
704: } else if (start < end && '\n' == buf[start]) {
705: /*
706: * Print the newline only if we haven't already
707: * printed a newline.
708: */
709: if (last != '\n')
710: putchar(last = buf[start]);
711: if (++start >= end)
712: continue;
713: /*
714: * If we have whitespace next, eat it to prevent
715: * mdoc(7) from thinking that it's meant for
716: * verbatim text.
717: * It is--but if we start with that, we can't
718: * have a macro subsequent it, which may be
719: * possible if we have an escape next.
720: */
721: if (' ' == buf[start] || '\t' == buf[start]) {
722: puts(".br");
723: last = '\n';
724: }
725: for ( ; start < end; start++)
726: if (' ' != buf[start] && '\t' != buf[start])
727: break;
728: } else if (start < end) {
729: /*
730: * Default: print the character.
731: * Escape initial control characters.
732: */
733: if ('\n' == last && '.' == buf[start])
734: printf("\\&");
735: else if ('\n' == last && '\'' == buf[start])
736: printf("\\&");
737: putchar(last = buf[start++]);
738: }
739: }
740:
741: if (last != '\n')
742: putchar('\n');
743: }
744:
745: /*
746: * There are three kinds of paragraphs: verbatim (starts with whitespace
747: * of some sort), ordinary (starts without "=" marker), or a command
748: * (default: starts with "=").
749: */
750: static void
751: dopar(struct state *st, const char *buf, size_t start, size_t end)
752: {
753:
754: if (end == start)
755: return;
756: if (' ' == buf[start] || '\t' == buf[start])
757: verbatim(st, buf, start, end);
758: else if ('=' != buf[start])
759: ordinary(st, buf, start, end);
760: else
761: command(st, buf, start, end);
762: }
763:
764: /*
765: * Loop around paragraphs within a document, processing each one in the
766: * POD way.
767: */
768: static void
769: dofile(const struct args *args, const char *fname,
770: const struct tm *tm, const char *buf, size_t sz)
771: {
772: size_t sup, end, i, cur = 0;
773: struct state st;
774: const char *section, *date;
775: char datebuf[64];
776: char *title, *cp;
777:
778: if (0 == sz)
779: return;
780:
781: /* Title is last path component of the filename. */
782:
783: if (NULL != args->title)
784: title = strdup(args->title);
785: else if (NULL != (cp = strrchr(fname, '/')))
786: title = strdup(cp + 1);
787: else
788: title = strdup(fname);
789:
790: if (NULL == title) {
791: perror(NULL);
792: exit(EXIT_FAILURE);
793: }
794:
795: /* Section is 1 unless suffix is "pm". */
796:
797: if (NULL == (section = args->section)) {
798: section = "1";
799: if (NULL != (cp = strrchr(title, '.'))) {
800: *cp++ = '\0';
801: if (0 == strcmp(cp, "pm"))
802: section = "3p";
803: }
804: }
805:
806: /* Date. Or the given "tm" if not supplied. */
807:
808: if (NULL == (date = args->date)) {
809: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
810: date = datebuf;
811: }
812:
813: for (cp = title; '\0' != *cp; cp++)
814: *cp = toupper((int)*cp);
815:
816: /* The usual mdoc(7) preamble. */
817:
818: printf(".Dd %s\n", date);
819: printf(".Dt %s %s\n", title, section);
820: puts(".Os");
821:
822: free(title);
823:
824: memset(&st, 0, sizeof(struct state));
825: assert(sz > 0);
826:
827: /* Main loop over file contents. */
828:
829: while (cur < sz) {
830: /* Read until next paragraph. */
831: for (i = cur + 1; i < sz; i++)
832: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
833: /* Consume blank paragraphs. */
834: while (i + 1 < sz && '\n' == buf[i + 1])
835: i++;
836: break;
837: }
838:
839: /* Adjust end marker for EOF. */
840: end = i < sz ? i - 1 :
841: ('\n' == buf[sz - 1] ? sz - 1 : sz);
842: sup = i < sz ? end + 2 : sz;
843:
844: /* Process paragraph and adjust start. */
845: dopar(&st, buf, cur, end);
846: cur = sup;
847: }
848: }
849:
850: /*
851: * Read a single file fully into memory.
852: * If the file is "-", do it from stdin.
853: * If successfully read, send the input buffer to dofile() for further
854: * processing.
855: */
856: static int
857: readfile(const struct args *args, const char *fname)
858: {
859: int fd;
860: char *buf;
861: size_t bufsz, cur;
862: ssize_t ssz;
863: struct tm *tm;
864: time_t ttm;
865: struct stat st;
866:
867: assert(NULL != fname);
868:
869: fd = 0 != strcmp("-", fname) ?
870: open(fname, O_RDONLY, 0) : STDIN_FILENO;
871:
872: if (-1 == fd) {
873: perror(fname);
874: return(0);
875: }
876:
877: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
878: ttm = time(NULL);
879: tm = localtime(&ttm);
880: } else
881: tm = localtime(&st.st_mtime);
882:
883: /*
884: * Arbitrarily-sized initial buffer.
885: * Should be big enough for most files...
886: */
887: cur = 0;
888: bufsz = 1 << 14;
889: if (NULL == (buf = malloc(bufsz))) {
890: perror(NULL);
891: exit(EXIT_FAILURE);
892: }
893:
894: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
895: /* Double buffer size on fill. */
896: if ((size_t)ssz == bufsz - cur) {
897: bufsz *= 2;
898: if (NULL == (buf = realloc(buf, bufsz))) {
899: perror(NULL);
900: exit(EXIT_FAILURE);
901: }
902: }
903: cur += (size_t)ssz;
904: }
905: if (ssz < 0) {
906: perror(fname);
907: free(buf);
908: return(0);
909: }
910:
911: dofile(args, STDIN_FILENO == fd ?
912: "STDIN" : fname, tm, buf, cur);
913: free(buf);
914: if (STDIN_FILENO != fd)
915: close(fd);
916: return(1);
917: }
918:
919: int
920: main(int argc, char *argv[])
921: {
922: const char *fname, *name;
923: struct args args;
924: int c;
925:
926: name = strrchr(argv[0], '/');
927: if (name == NULL)
928: name = argv[0];
929: else
930: ++name;
931:
932: memset(&args, 0, sizeof(struct args));
933: fname = "-";
934:
935: /* Accept no arguments for now. */
936:
937: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
938: switch (c) {
939: case ('h'):
940: /* FALLTHROUGH */
941: case ('l'):
942: /* FALLTHROUGH */
943: case ('c'):
944: /* FALLTHROUGH */
945: case ('o'):
946: /* FALLTHROUGH */
947: case ('q'):
948: /* FALLTHROUGH */
949: case ('r'):
950: /* FALLTHROUGH */
951: case ('u'):
952: /* FALLTHROUGH */
953: case ('v'):
954: /* Ignore these. */
955: break;
956: case ('d'):
957: args.date = optarg;
958: break;
959: case ('n'):
960: args.title = optarg;
961: break;
962: case ('s'):
963: args.section = optarg;
964: break;
965: default:
966: goto usage;
967: }
968:
969: argc -= optind;
970: argv += optind;
971:
972: /* Accept only a single input file. */
973:
974: if (argc > 2)
975: return(EXIT_FAILURE);
976: else if (1 == argc)
977: fname = *argv;
978:
979: return(readfile(&args, fname) ?
980: EXIT_SUCCESS : EXIT_FAILURE);
981:
982: usage:
983: fprintf(stderr, "usage: %s [-d date] "
984: "[-n title] [-s section]\n", name);
985:
986: return(EXIT_FAILURE);
987: }
CVSweb