Annotation of pod2mdoc/pod2mdoc.c, Revision 1.4
1.4 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.7 2014/03/20 00:55:35 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 ! schwarze 35: enum list {
! 36: LIST_BULLET = 0,
! 37: LIST_ENUM,
! 38: LIST_TAG,
! 39: LIST__MAX
! 40: };
! 41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 ! schwarze 48: #define LIST_STACKSZ 128
! 49: enum list lstack[LIST_STACKSZ]; /* open lists */
! 50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
111: /*
112: * Given buf[*start] is at the start of an escape name, read til the end
113: * of the escape ('>') then try to do something with it.
114: * Sets start to be one after the '>'.
115: */
116: static void
117: formatescape(const char *buf, size_t *start, size_t end)
118: {
119: char esc[16]; /* no more needed */
120: size_t i, max;
121:
122: max = sizeof(esc) - 1;
123: i = 0;
124: /* Read til our buffer is full. */
125: while (*start < end && '>' != buf[*start] && i < max)
126: esc[i++] = buf[(*start)++];
127: esc[i] = '\0';
128:
129: if (i == max) {
130: /* Too long... skip til we end. */
131: while (*start < end && '>' != buf[*start])
132: (*start)++;
133: return;
134: } else if (*start >= end)
135: return;
136:
137: assert('>' == buf[*start]);
138: (*start)++;
139:
140: /*
141: * TODO: right now, we only recognise the named escapes.
142: * Just let the rest of them go.
143: */
144: if (0 == strcmp(esc, "lt"))
145: printf("\\(la");
146: else if (0 == strcmp(esc, "gt"))
147: printf("\\(ra");
148: else if (0 == strcmp(esc, "vb"))
149: printf("\\(ba");
150: else if (0 == strcmp(esc, "sol"))
151: printf("\\(sl");
152: }
153:
154: /*
155: * Skip space characters.
156: */
157: static void
158: skipspace(const char *buf, size_t *start, size_t end)
159: {
160:
161: while (*start < end && ' ' == buf[*start])
162: (*start)++;
163: }
164:
165: /*
166: * We're at the character in front of a format code, which is structured
167: * like X<...> and can contain nested format codes.
168: * This consumes the whole format code, and any nested format codes, til
169: * the end of matched production.
170: * If "reentrant", then we're being called after a macro has already
171: * been printed to the current line.
172: * "last" is set to the last read character: this is used to determine
173: * whether we should buffer with space or not.
174: * If "nomacro", then we don't print any macros, just contained data.
175: */
176: static int
177: formatcode(const char *buf, size_t *start,
178: size_t end, int reentrant, int last, int nomacro)
179: {
180: enum fmt fmt;
181:
182: assert(*start + 1 < end);
183: assert('<' == buf[*start + 1]);
184:
185: for (fmt = 0; fmt < FMT__MAX; fmt++)
186: if (buf[*start] == fmts[fmt])
187: break;
188:
189: /* Invalid macros are just regular text. */
190:
191: if (FMT__MAX == fmt) {
192: putchar(buf[*start]);
193: (*start)++;
194: return(0);
195: }
196:
197: *start += 2;
198:
199: /*
200: * Escapes don't print macro sequences, so just output them like
201: * normal text before processing for macros.
202: */
203: if (FMT_ESCAPE == fmt) {
204: formatescape(buf, start, end);
205: return(0);
206: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
207: /* For indices and nulls, just consume. */
208: while (*start < end && '>' != buf[*start])
209: (*start)++;
210: if (*start < end)
211: (*start)++;
212: return(0);
213: }
214:
215: if ( ! nomacro) {
216: /*
217: * Print out the macro describing this format code.
218: * If we're not "reentrant" (not yet on a macro line)
219: * then print a newline, if necessary, and the macro
220: * indicator.
221: * Otherwise, offset us with a space.
222: */
223: if ( ! reentrant && last != '\n')
224: putchar('\n');
225: if ( ! reentrant)
226: putchar('.');
227: else
228: putchar(' ');
229:
230: /*
231: * If we don't have whitespace before us, then suppress
232: * macro whitespace with Ns.
233: */
234: if (' ' != last)
235: printf("Ns ");
236: switch (fmt) {
237: case (FMT_ITALIC):
238: printf("Em ");
239: break;
240: case (FMT_BOLD):
241: printf("Sy ");
242: break;
243: case (FMT_CODE):
1.2 schwarze 244: printf("Qo Li ");
1.1 schwarze 245: break;
246: case (FMT_LINK):
247: printf("Lk ");
248: break;
249: case (FMT_FILE):
250: printf("Pa ");
251: break;
252: case (FMT_NBSP):
253: /* TODO. */
254: printf("No ");
255: break;
256: default:
257: abort();
258: }
259: }
260:
261: /*
262: * Read until we reach the end market ('>') or until we find a
263: * nested format code.
264: * Don't emit any newlines: since we're on a macro line, we
265: * don't want to break the line.
266: */
267: while (*start < end) {
268: if ('>' == buf[*start]) {
269: (*start)++;
270: break;
271: }
272: if (*start + 1 < end && '<' == buf[*start + 1]) {
273: formatcode(buf, start, end, 1, last, nomacro);
274: continue;
275: }
1.3 schwarze 276:
1.4 ! schwarze 277: /*
! 278: * Make sure that any macro-like words (or
! 279: * really any word starting with a capital
! 280: * letter) is assumed to be a macro that must be
! 281: * escaped.
! 282: * This matches "Xx " and "XxEOLN".
! 283: */
! 284: if ((' ' == last || '\n' == last) &&
! 285: end - *start > 1 &&
! 286: isupper((int)buf[*start]) &&
! 287: islower((int)buf[*start + 1]) &&
! 288: (end - *start == 2 ||
! 289: ' ' == buf[*start + 2]))
! 290: printf("\\&");
1.3 schwarze 291:
1.4 ! schwarze 292: /* Suppress newline. */
! 293: if ('\n' == (last = buf[(*start)++]))
1.3 schwarze 294: last = ' ';
1.4 ! schwarze 295:
1.3 schwarze 296: putchar(last);
1.1 schwarze 297: }
1.2 schwarze 298:
299: if ( ! nomacro && FMT_CODE == fmt)
300: printf(" Qc ");
1.1 schwarze 301:
302: if (reentrant)
303: return(1);
304:
305: /*
306: * If we're not reentrant, we want to put ending punctuation on
307: * the macro line so that it's properly handled by being
308: * smooshed against the terminal word.
309: */
310: skipspace(buf, start, end);
311: if (',' != buf[*start] && '.' != buf[*start] &&
312: '!' != buf[*start] && '?' != buf[*start] &&
313: ')' != buf[*start])
314: return(1);
315: while (*start < end) {
316: if (',' != buf[*start] &&
317: '.' != buf[*start] &&
318: '!' != buf[*start] &&
319: '?' != buf[*start] &&
320: ')' != buf[*start])
321: break;
322: putchar(' ');
323: putchar(buf[*start]);
324: (*start)++;
325: }
326: skipspace(buf, start, end);
327: return(1);
328: }
329:
330: /*
331: * Calls formatcode() til the end of a paragraph.
332: */
333: static void
334: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
335: {
336: int last;
337:
1.4 ! schwarze 338: last = ' ';
1.1 schwarze 339: while (*start < end) {
340: if (*start + 1 < end && '<' == buf[*start + 1]) {
341: formatcode(buf, start, end, 1, last, nomacro);
342: continue;
343: }
1.4 ! schwarze 344: /*
! 345: * Since we're already on a macro line, we want to make
! 346: * sure that we don't inadvertently invoke a macro.
! 347: * We need to do this carefully because section names
! 348: * are used in troff and we don't want to escape
! 349: * something that needn't be escaped.
! 350: */
! 351: if (' ' == last && end - *start > 1 &&
! 352: isupper((int)buf[*start]) &&
! 353: islower((int)buf[*start + 1]) &&
! 354: (end - *start == 2 ||
! 355: ' ' == buf[*start + 2]))
! 356: printf("\\&");
! 357:
1.1 schwarze 358: if ('\n' != buf[*start])
359: putchar(last = buf[*start]);
1.4 ! schwarze 360: else
! 361: putchar(last = ' ');
1.1 schwarze 362: (*start)++;
363: }
364: }
365:
366: /*
1.4 ! schwarze 367: * Guess at what kind of list we are.
! 368: * These are taken straight from the POD manual.
! 369: * I don't know what people do in real life.
! 370: */
! 371: static enum list
! 372: listguess(const char *buf, size_t start, size_t end)
! 373: {
! 374: size_t len = end - start;
! 375:
! 376: assert(end >= start);
! 377:
! 378: if (len == 1 && '*' == buf[start])
! 379: return(LIST_BULLET);
! 380: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
! 381: return(LIST_ENUM);
! 382: else if (len == 1 && '1' == buf[start])
! 383: return(LIST_ENUM);
! 384: else
! 385: return(LIST_TAG);
! 386: }
! 387:
! 388: /*
1.1 schwarze 389: * A command paragraph, as noted in the perlpod manual, just indicates
390: * that we should do something, optionally with some text to print as
391: * well.
392: */
393: static void
394: command(struct state *st, const char *buf, size_t start, size_t end)
395: {
396: size_t len, csz;
397: enum cmd cmd;
398:
399: assert('=' == buf[start]);
400: start++;
401: len = end - start;
402:
403: for (cmd = 0; cmd < CMD__MAX; cmd++) {
404: csz = strlen(cmds[cmd]);
405: if (len < csz)
406: continue;
407: if (0 == memcmp(&buf[start], cmd[cmds], csz))
408: break;
409: }
410:
411: /* Ignore bogus commands. */
412:
413: if (CMD__MAX == cmd)
414: return;
415:
416: start += csz;
417: skipspace(buf, &start, end);
418: len = end - start;
419:
420: if (st->paused) {
421: st->paused = CMD_END != cmd;
422: return;
423: }
424:
425: switch (cmd) {
426: case (CMD_POD):
427: break;
428: case (CMD_HEAD1):
429: /*
430: * The behaviour of head= follows from a quick glance at
431: * how pod2man handles it.
432: */
433: printf(".Sh ");
434: st->isname = 0;
435: if (end - start == 4)
436: if (0 == memcmp(&buf[start], "NAME", 4))
437: st->isname = 1;
438: formatcodeln(buf, &start, end, 1);
439: putchar('\n');
440: st->haspar = 1;
441: break;
442: case (CMD_HEAD2):
443: printf(".Ss ");
444: formatcodeln(buf, &start, end, 1);
445: putchar('\n');
446: st->haspar = 1;
447: break;
448: case (CMD_HEAD3):
449: puts(".Pp");
450: printf(".Em ");
451: formatcodeln(buf, &start, end, 0);
452: putchar('\n');
453: puts(".Pp");
454: st->haspar = 1;
455: break;
456: case (CMD_HEAD4):
457: puts(".Pp");
458: printf(".No ");
459: formatcodeln(buf, &start, end, 0);
460: putchar('\n');
461: puts(".Pp");
462: st->haspar = 1;
463: break;
464: case (CMD_OVER):
1.4 ! schwarze 465: /*
! 466: * If we have an existing list that hasn't had an =item
! 467: * yet, then make sure that we open it now.
! 468: * We use the default list type, but that can't be
! 469: * helped (we haven't seen any items yet).
1.1 schwarze 470: */
1.4 ! schwarze 471: if (st->lpos > 0)
! 472: if (LIST__MAX == st->lstack[st->lpos - 1]) {
! 473: st->lstack[st->lpos - 1] = LIST_TAG;
! 474: puts(".Bl -tag -width Ds");
! 475: }
! 476: st->lpos++;
! 477: assert(st->lpos < LIST_STACKSZ);
! 478: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 479: break;
480: case (CMD_ITEM):
1.4 ! schwarze 481: assert(st->lpos > 0);
! 482: /*
! 483: * If we're the first =item, guess at what our content
! 484: * will be: "*" is a bullet list, "1." is a numbered
! 485: * list, and everything is tagged.
! 486: */
! 487: if (LIST__MAX == st->lstack[st->lpos - 1]) {
! 488: st->lstack[st->lpos - 1] =
! 489: listguess(buf, start, end);
! 490: switch (st->lstack[st->lpos - 1]) {
! 491: case (LIST_BULLET):
! 492: puts(".Bl -bullet");
! 493: break;
! 494: case (LIST_ENUM):
! 495: puts(".Bl -enum");
! 496: break;
! 497: default:
! 498: puts(".Bl -tag -width Ds");
! 499: break;
! 500: }
! 501: }
! 502: switch (st->lstack[st->lpos - 1]) {
! 503: case (LIST_TAG):
! 504: printf(".It ");
! 505: formatcodeln(buf, &start, end, 0);
! 506: putchar('\n');
! 507: break;
! 508: case (LIST_ENUM):
! 509: /* FALLTHROUGH */
! 510: case (LIST_BULLET):
! 511: /*
! 512: * Abandon the remainder of the paragraph
! 513: * because we're going to be a bulletted or
! 514: * numbered list.
! 515: */
! 516: puts(".It");
! 517: break;
! 518: default:
! 519: abort();
! 520: }
1.1 schwarze 521: st->haspar = 1;
522: break;
523: case (CMD_BACK):
1.4 ! schwarze 524: /* Make sure we don't back over the stack. */
! 525: if (st->lpos > 0) {
! 526: st->lpos--;
! 527: puts(".El");
! 528: }
1.1 schwarze 529: break;
530: case (CMD_BEGIN):
531: /*
532: * We disregard all types for now.
533: * TODO: process at least "text" in a -literal block.
534: */
535: st->paused = 1;
536: break;
537: case (CMD_FOR):
538: /*
539: * We ignore all types of encodings and formats
540: * unilaterally.
541: */
542: break;
543: case (CMD_ENCODING):
544: break;
545: case (CMD_CUT):
546: st->parsing = 0;
547: return;
548: default:
549: abort();
550: }
551:
552: /* Any command (but =cut) makes us start parsing. */
553: st->parsing = 1;
554: }
555:
556: /*
557: * Just pump out the line in a verbatim block.
558: */
559: static void
560: verbatim(struct state *st, const char *buf, size_t start, size_t end)
561: {
562:
563: if ( ! st->parsing || st->paused)
564: return;
565:
566: puts(".Bd -literal");
567: printf("%.*s\n", (int)(end - start), &buf[start]);
568: puts(".Ed");
569: }
570:
571: /*
572: * Ordinary paragraph.
573: * Well, this is really the hardest--POD seems to assume that, for
574: * example, a leading space implies a newline, and so on.
575: * Lots of other snakes in the grass: escaping a newline followed by a
576: * period (accidental mdoc(7) control), double-newlines after macro
577: * passages, etc.
578: */
579: static void
580: ordinary(struct state *st, const char *buf, size_t start, size_t end)
581: {
582: int last;
583: size_t i, j;
584:
585: if ( ! st->parsing || st->paused)
586: return;
587:
588: /*
589: * Special-case: the NAME section.
590: * If we find a "-" when searching from the end, assume that
591: * we're in "name - description" format.
592: * To wit, print out a "Nm" and "Nd" in that format.
593: */
594: if (st->isname) {
595: for (i = end - 1; i > start; i--)
596: if ('-' == buf[i])
597: break;
598: if ('-' == buf[i]) {
599: j = i;
600: /* Roll over multiple "-". */
601: for ( ; i > start; i--)
602: if ('-' != buf[i])
603: break;
1.4 ! schwarze 604: /* FIXME: escape macro-like words etc. */
1.1 schwarze 605: printf(".Nm %.*s\n",
606: (int)((i + 1) - start), &buf[start]);
607: printf(".Nd %.*s\n",
608: (int)(end - (j + 1)), &buf[j + 1]);
609: return;
610: }
611: }
612:
613: if ( ! st->haspar)
614: puts(".Pp");
615:
616: st->haspar = 0;
617: last = '\n';
618:
619: while (start < end) {
620: /*
621: * Loop til we get either to a newline or escape.
622: * Escape initial control characters.
623: */
624: while (start < end) {
625: if (start < end - 1 && '<' == buf[start + 1])
626: break;
627: else if ('\n' == buf[start])
628: break;
629: else if ('\n' == last && '.' == buf[start])
630: printf("\\&");
631: else if ('\n' == last && '\'' == buf[start])
632: printf("\\&");
633: putchar(last = buf[start++]);
634: }
635:
636: if (start < end - 1 && '<' == buf[start + 1]) {
637: /*
638: * We've encountered a format code.
639: * This is going to trigger a macro no matter
640: * what, so print a newline now.
641: * Then print the (possibly nested) macros and
642: * following that, a newline.
643: */
644: if (formatcode(buf, &start, end, 0, last, 0))
645: putchar(last = '\n');
646: } else if (start < end && '\n' == buf[start]) {
647: /*
648: * Print the newline only if we haven't already
649: * printed a newline.
650: */
651: if (last != '\n')
652: putchar(last = buf[start]);
653: if (++start >= end)
654: continue;
655: /*
656: * If we have whitespace next, eat it to prevent
657: * mdoc(7) from thinking that it's meant for
658: * verbatim text.
659: * It is--but if we start with that, we can't
660: * have a macro subsequent it, which may be
661: * possible if we have an escape next.
662: */
663: if (' ' == buf[start] || '\t' == buf[start]) {
664: puts(".br");
665: last = '\n';
666: }
667: for ( ; start < end; start++)
668: if (' ' != buf[start] && '\t' != buf[start])
669: break;
670: } else if (start < end) {
671: /*
672: * Default: print the character.
673: * Escape initial control characters.
674: */
675: if ('\n' == last && '.' == buf[start])
676: printf("\\&");
677: else if ('\n' == last && '\'' == buf[start])
678: printf("\\&");
679: putchar(last = buf[start++]);
680: }
681: }
682:
683: if (last != '\n')
684: putchar('\n');
685: }
686:
687: /*
688: * There are three kinds of paragraphs: verbatim (starts with whitespace
689: * of some sort), ordinary (starts without "=" marker), or a command
690: * (default: starts with "=").
691: */
692: static void
693: dopar(struct state *st, const char *buf, size_t start, size_t end)
694: {
695:
696: if (end == start)
697: return;
698: if (' ' == buf[start] || '\t' == buf[start])
699: verbatim(st, buf, start, end);
700: else if ('=' != buf[start])
701: ordinary(st, buf, start, end);
702: else
703: command(st, buf, start, end);
704: }
705:
706: /*
707: * Loop around paragraphs within a document, processing each one in the
708: * POD way.
709: */
710: static void
711: dofile(const struct args *args, const char *fname,
712: const struct tm *tm, const char *buf, size_t sz)
713: {
714: size_t sup, end, i, cur = 0;
715: struct state st;
716: const char *section, *date;
717: char datebuf[64];
718: char *title, *cp;
719:
720: if (0 == sz)
721: return;
722:
723: /* Title is last path component of the filename. */
724:
725: if (NULL != args->title)
726: title = strdup(args->title);
727: else if (NULL != (cp = strrchr(fname, '/')))
728: title = strdup(cp + 1);
729: else
730: title = strdup(fname);
731:
732: if (NULL == title) {
733: perror(NULL);
734: exit(EXIT_FAILURE);
735: }
736:
737: /* Section is 1 unless suffix is "pm". */
738:
739: if (NULL == (section = args->section)) {
740: section = "1";
741: if (NULL != (cp = strrchr(title, '.'))) {
742: *cp++ = '\0';
743: if (0 == strcmp(cp, "pm"))
744: section = "3p";
745: }
746: }
747:
748: /* Date. Or the given "tm" if not supplied. */
749:
750: if (NULL == (date = args->date)) {
751: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
752: date = datebuf;
753: }
754:
755: for (cp = title; '\0' != *cp; cp++)
756: *cp = toupper((int)*cp);
757:
758: /* The usual mdoc(7) preamble. */
759:
760: printf(".Dd %s\n", date);
761: printf(".Dt %s %s\n", title, section);
762: puts(".Os");
763:
764: free(title);
765:
766: memset(&st, 0, sizeof(struct state));
767: assert(sz > 0);
768:
769: /* Main loop over file contents. */
770:
771: while (cur < sz) {
772: /* Read until next paragraph. */
773: for (i = cur + 1; i < sz; i++)
774: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
775: /* Consume blank paragraphs. */
776: while (i + 1 < sz && '\n' == buf[i + 1])
777: i++;
778: break;
779: }
780:
781: /* Adjust end marker for EOF. */
782: end = i < sz ? i - 1 :
783: ('\n' == buf[sz - 1] ? sz - 1 : sz);
784: sup = i < sz ? end + 2 : sz;
785:
786: /* Process paragraph and adjust start. */
787: dopar(&st, buf, cur, end);
788: cur = sup;
789: }
790: }
791:
792: /*
793: * Read a single file fully into memory.
794: * If the file is "-", do it from stdin.
795: * If successfully read, send the input buffer to dofile() for further
796: * processing.
797: */
798: static int
799: readfile(const struct args *args, const char *fname)
800: {
801: int fd;
802: char *buf;
803: size_t bufsz, cur;
804: ssize_t ssz;
805: struct tm *tm;
806: time_t ttm;
807: struct stat st;
808:
809: assert(NULL != fname);
810:
811: fd = 0 != strcmp("-", fname) ?
812: open(fname, O_RDONLY, 0) : STDIN_FILENO;
813:
814: if (-1 == fd) {
815: perror(fname);
816: return(0);
817: }
818:
819: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
820: ttm = time(NULL);
821: tm = localtime(&ttm);
822: } else
823: tm = localtime(&st.st_mtime);
824:
825: /*
826: * Arbitrarily-sized initial buffer.
827: * Should be big enough for most files...
828: */
829: cur = 0;
830: bufsz = 1 << 14;
831: if (NULL == (buf = malloc(bufsz))) {
832: perror(NULL);
833: exit(EXIT_FAILURE);
834: }
835:
836: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
837: /* Double buffer size on fill. */
838: if ((size_t)ssz == bufsz - cur) {
839: bufsz *= 2;
840: if (NULL == (buf = realloc(buf, bufsz))) {
841: perror(NULL);
842: exit(EXIT_FAILURE);
843: }
844: }
845: cur += (size_t)ssz;
846: }
847: if (ssz < 0) {
848: perror(fname);
849: free(buf);
850: return(0);
851: }
852:
853: dofile(args, STDIN_FILENO == fd ?
854: "STDIN" : fname, tm, buf, cur);
855: free(buf);
856: if (STDIN_FILENO != fd)
857: close(fd);
858: return(1);
859: }
860:
861: int
862: main(int argc, char *argv[])
863: {
864: const char *fname, *name;
865: struct args args;
866: int c;
867:
868: name = strrchr(argv[0], '/');
869: if (name == NULL)
870: name = argv[0];
871: else
872: ++name;
873:
874: memset(&args, 0, sizeof(struct args));
875: fname = "-";
876:
877: /* Accept no arguments for now. */
878:
879: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
880: switch (c) {
881: case ('h'):
882: /* FALLTHROUGH */
883: case ('l'):
884: /* FALLTHROUGH */
885: case ('c'):
886: /* FALLTHROUGH */
887: case ('o'):
888: /* FALLTHROUGH */
889: case ('q'):
890: /* FALLTHROUGH */
891: case ('r'):
892: /* FALLTHROUGH */
893: case ('u'):
894: /* FALLTHROUGH */
895: case ('v'):
896: /* Ignore these. */
897: break;
898: case ('d'):
899: args.date = optarg;
900: break;
901: case ('n'):
902: args.title = optarg;
903: break;
904: case ('s'):
905: args.section = optarg;
906: break;
907: default:
908: goto usage;
909: }
910:
911: argc -= optind;
912: argv += optind;
913:
914: /* Accept only a single input file. */
915:
916: if (argc > 2)
917: return(EXIT_FAILURE);
918: else if (1 == argc)
919: fname = *argv;
920:
921: return(readfile(&args, fname) ?
922: EXIT_SUCCESS : EXIT_FAILURE);
923:
924: usage:
925: fprintf(stderr, "usage: %s [-d date] "
926: "[-n title] [-s section]\n", name);
927:
928: return(EXIT_FAILURE);
929: }
CVSweb