Annotation of pod2mdoc/pod2mdoc.c, Revision 1.10
1.10 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.9 2014/03/24 01:07:30 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 ! kristaps 29: /*
! 30: * In what section can we find Perl manuals?
! 31: */
! 32: #define PERL_SECTION "3p"
! 33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.1 schwarze 47: struct state {
48: int parsing; /* after =cut of before command */
49: int paused; /* in =begin and before =end */
50: int haspar; /* in paragraph: do we need Pp? */
51: int isname; /* are we the NAME section? */
52: const char *fname; /* file being parsed */
1.4 schwarze 53: #define LIST_STACKSZ 128
54: enum list lstack[LIST_STACKSZ]; /* open lists */
55: size_t lpos; /* where in list stack */
1.1 schwarze 56: };
57:
58: enum fmt {
59: FMT_ITALIC,
60: FMT_BOLD,
61: FMT_CODE,
62: FMT_LINK,
63: FMT_ESCAPE,
64: FMT_FILE,
65: FMT_NBSP,
66: FMT_INDEX,
67: FMT_NULL,
68: FMT__MAX
69: };
70:
71: enum cmd {
72: CMD_POD = 0,
73: CMD_HEAD1,
74: CMD_HEAD2,
75: CMD_HEAD3,
76: CMD_HEAD4,
77: CMD_OVER,
78: CMD_ITEM,
79: CMD_BACK,
80: CMD_BEGIN,
81: CMD_END,
82: CMD_FOR,
83: CMD_ENCODING,
84: CMD_CUT,
85: CMD__MAX
86: };
87:
88: static const char *const cmds[CMD__MAX] = {
89: "pod", /* CMD_POD */
90: "head1", /* CMD_HEAD1 */
91: "head2", /* CMD_HEAD2 */
92: "head3", /* CMD_HEAD3 */
93: "head4", /* CMD_HEAD4 */
94: "over", /* CMD_OVER */
95: "item", /* CMD_ITEM */
96: "back", /* CMD_BACK */
97: "begin", /* CMD_BEGIN */
98: "end", /* CMD_END */
99: "for", /* CMD_FOR */
100: "encoding", /* CMD_ENCODING */
101: "cut" /* CMD_CUT */
102: };
103:
104: static const char fmts[FMT__MAX] = {
105: 'I', /* FMT_ITALIC */
106: 'B', /* FMT_BOLD */
107: 'C', /* FMT_CODE */
108: 'L', /* FMT_LINK */
109: 'E', /* FMT_ESCAPE */
110: 'F', /* FMT_FILE */
111: 'S', /* FMT_NBSP */
112: 'X', /* FMT_INDEX */
113: 'Z' /* FMT_NULL */
114: };
115:
1.6 kristaps 116: static int last;
117:
1.1 schwarze 118: /*
119: * Given buf[*start] is at the start of an escape name, read til the end
120: * of the escape ('>') then try to do something with it.
121: * Sets start to be one after the '>'.
122: */
123: static void
124: formatescape(const char *buf, size_t *start, size_t end)
125: {
126: char esc[16]; /* no more needed */
127: size_t i, max;
128:
129: max = sizeof(esc) - 1;
130: i = 0;
131: /* Read til our buffer is full. */
132: while (*start < end && '>' != buf[*start] && i < max)
133: esc[i++] = buf[(*start)++];
134: esc[i] = '\0';
135:
136: if (i == max) {
137: /* Too long... skip til we end. */
138: while (*start < end && '>' != buf[*start])
139: (*start)++;
140: return;
141: } else if (*start >= end)
142: return;
143:
144: assert('>' == buf[*start]);
145: (*start)++;
146:
147: /*
148: * TODO: right now, we only recognise the named escapes.
149: * Just let the rest of them go.
150: */
1.6 kristaps 151: if (0 == strcmp(esc, "lt"))
1.1 schwarze 152: printf("\\(la");
153: else if (0 == strcmp(esc, "gt"))
154: printf("\\(ra");
155: else if (0 == strcmp(esc, "vb"))
156: printf("\\(ba");
157: else if (0 == strcmp(esc, "sol"))
158: printf("\\(sl");
1.6 kristaps 159: else
160: return;
161:
162: last = 'a';
1.1 schwarze 163: }
164:
165: /*
1.9 kristaps 166: * Run some heuristics to intuit a link format.
167: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
168: * or a general UNIX foo(5) manpage.
169: * If I recognise one, I set "start" to be the end of the sequence so
170: * that the caller can safely just continue processing.
171: * Otherwise, I don't touch "start".
172: */
173: static int
174: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
175: {
176: size_t sv, nstart, nend, i, j;
177: int hasdouble;
178:
179: /*
180: * Scan to the start of the terminus.
181: * This function is more or less replicated in the formatcode()
182: * for null or index formatting codes.
183: */
184: hasdouble = 0;
185: for (sv = nstart = *start; nstart < end; nstart++) {
186: /* Do we have a double-colon? */
187: if (':' == buf[nstart] &&
188: nstart > sv &&
189: ':' == buf[nstart - 1])
190: hasdouble = 1;
191: if ('>' != buf[nstart])
192: continue;
193: else if (dsz == 1)
194: break;
195: assert(nstart > 0);
196: if (' ' != buf[nstart - 1])
197: continue;
198: i = nstart;
199: for (j = 0; i < end && j < dsz; j++)
200: if ('>' != buf[i++])
201: break;
202: if (dsz == j)
203: break;
204: }
205:
206: /* We don't care about stubs. */
207: if (nstart == end || nstart == *start)
208: return(0);
209:
210: /* Set nend to the end of content. */
211: nend = nstart;
212: if (dsz > 1)
213: nend--;
214:
215: /*
216: * Provide for some common invocations of the link primitive.
217: * First, allow us to link to other Perl manuals.
218: */
219: if (hasdouble)
1.10 ! kristaps 220: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 221: (int)(nend - sv), &buf[sv]);
222: else if (nend - sv > 3 && isalnum(buf[sv]) &&
223: ')' == buf[nend - 1] &&
224: isdigit((int)buf[nend - 2]) &&
225: '(' == buf[nend - 3])
226: printf("Xr %.*s %c",
227: (int)(nend - 3 - sv),
228: &buf[sv], buf[nend - 2]);
229: else
230: return(0);
231:
232: *start = nstart;
233: return(1);
234: }
235:
236: /*
1.1 schwarze 237: * We're at the character in front of a format code, which is structured
238: * like X<...> and can contain nested format codes.
239: * This consumes the whole format code, and any nested format codes, til
240: * the end of matched production.
241: * If "reentrant", then we're being called after a macro has already
242: * been printed to the current line.
1.6 kristaps 243: * If "nomacro", then we don't print any macros, just contained data
244: * (e.g., following "Sh" or "Nm").
245: * Return whether we've printed a macro or not--in other words, whether
246: * this should trigger a subsequent newline (this should be ignored when
247: * reentrant).
1.1 schwarze 248: */
249: static int
250: formatcode(const char *buf, size_t *start,
1.6 kristaps 251: size_t end, int reentrant, int nomacro)
1.1 schwarze 252: {
253: enum fmt fmt;
1.5 kristaps 254: size_t i, j, dsz;
1.1 schwarze 255:
256: assert(*start + 1 < end);
257: assert('<' == buf[*start + 1]);
258:
1.6 kristaps 259: /*
260: * First, look up the format code.
261: * If it's not valid, then exit immediately.
262: */
263: for (fmt = 0; fmt < FMT__MAX; fmt++)
264: if (buf[*start] == fmts[fmt])
265: break;
266:
267: if (FMT__MAX == fmt) {
268: putchar(last = buf[(*start)++]);
1.8 kristaps 269: if ('\\' == last)
270: putchar('e');
1.6 kristaps 271: return(0);
272: }
273:
1.5 kristaps 274: /*
275: * Determine whether we're overriding our delimiter.
276: * According to POD, if we have more than one '<' followed by a
277: * space, then we need a space followed by matching '>' to close
278: * the expression.
279: * Otherwise we use the usual '<' and '>' matched pair.
280: */
281: i = *start + 1;
282: while (i < end && '<' == buf[i])
283: i++;
284: assert(i > *start + 1);
285: dsz = i - (*start + 1);
286: if (dsz > 1 && (i >= end || ' ' != buf[i]))
287: dsz = 1;
288:
289: /* Remember, if dsz>1, to jump the trailing space. */
290: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 291:
292: /*
1.6 kristaps 293: * Escapes and ignored codes (NULL and INDEX) don't print macro
294: * sequences, so just output them like normal text before
295: * processing for real macros.
1.1 schwarze 296: */
297: if (FMT_ESCAPE == fmt) {
298: formatescape(buf, start, end);
299: return(0);
300: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 301: /*
1.6 kristaps 302: * Just consume til the end delimiter, accounting for
303: * whether it's a custom one.
1.5 kristaps 304: */
305: for ( ; *start < end; (*start)++) {
306: if ('>' != buf[*start])
307: continue;
308: else if (dsz == 1)
309: break;
310: assert(*start > 0);
311: if (' ' != buf[*start - 1])
312: continue;
313: i = *start;
314: for (j = 0; i < end && j < dsz; j++)
315: if ('>' != buf[i++])
316: break;
317: if (dsz != j)
318: continue;
319: (*start) += dsz;
320: break;
321: }
1.1 schwarze 322: return(0);
323: }
324:
1.6 kristaps 325: /*
326: * Check whether we're supposed to print macro stuff (this is
327: * suppressed in, e.g., "Nm" and "Sh" macros).
328: */
1.1 schwarze 329: if ( ! nomacro) {
330: /*
331: * Print out the macro describing this format code.
332: * If we're not "reentrant" (not yet on a macro line)
333: * then print a newline, if necessary, and the macro
334: * indicator.
335: * Otherwise, offset us with a space.
336: */
1.6 kristaps 337: if ( ! reentrant) {
338: if (last != '\n')
339: putchar('\n');
1.1 schwarze 340: putchar('.');
1.6 kristaps 341: } else
1.1 schwarze 342: putchar(' ');
343:
344: /*
1.6 kristaps 345: * If we don't have whitespace before us (and none after
346: * the opening delimiter), then suppress macro
347: * whitespace with Pf.
1.1 schwarze 348: */
1.6 kristaps 349: if (' ' != last && '\n' != last && ' ' != buf[*start])
350: printf("Pf ");
351:
1.1 schwarze 352: switch (fmt) {
353: case (FMT_ITALIC):
354: printf("Em ");
355: break;
356: case (FMT_BOLD):
357: printf("Sy ");
358: break;
359: case (FMT_CODE):
1.2 schwarze 360: printf("Qo Li ");
1.1 schwarze 361: break;
362: case (FMT_LINK):
1.9 kristaps 363: if ( ! trylink(buf, start, end, dsz))
364: printf("No ");
1.1 schwarze 365: break;
366: case (FMT_FILE):
367: printf("Pa ");
368: break;
369: case (FMT_NBSP):
370: printf("No ");
371: break;
372: default:
373: abort();
374: }
375: }
376:
377: /*
1.6 kristaps 378: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 379: * find a nested format code.
1.1 schwarze 380: * Don't emit any newlines: since we're on a macro line, we
381: * don't want to break the line.
382: */
383: while (*start < end) {
1.5 kristaps 384: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 385: (*start)++;
386: break;
1.5 kristaps 387: } else if ('>' == buf[*start] &&
388: ' ' == buf[*start - 1]) {
389: /*
390: * Handle custom delimiters.
391: * These require a certain number of
392: * space-preceded carrots before we're really at
393: * the end.
394: */
395: i = *start;
396: for (j = 0; i < end && j < dsz; j++)
397: if ('>' != buf[i++])
398: break;
399: if (dsz == j) {
400: *start += dsz;
401: break;
402: }
1.1 schwarze 403: }
404: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 405: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 406: continue;
407: }
1.3 schwarze 408:
1.4 schwarze 409: /*
410: * Make sure that any macro-like words (or
411: * really any word starting with a capital
412: * letter) is assumed to be a macro that must be
413: * escaped.
414: * This matches "Xx " and "XxEOLN".
415: */
416: if ((' ' == last || '\n' == last) &&
417: end - *start > 1 &&
418: isupper((int)buf[*start]) &&
419: islower((int)buf[*start + 1]) &&
420: (end - *start == 2 ||
421: ' ' == buf[*start + 2]))
422: printf("\\&");
1.3 schwarze 423:
1.4 schwarze 424: /* Suppress newline. */
1.6 kristaps 425: if ('\n' == buf[*start])
426: putchar(last = ' ');
427: else
428: putchar(last = buf[*start]);
1.4 schwarze 429:
1.8 kristaps 430: /* Protect against character escapes. */
431: if ('\\' == last)
432: putchar('e');
433:
1.6 kristaps 434: (*start)++;
435:
436: if (' ' == last)
437: while (*start < end && ' ' == buf[*start])
438: (*start)++;
1.1 schwarze 439: }
1.2 schwarze 440:
441: if ( ! nomacro && FMT_CODE == fmt)
442: printf(" Qc ");
1.1 schwarze 443:
444: /*
1.6 kristaps 445: * We're now subsequent the format code.
446: * If there isn't a space (or newline) here, and we haven't just
447: * printed a space, then suppress space.
1.1 schwarze 448: */
1.6 kristaps 449: if ( ! nomacro && ' ' != last)
450: if (' ' != buf[*start] && '\n' != buf[*start])
451: printf(" Ns ");
1.5 kristaps 452:
1.1 schwarze 453: return(1);
454: }
455:
456: /*
457: * Calls formatcode() til the end of a paragraph.
458: */
459: static void
460: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
461: {
462:
1.4 schwarze 463: last = ' ';
1.1 schwarze 464: while (*start < end) {
465: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 466: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 467: continue;
468: }
1.4 schwarze 469: /*
470: * Since we're already on a macro line, we want to make
471: * sure that we don't inadvertently invoke a macro.
472: * We need to do this carefully because section names
473: * are used in troff and we don't want to escape
474: * something that needn't be escaped.
475: */
476: if (' ' == last && end - *start > 1 &&
477: isupper((int)buf[*start]) &&
478: islower((int)buf[*start + 1]) &&
479: (end - *start == 2 ||
480: ' ' == buf[*start + 2]))
481: printf("\\&");
482:
1.8 kristaps 483: if ('\n' == buf[*start])
484: putchar(last = ' ');
485: else
1.1 schwarze 486: putchar(last = buf[*start]);
1.8 kristaps 487:
488: /* Protect against character escapes. */
489: if ('\\' == last)
490: putchar('e');
491:
1.1 schwarze 492: (*start)++;
493: }
494: }
495:
496: /*
1.4 schwarze 497: * Guess at what kind of list we are.
498: * These are taken straight from the POD manual.
499: * I don't know what people do in real life.
500: */
501: static enum list
502: listguess(const char *buf, size_t start, size_t end)
503: {
504: size_t len = end - start;
505:
506: assert(end >= start);
507:
508: if (len == 1 && '*' == buf[start])
509: return(LIST_BULLET);
510: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
511: return(LIST_ENUM);
512: else if (len == 1 && '1' == buf[start])
513: return(LIST_ENUM);
514: else
515: return(LIST_TAG);
516: }
517:
518: /*
1.1 schwarze 519: * A command paragraph, as noted in the perlpod manual, just indicates
520: * that we should do something, optionally with some text to print as
521: * well.
522: */
523: static void
524: command(struct state *st, const char *buf, size_t start, size_t end)
525: {
526: size_t len, csz;
527: enum cmd cmd;
528:
529: assert('=' == buf[start]);
530: start++;
531: len = end - start;
532:
533: for (cmd = 0; cmd < CMD__MAX; cmd++) {
534: csz = strlen(cmds[cmd]);
535: if (len < csz)
536: continue;
537: if (0 == memcmp(&buf[start], cmd[cmds], csz))
538: break;
539: }
540:
541: /* Ignore bogus commands. */
542:
543: if (CMD__MAX == cmd)
544: return;
545:
546: start += csz;
1.8 kristaps 547: while (start < end && ' ' == buf[start])
548: start++;
549:
1.1 schwarze 550: len = end - start;
551:
552: if (st->paused) {
553: st->paused = CMD_END != cmd;
554: return;
555: }
556:
557: switch (cmd) {
558: case (CMD_POD):
559: break;
560: case (CMD_HEAD1):
561: /*
562: * The behaviour of head= follows from a quick glance at
563: * how pod2man handles it.
564: */
565: printf(".Sh ");
566: st->isname = 0;
567: if (end - start == 4)
568: if (0 == memcmp(&buf[start], "NAME", 4))
569: st->isname = 1;
570: formatcodeln(buf, &start, end, 1);
571: putchar('\n');
572: st->haspar = 1;
573: break;
574: case (CMD_HEAD2):
575: printf(".Ss ");
576: formatcodeln(buf, &start, end, 1);
577: putchar('\n');
578: st->haspar = 1;
579: break;
580: case (CMD_HEAD3):
581: puts(".Pp");
582: printf(".Em ");
583: formatcodeln(buf, &start, end, 0);
584: putchar('\n');
585: puts(".Pp");
586: st->haspar = 1;
587: break;
588: case (CMD_HEAD4):
589: puts(".Pp");
590: printf(".No ");
591: formatcodeln(buf, &start, end, 0);
592: putchar('\n');
593: puts(".Pp");
594: st->haspar = 1;
595: break;
596: case (CMD_OVER):
1.4 schwarze 597: /*
598: * If we have an existing list that hasn't had an =item
599: * yet, then make sure that we open it now.
600: * We use the default list type, but that can't be
601: * helped (we haven't seen any items yet).
1.1 schwarze 602: */
1.4 schwarze 603: if (st->lpos > 0)
604: if (LIST__MAX == st->lstack[st->lpos - 1]) {
605: st->lstack[st->lpos - 1] = LIST_TAG;
606: puts(".Bl -tag -width Ds");
607: }
608: st->lpos++;
609: assert(st->lpos < LIST_STACKSZ);
610: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 611: break;
612: case (CMD_ITEM):
1.6 kristaps 613: if (0 == st->lpos) {
614: /*
615: * Bad markup.
616: * Try to compensate.
617: */
618: st->lstack[st->lpos] = LIST__MAX;
619: st->lpos++;
620: }
1.4 schwarze 621: assert(st->lpos > 0);
622: /*
623: * If we're the first =item, guess at what our content
624: * will be: "*" is a bullet list, "1." is a numbered
625: * list, and everything is tagged.
626: */
627: if (LIST__MAX == st->lstack[st->lpos - 1]) {
628: st->lstack[st->lpos - 1] =
629: listguess(buf, start, end);
630: switch (st->lstack[st->lpos - 1]) {
631: case (LIST_BULLET):
632: puts(".Bl -bullet");
633: break;
634: case (LIST_ENUM):
635: puts(".Bl -enum");
636: break;
637: default:
638: puts(".Bl -tag -width Ds");
639: break;
640: }
641: }
642: switch (st->lstack[st->lpos - 1]) {
643: case (LIST_TAG):
644: printf(".It ");
645: formatcodeln(buf, &start, end, 0);
646: putchar('\n');
647: break;
648: case (LIST_ENUM):
649: /* FALLTHROUGH */
650: case (LIST_BULLET):
651: /*
652: * Abandon the remainder of the paragraph
653: * because we're going to be a bulletted or
654: * numbered list.
655: */
656: puts(".It");
657: break;
658: default:
659: abort();
660: }
1.1 schwarze 661: st->haspar = 1;
662: break;
663: case (CMD_BACK):
1.4 schwarze 664: /* Make sure we don't back over the stack. */
665: if (st->lpos > 0) {
666: st->lpos--;
667: puts(".El");
668: }
1.1 schwarze 669: break;
670: case (CMD_BEGIN):
671: /*
672: * We disregard all types for now.
673: * TODO: process at least "text" in a -literal block.
674: */
675: st->paused = 1;
676: break;
677: case (CMD_FOR):
678: /*
679: * We ignore all types of encodings and formats
680: * unilaterally.
681: */
682: break;
683: case (CMD_ENCODING):
684: break;
685: case (CMD_CUT):
686: st->parsing = 0;
687: return;
688: default:
689: abort();
690: }
691:
692: /* Any command (but =cut) makes us start parsing. */
693: st->parsing = 1;
694: }
695:
696: /*
697: * Just pump out the line in a verbatim block.
698: */
699: static void
700: verbatim(struct state *st, const char *buf, size_t start, size_t end)
701: {
1.8 kristaps 702: int last;
1.1 schwarze 703:
704: if ( ! st->parsing || st->paused)
705: return;
706:
707: puts(".Bd -literal");
1.8 kristaps 708: for (last = ' '; start < end; start++) {
709: /*
710: * Handle accidental macros (newline starting with
711: * control character) and escapes.
712: */
713: if ('\n' == last)
1.7 kristaps 714: if ('.' == buf[start] || '\'' == buf[start])
715: printf("\\&");
1.8 kristaps 716: putchar(last = buf[start]);
717: if ('\\' == buf[start])
718: printf("e");
1.7 kristaps 719: }
720: putchar('\n');
1.1 schwarze 721: puts(".Ed");
722: }
723:
724: /*
725: * Ordinary paragraph.
726: * Well, this is really the hardest--POD seems to assume that, for
727: * example, a leading space implies a newline, and so on.
728: * Lots of other snakes in the grass: escaping a newline followed by a
729: * period (accidental mdoc(7) control), double-newlines after macro
730: * passages, etc.
731: */
732: static void
733: ordinary(struct state *st, const char *buf, size_t start, size_t end)
734: {
735: size_t i, j;
736:
737: if ( ! st->parsing || st->paused)
738: return;
739:
740: /*
741: * Special-case: the NAME section.
742: * If we find a "-" when searching from the end, assume that
743: * we're in "name - description" format.
744: * To wit, print out a "Nm" and "Nd" in that format.
745: */
746: if (st->isname) {
747: for (i = end - 1; i > start; i--)
748: if ('-' == buf[i])
749: break;
750: if ('-' == buf[i]) {
751: j = i;
752: /* Roll over multiple "-". */
753: for ( ; i > start; i--)
754: if ('-' != buf[i])
755: break;
1.5 kristaps 756: printf(".Nm ");
757: formatcodeln(buf, &start, i + 1, 1);
758: putchar('\n');
759: start = j + 1;
760: printf(".Nd ");
761: formatcodeln(buf, &start, end, 1);
762: putchar('\n');
1.1 schwarze 763: return;
764: }
765: }
766:
767: if ( ! st->haspar)
768: puts(".Pp");
769:
770: st->haspar = 0;
771: last = '\n';
772:
773: while (start < end) {
774: /*
775: * Loop til we get either to a newline or escape.
776: * Escape initial control characters.
777: */
778: while (start < end) {
779: if (start < end - 1 && '<' == buf[start + 1])
780: break;
781: else if ('\n' == buf[start])
782: break;
783: else if ('\n' == last && '.' == buf[start])
784: printf("\\&");
785: else if ('\n' == last && '\'' == buf[start])
786: printf("\\&");
787: putchar(last = buf[start++]);
1.8 kristaps 788: if ('\\' == last)
789: putchar('e');
1.1 schwarze 790: }
791:
792: if (start < end - 1 && '<' == buf[start + 1]) {
793: /*
794: * We've encountered a format code.
795: * This is going to trigger a macro no matter
796: * what, so print a newline now.
797: * Then print the (possibly nested) macros and
798: * following that, a newline.
1.8 kristaps 799: * Consume all whitespace so we don't
800: * accidentally start an implicit literal line.
1.1 schwarze 801: */
1.6 kristaps 802: if (formatcode(buf, &start, end, 0, 0)) {
1.1 schwarze 803: putchar(last = '\n');
1.6 kristaps 804: while (start < end && ' ' == buf[start])
805: start++;
806: }
1.1 schwarze 807: } else if (start < end && '\n' == buf[start]) {
808: /*
809: * Print the newline only if we haven't already
810: * printed a newline.
811: */
812: if (last != '\n')
813: putchar(last = buf[start]);
814: if (++start >= end)
815: continue;
816: /*
817: * If we have whitespace next, eat it to prevent
818: * mdoc(7) from thinking that it's meant for
819: * verbatim text.
820: * It is--but if we start with that, we can't
821: * have a macro subsequent it, which may be
822: * possible if we have an escape next.
823: */
824: if (' ' == buf[start] || '\t' == buf[start]) {
825: puts(".br");
826: last = '\n';
827: }
828: for ( ; start < end; start++)
829: if (' ' != buf[start] && '\t' != buf[start])
830: break;
831: } else if (start < end) {
832: /*
833: * Default: print the character.
834: * Escape initial control characters.
835: */
836: if ('\n' == last && '.' == buf[start])
837: printf("\\&");
838: else if ('\n' == last && '\'' == buf[start])
839: printf("\\&");
840: putchar(last = buf[start++]);
1.8 kristaps 841: if ('\\' == last)
842: putchar('e');
1.1 schwarze 843: }
844: }
845:
846: if (last != '\n')
847: putchar('\n');
848: }
849:
850: /*
851: * There are three kinds of paragraphs: verbatim (starts with whitespace
852: * of some sort), ordinary (starts without "=" marker), or a command
853: * (default: starts with "=").
854: */
855: static void
856: dopar(struct state *st, const char *buf, size_t start, size_t end)
857: {
858:
859: if (end == start)
860: return;
861: if (' ' == buf[start] || '\t' == buf[start])
862: verbatim(st, buf, start, end);
863: else if ('=' != buf[start])
864: ordinary(st, buf, start, end);
865: else
866: command(st, buf, start, end);
867: }
868:
869: /*
870: * Loop around paragraphs within a document, processing each one in the
871: * POD way.
872: */
873: static void
874: dofile(const struct args *args, const char *fname,
875: const struct tm *tm, const char *buf, size_t sz)
876: {
877: size_t sup, end, i, cur = 0;
878: struct state st;
879: const char *section, *date;
880: char datebuf[64];
881: char *title, *cp;
882:
883: if (0 == sz)
884: return;
885:
886: /* Title is last path component of the filename. */
887:
888: if (NULL != args->title)
889: title = strdup(args->title);
890: else if (NULL != (cp = strrchr(fname, '/')))
891: title = strdup(cp + 1);
892: else
893: title = strdup(fname);
894:
895: if (NULL == title) {
896: perror(NULL);
897: exit(EXIT_FAILURE);
898: }
899:
900: /* Section is 1 unless suffix is "pm". */
901:
902: if (NULL == (section = args->section)) {
903: section = "1";
904: if (NULL != (cp = strrchr(title, '.'))) {
905: *cp++ = '\0';
906: if (0 == strcmp(cp, "pm"))
1.10 ! kristaps 907: section = PERL_SECTION;
1.1 schwarze 908: }
909: }
910:
911: /* Date. Or the given "tm" if not supplied. */
912:
913: if (NULL == (date = args->date)) {
914: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
915: date = datebuf;
916: }
917:
918: for (cp = title; '\0' != *cp; cp++)
919: *cp = toupper((int)*cp);
920:
921: /* The usual mdoc(7) preamble. */
922:
923: printf(".Dd %s\n", date);
924: printf(".Dt %s %s\n", title, section);
925: puts(".Os");
926:
927: free(title);
928:
929: memset(&st, 0, sizeof(struct state));
930: assert(sz > 0);
931:
932: /* Main loop over file contents. */
933:
934: while (cur < sz) {
935: /* Read until next paragraph. */
936: for (i = cur + 1; i < sz; i++)
937: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
938: /* Consume blank paragraphs. */
939: while (i + 1 < sz && '\n' == buf[i + 1])
940: i++;
941: break;
942: }
943:
944: /* Adjust end marker for EOF. */
945: end = i < sz ? i - 1 :
946: ('\n' == buf[sz - 1] ? sz - 1 : sz);
947: sup = i < sz ? end + 2 : sz;
948:
949: /* Process paragraph and adjust start. */
950: dopar(&st, buf, cur, end);
951: cur = sup;
952: }
953: }
954:
955: /*
956: * Read a single file fully into memory.
957: * If the file is "-", do it from stdin.
958: * If successfully read, send the input buffer to dofile() for further
959: * processing.
960: */
961: static int
962: readfile(const struct args *args, const char *fname)
963: {
964: int fd;
965: char *buf;
966: size_t bufsz, cur;
967: ssize_t ssz;
968: struct tm *tm;
969: time_t ttm;
970: struct stat st;
971:
972: assert(NULL != fname);
973:
974: fd = 0 != strcmp("-", fname) ?
975: open(fname, O_RDONLY, 0) : STDIN_FILENO;
976:
977: if (-1 == fd) {
978: perror(fname);
979: return(0);
980: }
981:
982: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
983: ttm = time(NULL);
984: tm = localtime(&ttm);
985: } else
986: tm = localtime(&st.st_mtime);
987:
988: /*
989: * Arbitrarily-sized initial buffer.
990: * Should be big enough for most files...
991: */
992: cur = 0;
993: bufsz = 1 << 14;
994: if (NULL == (buf = malloc(bufsz))) {
995: perror(NULL);
996: exit(EXIT_FAILURE);
997: }
998:
999: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1000: /* Double buffer size on fill. */
1001: if ((size_t)ssz == bufsz - cur) {
1002: bufsz *= 2;
1003: if (NULL == (buf = realloc(buf, bufsz))) {
1004: perror(NULL);
1005: exit(EXIT_FAILURE);
1006: }
1007: }
1008: cur += (size_t)ssz;
1009: }
1010: if (ssz < 0) {
1011: perror(fname);
1012: free(buf);
1013: return(0);
1014: }
1015:
1016: dofile(args, STDIN_FILENO == fd ?
1017: "STDIN" : fname, tm, buf, cur);
1018: free(buf);
1019: if (STDIN_FILENO != fd)
1020: close(fd);
1021: return(1);
1022: }
1023:
1024: int
1025: main(int argc, char *argv[])
1026: {
1027: const char *fname, *name;
1028: struct args args;
1029: int c;
1030:
1031: name = strrchr(argv[0], '/');
1032: if (name == NULL)
1033: name = argv[0];
1034: else
1035: ++name;
1036:
1037: memset(&args, 0, sizeof(struct args));
1038: fname = "-";
1039:
1040: /* Accept no arguments for now. */
1041:
1042: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1043: switch (c) {
1044: case ('h'):
1045: /* FALLTHROUGH */
1046: case ('l'):
1047: /* FALLTHROUGH */
1048: case ('c'):
1049: /* FALLTHROUGH */
1050: case ('o'):
1051: /* FALLTHROUGH */
1052: case ('q'):
1053: /* FALLTHROUGH */
1054: case ('r'):
1055: /* FALLTHROUGH */
1056: case ('u'):
1057: /* FALLTHROUGH */
1058: case ('v'):
1059: /* Ignore these. */
1060: break;
1061: case ('d'):
1062: args.date = optarg;
1063: break;
1064: case ('n'):
1065: args.title = optarg;
1066: break;
1067: case ('s'):
1068: args.section = optarg;
1069: break;
1070: default:
1071: goto usage;
1072: }
1073:
1074: argc -= optind;
1075: argv += optind;
1076:
1077: /* Accept only a single input file. */
1078:
1079: if (argc > 2)
1080: return(EXIT_FAILURE);
1081: else if (1 == argc)
1082: fname = *argv;
1083:
1084: return(readfile(&args, fname) ?
1085: EXIT_SUCCESS : EXIT_FAILURE);
1086:
1087: usage:
1088: fprintf(stderr, "usage: %s [-d date] "
1089: "[-n title] [-s section]\n", name);
1090:
1091: return(EXIT_FAILURE);
1092: }
CVSweb