Annotation of pod2mdoc/pod2mdoc.c, Revision 1.9
1.9 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.8 2014/03/23 23:57:38 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
29: struct args {
30: const char *title; /* override "Dt" title */
31: const char *date; /* override "Dd" date */
32: const char *section; /* override "Dt" section */
33: };
34:
1.4 schwarze 35: enum list {
36: LIST_BULLET = 0,
37: LIST_ENUM,
38: LIST_TAG,
39: LIST__MAX
40: };
41:
1.1 schwarze 42: struct state {
43: int parsing; /* after =cut of before command */
44: int paused; /* in =begin and before =end */
45: int haspar; /* in paragraph: do we need Pp? */
46: int isname; /* are we the NAME section? */
47: const char *fname; /* file being parsed */
1.4 schwarze 48: #define LIST_STACKSZ 128
49: enum list lstack[LIST_STACKSZ]; /* open lists */
50: size_t lpos; /* where in list stack */
1.1 schwarze 51: };
52:
53: enum fmt {
54: FMT_ITALIC,
55: FMT_BOLD,
56: FMT_CODE,
57: FMT_LINK,
58: FMT_ESCAPE,
59: FMT_FILE,
60: FMT_NBSP,
61: FMT_INDEX,
62: FMT_NULL,
63: FMT__MAX
64: };
65:
66: enum cmd {
67: CMD_POD = 0,
68: CMD_HEAD1,
69: CMD_HEAD2,
70: CMD_HEAD3,
71: CMD_HEAD4,
72: CMD_OVER,
73: CMD_ITEM,
74: CMD_BACK,
75: CMD_BEGIN,
76: CMD_END,
77: CMD_FOR,
78: CMD_ENCODING,
79: CMD_CUT,
80: CMD__MAX
81: };
82:
83: static const char *const cmds[CMD__MAX] = {
84: "pod", /* CMD_POD */
85: "head1", /* CMD_HEAD1 */
86: "head2", /* CMD_HEAD2 */
87: "head3", /* CMD_HEAD3 */
88: "head4", /* CMD_HEAD4 */
89: "over", /* CMD_OVER */
90: "item", /* CMD_ITEM */
91: "back", /* CMD_BACK */
92: "begin", /* CMD_BEGIN */
93: "end", /* CMD_END */
94: "for", /* CMD_FOR */
95: "encoding", /* CMD_ENCODING */
96: "cut" /* CMD_CUT */
97: };
98:
99: static const char fmts[FMT__MAX] = {
100: 'I', /* FMT_ITALIC */
101: 'B', /* FMT_BOLD */
102: 'C', /* FMT_CODE */
103: 'L', /* FMT_LINK */
104: 'E', /* FMT_ESCAPE */
105: 'F', /* FMT_FILE */
106: 'S', /* FMT_NBSP */
107: 'X', /* FMT_INDEX */
108: 'Z' /* FMT_NULL */
109: };
110:
1.6 kristaps 111: static int last;
112:
1.1 schwarze 113: /*
114: * Given buf[*start] is at the start of an escape name, read til the end
115: * of the escape ('>') then try to do something with it.
116: * Sets start to be one after the '>'.
117: */
118: static void
119: formatescape(const char *buf, size_t *start, size_t end)
120: {
121: char esc[16]; /* no more needed */
122: size_t i, max;
123:
124: max = sizeof(esc) - 1;
125: i = 0;
126: /* Read til our buffer is full. */
127: while (*start < end && '>' != buf[*start] && i < max)
128: esc[i++] = buf[(*start)++];
129: esc[i] = '\0';
130:
131: if (i == max) {
132: /* Too long... skip til we end. */
133: while (*start < end && '>' != buf[*start])
134: (*start)++;
135: return;
136: } else if (*start >= end)
137: return;
138:
139: assert('>' == buf[*start]);
140: (*start)++;
141:
142: /*
143: * TODO: right now, we only recognise the named escapes.
144: * Just let the rest of them go.
145: */
1.6 kristaps 146: if (0 == strcmp(esc, "lt"))
1.1 schwarze 147: printf("\\(la");
148: else if (0 == strcmp(esc, "gt"))
149: printf("\\(ra");
150: else if (0 == strcmp(esc, "vb"))
151: printf("\\(ba");
152: else if (0 == strcmp(esc, "sol"))
153: printf("\\(sl");
1.6 kristaps 154: else
155: return;
156:
157: last = 'a';
1.1 schwarze 158: }
159:
160: /*
1.9 ! kristaps 161: * Run some heuristics to intuit a link format.
! 162: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
! 163: * or a general UNIX foo(5) manpage.
! 164: * If I recognise one, I set "start" to be the end of the sequence so
! 165: * that the caller can safely just continue processing.
! 166: * Otherwise, I don't touch "start".
! 167: */
! 168: static int
! 169: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
! 170: {
! 171: size_t sv, nstart, nend, i, j;
! 172: int hasdouble;
! 173:
! 174: /*
! 175: * Scan to the start of the terminus.
! 176: * This function is more or less replicated in the formatcode()
! 177: * for null or index formatting codes.
! 178: */
! 179: hasdouble = 0;
! 180: for (sv = nstart = *start; nstart < end; nstart++) {
! 181: /* Do we have a double-colon? */
! 182: if (':' == buf[nstart] &&
! 183: nstart > sv &&
! 184: ':' == buf[nstart - 1])
! 185: hasdouble = 1;
! 186: if ('>' != buf[nstart])
! 187: continue;
! 188: else if (dsz == 1)
! 189: break;
! 190: assert(nstart > 0);
! 191: if (' ' != buf[nstart - 1])
! 192: continue;
! 193: i = nstart;
! 194: for (j = 0; i < end && j < dsz; j++)
! 195: if ('>' != buf[i++])
! 196: break;
! 197: if (dsz == j)
! 198: break;
! 199: }
! 200:
! 201: /* We don't care about stubs. */
! 202: if (nstart == end || nstart == *start)
! 203: return(0);
! 204:
! 205: /* Set nend to the end of content. */
! 206: nend = nstart;
! 207: if (dsz > 1)
! 208: nend--;
! 209:
! 210: /*
! 211: * Provide for some common invocations of the link primitive.
! 212: * First, allow us to link to other Perl manuals.
! 213: */
! 214: if (hasdouble)
! 215: printf("Xr %.*s 3p",
! 216: (int)(nend - sv), &buf[sv]);
! 217: else if (nend - sv > 3 && isalnum(buf[sv]) &&
! 218: ')' == buf[nend - 1] &&
! 219: isdigit((int)buf[nend - 2]) &&
! 220: '(' == buf[nend - 3])
! 221: printf("Xr %.*s %c",
! 222: (int)(nend - 3 - sv),
! 223: &buf[sv], buf[nend - 2]);
! 224: else
! 225: return(0);
! 226:
! 227: *start = nstart;
! 228: return(1);
! 229: }
! 230:
! 231: /*
1.1 schwarze 232: * We're at the character in front of a format code, which is structured
233: * like X<...> and can contain nested format codes.
234: * This consumes the whole format code, and any nested format codes, til
235: * the end of matched production.
236: * If "reentrant", then we're being called after a macro has already
237: * been printed to the current line.
1.6 kristaps 238: * If "nomacro", then we don't print any macros, just contained data
239: * (e.g., following "Sh" or "Nm").
240: * Return whether we've printed a macro or not--in other words, whether
241: * this should trigger a subsequent newline (this should be ignored when
242: * reentrant).
1.1 schwarze 243: */
244: static int
245: formatcode(const char *buf, size_t *start,
1.6 kristaps 246: size_t end, int reentrant, int nomacro)
1.1 schwarze 247: {
248: enum fmt fmt;
1.5 kristaps 249: size_t i, j, dsz;
1.1 schwarze 250:
251: assert(*start + 1 < end);
252: assert('<' == buf[*start + 1]);
253:
1.6 kristaps 254: /*
255: * First, look up the format code.
256: * If it's not valid, then exit immediately.
257: */
258: for (fmt = 0; fmt < FMT__MAX; fmt++)
259: if (buf[*start] == fmts[fmt])
260: break;
261:
262: if (FMT__MAX == fmt) {
263: putchar(last = buf[(*start)++]);
1.8 kristaps 264: if ('\\' == last)
265: putchar('e');
1.6 kristaps 266: return(0);
267: }
268:
1.5 kristaps 269: /*
270: * Determine whether we're overriding our delimiter.
271: * According to POD, if we have more than one '<' followed by a
272: * space, then we need a space followed by matching '>' to close
273: * the expression.
274: * Otherwise we use the usual '<' and '>' matched pair.
275: */
276: i = *start + 1;
277: while (i < end && '<' == buf[i])
278: i++;
279: assert(i > *start + 1);
280: dsz = i - (*start + 1);
281: if (dsz > 1 && (i >= end || ' ' != buf[i]))
282: dsz = 1;
283:
284: /* Remember, if dsz>1, to jump the trailing space. */
285: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 286:
287: /*
1.6 kristaps 288: * Escapes and ignored codes (NULL and INDEX) don't print macro
289: * sequences, so just output them like normal text before
290: * processing for real macros.
1.1 schwarze 291: */
292: if (FMT_ESCAPE == fmt) {
293: formatescape(buf, start, end);
294: return(0);
295: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 296: /*
1.6 kristaps 297: * Just consume til the end delimiter, accounting for
298: * whether it's a custom one.
1.5 kristaps 299: */
300: for ( ; *start < end; (*start)++) {
301: if ('>' != buf[*start])
302: continue;
303: else if (dsz == 1)
304: break;
305: assert(*start > 0);
306: if (' ' != buf[*start - 1])
307: continue;
308: i = *start;
309: for (j = 0; i < end && j < dsz; j++)
310: if ('>' != buf[i++])
311: break;
312: if (dsz != j)
313: continue;
314: (*start) += dsz;
315: break;
316: }
1.1 schwarze 317: return(0);
318: }
319:
1.6 kristaps 320: /*
321: * Check whether we're supposed to print macro stuff (this is
322: * suppressed in, e.g., "Nm" and "Sh" macros).
323: */
1.1 schwarze 324: if ( ! nomacro) {
325: /*
326: * Print out the macro describing this format code.
327: * If we're not "reentrant" (not yet on a macro line)
328: * then print a newline, if necessary, and the macro
329: * indicator.
330: * Otherwise, offset us with a space.
331: */
1.6 kristaps 332: if ( ! reentrant) {
333: if (last != '\n')
334: putchar('\n');
1.1 schwarze 335: putchar('.');
1.6 kristaps 336: } else
1.1 schwarze 337: putchar(' ');
338:
339: /*
1.6 kristaps 340: * If we don't have whitespace before us (and none after
341: * the opening delimiter), then suppress macro
342: * whitespace with Pf.
1.1 schwarze 343: */
1.6 kristaps 344: if (' ' != last && '\n' != last && ' ' != buf[*start])
345: printf("Pf ");
346:
1.1 schwarze 347: switch (fmt) {
348: case (FMT_ITALIC):
349: printf("Em ");
350: break;
351: case (FMT_BOLD):
352: printf("Sy ");
353: break;
354: case (FMT_CODE):
1.2 schwarze 355: printf("Qo Li ");
1.1 schwarze 356: break;
357: case (FMT_LINK):
1.9 ! kristaps 358: if ( ! trylink(buf, start, end, dsz))
! 359: printf("No ");
1.1 schwarze 360: break;
361: case (FMT_FILE):
362: printf("Pa ");
363: break;
364: case (FMT_NBSP):
365: printf("No ");
366: break;
367: default:
368: abort();
369: }
370: }
371:
372: /*
1.6 kristaps 373: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 374: * find a nested format code.
1.1 schwarze 375: * Don't emit any newlines: since we're on a macro line, we
376: * don't want to break the line.
377: */
378: while (*start < end) {
1.5 kristaps 379: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 380: (*start)++;
381: break;
1.5 kristaps 382: } else if ('>' == buf[*start] &&
383: ' ' == buf[*start - 1]) {
384: /*
385: * Handle custom delimiters.
386: * These require a certain number of
387: * space-preceded carrots before we're really at
388: * the end.
389: */
390: i = *start;
391: for (j = 0; i < end && j < dsz; j++)
392: if ('>' != buf[i++])
393: break;
394: if (dsz == j) {
395: *start += dsz;
396: break;
397: }
1.1 schwarze 398: }
399: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 400: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 401: continue;
402: }
1.3 schwarze 403:
1.4 schwarze 404: /*
405: * Make sure that any macro-like words (or
406: * really any word starting with a capital
407: * letter) is assumed to be a macro that must be
408: * escaped.
409: * This matches "Xx " and "XxEOLN".
410: */
411: if ((' ' == last || '\n' == last) &&
412: end - *start > 1 &&
413: isupper((int)buf[*start]) &&
414: islower((int)buf[*start + 1]) &&
415: (end - *start == 2 ||
416: ' ' == buf[*start + 2]))
417: printf("\\&");
1.3 schwarze 418:
1.4 schwarze 419: /* Suppress newline. */
1.6 kristaps 420: if ('\n' == buf[*start])
421: putchar(last = ' ');
422: else
423: putchar(last = buf[*start]);
1.4 schwarze 424:
1.8 kristaps 425: /* Protect against character escapes. */
426: if ('\\' == last)
427: putchar('e');
428:
1.6 kristaps 429: (*start)++;
430:
431: if (' ' == last)
432: while (*start < end && ' ' == buf[*start])
433: (*start)++;
1.1 schwarze 434: }
1.2 schwarze 435:
436: if ( ! nomacro && FMT_CODE == fmt)
437: printf(" Qc ");
1.1 schwarze 438:
439: /*
1.6 kristaps 440: * We're now subsequent the format code.
441: * If there isn't a space (or newline) here, and we haven't just
442: * printed a space, then suppress space.
1.1 schwarze 443: */
1.6 kristaps 444: if ( ! nomacro && ' ' != last)
445: if (' ' != buf[*start] && '\n' != buf[*start])
446: printf(" Ns ");
1.5 kristaps 447:
1.1 schwarze 448: return(1);
449: }
450:
451: /*
452: * Calls formatcode() til the end of a paragraph.
453: */
454: static void
455: formatcodeln(const char *buf, size_t *start, size_t end, int nomacro)
456: {
457:
1.4 schwarze 458: last = ' ';
1.1 schwarze 459: while (*start < end) {
460: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.6 kristaps 461: formatcode(buf, start, end, 1, nomacro);
1.1 schwarze 462: continue;
463: }
1.4 schwarze 464: /*
465: * Since we're already on a macro line, we want to make
466: * sure that we don't inadvertently invoke a macro.
467: * We need to do this carefully because section names
468: * are used in troff and we don't want to escape
469: * something that needn't be escaped.
470: */
471: if (' ' == last && end - *start > 1 &&
472: isupper((int)buf[*start]) &&
473: islower((int)buf[*start + 1]) &&
474: (end - *start == 2 ||
475: ' ' == buf[*start + 2]))
476: printf("\\&");
477:
1.8 kristaps 478: if ('\n' == buf[*start])
479: putchar(last = ' ');
480: else
1.1 schwarze 481: putchar(last = buf[*start]);
1.8 kristaps 482:
483: /* Protect against character escapes. */
484: if ('\\' == last)
485: putchar('e');
486:
1.1 schwarze 487: (*start)++;
488: }
489: }
490:
491: /*
1.4 schwarze 492: * Guess at what kind of list we are.
493: * These are taken straight from the POD manual.
494: * I don't know what people do in real life.
495: */
496: static enum list
497: listguess(const char *buf, size_t start, size_t end)
498: {
499: size_t len = end - start;
500:
501: assert(end >= start);
502:
503: if (len == 1 && '*' == buf[start])
504: return(LIST_BULLET);
505: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
506: return(LIST_ENUM);
507: else if (len == 1 && '1' == buf[start])
508: return(LIST_ENUM);
509: else
510: return(LIST_TAG);
511: }
512:
513: /*
1.1 schwarze 514: * A command paragraph, as noted in the perlpod manual, just indicates
515: * that we should do something, optionally with some text to print as
516: * well.
517: */
518: static void
519: command(struct state *st, const char *buf, size_t start, size_t end)
520: {
521: size_t len, csz;
522: enum cmd cmd;
523:
524: assert('=' == buf[start]);
525: start++;
526: len = end - start;
527:
528: for (cmd = 0; cmd < CMD__MAX; cmd++) {
529: csz = strlen(cmds[cmd]);
530: if (len < csz)
531: continue;
532: if (0 == memcmp(&buf[start], cmd[cmds], csz))
533: break;
534: }
535:
536: /* Ignore bogus commands. */
537:
538: if (CMD__MAX == cmd)
539: return;
540:
541: start += csz;
1.8 kristaps 542: while (start < end && ' ' == buf[start])
543: start++;
544:
1.1 schwarze 545: len = end - start;
546:
547: if (st->paused) {
548: st->paused = CMD_END != cmd;
549: return;
550: }
551:
552: switch (cmd) {
553: case (CMD_POD):
554: break;
555: case (CMD_HEAD1):
556: /*
557: * The behaviour of head= follows from a quick glance at
558: * how pod2man handles it.
559: */
560: printf(".Sh ");
561: st->isname = 0;
562: if (end - start == 4)
563: if (0 == memcmp(&buf[start], "NAME", 4))
564: st->isname = 1;
565: formatcodeln(buf, &start, end, 1);
566: putchar('\n');
567: st->haspar = 1;
568: break;
569: case (CMD_HEAD2):
570: printf(".Ss ");
571: formatcodeln(buf, &start, end, 1);
572: putchar('\n');
573: st->haspar = 1;
574: break;
575: case (CMD_HEAD3):
576: puts(".Pp");
577: printf(".Em ");
578: formatcodeln(buf, &start, end, 0);
579: putchar('\n');
580: puts(".Pp");
581: st->haspar = 1;
582: break;
583: case (CMD_HEAD4):
584: puts(".Pp");
585: printf(".No ");
586: formatcodeln(buf, &start, end, 0);
587: putchar('\n');
588: puts(".Pp");
589: st->haspar = 1;
590: break;
591: case (CMD_OVER):
1.4 schwarze 592: /*
593: * If we have an existing list that hasn't had an =item
594: * yet, then make sure that we open it now.
595: * We use the default list type, but that can't be
596: * helped (we haven't seen any items yet).
1.1 schwarze 597: */
1.4 schwarze 598: if (st->lpos > 0)
599: if (LIST__MAX == st->lstack[st->lpos - 1]) {
600: st->lstack[st->lpos - 1] = LIST_TAG;
601: puts(".Bl -tag -width Ds");
602: }
603: st->lpos++;
604: assert(st->lpos < LIST_STACKSZ);
605: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 606: break;
607: case (CMD_ITEM):
1.6 kristaps 608: if (0 == st->lpos) {
609: /*
610: * Bad markup.
611: * Try to compensate.
612: */
613: st->lstack[st->lpos] = LIST__MAX;
614: st->lpos++;
615: }
1.4 schwarze 616: assert(st->lpos > 0);
617: /*
618: * If we're the first =item, guess at what our content
619: * will be: "*" is a bullet list, "1." is a numbered
620: * list, and everything is tagged.
621: */
622: if (LIST__MAX == st->lstack[st->lpos - 1]) {
623: st->lstack[st->lpos - 1] =
624: listguess(buf, start, end);
625: switch (st->lstack[st->lpos - 1]) {
626: case (LIST_BULLET):
627: puts(".Bl -bullet");
628: break;
629: case (LIST_ENUM):
630: puts(".Bl -enum");
631: break;
632: default:
633: puts(".Bl -tag -width Ds");
634: break;
635: }
636: }
637: switch (st->lstack[st->lpos - 1]) {
638: case (LIST_TAG):
639: printf(".It ");
640: formatcodeln(buf, &start, end, 0);
641: putchar('\n');
642: break;
643: case (LIST_ENUM):
644: /* FALLTHROUGH */
645: case (LIST_BULLET):
646: /*
647: * Abandon the remainder of the paragraph
648: * because we're going to be a bulletted or
649: * numbered list.
650: */
651: puts(".It");
652: break;
653: default:
654: abort();
655: }
1.1 schwarze 656: st->haspar = 1;
657: break;
658: case (CMD_BACK):
1.4 schwarze 659: /* Make sure we don't back over the stack. */
660: if (st->lpos > 0) {
661: st->lpos--;
662: puts(".El");
663: }
1.1 schwarze 664: break;
665: case (CMD_BEGIN):
666: /*
667: * We disregard all types for now.
668: * TODO: process at least "text" in a -literal block.
669: */
670: st->paused = 1;
671: break;
672: case (CMD_FOR):
673: /*
674: * We ignore all types of encodings and formats
675: * unilaterally.
676: */
677: break;
678: case (CMD_ENCODING):
679: break;
680: case (CMD_CUT):
681: st->parsing = 0;
682: return;
683: default:
684: abort();
685: }
686:
687: /* Any command (but =cut) makes us start parsing. */
688: st->parsing = 1;
689: }
690:
691: /*
692: * Just pump out the line in a verbatim block.
693: */
694: static void
695: verbatim(struct state *st, const char *buf, size_t start, size_t end)
696: {
1.8 kristaps 697: int last;
1.1 schwarze 698:
699: if ( ! st->parsing || st->paused)
700: return;
701:
702: puts(".Bd -literal");
1.8 kristaps 703: for (last = ' '; start < end; start++) {
704: /*
705: * Handle accidental macros (newline starting with
706: * control character) and escapes.
707: */
708: if ('\n' == last)
1.7 kristaps 709: if ('.' == buf[start] || '\'' == buf[start])
710: printf("\\&");
1.8 kristaps 711: putchar(last = buf[start]);
712: if ('\\' == buf[start])
713: printf("e");
1.7 kristaps 714: }
715: putchar('\n');
1.1 schwarze 716: puts(".Ed");
717: }
718:
719: /*
720: * Ordinary paragraph.
721: * Well, this is really the hardest--POD seems to assume that, for
722: * example, a leading space implies a newline, and so on.
723: * Lots of other snakes in the grass: escaping a newline followed by a
724: * period (accidental mdoc(7) control), double-newlines after macro
725: * passages, etc.
726: */
727: static void
728: ordinary(struct state *st, const char *buf, size_t start, size_t end)
729: {
730: size_t i, j;
731:
732: if ( ! st->parsing || st->paused)
733: return;
734:
735: /*
736: * Special-case: the NAME section.
737: * If we find a "-" when searching from the end, assume that
738: * we're in "name - description" format.
739: * To wit, print out a "Nm" and "Nd" in that format.
740: */
741: if (st->isname) {
742: for (i = end - 1; i > start; i--)
743: if ('-' == buf[i])
744: break;
745: if ('-' == buf[i]) {
746: j = i;
747: /* Roll over multiple "-". */
748: for ( ; i > start; i--)
749: if ('-' != buf[i])
750: break;
1.5 kristaps 751: printf(".Nm ");
752: formatcodeln(buf, &start, i + 1, 1);
753: putchar('\n');
754: start = j + 1;
755: printf(".Nd ");
756: formatcodeln(buf, &start, end, 1);
757: putchar('\n');
1.1 schwarze 758: return;
759: }
760: }
761:
762: if ( ! st->haspar)
763: puts(".Pp");
764:
765: st->haspar = 0;
766: last = '\n';
767:
768: while (start < end) {
769: /*
770: * Loop til we get either to a newline or escape.
771: * Escape initial control characters.
772: */
773: while (start < end) {
774: if (start < end - 1 && '<' == buf[start + 1])
775: break;
776: else if ('\n' == buf[start])
777: break;
778: else if ('\n' == last && '.' == buf[start])
779: printf("\\&");
780: else if ('\n' == last && '\'' == buf[start])
781: printf("\\&");
782: putchar(last = buf[start++]);
1.8 kristaps 783: if ('\\' == last)
784: putchar('e');
1.1 schwarze 785: }
786:
787: if (start < end - 1 && '<' == buf[start + 1]) {
788: /*
789: * We've encountered a format code.
790: * This is going to trigger a macro no matter
791: * what, so print a newline now.
792: * Then print the (possibly nested) macros and
793: * following that, a newline.
1.8 kristaps 794: * Consume all whitespace so we don't
795: * accidentally start an implicit literal line.
1.1 schwarze 796: */
1.6 kristaps 797: if (formatcode(buf, &start, end, 0, 0)) {
1.1 schwarze 798: putchar(last = '\n');
1.6 kristaps 799: while (start < end && ' ' == buf[start])
800: start++;
801: }
1.1 schwarze 802: } else if (start < end && '\n' == buf[start]) {
803: /*
804: * Print the newline only if we haven't already
805: * printed a newline.
806: */
807: if (last != '\n')
808: putchar(last = buf[start]);
809: if (++start >= end)
810: continue;
811: /*
812: * If we have whitespace next, eat it to prevent
813: * mdoc(7) from thinking that it's meant for
814: * verbatim text.
815: * It is--but if we start with that, we can't
816: * have a macro subsequent it, which may be
817: * possible if we have an escape next.
818: */
819: if (' ' == buf[start] || '\t' == buf[start]) {
820: puts(".br");
821: last = '\n';
822: }
823: for ( ; start < end; start++)
824: if (' ' != buf[start] && '\t' != buf[start])
825: break;
826: } else if (start < end) {
827: /*
828: * Default: print the character.
829: * Escape initial control characters.
830: */
831: if ('\n' == last && '.' == buf[start])
832: printf("\\&");
833: else if ('\n' == last && '\'' == buf[start])
834: printf("\\&");
835: putchar(last = buf[start++]);
1.8 kristaps 836: if ('\\' == last)
837: putchar('e');
1.1 schwarze 838: }
839: }
840:
841: if (last != '\n')
842: putchar('\n');
843: }
844:
845: /*
846: * There are three kinds of paragraphs: verbatim (starts with whitespace
847: * of some sort), ordinary (starts without "=" marker), or a command
848: * (default: starts with "=").
849: */
850: static void
851: dopar(struct state *st, const char *buf, size_t start, size_t end)
852: {
853:
854: if (end == start)
855: return;
856: if (' ' == buf[start] || '\t' == buf[start])
857: verbatim(st, buf, start, end);
858: else if ('=' != buf[start])
859: ordinary(st, buf, start, end);
860: else
861: command(st, buf, start, end);
862: }
863:
864: /*
865: * Loop around paragraphs within a document, processing each one in the
866: * POD way.
867: */
868: static void
869: dofile(const struct args *args, const char *fname,
870: const struct tm *tm, const char *buf, size_t sz)
871: {
872: size_t sup, end, i, cur = 0;
873: struct state st;
874: const char *section, *date;
875: char datebuf[64];
876: char *title, *cp;
877:
878: if (0 == sz)
879: return;
880:
881: /* Title is last path component of the filename. */
882:
883: if (NULL != args->title)
884: title = strdup(args->title);
885: else if (NULL != (cp = strrchr(fname, '/')))
886: title = strdup(cp + 1);
887: else
888: title = strdup(fname);
889:
890: if (NULL == title) {
891: perror(NULL);
892: exit(EXIT_FAILURE);
893: }
894:
895: /* Section is 1 unless suffix is "pm". */
896:
897: if (NULL == (section = args->section)) {
898: section = "1";
899: if (NULL != (cp = strrchr(title, '.'))) {
900: *cp++ = '\0';
901: if (0 == strcmp(cp, "pm"))
902: section = "3p";
903: }
904: }
905:
906: /* Date. Or the given "tm" if not supplied. */
907:
908: if (NULL == (date = args->date)) {
909: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
910: date = datebuf;
911: }
912:
913: for (cp = title; '\0' != *cp; cp++)
914: *cp = toupper((int)*cp);
915:
916: /* The usual mdoc(7) preamble. */
917:
918: printf(".Dd %s\n", date);
919: printf(".Dt %s %s\n", title, section);
920: puts(".Os");
921:
922: free(title);
923:
924: memset(&st, 0, sizeof(struct state));
925: assert(sz > 0);
926:
927: /* Main loop over file contents. */
928:
929: while (cur < sz) {
930: /* Read until next paragraph. */
931: for (i = cur + 1; i < sz; i++)
932: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
933: /* Consume blank paragraphs. */
934: while (i + 1 < sz && '\n' == buf[i + 1])
935: i++;
936: break;
937: }
938:
939: /* Adjust end marker for EOF. */
940: end = i < sz ? i - 1 :
941: ('\n' == buf[sz - 1] ? sz - 1 : sz);
942: sup = i < sz ? end + 2 : sz;
943:
944: /* Process paragraph and adjust start. */
945: dopar(&st, buf, cur, end);
946: cur = sup;
947: }
948: }
949:
950: /*
951: * Read a single file fully into memory.
952: * If the file is "-", do it from stdin.
953: * If successfully read, send the input buffer to dofile() for further
954: * processing.
955: */
956: static int
957: readfile(const struct args *args, const char *fname)
958: {
959: int fd;
960: char *buf;
961: size_t bufsz, cur;
962: ssize_t ssz;
963: struct tm *tm;
964: time_t ttm;
965: struct stat st;
966:
967: assert(NULL != fname);
968:
969: fd = 0 != strcmp("-", fname) ?
970: open(fname, O_RDONLY, 0) : STDIN_FILENO;
971:
972: if (-1 == fd) {
973: perror(fname);
974: return(0);
975: }
976:
977: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
978: ttm = time(NULL);
979: tm = localtime(&ttm);
980: } else
981: tm = localtime(&st.st_mtime);
982:
983: /*
984: * Arbitrarily-sized initial buffer.
985: * Should be big enough for most files...
986: */
987: cur = 0;
988: bufsz = 1 << 14;
989: if (NULL == (buf = malloc(bufsz))) {
990: perror(NULL);
991: exit(EXIT_FAILURE);
992: }
993:
994: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
995: /* Double buffer size on fill. */
996: if ((size_t)ssz == bufsz - cur) {
997: bufsz *= 2;
998: if (NULL == (buf = realloc(buf, bufsz))) {
999: perror(NULL);
1000: exit(EXIT_FAILURE);
1001: }
1002: }
1003: cur += (size_t)ssz;
1004: }
1005: if (ssz < 0) {
1006: perror(fname);
1007: free(buf);
1008: return(0);
1009: }
1010:
1011: dofile(args, STDIN_FILENO == fd ?
1012: "STDIN" : fname, tm, buf, cur);
1013: free(buf);
1014: if (STDIN_FILENO != fd)
1015: close(fd);
1016: return(1);
1017: }
1018:
1019: int
1020: main(int argc, char *argv[])
1021: {
1022: const char *fname, *name;
1023: struct args args;
1024: int c;
1025:
1026: name = strrchr(argv[0], '/');
1027: if (name == NULL)
1028: name = argv[0];
1029: else
1030: ++name;
1031:
1032: memset(&args, 0, sizeof(struct args));
1033: fname = "-";
1034:
1035: /* Accept no arguments for now. */
1036:
1037: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1038: switch (c) {
1039: case ('h'):
1040: /* FALLTHROUGH */
1041: case ('l'):
1042: /* FALLTHROUGH */
1043: case ('c'):
1044: /* FALLTHROUGH */
1045: case ('o'):
1046: /* FALLTHROUGH */
1047: case ('q'):
1048: /* FALLTHROUGH */
1049: case ('r'):
1050: /* FALLTHROUGH */
1051: case ('u'):
1052: /* FALLTHROUGH */
1053: case ('v'):
1054: /* Ignore these. */
1055: break;
1056: case ('d'):
1057: args.date = optarg;
1058: break;
1059: case ('n'):
1060: args.title = optarg;
1061: break;
1062: case ('s'):
1063: args.section = optarg;
1064: break;
1065: default:
1066: goto usage;
1067: }
1068:
1069: argc -= optind;
1070: argv += optind;
1071:
1072: /* Accept only a single input file. */
1073:
1074: if (argc > 2)
1075: return(EXIT_FAILURE);
1076: else if (1 == argc)
1077: fname = *argv;
1078:
1079: return(readfile(&args, fname) ?
1080: EXIT_SUCCESS : EXIT_FAILURE);
1081:
1082: usage:
1083: fprintf(stderr, "usage: %s [-d date] "
1084: "[-n title] [-s section]\n", name);
1085:
1086: return(EXIT_FAILURE);
1087: }
CVSweb