Annotation of pod2mdoc/pod2mdoc.c, Revision 1.13
1.13 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.12 2014/04/01 11:58:32 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
30: * In what section can we find Perl manuals?
31: */
32: #define PERL_SECTION "3p"
33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.11 kristaps 47: enum sect {
48: SECT_NONE = 0,
49: SECT_NAME, /* NAME section */
50: SECT_SYNOPSIS, /* SYNOPSIS section */
51: };
52:
1.1 schwarze 53: struct state {
54: int parsing; /* after =cut of before command */
55: int paused; /* in =begin and before =end */
56: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 57: enum sect sect; /* which section are we in? */
1.1 schwarze 58: const char *fname; /* file being parsed */
1.4 schwarze 59: #define LIST_STACKSZ 128
60: enum list lstack[LIST_STACKSZ]; /* open lists */
61: size_t lpos; /* where in list stack */
1.1 schwarze 62: };
63:
64: enum fmt {
65: FMT_ITALIC,
66: FMT_BOLD,
67: FMT_CODE,
68: FMT_LINK,
69: FMT_ESCAPE,
70: FMT_FILE,
71: FMT_NBSP,
72: FMT_INDEX,
73: FMT_NULL,
74: FMT__MAX
75: };
76:
77: enum cmd {
78: CMD_POD = 0,
79: CMD_HEAD1,
80: CMD_HEAD2,
81: CMD_HEAD3,
82: CMD_HEAD4,
83: CMD_OVER,
84: CMD_ITEM,
85: CMD_BACK,
86: CMD_BEGIN,
87: CMD_END,
88: CMD_FOR,
89: CMD_ENCODING,
90: CMD_CUT,
91: CMD__MAX
92: };
93:
94: static const char *const cmds[CMD__MAX] = {
95: "pod", /* CMD_POD */
96: "head1", /* CMD_HEAD1 */
97: "head2", /* CMD_HEAD2 */
98: "head3", /* CMD_HEAD3 */
99: "head4", /* CMD_HEAD4 */
100: "over", /* CMD_OVER */
101: "item", /* CMD_ITEM */
102: "back", /* CMD_BACK */
103: "begin", /* CMD_BEGIN */
104: "end", /* CMD_END */
105: "for", /* CMD_FOR */
106: "encoding", /* CMD_ENCODING */
107: "cut" /* CMD_CUT */
108: };
109:
110: static const char fmts[FMT__MAX] = {
111: 'I', /* FMT_ITALIC */
112: 'B', /* FMT_BOLD */
113: 'C', /* FMT_CODE */
114: 'L', /* FMT_LINK */
115: 'E', /* FMT_ESCAPE */
116: 'F', /* FMT_FILE */
117: 'S', /* FMT_NBSP */
118: 'X', /* FMT_INDEX */
119: 'Z' /* FMT_NULL */
120: };
121:
1.6 kristaps 122: static int last;
123:
1.1 schwarze 124: /*
125: * Given buf[*start] is at the start of an escape name, read til the end
126: * of the escape ('>') then try to do something with it.
127: * Sets start to be one after the '>'.
128: */
129: static void
130: formatescape(const char *buf, size_t *start, size_t end)
131: {
132: char esc[16]; /* no more needed */
133: size_t i, max;
134:
135: max = sizeof(esc) - 1;
136: i = 0;
137: /* Read til our buffer is full. */
138: while (*start < end && '>' != buf[*start] && i < max)
139: esc[i++] = buf[(*start)++];
140: esc[i] = '\0';
141:
142: if (i == max) {
143: /* Too long... skip til we end. */
144: while (*start < end && '>' != buf[*start])
145: (*start)++;
146: return;
147: } else if (*start >= end)
148: return;
149:
150: assert('>' == buf[*start]);
151: (*start)++;
152:
153: /*
154: * TODO: right now, we only recognise the named escapes.
155: * Just let the rest of them go.
156: */
1.6 kristaps 157: if (0 == strcmp(esc, "lt"))
1.1 schwarze 158: printf("\\(la");
159: else if (0 == strcmp(esc, "gt"))
160: printf("\\(ra");
161: else if (0 == strcmp(esc, "vb"))
162: printf("\\(ba");
163: else if (0 == strcmp(esc, "sol"))
164: printf("\\(sl");
1.6 kristaps 165: else
166: return;
167:
168: last = 'a';
1.1 schwarze 169: }
170:
171: /*
1.9 kristaps 172: * Run some heuristics to intuit a link format.
173: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
174: * or a general UNIX foo(5) manpage.
175: * If I recognise one, I set "start" to be the end of the sequence so
176: * that the caller can safely just continue processing.
177: * Otherwise, I don't touch "start".
178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
182: size_t sv, nstart, nend, i, j;
183: int hasdouble;
184:
185: /*
186: * Scan to the start of the terminus.
187: * This function is more or less replicated in the formatcode()
188: * for null or index formatting codes.
189: */
190: hasdouble = 0;
191: for (sv = nstart = *start; nstart < end; nstart++) {
192: /* Do we have a double-colon? */
193: if (':' == buf[nstart] &&
194: nstart > sv &&
195: ':' == buf[nstart - 1])
196: hasdouble = 1;
197: if ('>' != buf[nstart])
198: continue;
199: else if (dsz == 1)
200: break;
201: assert(nstart > 0);
202: if (' ' != buf[nstart - 1])
203: continue;
204: i = nstart;
205: for (j = 0; i < end && j < dsz; j++)
206: if ('>' != buf[i++])
207: break;
208: if (dsz == j)
209: break;
210: }
211:
212: /* We don't care about stubs. */
213: if (nstart == end || nstart == *start)
214: return(0);
215:
216: /* Set nend to the end of content. */
217: nend = nstart;
218: if (dsz > 1)
219: nend--;
220:
221: /*
222: * Provide for some common invocations of the link primitive.
223: * First, allow us to link to other Perl manuals.
224: */
225: if (hasdouble)
1.10 kristaps 226: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 227: (int)(nend - sv), &buf[sv]);
228: else if (nend - sv > 3 && isalnum(buf[sv]) &&
229: ')' == buf[nend - 1] &&
230: isdigit((int)buf[nend - 2]) &&
231: '(' == buf[nend - 3])
232: printf("Xr %.*s %c",
233: (int)(nend - 3 - sv),
234: &buf[sv], buf[nend - 2]);
235: else
236: return(0);
237:
238: *start = nstart;
239: return(1);
240: }
241:
1.13 ! kristaps 242:
! 243: /*
! 244: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
! 245: * then it's likely that we're a flag.
! 246: * Our flag might be followed by an argument, so make sure that we're
! 247: * accounting for that, too.
! 248: * If we don't have a flag at all, however, then assume we're an "Ar".
! 249: */
! 250: static void
! 251: dosynopsisfl(const char *buf, size_t *start, size_t end)
! 252: {
! 253: size_t i;
! 254: again:
! 255: (*start)++;
! 256: for (i = *start; i < end; i++)
! 257: if (isalnum((int)buf[i]))
! 258: continue;
! 259: else if ('-' == buf[i])
! 260: continue;
! 261: else if ('_' == buf[i])
! 262: continue;
! 263: else
! 264: break;
! 265:
! 266: assert(i < end);
! 267:
! 268: if ( ! (' ' == buf[i] || '>' == buf[i])) {
! 269: printf("Ar ");
! 270: return;
! 271: }
! 272:
! 273: printf("Fl ");
! 274: if (end - *start > 1 &&
! 275: isupper((int)buf[*start]) &&
! 276: islower((int)buf[*start + 1]) &&
! 277: (end - *start == 2 ||
! 278: ' ' == buf[*start + 2]))
! 279: printf("\\&");
! 280: printf("%.*s ", (int)(i - *start), &buf[*start]);
! 281: *start = i;
! 282:
! 283: if (' ' == buf[i]) {
! 284: while (i < end && ' ' == buf[i])
! 285: i++;
! 286: assert(i < end);
! 287: if ('-' == buf[i]) {
! 288: *start = i;
! 289: goto again;
! 290: }
! 291: printf("Ar ");
! 292: *start = i;
! 293: }
! 294: }
! 295:
1.9 kristaps 296: /*
1.1 schwarze 297: * We're at the character in front of a format code, which is structured
298: * like X<...> and can contain nested format codes.
299: * This consumes the whole format code, and any nested format codes, til
300: * the end of matched production.
301: * If "reentrant", then we're being called after a macro has already
302: * been printed to the current line.
1.6 kristaps 303: * If "nomacro", then we don't print any macros, just contained data
304: * (e.g., following "Sh" or "Nm").
305: * Return whether we've printed a macro or not--in other words, whether
306: * this should trigger a subsequent newline (this should be ignored when
307: * reentrant).
1.1 schwarze 308: */
309: static int
1.11 kristaps 310: formatcode(struct state *st, const char *buf,
311: size_t *start, size_t end, int reentrant, int nomacro)
1.1 schwarze 312: {
313: enum fmt fmt;
1.5 kristaps 314: size_t i, j, dsz;
1.1 schwarze 315:
316: assert(*start + 1 < end);
317: assert('<' == buf[*start + 1]);
318:
1.6 kristaps 319: /*
320: * First, look up the format code.
321: * If it's not valid, then exit immediately.
322: */
323: for (fmt = 0; fmt < FMT__MAX; fmt++)
324: if (buf[*start] == fmts[fmt])
325: break;
326:
327: if (FMT__MAX == fmt) {
328: putchar(last = buf[(*start)++]);
1.8 kristaps 329: if ('\\' == last)
330: putchar('e');
1.6 kristaps 331: return(0);
332: }
333:
1.5 kristaps 334: /*
335: * Determine whether we're overriding our delimiter.
336: * According to POD, if we have more than one '<' followed by a
337: * space, then we need a space followed by matching '>' to close
338: * the expression.
339: * Otherwise we use the usual '<' and '>' matched pair.
340: */
341: i = *start + 1;
342: while (i < end && '<' == buf[i])
343: i++;
344: assert(i > *start + 1);
345: dsz = i - (*start + 1);
346: if (dsz > 1 && (i >= end || ' ' != buf[i]))
347: dsz = 1;
348:
349: /* Remember, if dsz>1, to jump the trailing space. */
350: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 351:
352: /*
1.6 kristaps 353: * Escapes and ignored codes (NULL and INDEX) don't print macro
354: * sequences, so just output them like normal text before
355: * processing for real macros.
1.1 schwarze 356: */
357: if (FMT_ESCAPE == fmt) {
358: formatescape(buf, start, end);
359: return(0);
360: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 361: /*
1.6 kristaps 362: * Just consume til the end delimiter, accounting for
363: * whether it's a custom one.
1.5 kristaps 364: */
365: for ( ; *start < end; (*start)++) {
366: if ('>' != buf[*start])
367: continue;
368: else if (dsz == 1)
369: break;
370: assert(*start > 0);
371: if (' ' != buf[*start - 1])
372: continue;
373: i = *start;
374: for (j = 0; i < end && j < dsz; j++)
375: if ('>' != buf[i++])
376: break;
377: if (dsz != j)
378: continue;
379: (*start) += dsz;
380: break;
381: }
1.1 schwarze 382: return(0);
383: }
384:
1.6 kristaps 385: /*
386: * Check whether we're supposed to print macro stuff (this is
387: * suppressed in, e.g., "Nm" and "Sh" macros).
388: */
1.1 schwarze 389: if ( ! nomacro) {
390: /*
391: * Print out the macro describing this format code.
392: * If we're not "reentrant" (not yet on a macro line)
393: * then print a newline, if necessary, and the macro
394: * indicator.
395: * Otherwise, offset us with a space.
396: */
1.6 kristaps 397: if ( ! reentrant) {
398: if (last != '\n')
399: putchar('\n');
1.1 schwarze 400: putchar('.');
1.6 kristaps 401: } else
1.1 schwarze 402: putchar(' ');
403:
404: /*
1.6 kristaps 405: * If we don't have whitespace before us (and none after
406: * the opening delimiter), then suppress macro
407: * whitespace with Pf.
1.1 schwarze 408: */
1.6 kristaps 409: if (' ' != last && '\n' != last && ' ' != buf[*start])
410: printf("Pf ");
411:
1.1 schwarze 412: switch (fmt) {
413: case (FMT_ITALIC):
414: printf("Em ");
415: break;
416: case (FMT_BOLD):
1.13 ! kristaps 417: if (SECT_SYNOPSIS == st->sect &&
! 418: 1 == dsz &&
! 419: '-' == buf[*start])
! 420: dosynopsisfl(buf, start, end);
! 421: else
! 422: printf("Sy ");
1.1 schwarze 423: break;
424: case (FMT_CODE):
1.2 schwarze 425: printf("Qo Li ");
1.1 schwarze 426: break;
427: case (FMT_LINK):
1.9 kristaps 428: if ( ! trylink(buf, start, end, dsz))
429: printf("No ");
1.1 schwarze 430: break;
431: case (FMT_FILE):
432: printf("Pa ");
433: break;
434: case (FMT_NBSP):
435: printf("No ");
436: break;
437: default:
438: abort();
439: }
440: }
441:
442: /*
1.6 kristaps 443: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 444: * find a nested format code.
1.1 schwarze 445: * Don't emit any newlines: since we're on a macro line, we
446: * don't want to break the line.
447: */
448: while (*start < end) {
1.5 kristaps 449: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 450: (*start)++;
451: break;
1.5 kristaps 452: } else if ('>' == buf[*start] &&
453: ' ' == buf[*start - 1]) {
454: /*
455: * Handle custom delimiters.
456: * These require a certain number of
457: * space-preceded carrots before we're really at
458: * the end.
459: */
460: i = *start;
461: for (j = 0; i < end && j < dsz; j++)
462: if ('>' != buf[i++])
463: break;
464: if (dsz == j) {
465: *start += dsz;
466: break;
467: }
1.1 schwarze 468: }
469: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 470: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 471: continue;
472: }
1.3 schwarze 473:
1.4 schwarze 474: /*
475: * Make sure that any macro-like words (or
476: * really any word starting with a capital
477: * letter) is assumed to be a macro that must be
478: * escaped.
479: * This matches "Xx " and "XxEOLN".
480: */
481: if ((' ' == last || '\n' == last) &&
482: end - *start > 1 &&
483: isupper((int)buf[*start]) &&
484: islower((int)buf[*start + 1]) &&
485: (end - *start == 2 ||
486: ' ' == buf[*start + 2]))
487: printf("\\&");
1.3 schwarze 488:
1.4 schwarze 489: /* Suppress newline. */
1.6 kristaps 490: if ('\n' == buf[*start])
491: putchar(last = ' ');
492: else
493: putchar(last = buf[*start]);
1.4 schwarze 494:
1.8 kristaps 495: /* Protect against character escapes. */
496: if ('\\' == last)
497: putchar('e');
498:
1.6 kristaps 499: (*start)++;
500:
501: if (' ' == last)
502: while (*start < end && ' ' == buf[*start])
503: (*start)++;
1.1 schwarze 504: }
1.2 schwarze 505:
506: if ( ! nomacro && FMT_CODE == fmt)
507: printf(" Qc ");
1.1 schwarze 508:
509: /*
1.6 kristaps 510: * We're now subsequent the format code.
511: * If there isn't a space (or newline) here, and we haven't just
512: * printed a space, then suppress space.
1.1 schwarze 513: */
1.6 kristaps 514: if ( ! nomacro && ' ' != last)
515: if (' ' != buf[*start] && '\n' != buf[*start])
516: printf(" Ns ");
1.5 kristaps 517:
1.1 schwarze 518: return(1);
519: }
520:
521: /*
522: * Calls formatcode() til the end of a paragraph.
523: */
524: static void
1.11 kristaps 525: formatcodeln(struct state *st, const char *buf,
526: size_t *start, size_t end, int nomacro)
1.1 schwarze 527: {
528:
1.4 schwarze 529: last = ' ';
1.1 schwarze 530: while (*start < end) {
531: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.11 kristaps 532: formatcode(st, buf, start, end, 1, nomacro);
1.1 schwarze 533: continue;
534: }
1.4 schwarze 535: /*
536: * Since we're already on a macro line, we want to make
537: * sure that we don't inadvertently invoke a macro.
538: * We need to do this carefully because section names
539: * are used in troff and we don't want to escape
540: * something that needn't be escaped.
541: */
542: if (' ' == last && end - *start > 1 &&
543: isupper((int)buf[*start]) &&
544: islower((int)buf[*start + 1]) &&
545: (end - *start == 2 ||
546: ' ' == buf[*start + 2]))
547: printf("\\&");
548:
1.8 kristaps 549: if ('\n' == buf[*start])
550: putchar(last = ' ');
551: else
1.1 schwarze 552: putchar(last = buf[*start]);
1.8 kristaps 553:
554: /* Protect against character escapes. */
555: if ('\\' == last)
556: putchar('e');
557:
1.1 schwarze 558: (*start)++;
559: }
560: }
561:
562: /*
1.4 schwarze 563: * Guess at what kind of list we are.
564: * These are taken straight from the POD manual.
565: * I don't know what people do in real life.
566: */
567: static enum list
568: listguess(const char *buf, size_t start, size_t end)
569: {
570: size_t len = end - start;
571:
572: assert(end >= start);
573:
574: if (len == 1 && '*' == buf[start])
575: return(LIST_BULLET);
576: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
577: return(LIST_ENUM);
578: else if (len == 1 && '1' == buf[start])
579: return(LIST_ENUM);
580: else
581: return(LIST_TAG);
582: }
583:
584: /*
1.1 schwarze 585: * A command paragraph, as noted in the perlpod manual, just indicates
586: * that we should do something, optionally with some text to print as
587: * well.
588: */
589: static void
590: command(struct state *st, const char *buf, size_t start, size_t end)
591: {
592: size_t len, csz;
593: enum cmd cmd;
594:
595: assert('=' == buf[start]);
596: start++;
597: len = end - start;
598:
599: for (cmd = 0; cmd < CMD__MAX; cmd++) {
600: csz = strlen(cmds[cmd]);
601: if (len < csz)
602: continue;
603: if (0 == memcmp(&buf[start], cmd[cmds], csz))
604: break;
605: }
606:
607: /* Ignore bogus commands. */
608:
609: if (CMD__MAX == cmd)
610: return;
611:
612: start += csz;
1.8 kristaps 613: while (start < end && ' ' == buf[start])
614: start++;
615:
1.1 schwarze 616: len = end - start;
617:
618: if (st->paused) {
619: st->paused = CMD_END != cmd;
620: return;
621: }
622:
623: switch (cmd) {
624: case (CMD_POD):
625: break;
626: case (CMD_HEAD1):
627: /*
628: * The behaviour of head= follows from a quick glance at
629: * how pod2man handles it.
630: */
631: printf(".Sh ");
1.11 kristaps 632: st->sect = SECT_NONE;
633: if (end - start == 4) {
1.1 schwarze 634: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 635: st->sect = SECT_NAME;
636: } else if (end - start == 8) {
637: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
638: st->sect = SECT_SYNOPSIS;
639: }
640: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 641: putchar('\n');
642: st->haspar = 1;
643: break;
644: case (CMD_HEAD2):
645: printf(".Ss ");
1.11 kristaps 646: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 647: putchar('\n');
648: st->haspar = 1;
649: break;
650: case (CMD_HEAD3):
651: puts(".Pp");
652: printf(".Em ");
1.11 kristaps 653: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 654: putchar('\n');
655: puts(".Pp");
656: st->haspar = 1;
657: break;
658: case (CMD_HEAD4):
659: puts(".Pp");
660: printf(".No ");
1.11 kristaps 661: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 662: putchar('\n');
663: puts(".Pp");
664: st->haspar = 1;
665: break;
666: case (CMD_OVER):
1.4 schwarze 667: /*
668: * If we have an existing list that hasn't had an =item
669: * yet, then make sure that we open it now.
670: * We use the default list type, but that can't be
671: * helped (we haven't seen any items yet).
1.1 schwarze 672: */
1.4 schwarze 673: if (st->lpos > 0)
674: if (LIST__MAX == st->lstack[st->lpos - 1]) {
675: st->lstack[st->lpos - 1] = LIST_TAG;
676: puts(".Bl -tag -width Ds");
677: }
678: st->lpos++;
679: assert(st->lpos < LIST_STACKSZ);
680: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 681: break;
682: case (CMD_ITEM):
1.6 kristaps 683: if (0 == st->lpos) {
684: /*
685: * Bad markup.
686: * Try to compensate.
687: */
688: st->lstack[st->lpos] = LIST__MAX;
689: st->lpos++;
690: }
1.4 schwarze 691: assert(st->lpos > 0);
692: /*
693: * If we're the first =item, guess at what our content
694: * will be: "*" is a bullet list, "1." is a numbered
695: * list, and everything is tagged.
696: */
697: if (LIST__MAX == st->lstack[st->lpos - 1]) {
698: st->lstack[st->lpos - 1] =
699: listguess(buf, start, end);
700: switch (st->lstack[st->lpos - 1]) {
701: case (LIST_BULLET):
702: puts(".Bl -bullet");
703: break;
704: case (LIST_ENUM):
705: puts(".Bl -enum");
706: break;
707: default:
708: puts(".Bl -tag -width Ds");
709: break;
710: }
711: }
712: switch (st->lstack[st->lpos - 1]) {
713: case (LIST_TAG):
714: printf(".It ");
1.11 kristaps 715: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 716: putchar('\n');
717: break;
718: case (LIST_ENUM):
719: /* FALLTHROUGH */
720: case (LIST_BULLET):
721: /*
722: * Abandon the remainder of the paragraph
723: * because we're going to be a bulletted or
724: * numbered list.
725: */
726: puts(".It");
727: break;
728: default:
729: abort();
730: }
1.1 schwarze 731: st->haspar = 1;
732: break;
733: case (CMD_BACK):
1.4 schwarze 734: /* Make sure we don't back over the stack. */
735: if (st->lpos > 0) {
736: st->lpos--;
737: puts(".El");
738: }
1.1 schwarze 739: break;
740: case (CMD_BEGIN):
741: /*
742: * We disregard all types for now.
743: * TODO: process at least "text" in a -literal block.
744: */
745: st->paused = 1;
746: break;
747: case (CMD_FOR):
748: /*
749: * We ignore all types of encodings and formats
750: * unilaterally.
751: */
752: break;
753: case (CMD_ENCODING):
754: break;
755: case (CMD_CUT):
756: st->parsing = 0;
757: return;
758: default:
759: abort();
760: }
761:
762: /* Any command (but =cut) makes us start parsing. */
763: st->parsing = 1;
764: }
765:
766: /*
767: * Just pump out the line in a verbatim block.
768: */
769: static void
770: verbatim(struct state *st, const char *buf, size_t start, size_t end)
771: {
1.8 kristaps 772: int last;
1.1 schwarze 773:
774: if ( ! st->parsing || st->paused)
775: return;
776:
777: puts(".Bd -literal");
1.8 kristaps 778: for (last = ' '; start < end; start++) {
779: /*
780: * Handle accidental macros (newline starting with
781: * control character) and escapes.
782: */
783: if ('\n' == last)
1.7 kristaps 784: if ('.' == buf[start] || '\'' == buf[start])
785: printf("\\&");
1.8 kristaps 786: putchar(last = buf[start]);
787: if ('\\' == buf[start])
788: printf("e");
1.7 kristaps 789: }
790: putchar('\n');
1.1 schwarze 791: puts(".Ed");
792: }
793:
794: /*
1.13 ! kristaps 795: * See dosynopsisop().
! 796: */
! 797: static int
! 798: hasmatch(const char *buf, size_t start, size_t end)
! 799: {
! 800: size_t stack;
! 801:
! 802: for (stack = 0; start < end; start++)
! 803: if (buf[start] == '[')
! 804: stack++;
! 805: else if (buf[start] == ']' && 0 == stack)
! 806: return(1);
! 807: else if (buf[start] == ']')
! 808: stack--;
! 809: return(0);
! 810: }
! 811:
! 812: /*
! 813: * If we're in the SYNOPSIS section and we've encounter braces in an
! 814: * ordinary paragraph, then try to see whether we're an [-option].
! 815: * Do this, if we're an opening bracket, by first seeing if we have a
! 816: * matching end via hasmatch().
! 817: * If we're an ending bracket, see if we have a stack already.
! 818: */
! 819: static int
! 820: dosynopsisop(const char *buf, int *last,
! 821: size_t *start, size_t end, size_t *opstack)
! 822: {
! 823:
! 824: assert('[' == buf[*start] || ']' == buf[*start]);
! 825:
! 826: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
! 827: if ('\n' != *last)
! 828: putchar('\n');
! 829: puts(".Oo");
! 830: (*opstack)++;
! 831: } else if ('[' == buf[*start])
! 832: return(0);
! 833:
! 834: if (']' == buf[*start] && *opstack > 0) {
! 835: if ('\n' != *last)
! 836: putchar('\n');
! 837: puts(".Oc");
! 838: (*opstack)--;
! 839: } else if (']' == buf[*start])
! 840: return(0);
! 841:
! 842: (*start)++;
! 843: *last = '\n';
! 844: while (' ' == buf[*start])
! 845: (*start)++;
! 846: return(1);
! 847: }
! 848:
! 849: /*
1.1 schwarze 850: * Ordinary paragraph.
851: * Well, this is really the hardest--POD seems to assume that, for
852: * example, a leading space implies a newline, and so on.
853: * Lots of other snakes in the grass: escaping a newline followed by a
854: * period (accidental mdoc(7) control), double-newlines after macro
855: * passages, etc.
856: */
857: static void
858: ordinary(struct state *st, const char *buf, size_t start, size_t end)
859: {
1.13 ! kristaps 860: size_t i, j, opstack;
1.1 schwarze 861:
862: if ( ! st->parsing || st->paused)
863: return;
864:
865: /*
866: * Special-case: the NAME section.
867: * If we find a "-" when searching from the end, assume that
868: * we're in "name - description" format.
869: * To wit, print out a "Nm" and "Nd" in that format.
870: */
1.11 kristaps 871: if (SECT_NAME == st->sect) {
1.1 schwarze 872: for (i = end - 1; i > start; i--)
873: if ('-' == buf[i])
874: break;
875: if ('-' == buf[i]) {
876: j = i;
877: /* Roll over multiple "-". */
878: for ( ; i > start; i--)
879: if ('-' != buf[i])
880: break;
1.5 kristaps 881: printf(".Nm ");
1.11 kristaps 882: formatcodeln(st, buf, &start, i + 1, 1);
1.5 kristaps 883: putchar('\n');
884: start = j + 1;
885: printf(".Nd ");
1.11 kristaps 886: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 887: putchar('\n');
1.1 schwarze 888: return;
889: }
890: }
891:
892: if ( ! st->haspar)
893: puts(".Pp");
894:
895: st->haspar = 0;
896: last = '\n';
1.13 ! kristaps 897: opstack = 0;
1.1 schwarze 898:
899: while (start < end) {
900: /*
901: * Loop til we get either to a newline or escape.
902: * Escape initial control characters.
903: */
904: while (start < end) {
905: if (start < end - 1 && '<' == buf[start + 1])
906: break;
907: else if ('\n' == buf[start])
908: break;
909: else if ('\n' == last && '.' == buf[start])
910: printf("\\&");
911: else if ('\n' == last && '\'' == buf[start])
912: printf("\\&");
1.12 kristaps 913: /*
914: * If we're in the SYNOPSIS, have square
915: * brackets indicate that we're opening and
916: * closing an optional context.
917: */
1.13 ! kristaps 918: if (SECT_SYNOPSIS == st->sect &&
! 919: ('[' == buf[start] ||
! 920: ']' == buf[start]) &&
! 921: dosynopsisop(buf, &last,
! 922: &start, end, &opstack))
! 923: continue;
1.1 schwarze 924: putchar(last = buf[start++]);
1.8 kristaps 925: if ('\\' == last)
926: putchar('e');
1.1 schwarze 927: }
928:
929: if (start < end - 1 && '<' == buf[start + 1]) {
930: /*
931: * We've encountered a format code.
932: * This is going to trigger a macro no matter
933: * what, so print a newline now.
934: * Then print the (possibly nested) macros and
935: * following that, a newline.
1.8 kristaps 936: * Consume all whitespace so we don't
937: * accidentally start an implicit literal line.
1.1 schwarze 938: */
1.11 kristaps 939: if (formatcode(st, buf, &start, end, 0, 0)) {
1.1 schwarze 940: putchar(last = '\n');
1.6 kristaps 941: while (start < end && ' ' == buf[start])
942: start++;
943: }
1.1 schwarze 944: } else if (start < end && '\n' == buf[start]) {
945: /*
946: * Print the newline only if we haven't already
947: * printed a newline.
948: */
949: if (last != '\n')
950: putchar(last = buf[start]);
951: if (++start >= end)
952: continue;
953: /*
954: * If we have whitespace next, eat it to prevent
955: * mdoc(7) from thinking that it's meant for
956: * verbatim text.
957: * It is--but if we start with that, we can't
958: * have a macro subsequent it, which may be
959: * possible if we have an escape next.
960: */
961: if (' ' == buf[start] || '\t' == buf[start]) {
962: puts(".br");
963: last = '\n';
964: }
965: for ( ; start < end; start++)
966: if (' ' != buf[start] && '\t' != buf[start])
967: break;
1.12 kristaps 968: }
1.1 schwarze 969: }
970:
971: if (last != '\n')
972: putchar('\n');
973: }
974:
975: /*
976: * There are three kinds of paragraphs: verbatim (starts with whitespace
977: * of some sort), ordinary (starts without "=" marker), or a command
978: * (default: starts with "=").
979: */
980: static void
981: dopar(struct state *st, const char *buf, size_t start, size_t end)
982: {
983:
984: if (end == start)
985: return;
986: if (' ' == buf[start] || '\t' == buf[start])
987: verbatim(st, buf, start, end);
988: else if ('=' != buf[start])
989: ordinary(st, buf, start, end);
990: else
991: command(st, buf, start, end);
992: }
993:
994: /*
995: * Loop around paragraphs within a document, processing each one in the
996: * POD way.
997: */
998: static void
999: dofile(const struct args *args, const char *fname,
1000: const struct tm *tm, const char *buf, size_t sz)
1001: {
1002: size_t sup, end, i, cur = 0;
1003: struct state st;
1004: const char *section, *date;
1005: char datebuf[64];
1006: char *title, *cp;
1007:
1008: if (0 == sz)
1009: return;
1010:
1011: /* Title is last path component of the filename. */
1012:
1013: if (NULL != args->title)
1014: title = strdup(args->title);
1015: else if (NULL != (cp = strrchr(fname, '/')))
1016: title = strdup(cp + 1);
1017: else
1018: title = strdup(fname);
1019:
1020: if (NULL == title) {
1021: perror(NULL);
1022: exit(EXIT_FAILURE);
1023: }
1024:
1025: /* Section is 1 unless suffix is "pm". */
1026:
1027: if (NULL == (section = args->section)) {
1028: section = "1";
1029: if (NULL != (cp = strrchr(title, '.'))) {
1030: *cp++ = '\0';
1031: if (0 == strcmp(cp, "pm"))
1.10 kristaps 1032: section = PERL_SECTION;
1.1 schwarze 1033: }
1034: }
1035:
1036: /* Date. Or the given "tm" if not supplied. */
1037:
1038: if (NULL == (date = args->date)) {
1039: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1040: date = datebuf;
1041: }
1042:
1043: for (cp = title; '\0' != *cp; cp++)
1044: *cp = toupper((int)*cp);
1045:
1046: /* The usual mdoc(7) preamble. */
1047:
1048: printf(".Dd %s\n", date);
1049: printf(".Dt %s %s\n", title, section);
1050: puts(".Os");
1051:
1052: free(title);
1053:
1054: memset(&st, 0, sizeof(struct state));
1055: assert(sz > 0);
1056:
1057: /* Main loop over file contents. */
1058:
1059: while (cur < sz) {
1060: /* Read until next paragraph. */
1061: for (i = cur + 1; i < sz; i++)
1062: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1063: /* Consume blank paragraphs. */
1064: while (i + 1 < sz && '\n' == buf[i + 1])
1065: i++;
1066: break;
1067: }
1068:
1069: /* Adjust end marker for EOF. */
1070: end = i < sz ? i - 1 :
1071: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1072: sup = i < sz ? end + 2 : sz;
1073:
1074: /* Process paragraph and adjust start. */
1075: dopar(&st, buf, cur, end);
1076: cur = sup;
1077: }
1078: }
1079:
1080: /*
1081: * Read a single file fully into memory.
1082: * If the file is "-", do it from stdin.
1083: * If successfully read, send the input buffer to dofile() for further
1084: * processing.
1085: */
1086: static int
1087: readfile(const struct args *args, const char *fname)
1088: {
1089: int fd;
1090: char *buf;
1091: size_t bufsz, cur;
1092: ssize_t ssz;
1093: struct tm *tm;
1094: time_t ttm;
1095: struct stat st;
1096:
1097: assert(NULL != fname);
1098:
1099: fd = 0 != strcmp("-", fname) ?
1100: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1101:
1102: if (-1 == fd) {
1103: perror(fname);
1104: return(0);
1105: }
1106:
1107: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1108: ttm = time(NULL);
1109: tm = localtime(&ttm);
1110: } else
1111: tm = localtime(&st.st_mtime);
1112:
1113: /*
1114: * Arbitrarily-sized initial buffer.
1115: * Should be big enough for most files...
1116: */
1117: cur = 0;
1118: bufsz = 1 << 14;
1119: if (NULL == (buf = malloc(bufsz))) {
1120: perror(NULL);
1121: exit(EXIT_FAILURE);
1122: }
1123:
1124: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1125: /* Double buffer size on fill. */
1126: if ((size_t)ssz == bufsz - cur) {
1127: bufsz *= 2;
1128: if (NULL == (buf = realloc(buf, bufsz))) {
1129: perror(NULL);
1130: exit(EXIT_FAILURE);
1131: }
1132: }
1133: cur += (size_t)ssz;
1134: }
1135: if (ssz < 0) {
1136: perror(fname);
1137: free(buf);
1138: return(0);
1139: }
1140:
1141: dofile(args, STDIN_FILENO == fd ?
1142: "STDIN" : fname, tm, buf, cur);
1143: free(buf);
1144: if (STDIN_FILENO != fd)
1145: close(fd);
1146: return(1);
1147: }
1148:
1149: int
1150: main(int argc, char *argv[])
1151: {
1152: const char *fname, *name;
1153: struct args args;
1154: int c;
1155:
1156: name = strrchr(argv[0], '/');
1157: if (name == NULL)
1158: name = argv[0];
1159: else
1160: ++name;
1161:
1162: memset(&args, 0, sizeof(struct args));
1163: fname = "-";
1164:
1165: /* Accept no arguments for now. */
1166:
1167: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1168: switch (c) {
1169: case ('h'):
1170: /* FALLTHROUGH */
1171: case ('l'):
1172: /* FALLTHROUGH */
1173: case ('c'):
1174: /* FALLTHROUGH */
1175: case ('o'):
1176: /* FALLTHROUGH */
1177: case ('q'):
1178: /* FALLTHROUGH */
1179: case ('r'):
1180: /* FALLTHROUGH */
1181: case ('u'):
1182: /* FALLTHROUGH */
1183: case ('v'):
1184: /* Ignore these. */
1185: break;
1186: case ('d'):
1187: args.date = optarg;
1188: break;
1189: case ('n'):
1190: args.title = optarg;
1191: break;
1192: case ('s'):
1193: args.section = optarg;
1194: break;
1195: default:
1196: goto usage;
1197: }
1198:
1199: argc -= optind;
1200: argv += optind;
1201:
1202: /* Accept only a single input file. */
1203:
1204: if (argc > 2)
1205: return(EXIT_FAILURE);
1206: else if (1 == argc)
1207: fname = *argv;
1208:
1209: return(readfile(&args, fname) ?
1210: EXIT_SUCCESS : EXIT_FAILURE);
1211:
1212: usage:
1213: fprintf(stderr, "usage: %s [-d date] "
1214: "[-n title] [-s section]\n", name);
1215:
1216: return(EXIT_FAILURE);
1217: }
CVSweb