Annotation of pod2mdoc/pod2mdoc.c, Revision 1.18
1.18 ! kristaps 1: /* $Id: pod2mdoc.c,v 1.17 2014/04/02 20:32:41 kristaps Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
30: * In what section can we find Perl manuals?
31: */
32: #define PERL_SECTION "3p"
33:
1.1 schwarze 34: struct args {
35: const char *title; /* override "Dt" title */
36: const char *date; /* override "Dd" date */
37: const char *section; /* override "Dt" section */
38: };
39:
1.4 schwarze 40: enum list {
41: LIST_BULLET = 0,
42: LIST_ENUM,
43: LIST_TAG,
44: LIST__MAX
45: };
46:
1.11 kristaps 47: enum sect {
48: SECT_NONE = 0,
49: SECT_NAME, /* NAME section */
50: SECT_SYNOPSIS, /* SYNOPSIS section */
51: };
52:
1.1 schwarze 53: struct state {
54: int parsing; /* after =cut of before command */
55: int paused; /* in =begin and before =end */
56: int haspar; /* in paragraph: do we need Pp? */
1.11 kristaps 57: enum sect sect; /* which section are we in? */
1.1 schwarze 58: const char *fname; /* file being parsed */
1.4 schwarze 59: #define LIST_STACKSZ 128
60: enum list lstack[LIST_STACKSZ]; /* open lists */
61: size_t lpos; /* where in list stack */
1.1 schwarze 62: };
63:
64: enum fmt {
65: FMT_ITALIC,
66: FMT_BOLD,
67: FMT_CODE,
68: FMT_LINK,
69: FMT_ESCAPE,
70: FMT_FILE,
71: FMT_NBSP,
72: FMT_INDEX,
73: FMT_NULL,
74: FMT__MAX
75: };
76:
77: enum cmd {
78: CMD_POD = 0,
79: CMD_HEAD1,
80: CMD_HEAD2,
81: CMD_HEAD3,
82: CMD_HEAD4,
83: CMD_OVER,
84: CMD_ITEM,
85: CMD_BACK,
86: CMD_BEGIN,
87: CMD_END,
88: CMD_FOR,
89: CMD_ENCODING,
90: CMD_CUT,
91: CMD__MAX
92: };
93:
94: static const char *const cmds[CMD__MAX] = {
95: "pod", /* CMD_POD */
96: "head1", /* CMD_HEAD1 */
97: "head2", /* CMD_HEAD2 */
98: "head3", /* CMD_HEAD3 */
99: "head4", /* CMD_HEAD4 */
100: "over", /* CMD_OVER */
101: "item", /* CMD_ITEM */
102: "back", /* CMD_BACK */
103: "begin", /* CMD_BEGIN */
104: "end", /* CMD_END */
105: "for", /* CMD_FOR */
106: "encoding", /* CMD_ENCODING */
107: "cut" /* CMD_CUT */
108: };
109:
110: static const char fmts[FMT__MAX] = {
111: 'I', /* FMT_ITALIC */
112: 'B', /* FMT_BOLD */
113: 'C', /* FMT_CODE */
114: 'L', /* FMT_LINK */
115: 'E', /* FMT_ESCAPE */
116: 'F', /* FMT_FILE */
117: 'S', /* FMT_NBSP */
118: 'X', /* FMT_INDEX */
119: 'Z' /* FMT_NULL */
120: };
121:
1.6 kristaps 122: static int last;
123:
1.1 schwarze 124: /*
125: * Given buf[*start] is at the start of an escape name, read til the end
126: * of the escape ('>') then try to do something with it.
127: * Sets start to be one after the '>'.
128: */
129: static void
130: formatescape(const char *buf, size_t *start, size_t end)
131: {
132: char esc[16]; /* no more needed */
133: size_t i, max;
134:
135: max = sizeof(esc) - 1;
136: i = 0;
137: /* Read til our buffer is full. */
138: while (*start < end && '>' != buf[*start] && i < max)
139: esc[i++] = buf[(*start)++];
140: esc[i] = '\0';
141:
142: if (i == max) {
143: /* Too long... skip til we end. */
144: while (*start < end && '>' != buf[*start])
145: (*start)++;
146: return;
147: } else if (*start >= end)
148: return;
149:
150: assert('>' == buf[*start]);
151: (*start)++;
152:
153: /*
154: * TODO: right now, we only recognise the named escapes.
155: * Just let the rest of them go.
156: */
1.6 kristaps 157: if (0 == strcmp(esc, "lt"))
1.1 schwarze 158: printf("\\(la");
159: else if (0 == strcmp(esc, "gt"))
160: printf("\\(ra");
161: else if (0 == strcmp(esc, "vb"))
162: printf("\\(ba");
163: else if (0 == strcmp(esc, "sol"))
164: printf("\\(sl");
1.6 kristaps 165: else
166: return;
167:
168: last = 'a';
1.1 schwarze 169: }
170:
171: /*
1.9 kristaps 172: * Run some heuristics to intuit a link format.
173: * I recognise L<foo::bar> as a Perl manpage, printing it in section 3p;
174: * or a general UNIX foo(5) manpage.
175: * If I recognise one, I set "start" to be the end of the sequence so
176: * that the caller can safely just continue processing.
177: * Otherwise, I don't touch "start".
178: */
179: static int
180: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
181: {
1.18 ! kristaps 182: size_t sv, nstart, nend, i, j, textsz;
! 183: const char *text;
! 184: int hasdouble;
1.9 kristaps 185:
186: /*
187: * Scan to the start of the terminus.
188: * This function is more or less replicated in the formatcode()
189: * for null or index formatting codes.
190: */
191: for (sv = nstart = *start; nstart < end; nstart++) {
192: if ('>' != buf[nstart])
193: continue;
194: else if (dsz == 1)
195: break;
196: assert(nstart > 0);
197: if (' ' != buf[nstart - 1])
198: continue;
1.18 ! kristaps 199: for (i = nstart, j = 0; i < end && j < dsz; j++)
1.9 kristaps 200: if ('>' != buf[i++])
201: break;
202: if (dsz == j)
203: break;
204: }
205:
206: /* We don't care about stubs. */
207: if (nstart == end || nstart == *start)
208: return(0);
209:
210: /* Set nend to the end of content. */
211: nend = nstart;
212: if (dsz > 1)
213: nend--;
1.18 ! kristaps 214:
! 215: /* Re-scan to see if we have a title. */
! 216: text = &buf[*start];
! 217: for (textsz = *start; textsz < nend; textsz++)
! 218: if ('|' == buf[textsz])
! 219: break;
! 220:
! 221: if (textsz < nend) {
! 222: sv = textsz + 1;
! 223: textsz = textsz - *start;
! 224: } else
! 225: textsz = 0;
! 226:
! 227: /* Now see if we're a Perl manual. */
! 228: for (hasdouble = 0, i = sv + 1; i < end; i++)
! 229: if (':' == buf[i] && ':' == buf[i - 1]) {
! 230: hasdouble = 1;
! 231: break;
! 232: }
1.9 kristaps 233:
234: /*
235: * Provide for some common invocations of the link primitive.
236: * First, allow us to link to other Perl manuals.
237: */
238: if (hasdouble)
1.10 kristaps 239: printf("Xr %.*s " PERL_SECTION,
1.9 kristaps 240: (int)(nend - sv), &buf[sv]);
241: else if (nend - sv > 3 && isalnum(buf[sv]) &&
242: ')' == buf[nend - 1] &&
243: isdigit((int)buf[nend - 2]) &&
244: '(' == buf[nend - 3])
245: printf("Xr %.*s %c",
246: (int)(nend - 3 - sv),
247: &buf[sv], buf[nend - 2]);
248: else
249: return(0);
250:
251: *start = nstart;
252: return(1);
253: }
254:
1.13 kristaps 255: /*
256: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
257: * then it's likely that we're a flag.
258: * Our flag might be followed by an argument, so make sure that we're
259: * accounting for that, too.
260: * If we don't have a flag at all, however, then assume we're an "Ar".
261: */
262: static void
263: dosynopsisfl(const char *buf, size_t *start, size_t end)
264: {
265: size_t i;
266: again:
1.14 kristaps 267: assert(*start + 1 < end);
268: assert('-' == buf[*start]);
269:
270: if ( ! isalnum((int)buf[*start + 1]) &&
271: '?' != buf[*start + 1] &&
272: '-' != buf[*start + 1]) {
273: (*start)--;
274: fputs("Ar ", stdout);
275: return;
276: }
277:
1.13 kristaps 278: (*start)++;
279: for (i = *start; i < end; i++)
280: if (isalnum((int)buf[i]))
281: continue;
1.14 kristaps 282: else if ('?' == buf[i])
283: continue;
1.13 kristaps 284: else if ('-' == buf[i])
285: continue;
286: else if ('_' == buf[i])
287: continue;
288: else
289: break;
290:
291: assert(i < end);
292:
293: if ( ! (' ' == buf[i] || '>' == buf[i])) {
294: printf("Ar ");
295: return;
296: }
297:
298: printf("Fl ");
299: if (end - *start > 1 &&
300: isupper((int)buf[*start]) &&
301: islower((int)buf[*start + 1]) &&
302: (end - *start == 2 ||
303: ' ' == buf[*start + 2]))
304: printf("\\&");
305: printf("%.*s ", (int)(i - *start), &buf[*start]);
306: *start = i;
307:
308: if (' ' == buf[i]) {
309: while (i < end && ' ' == buf[i])
310: i++;
311: assert(i < end);
312: if ('-' == buf[i]) {
313: *start = i;
314: goto again;
315: }
316: printf("Ar ");
317: *start = i;
318: }
319: }
320:
1.9 kristaps 321: /*
1.1 schwarze 322: * We're at the character in front of a format code, which is structured
323: * like X<...> and can contain nested format codes.
324: * This consumes the whole format code, and any nested format codes, til
325: * the end of matched production.
326: * If "reentrant", then we're being called after a macro has already
327: * been printed to the current line.
1.6 kristaps 328: * If "nomacro", then we don't print any macros, just contained data
329: * (e.g., following "Sh" or "Nm").
1.15 kristaps 330: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
331: * as the first format code on a line (for decoration as an "Nm"),
332: * non-zero otherwise.
1.6 kristaps 333: * Return whether we've printed a macro or not--in other words, whether
334: * this should trigger a subsequent newline (this should be ignored when
335: * reentrant).
1.1 schwarze 336: */
337: static int
1.15 kristaps 338: formatcode(struct state *st, const char *buf, size_t *start,
339: size_t end, int reentrant, int nomacro, int pos)
1.1 schwarze 340: {
341: enum fmt fmt;
1.5 kristaps 342: size_t i, j, dsz;
1.1 schwarze 343:
344: assert(*start + 1 < end);
345: assert('<' == buf[*start + 1]);
346:
1.6 kristaps 347: /*
348: * First, look up the format code.
349: * If it's not valid, then exit immediately.
350: */
351: for (fmt = 0; fmt < FMT__MAX; fmt++)
352: if (buf[*start] == fmts[fmt])
353: break;
354:
355: if (FMT__MAX == fmt) {
356: putchar(last = buf[(*start)++]);
1.8 kristaps 357: if ('\\' == last)
358: putchar('e');
1.6 kristaps 359: return(0);
360: }
361:
1.5 kristaps 362: /*
363: * Determine whether we're overriding our delimiter.
364: * According to POD, if we have more than one '<' followed by a
365: * space, then we need a space followed by matching '>' to close
366: * the expression.
367: * Otherwise we use the usual '<' and '>' matched pair.
368: */
369: i = *start + 1;
370: while (i < end && '<' == buf[i])
371: i++;
372: assert(i > *start + 1);
373: dsz = i - (*start + 1);
374: if (dsz > 1 && (i >= end || ' ' != buf[i]))
375: dsz = 1;
376:
377: /* Remember, if dsz>1, to jump the trailing space. */
378: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 379:
380: /*
1.6 kristaps 381: * Escapes and ignored codes (NULL and INDEX) don't print macro
382: * sequences, so just output them like normal text before
383: * processing for real macros.
1.1 schwarze 384: */
385: if (FMT_ESCAPE == fmt) {
386: formatescape(buf, start, end);
387: return(0);
388: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 389: /*
1.6 kristaps 390: * Just consume til the end delimiter, accounting for
391: * whether it's a custom one.
1.5 kristaps 392: */
393: for ( ; *start < end; (*start)++) {
394: if ('>' != buf[*start])
395: continue;
396: else if (dsz == 1)
397: break;
398: assert(*start > 0);
399: if (' ' != buf[*start - 1])
400: continue;
401: i = *start;
402: for (j = 0; i < end && j < dsz; j++)
403: if ('>' != buf[i++])
404: break;
405: if (dsz != j)
406: continue;
407: (*start) += dsz;
408: break;
409: }
1.1 schwarze 410: return(0);
411: }
412:
1.6 kristaps 413: /*
414: * Check whether we're supposed to print macro stuff (this is
415: * suppressed in, e.g., "Nm" and "Sh" macros).
416: */
1.1 schwarze 417: if ( ! nomacro) {
418: /*
419: * Print out the macro describing this format code.
420: * If we're not "reentrant" (not yet on a macro line)
421: * then print a newline, if necessary, and the macro
422: * indicator.
423: * Otherwise, offset us with a space.
424: */
1.6 kristaps 425: if ( ! reentrant) {
426: if (last != '\n')
427: putchar('\n');
1.1 schwarze 428: putchar('.');
1.6 kristaps 429: } else
1.1 schwarze 430: putchar(' ');
431:
432: /*
1.6 kristaps 433: * If we don't have whitespace before us (and none after
434: * the opening delimiter), then suppress macro
435: * whitespace with Pf.
1.1 schwarze 436: */
1.6 kristaps 437: if (' ' != last && '\n' != last && ' ' != buf[*start])
438: printf("Pf ");
439:
1.1 schwarze 440: switch (fmt) {
441: case (FMT_ITALIC):
442: printf("Em ");
443: break;
444: case (FMT_BOLD):
1.14 kristaps 445: if (SECT_SYNOPSIS == st->sect) {
446: if (1 == dsz && '-' == buf[*start])
447: dosynopsisfl(buf, start, end);
1.15 kristaps 448: else if (0 == pos)
449: printf("Nm ");
1.14 kristaps 450: else
451: printf("Ar ");
452: break;
453: }
454: printf("Sy ");
1.1 schwarze 455: break;
456: case (FMT_CODE):
1.2 schwarze 457: printf("Qo Li ");
1.1 schwarze 458: break;
459: case (FMT_LINK):
1.9 kristaps 460: if ( ! trylink(buf, start, end, dsz))
461: printf("No ");
1.1 schwarze 462: break;
463: case (FMT_FILE):
464: printf("Pa ");
465: break;
466: case (FMT_NBSP):
467: printf("No ");
468: break;
469: default:
470: abort();
471: }
472: }
473:
474: /*
1.6 kristaps 475: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 476: * find a nested format code.
1.1 schwarze 477: * Don't emit any newlines: since we're on a macro line, we
478: * don't want to break the line.
479: */
480: while (*start < end) {
1.5 kristaps 481: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 482: (*start)++;
483: break;
1.5 kristaps 484: } else if ('>' == buf[*start] &&
485: ' ' == buf[*start - 1]) {
486: /*
487: * Handle custom delimiters.
488: * These require a certain number of
489: * space-preceded carrots before we're really at
490: * the end.
491: */
492: i = *start;
493: for (j = 0; i < end && j < dsz; j++)
494: if ('>' != buf[i++])
495: break;
496: if (dsz == j) {
497: *start += dsz;
498: break;
499: }
1.1 schwarze 500: }
501: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 kristaps 502: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 503: continue;
504: }
1.3 schwarze 505:
1.4 schwarze 506: /*
507: * Make sure that any macro-like words (or
508: * really any word starting with a capital
509: * letter) is assumed to be a macro that must be
510: * escaped.
511: * This matches "Xx " and "XxEOLN".
512: */
513: if ((' ' == last || '\n' == last) &&
514: end - *start > 1 &&
515: isupper((int)buf[*start]) &&
516: islower((int)buf[*start + 1]) &&
517: (end - *start == 2 ||
518: ' ' == buf[*start + 2]))
519: printf("\\&");
1.3 schwarze 520:
1.4 schwarze 521: /* Suppress newline. */
1.6 kristaps 522: if ('\n' == buf[*start])
523: putchar(last = ' ');
524: else
525: putchar(last = buf[*start]);
1.4 schwarze 526:
1.8 kristaps 527: /* Protect against character escapes. */
528: if ('\\' == last)
529: putchar('e');
530:
1.6 kristaps 531: (*start)++;
532:
533: if (' ' == last)
534: while (*start < end && ' ' == buf[*start])
535: (*start)++;
1.1 schwarze 536: }
1.2 schwarze 537:
538: if ( ! nomacro && FMT_CODE == fmt)
539: printf(" Qc ");
1.1 schwarze 540:
541: /*
1.6 kristaps 542: * We're now subsequent the format code.
543: * If there isn't a space (or newline) here, and we haven't just
544: * printed a space, then suppress space.
1.1 schwarze 545: */
1.6 kristaps 546: if ( ! nomacro && ' ' != last)
547: if (' ' != buf[*start] && '\n' != buf[*start])
548: printf(" Ns ");
1.5 kristaps 549:
1.1 schwarze 550: return(1);
551: }
552:
553: /*
554: * Calls formatcode() til the end of a paragraph.
555: */
556: static void
1.11 kristaps 557: formatcodeln(struct state *st, const char *buf,
558: size_t *start, size_t end, int nomacro)
1.1 schwarze 559: {
560:
1.4 schwarze 561: last = ' ';
1.1 schwarze 562: while (*start < end) {
563: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.15 kristaps 564: formatcode(st, buf, start, end, 1, nomacro, 1);
1.1 schwarze 565: continue;
566: }
1.4 schwarze 567: /*
568: * Since we're already on a macro line, we want to make
569: * sure that we don't inadvertently invoke a macro.
570: * We need to do this carefully because section names
571: * are used in troff and we don't want to escape
572: * something that needn't be escaped.
573: */
574: if (' ' == last && end - *start > 1 &&
575: isupper((int)buf[*start]) &&
576: islower((int)buf[*start + 1]) &&
577: (end - *start == 2 ||
578: ' ' == buf[*start + 2]))
579: printf("\\&");
580:
1.8 kristaps 581: if ('\n' == buf[*start])
582: putchar(last = ' ');
583: else
1.1 schwarze 584: putchar(last = buf[*start]);
1.8 kristaps 585:
586: /* Protect against character escapes. */
587: if ('\\' == last)
588: putchar('e');
589:
1.1 schwarze 590: (*start)++;
591: }
592: }
593:
594: /*
1.4 schwarze 595: * Guess at what kind of list we are.
596: * These are taken straight from the POD manual.
597: * I don't know what people do in real life.
598: */
599: static enum list
600: listguess(const char *buf, size_t start, size_t end)
601: {
602: size_t len = end - start;
603:
604: assert(end >= start);
605:
606: if (len == 1 && '*' == buf[start])
607: return(LIST_BULLET);
608: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
609: return(LIST_ENUM);
610: else if (len == 1 && '1' == buf[start])
611: return(LIST_ENUM);
612: else
613: return(LIST_TAG);
614: }
615:
616: /*
1.1 schwarze 617: * A command paragraph, as noted in the perlpod manual, just indicates
618: * that we should do something, optionally with some text to print as
619: * well.
620: */
621: static void
622: command(struct state *st, const char *buf, size_t start, size_t end)
623: {
624: size_t len, csz;
625: enum cmd cmd;
626:
627: assert('=' == buf[start]);
628: start++;
629: len = end - start;
630:
631: for (cmd = 0; cmd < CMD__MAX; cmd++) {
632: csz = strlen(cmds[cmd]);
633: if (len < csz)
634: continue;
635: if (0 == memcmp(&buf[start], cmd[cmds], csz))
636: break;
637: }
638:
639: /* Ignore bogus commands. */
640:
641: if (CMD__MAX == cmd)
642: return;
643:
644: start += csz;
1.8 kristaps 645: while (start < end && ' ' == buf[start])
646: start++;
647:
1.1 schwarze 648: len = end - start;
649:
650: if (st->paused) {
651: st->paused = CMD_END != cmd;
652: return;
653: }
654:
655: switch (cmd) {
656: case (CMD_POD):
657: break;
658: case (CMD_HEAD1):
659: /*
660: * The behaviour of head= follows from a quick glance at
661: * how pod2man handles it.
662: */
663: printf(".Sh ");
1.11 kristaps 664: st->sect = SECT_NONE;
665: if (end - start == 4) {
1.1 schwarze 666: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 667: st->sect = SECT_NAME;
668: } else if (end - start == 8) {
669: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
670: st->sect = SECT_SYNOPSIS;
671: }
672: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 673: putchar('\n');
674: st->haspar = 1;
675: break;
676: case (CMD_HEAD2):
677: printf(".Ss ");
1.11 kristaps 678: formatcodeln(st, buf, &start, end, 1);
1.1 schwarze 679: putchar('\n');
680: st->haspar = 1;
681: break;
682: case (CMD_HEAD3):
683: puts(".Pp");
684: printf(".Em ");
1.11 kristaps 685: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 686: putchar('\n');
687: puts(".Pp");
688: st->haspar = 1;
689: break;
690: case (CMD_HEAD4):
691: puts(".Pp");
692: printf(".No ");
1.11 kristaps 693: formatcodeln(st, buf, &start, end, 0);
1.1 schwarze 694: putchar('\n');
695: puts(".Pp");
696: st->haspar = 1;
697: break;
698: case (CMD_OVER):
1.4 schwarze 699: /*
700: * If we have an existing list that hasn't had an =item
701: * yet, then make sure that we open it now.
702: * We use the default list type, but that can't be
703: * helped (we haven't seen any items yet).
1.1 schwarze 704: */
1.4 schwarze 705: if (st->lpos > 0)
706: if (LIST__MAX == st->lstack[st->lpos - 1]) {
707: st->lstack[st->lpos - 1] = LIST_TAG;
708: puts(".Bl -tag -width Ds");
709: }
710: st->lpos++;
711: assert(st->lpos < LIST_STACKSZ);
712: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 713: break;
714: case (CMD_ITEM):
1.6 kristaps 715: if (0 == st->lpos) {
716: /*
717: * Bad markup.
718: * Try to compensate.
719: */
720: st->lstack[st->lpos] = LIST__MAX;
721: st->lpos++;
722: }
1.4 schwarze 723: assert(st->lpos > 0);
724: /*
725: * If we're the first =item, guess at what our content
726: * will be: "*" is a bullet list, "1." is a numbered
727: * list, and everything is tagged.
728: */
729: if (LIST__MAX == st->lstack[st->lpos - 1]) {
730: st->lstack[st->lpos - 1] =
731: listguess(buf, start, end);
732: switch (st->lstack[st->lpos - 1]) {
733: case (LIST_BULLET):
734: puts(".Bl -bullet");
735: break;
736: case (LIST_ENUM):
737: puts(".Bl -enum");
738: break;
739: default:
740: puts(".Bl -tag -width Ds");
741: break;
742: }
743: }
744: switch (st->lstack[st->lpos - 1]) {
745: case (LIST_TAG):
746: printf(".It ");
1.11 kristaps 747: formatcodeln(st, buf, &start, end, 0);
1.4 schwarze 748: putchar('\n');
749: break;
750: case (LIST_ENUM):
751: /* FALLTHROUGH */
752: case (LIST_BULLET):
753: /*
754: * Abandon the remainder of the paragraph
755: * because we're going to be a bulletted or
756: * numbered list.
757: */
758: puts(".It");
759: break;
760: default:
761: abort();
762: }
1.1 schwarze 763: st->haspar = 1;
764: break;
765: case (CMD_BACK):
1.4 schwarze 766: /* Make sure we don't back over the stack. */
767: if (st->lpos > 0) {
768: st->lpos--;
769: puts(".El");
770: }
1.1 schwarze 771: break;
772: case (CMD_BEGIN):
773: /*
774: * We disregard all types for now.
775: * TODO: process at least "text" in a -literal block.
776: */
777: st->paused = 1;
778: break;
779: case (CMD_FOR):
780: /*
781: * We ignore all types of encodings and formats
782: * unilaterally.
783: */
784: break;
785: case (CMD_ENCODING):
786: break;
787: case (CMD_CUT):
788: st->parsing = 0;
789: return;
790: default:
791: abort();
792: }
793:
794: /* Any command (but =cut) makes us start parsing. */
795: st->parsing = 1;
796: }
797:
798: /*
799: * Just pump out the line in a verbatim block.
800: */
801: static void
802: verbatim(struct state *st, const char *buf, size_t start, size_t end)
803: {
1.8 kristaps 804: int last;
1.1 schwarze 805:
806: if ( ! st->parsing || st->paused)
807: return;
808:
809: puts(".Bd -literal");
1.8 kristaps 810: for (last = ' '; start < end; start++) {
811: /*
812: * Handle accidental macros (newline starting with
813: * control character) and escapes.
814: */
815: if ('\n' == last)
1.7 kristaps 816: if ('.' == buf[start] || '\'' == buf[start])
817: printf("\\&");
1.8 kristaps 818: putchar(last = buf[start]);
819: if ('\\' == buf[start])
820: printf("e");
1.7 kristaps 821: }
822: putchar('\n');
1.1 schwarze 823: puts(".Ed");
824: }
825:
826: /*
1.13 kristaps 827: * See dosynopsisop().
828: */
829: static int
830: hasmatch(const char *buf, size_t start, size_t end)
831: {
832: size_t stack;
833:
834: for (stack = 0; start < end; start++)
835: if (buf[start] == '[')
836: stack++;
837: else if (buf[start] == ']' && 0 == stack)
838: return(1);
839: else if (buf[start] == ']')
840: stack--;
841: return(0);
842: }
843:
844: /*
845: * If we're in the SYNOPSIS section and we've encounter braces in an
846: * ordinary paragraph, then try to see whether we're an [-option].
847: * Do this, if we're an opening bracket, by first seeing if we have a
848: * matching end via hasmatch().
849: * If we're an ending bracket, see if we have a stack already.
850: */
851: static int
852: dosynopsisop(const char *buf, int *last,
853: size_t *start, size_t end, size_t *opstack)
854: {
855:
856: assert('[' == buf[*start] || ']' == buf[*start]);
857:
858: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
859: if ('\n' != *last)
860: putchar('\n');
861: puts(".Oo");
862: (*opstack)++;
863: } else if ('[' == buf[*start])
864: return(0);
865:
866: if (']' == buf[*start] && *opstack > 0) {
867: if ('\n' != *last)
868: putchar('\n');
869: puts(".Oc");
870: (*opstack)--;
871: } else if (']' == buf[*start])
872: return(0);
873:
874: (*start)++;
875: *last = '\n';
876: while (' ' == buf[*start])
877: (*start)++;
878: return(1);
879: }
880:
881: /*
1.17 kristaps 882: * Format multiple "Nm" manpage names in the NAME section.
883: */
884: static void
885: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
886: {
887: size_t word;
888:
889: while (*start < end && ' ' == buf[*start])
890: (*start)++;
891:
892: if (end == *start) {
893: puts(".Nm unknown");
894: return;
895: }
896:
897: while (*start < end) {
898: fputs(".Nm ", stdout);
899: for (word = *start; word < end; word++)
900: if (',' == buf[word])
901: break;
902: formatcodeln(st, buf, start, word, 1);
903: if (*start == end) {
904: putchar('\n');
905: continue;
906: }
907: assert(',' == buf[*start]);
908: puts(" ,");
909: (*start)++;
910: while (*start < end && ' ' == buf[*start])
911: (*start)++;
912: }
913: }
914:
915: /*
1.1 schwarze 916: * Ordinary paragraph.
917: * Well, this is really the hardest--POD seems to assume that, for
918: * example, a leading space implies a newline, and so on.
919: * Lots of other snakes in the grass: escaping a newline followed by a
920: * period (accidental mdoc(7) control), double-newlines after macro
921: * passages, etc.
922: */
923: static void
924: ordinary(struct state *st, const char *buf, size_t start, size_t end)
925: {
1.13 kristaps 926: size_t i, j, opstack;
1.15 kristaps 927: int seq;
1.1 schwarze 928:
929: if ( ! st->parsing || st->paused)
930: return;
931:
932: /*
933: * Special-case: the NAME section.
934: * If we find a "-" when searching from the end, assume that
935: * we're in "name - description" format.
936: * To wit, print out a "Nm" and "Nd" in that format.
937: */
1.11 kristaps 938: if (SECT_NAME == st->sect) {
1.15 kristaps 939: for (i = end - 2; i > start; i--)
940: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 941: break;
942: if ('-' == buf[i]) {
943: j = i;
944: /* Roll over multiple "-". */
945: for ( ; i > start; i--)
946: if ('-' != buf[i])
947: break;
1.17 kristaps 948: donamenm(st, buf, &start, i + 1);
1.5 kristaps 949: start = j + 1;
1.17 kristaps 950: while (start < end && ' ' == buf[start])
951: start++;
1.15 kristaps 952: fputs(".Nd ", stdout);
1.11 kristaps 953: formatcodeln(st, buf, &start, end, 1);
1.5 kristaps 954: putchar('\n');
1.1 schwarze 955: return;
956: }
957: }
958:
959: if ( ! st->haspar)
960: puts(".Pp");
961:
962: st->haspar = 0;
963: last = '\n';
1.13 kristaps 964: opstack = 0;
1.1 schwarze 965:
1.15 kristaps 966: for (seq = 0; start < end; seq++) {
1.1 schwarze 967: /*
968: * Loop til we get either to a newline or escape.
969: * Escape initial control characters.
970: */
971: while (start < end) {
972: if (start < end - 1 && '<' == buf[start + 1])
973: break;
974: else if ('\n' == buf[start])
975: break;
976: else if ('\n' == last && '.' == buf[start])
977: printf("\\&");
978: else if ('\n' == last && '\'' == buf[start])
979: printf("\\&");
1.12 kristaps 980: /*
981: * If we're in the SYNOPSIS, have square
982: * brackets indicate that we're opening and
983: * closing an optional context.
984: */
1.13 kristaps 985: if (SECT_SYNOPSIS == st->sect &&
986: ('[' == buf[start] ||
987: ']' == buf[start]) &&
988: dosynopsisop(buf, &last,
989: &start, end, &opstack))
990: continue;
1.1 schwarze 991: putchar(last = buf[start++]);
1.8 kristaps 992: if ('\\' == last)
993: putchar('e');
1.1 schwarze 994: }
995:
996: if (start < end - 1 && '<' == buf[start + 1]) {
997: /*
998: * We've encountered a format code.
999: * This is going to trigger a macro no matter
1000: * what, so print a newline now.
1001: * Then print the (possibly nested) macros and
1002: * following that, a newline.
1.8 kristaps 1003: * Consume all whitespace so we don't
1004: * accidentally start an implicit literal line.
1.16 kristaps 1005: * If the macro ends with a flush comma or
1006: * period, let mdoc(7) handle it for us.
1.1 schwarze 1007: */
1.15 kristaps 1008: if (formatcode(st, buf, &start, end, 0, 0, seq)) {
1.16 kristaps 1009: if ((start == end - 1 ||
1010: (start < end - 1 &&
1011: (' ' == buf[start + 1] ||
1012: '\n' == buf[start + 1]))) &&
1013: ('.' == buf[start] ||
1014: ',' == buf[start])) {
1015: putchar(' ');
1016: putchar(buf[start++]);
1017: }
1.1 schwarze 1018: putchar(last = '\n');
1.6 kristaps 1019: while (start < end && ' ' == buf[start])
1020: start++;
1021: }
1.1 schwarze 1022: } else if (start < end && '\n' == buf[start]) {
1023: /*
1024: * Print the newline only if we haven't already
1025: * printed a newline.
1026: */
1027: if (last != '\n')
1028: putchar(last = buf[start]);
1029: if (++start >= end)
1030: continue;
1031: /*
1032: * If we have whitespace next, eat it to prevent
1033: * mdoc(7) from thinking that it's meant for
1034: * verbatim text.
1035: * It is--but if we start with that, we can't
1036: * have a macro subsequent it, which may be
1037: * possible if we have an escape next.
1038: */
1039: if (' ' == buf[start] || '\t' == buf[start]) {
1040: puts(".br");
1041: last = '\n';
1042: }
1043: for ( ; start < end; start++)
1044: if (' ' != buf[start] && '\t' != buf[start])
1045: break;
1.12 kristaps 1046: }
1.1 schwarze 1047: }
1048:
1049: if (last != '\n')
1050: putchar('\n');
1051: }
1052:
1053: /*
1054: * There are three kinds of paragraphs: verbatim (starts with whitespace
1055: * of some sort), ordinary (starts without "=" marker), or a command
1056: * (default: starts with "=").
1057: */
1058: static void
1059: dopar(struct state *st, const char *buf, size_t start, size_t end)
1060: {
1061:
1062: if (end == start)
1063: return;
1064: if (' ' == buf[start] || '\t' == buf[start])
1065: verbatim(st, buf, start, end);
1066: else if ('=' != buf[start])
1067: ordinary(st, buf, start, end);
1068: else
1069: command(st, buf, start, end);
1070: }
1071:
1072: /*
1073: * Loop around paragraphs within a document, processing each one in the
1074: * POD way.
1075: */
1076: static void
1077: dofile(const struct args *args, const char *fname,
1078: const struct tm *tm, const char *buf, size_t sz)
1079: {
1080: size_t sup, end, i, cur = 0;
1081: struct state st;
1082: const char *section, *date;
1083: char datebuf[64];
1084: char *title, *cp;
1085:
1086: if (0 == sz)
1087: return;
1088:
1089: /* Title is last path component of the filename. */
1090:
1091: if (NULL != args->title)
1092: title = strdup(args->title);
1093: else if (NULL != (cp = strrchr(fname, '/')))
1094: title = strdup(cp + 1);
1095: else
1096: title = strdup(fname);
1097:
1098: if (NULL == title) {
1099: perror(NULL);
1100: exit(EXIT_FAILURE);
1101: }
1102:
1103: /* Section is 1 unless suffix is "pm". */
1104:
1105: if (NULL == (section = args->section)) {
1106: section = "1";
1107: if (NULL != (cp = strrchr(title, '.'))) {
1108: *cp++ = '\0';
1109: if (0 == strcmp(cp, "pm"))
1.10 kristaps 1110: section = PERL_SECTION;
1.1 schwarze 1111: }
1112: }
1113:
1114: /* Date. Or the given "tm" if not supplied. */
1115:
1116: if (NULL == (date = args->date)) {
1117: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1118: date = datebuf;
1119: }
1120:
1121: for (cp = title; '\0' != *cp; cp++)
1122: *cp = toupper((int)*cp);
1123:
1124: /* The usual mdoc(7) preamble. */
1125:
1126: printf(".Dd %s\n", date);
1127: printf(".Dt %s %s\n", title, section);
1128: puts(".Os");
1129:
1130: free(title);
1131:
1132: memset(&st, 0, sizeof(struct state));
1133: assert(sz > 0);
1134:
1135: /* Main loop over file contents. */
1136:
1137: while (cur < sz) {
1138: /* Read until next paragraph. */
1139: for (i = cur + 1; i < sz; i++)
1140: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1141: /* Consume blank paragraphs. */
1142: while (i + 1 < sz && '\n' == buf[i + 1])
1143: i++;
1144: break;
1145: }
1146:
1147: /* Adjust end marker for EOF. */
1148: end = i < sz ? i - 1 :
1149: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1150: sup = i < sz ? end + 2 : sz;
1151:
1152: /* Process paragraph and adjust start. */
1153: dopar(&st, buf, cur, end);
1154: cur = sup;
1155: }
1156: }
1157:
1158: /*
1159: * Read a single file fully into memory.
1160: * If the file is "-", do it from stdin.
1161: * If successfully read, send the input buffer to dofile() for further
1162: * processing.
1163: */
1164: static int
1165: readfile(const struct args *args, const char *fname)
1166: {
1167: int fd;
1168: char *buf;
1169: size_t bufsz, cur;
1170: ssize_t ssz;
1171: struct tm *tm;
1172: time_t ttm;
1173: struct stat st;
1174:
1175: assert(NULL != fname);
1176:
1177: fd = 0 != strcmp("-", fname) ?
1178: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1179:
1180: if (-1 == fd) {
1181: perror(fname);
1182: return(0);
1183: }
1184:
1185: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1186: ttm = time(NULL);
1187: tm = localtime(&ttm);
1188: } else
1189: tm = localtime(&st.st_mtime);
1190:
1191: /*
1192: * Arbitrarily-sized initial buffer.
1193: * Should be big enough for most files...
1194: */
1195: cur = 0;
1196: bufsz = 1 << 14;
1197: if (NULL == (buf = malloc(bufsz))) {
1198: perror(NULL);
1199: exit(EXIT_FAILURE);
1200: }
1201:
1202: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1203: /* Double buffer size on fill. */
1204: if ((size_t)ssz == bufsz - cur) {
1205: bufsz *= 2;
1206: if (NULL == (buf = realloc(buf, bufsz))) {
1207: perror(NULL);
1208: exit(EXIT_FAILURE);
1209: }
1210: }
1211: cur += (size_t)ssz;
1212: }
1213: if (ssz < 0) {
1214: perror(fname);
1215: free(buf);
1216: return(0);
1217: }
1218:
1219: dofile(args, STDIN_FILENO == fd ?
1220: "STDIN" : fname, tm, buf, cur);
1221: free(buf);
1222: if (STDIN_FILENO != fd)
1223: close(fd);
1224: return(1);
1225: }
1226:
1227: int
1228: main(int argc, char *argv[])
1229: {
1230: const char *fname, *name;
1231: struct args args;
1232: int c;
1233:
1234: name = strrchr(argv[0], '/');
1235: if (name == NULL)
1236: name = argv[0];
1237: else
1238: ++name;
1239:
1240: memset(&args, 0, sizeof(struct args));
1241: fname = "-";
1242:
1243: /* Accept no arguments for now. */
1244:
1245: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1246: switch (c) {
1247: case ('h'):
1248: /* FALLTHROUGH */
1249: case ('l'):
1250: /* FALLTHROUGH */
1251: case ('c'):
1252: /* FALLTHROUGH */
1253: case ('o'):
1254: /* FALLTHROUGH */
1255: case ('q'):
1256: /* FALLTHROUGH */
1257: case ('r'):
1258: /* FALLTHROUGH */
1259: case ('u'):
1260: /* FALLTHROUGH */
1261: case ('v'):
1262: /* Ignore these. */
1263: break;
1264: case ('d'):
1265: args.date = optarg;
1266: break;
1267: case ('n'):
1268: args.title = optarg;
1269: break;
1270: case ('s'):
1271: args.section = optarg;
1272: break;
1273: default:
1274: goto usage;
1275: }
1276:
1277: argc -= optind;
1278: argv += optind;
1279:
1280: /* Accept only a single input file. */
1281:
1282: if (argc > 2)
1283: return(EXIT_FAILURE);
1284: else if (1 == argc)
1285: fname = *argv;
1286:
1287: return(readfile(&args, fname) ?
1288: EXIT_SUCCESS : EXIT_FAILURE);
1289:
1290: usage:
1291: fprintf(stderr, "usage: %s [-d date] "
1292: "[-n title] [-s section]\n", name);
1293:
1294: return(EXIT_FAILURE);
1295: }
CVSweb