Annotation of pod2mdoc/pod2mdoc.c, Revision 1.36
1.36 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.35 2014/10/22 23:10:05 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
1.19 kristaps 30: * In what section can we find Perl module manuals?
31: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
32: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 33: */
34: #define PERL_SECTION "3p"
35:
1.1 schwarze 36: struct args {
37: const char *title; /* override "Dt" title */
38: const char *date; /* override "Dd" date */
39: const char *section; /* override "Dt" section */
40: };
41:
1.4 schwarze 42: enum list {
43: LIST_BULLET = 0,
44: LIST_ENUM,
45: LIST_TAG,
46: LIST__MAX
47: };
48:
1.11 kristaps 49: enum sect {
50: SECT_NONE = 0,
51: SECT_NAME, /* NAME section */
52: SECT_SYNOPSIS, /* SYNOPSIS section */
53: };
54:
1.32 schwarze 55: enum outstate {
56: OUST_NL = 0, /* just started a new output line */
57: OUST_TXT, /* text line output in progress */
58: OUST_MAC /* macro line output in progress */
59: };
60:
1.1 schwarze 61: struct state {
1.31 schwarze 62: const char *fname; /* file being parsed */
1.1 schwarze 63: int parsing; /* after =cut of before command */
64: int paused; /* in =begin and before =end */
1.11 kristaps 65: enum sect sect; /* which section are we in? */
1.4 schwarze 66: #define LIST_STACKSZ 128
67: enum list lstack[LIST_STACKSZ]; /* open lists */
68: size_t lpos; /* where in list stack */
1.31 schwarze 69: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 70: enum outstate oust; /* state of the mdoc output stream */
71: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 72: char *outbuf; /* text buffered for output */
73: size_t outbufsz; /* allocated size of outbuf */
74: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 75: };
76:
77: enum fmt {
78: FMT_ITALIC,
79: FMT_BOLD,
80: FMT_CODE,
81: FMT_LINK,
82: FMT_ESCAPE,
83: FMT_FILE,
84: FMT_NBSP,
85: FMT_INDEX,
86: FMT_NULL,
87: FMT__MAX
88: };
89:
90: enum cmd {
91: CMD_POD = 0,
92: CMD_HEAD1,
93: CMD_HEAD2,
94: CMD_HEAD3,
95: CMD_HEAD4,
96: CMD_OVER,
97: CMD_ITEM,
98: CMD_BACK,
99: CMD_BEGIN,
100: CMD_END,
101: CMD_FOR,
102: CMD_ENCODING,
103: CMD_CUT,
104: CMD__MAX
105: };
106:
107: static const char *const cmds[CMD__MAX] = {
108: "pod", /* CMD_POD */
109: "head1", /* CMD_HEAD1 */
110: "head2", /* CMD_HEAD2 */
111: "head3", /* CMD_HEAD3 */
112: "head4", /* CMD_HEAD4 */
113: "over", /* CMD_OVER */
114: "item", /* CMD_ITEM */
115: "back", /* CMD_BACK */
116: "begin", /* CMD_BEGIN */
117: "end", /* CMD_END */
118: "for", /* CMD_FOR */
119: "encoding", /* CMD_ENCODING */
120: "cut" /* CMD_CUT */
121: };
122:
123: static const char fmts[FMT__MAX] = {
124: 'I', /* FMT_ITALIC */
125: 'B', /* FMT_BOLD */
126: 'C', /* FMT_CODE */
127: 'L', /* FMT_LINK */
128: 'E', /* FMT_ESCAPE */
129: 'F', /* FMT_FILE */
130: 'S', /* FMT_NBSP */
131: 'X', /* FMT_INDEX */
132: 'Z' /* FMT_NULL */
133: };
134:
1.6 kristaps 135: static int last;
136:
1.31 schwarze 137:
138: static void
139: outbuf_grow(struct state *st, size_t by)
140: {
141:
142: st->outbufsz += (by / 128 + 1) * 128;
143: st->outbuf = realloc(st->outbuf, st->outbufsz);
144: if (NULL == st->outbuf) {
145: perror(NULL);
146: exit(EXIT_FAILURE);
147: }
148: }
149:
150: static void
151: outbuf_addchar(struct state *st)
152: {
153:
154: if (st->outbuflen + 2 >= st->outbufsz)
155: outbuf_grow(st, 1);
156: st->outbuf[st->outbuflen++] = last;
157: if ('\\' == last)
158: st->outbuf[st->outbuflen++] = 'e';
159: st->outbuf[st->outbuflen] = '\0';
1.32 schwarze 160: st->wantws = 0;
1.31 schwarze 161: }
162:
163: static void
164: outbuf_addstr(struct state *st, const char *str)
165: {
166: size_t slen;
167:
168: slen = strlen(str);
169: if (st->outbuflen + slen >= st->outbufsz)
170: outbuf_grow(st, slen);
171: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 172: st->outbuflen += slen;
1.31 schwarze 173: last = str[slen - 1];
1.32 schwarze 174: st->wantws = 0;
1.31 schwarze 175: }
176:
177: static void
178: outbuf_flush(struct state *st)
179: {
180:
181: if (0 == st->outbuflen)
182: return;
183:
184: fputs(st->outbuf, stdout);
185: *st->outbuf = '\0';
186: st->outbuflen = 0;
1.32 schwarze 187:
188: if (OUST_NL == st->oust)
189: st->oust = OUST_TXT;
1.31 schwarze 190: }
191:
192: static void
1.32 schwarze 193: mdoc_newln(struct state *st)
1.31 schwarze 194: {
195:
1.32 schwarze 196: if (OUST_NL == st->oust)
1.31 schwarze 197: return;
1.32 schwarze 198:
1.31 schwarze 199: putchar('\n');
200: last = '\n';
1.32 schwarze 201: st->oust = OUST_NL;
202: st->wantws = 1;
1.31 schwarze 203: }
204:
1.1 schwarze 205: /*
206: * Given buf[*start] is at the start of an escape name, read til the end
207: * of the escape ('>') then try to do something with it.
208: * Sets start to be one after the '>'.
1.32 schwarze 209: *
210: * This function does not care about output modes,
211: * it merely appends text to the output buffer,
212: * which can then be used in any mode.
1.1 schwarze 213: */
214: static void
1.31 schwarze 215: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 216: {
217: char esc[16]; /* no more needed */
218: size_t i, max;
219:
220: max = sizeof(esc) - 1;
221: i = 0;
222: /* Read til our buffer is full. */
223: while (*start < end && '>' != buf[*start] && i < max)
224: esc[i++] = buf[(*start)++];
225: esc[i] = '\0';
226:
227: if (i == max) {
228: /* Too long... skip til we end. */
229: while (*start < end && '>' != buf[*start])
230: (*start)++;
231: return;
232: } else if (*start >= end)
233: return;
234:
235: assert('>' == buf[*start]);
236: (*start)++;
237:
238: /*
239: * TODO: right now, we only recognise the named escapes.
240: * Just let the rest of them go.
241: */
1.6 kristaps 242: if (0 == strcmp(esc, "lt"))
1.31 schwarze 243: outbuf_addstr(st, "\\(la");
1.1 schwarze 244: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 245: outbuf_addstr(st, "\\(ra");
1.33 schwarze 246: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 247: outbuf_addstr(st, "\\(ba");
1.1 schwarze 248: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 249: outbuf_addstr(st, "\\(sl");
1.1 schwarze 250: }
251:
252: /*
1.9 kristaps 253: * Run some heuristics to intuit a link format.
1.19 kristaps 254: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 255: * that the caller can safely just continue processing.
1.19 kristaps 256: * If this is just an empty tag, I'll return 0.
1.32 schwarze 257: *
258: * Always operates in OUST_MAC mode.
259: * Mode handling is done by the caller.
1.9 kristaps 260: */
261: static int
262: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
263: {
1.21 kristaps 264: size_t linkstart, realend, linkend,
265: i, j, textsz, stack;
1.9 kristaps 266:
267: /*
268: * Scan to the start of the terminus.
269: * This function is more or less replicated in the formatcode()
270: * for null or index formatting codes.
1.23 kristaps 271: * However, we're slightly different because we might have
272: * nested escapes we need to ignore.
1.9 kristaps 273: */
1.21 kristaps 274: stack = 0;
1.19 kristaps 275: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 276: if ('<' == buf[realend])
277: stack++;
1.19 kristaps 278: if ('>' != buf[realend])
1.9 kristaps 279: continue;
1.23 kristaps 280: else if (stack-- > 0)
281: continue;
282: if (dsz == 1)
1.9 kristaps 283: break;
1.19 kristaps 284: assert(realend > 0);
285: if (' ' != buf[realend - 1])
1.9 kristaps 286: continue;
1.19 kristaps 287: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 288: if ('>' != buf[i++])
289: break;
290: if (dsz == j)
291: break;
292: }
1.19 kristaps 293:
294: /* Ignore stubs. */
295: if (realend == end || realend == *start)
1.9 kristaps 296: return(0);
297:
1.19 kristaps 298: /* Set linkend to the end of content. */
299: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 300:
1.19 kristaps 301: /* Re-scan to see if we have a title or section. */
302: for (textsz = *start; textsz < linkend; textsz++)
303: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 304: break;
305:
1.19 kristaps 306: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 307: /* With title: set start, then end at section. */
1.19 kristaps 308: linkstart = textsz + 1;
1.18 kristaps 309: textsz = textsz - *start;
1.19 kristaps 310: for (i = linkstart; i < linkend; i++)
311: if ('/' == buf[i])
312: break;
313: if (i < linkend)
314: linkend = i;
1.20 kristaps 315: } else if (textsz < linkend && '/' == buf[textsz]) {
316: /* With section: set end at section. */
317: linkend = textsz;
318: textsz = 0;
319: } else
320: /* No title, no section. */
1.18 kristaps 321: textsz = 0;
1.19 kristaps 322:
323: *start = realend;
324: j = linkend - linkstart;
325:
1.20 kristaps 326: /* Do we have only subsection material? */
327: if (0 == j && '/' == buf[linkend]) {
328: linkstart = linkend + 1;
329: linkend = dsz > 1 ? realend - 1 : realend;
330: if (0 == (j = linkend - linkstart))
331: return(0);
332: printf("Sx %.*s", (int)j, &buf[linkstart]);
333: return(1);
334: } else if (0 == j)
1.19 kristaps 335: return(0);
336:
337: /* See if we qualify as being a link or not. */
1.20 kristaps 338: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
339: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
340: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
341: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
342: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
343: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
344: /* Gross. */
345: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
346: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 347: return(1);
348: }
349:
350: /* See if we qualify as a mailto. */
1.20 kristaps 351: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 352: printf("Mt %.*s", (int)j, &buf[linkstart]);
353: return(1);
354: }
355:
356: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
357: if ((j > 3 && ')' == buf[linkend - 1]) &&
358: ('(' == buf[linkend - 3])) {
359: printf("Xr %.*s %c", (int)(j - 3),
360: &buf[linkstart], buf[linkend - 2]);
361: return(1);
362: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
363: ('(' == buf[linkend - 4])) {
364: printf("Xr %.*s %.*s", (int)(j - 4),
365: &buf[linkstart], 2, &buf[linkend - 3]);
366: return(1);
367: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
368: ('(' == buf[linkend - 5])) {
369: printf("Xr %.*s %.*s", (int)(j - 5),
370: &buf[linkstart], 3, &buf[linkend - 4]);
371: return(1);
372: }
373:
374: /* Last try: do we have a double-colon? */
375: for (i = linkstart + 1; i < linkend; i++)
376: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 377: break;
1.9 kristaps 378:
1.19 kristaps 379: if (i < linkend)
1.10 kristaps 380: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 381: (int)j, &buf[linkstart]);
1.9 kristaps 382: else
1.19 kristaps 383: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 384:
385: return(1);
386: }
387:
1.13 kristaps 388: /*
389: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
390: * then it's likely that we're a flag.
391: * Our flag might be followed by an argument, so make sure that we're
392: * accounting for that, too.
393: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 394: *
395: * Always operates in OUST_MAC mode.
396: * Mode handlinf is done by the caller.
1.13 kristaps 397: */
398: static void
399: dosynopsisfl(const char *buf, size_t *start, size_t end)
400: {
401: size_t i;
402: again:
1.14 kristaps 403: assert(*start + 1 < end);
404: assert('-' == buf[*start]);
405:
406: if ( ! isalnum((int)buf[*start + 1]) &&
407: '?' != buf[*start + 1] &&
408: '-' != buf[*start + 1]) {
409: (*start)--;
410: fputs("Ar ", stdout);
411: return;
412: }
413:
1.13 kristaps 414: (*start)++;
415: for (i = *start; i < end; i++)
416: if (isalnum((int)buf[i]))
417: continue;
1.14 kristaps 418: else if ('?' == buf[i])
419: continue;
1.13 kristaps 420: else if ('-' == buf[i])
421: continue;
422: else if ('_' == buf[i])
423: continue;
424: else
425: break;
426:
427: assert(i < end);
428:
429: if ( ! (' ' == buf[i] || '>' == buf[i])) {
430: printf("Ar ");
431: return;
432: }
433:
434: printf("Fl ");
435: if (end - *start > 1 &&
436: isupper((int)buf[*start]) &&
437: islower((int)buf[*start + 1]) &&
438: (end - *start == 2 ||
439: ' ' == buf[*start + 2]))
440: printf("\\&");
441: printf("%.*s ", (int)(i - *start), &buf[*start]);
442: *start = i;
443:
444: if (' ' == buf[i]) {
445: while (i < end && ' ' == buf[i])
446: i++;
447: assert(i < end);
448: if ('-' == buf[i]) {
449: *start = i;
450: goto again;
451: }
452: printf("Ar ");
453: *start = i;
454: }
455: }
456:
1.9 kristaps 457: /*
1.1 schwarze 458: * We're at the character in front of a format code, which is structured
459: * like X<...> and can contain nested format codes.
460: * This consumes the whole format code, and any nested format codes, til
461: * the end of matched production.
1.6 kristaps 462: * If "nomacro", then we don't print any macros, just contained data
463: * (e.g., following "Sh" or "Nm").
1.15 kristaps 464: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
465: * as the first format code on a line (for decoration as an "Nm"),
466: * non-zero otherwise.
1.32 schwarze 467: *
468: * Output mode handling is most complicated here.
469: * We may enter in any mode.
470: * We usually exit in OUST_MAC mode, except when
471: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 472: */
1.33 schwarze 473: static int
1.15 kristaps 474: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 475: size_t end, int nomacro, int pos)
1.1 schwarze 476: {
477: enum fmt fmt;
1.5 kristaps 478: size_t i, j, dsz;
1.1 schwarze 479:
480: assert(*start + 1 < end);
481: assert('<' == buf[*start + 1]);
482:
1.6 kristaps 483: /*
484: * First, look up the format code.
1.30 schwarze 485: * If it's not valid, treat it as a NOOP.
1.6 kristaps 486: */
487: for (fmt = 0; fmt < FMT__MAX; fmt++)
488: if (buf[*start] == fmts[fmt])
489: break;
490:
1.5 kristaps 491: /*
492: * Determine whether we're overriding our delimiter.
493: * According to POD, if we have more than one '<' followed by a
494: * space, then we need a space followed by matching '>' to close
495: * the expression.
496: * Otherwise we use the usual '<' and '>' matched pair.
497: */
498: i = *start + 1;
499: while (i < end && '<' == buf[i])
500: i++;
501: assert(i > *start + 1);
502: dsz = i - (*start + 1);
503: if (dsz > 1 && (i >= end || ' ' != buf[i]))
504: dsz = 1;
505:
506: /* Remember, if dsz>1, to jump the trailing space. */
507: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 508:
509: /*
1.6 kristaps 510: * Escapes and ignored codes (NULL and INDEX) don't print macro
511: * sequences, so just output them like normal text before
512: * processing for real macros.
1.1 schwarze 513: */
514: if (FMT_ESCAPE == fmt) {
1.31 schwarze 515: formatescape(st, buf, start, end);
1.33 schwarze 516: return(0);
1.1 schwarze 517: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 518: /*
1.6 kristaps 519: * Just consume til the end delimiter, accounting for
520: * whether it's a custom one.
1.5 kristaps 521: */
522: for ( ; *start < end; (*start)++) {
523: if ('>' != buf[*start])
524: continue;
525: else if (dsz == 1)
526: break;
527: assert(*start > 0);
528: if (' ' != buf[*start - 1])
529: continue;
530: i = *start;
531: for (j = 0; i < end && j < dsz; j++)
532: if ('>' != buf[i++])
533: break;
534: if (dsz != j)
535: continue;
536: (*start) += dsz;
537: break;
538: }
1.24 kristaps 539: if (*start < end) {
540: assert('>' == buf[*start]);
541: (*start)++;
542: }
543: if (isspace(last))
544: while (*start < end && isspace((int)buf[*start]))
545: (*start)++;
1.33 schwarze 546: return(0);
1.1 schwarze 547: }
548:
1.6 kristaps 549: /*
550: * Check whether we're supposed to print macro stuff (this is
551: * suppressed in, e.g., "Nm" and "Sh" macros).
552: */
1.30 schwarze 553: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 554:
555: /*
556: * We may already have wantws if there was whitespace
557: * before the code ("text B<text"), but initial
558: * whitespace inside our scope ("textB< text")
559: * allows to break at this point as well.
560: */
561:
562: st->wantws |= ' ' == buf[*start];
1.31 schwarze 563:
1.1 schwarze 564: /*
1.31 schwarze 565: * If we are on a text line and there is no
566: * whitespace before our content, we have to make
567: * the previous word a prefix to the macro line.
1.32 schwarze 568: * In the following, mdoc_newln() must not be used
569: * lest we clobber out output state.
1.1 schwarze 570: */
1.31 schwarze 571:
1.32 schwarze 572: if (OUST_MAC != st->oust && !st->wantws) {
573: if (OUST_NL != st->oust)
1.31 schwarze 574: putchar('\n');
575: printf(".Pf ");
576: }
577:
578: outbuf_flush(st);
579:
580: /* Whitespace is easier to suppress on macro lines. */
581:
1.32 schwarze 582: if (OUST_MAC == st->oust && !st->wantws)
583: printf(" Ns ");
1.31 schwarze 584:
585: /* Unless we are on a macro line, start one. */
586:
1.32 schwarze 587: if (OUST_MAC != st->oust && st->wantws) {
588: if (OUST_NL != st->oust)
1.6 kristaps 589: putchar('\n');
1.1 schwarze 590: putchar('.');
1.31 schwarze 591: } else
1.1 schwarze 592: putchar(' ');
1.31 schwarze 593:
1.32 schwarze 594: /*
595: * Print the macro corresponding to this format code,
596: * and update the output state afterwards.
597: */
1.6 kristaps 598:
1.1 schwarze 599: switch (fmt) {
600: case (FMT_ITALIC):
601: printf("Em ");
602: break;
603: case (FMT_BOLD):
1.14 kristaps 604: if (SECT_SYNOPSIS == st->sect) {
605: if (1 == dsz && '-' == buf[*start])
606: dosynopsisfl(buf, start, end);
1.15 kristaps 607: else if (0 == pos)
608: printf("Nm ");
1.14 kristaps 609: else
610: printf("Ar ");
611: break;
612: }
1.27 schwarze 613: if (0 == strncmp(buf + *start, "NULL", 4) &&
614: ('=' == buf[*start + 4] ||
615: '>' == buf[*start + 4]))
616: printf("Dv ");
617: else
618: printf("Sy ");
1.1 schwarze 619: break;
620: case (FMT_CODE):
1.2 schwarze 621: printf("Qo Li ");
1.1 schwarze 622: break;
623: case (FMT_LINK):
1.19 kristaps 624: /* Try to link; use "No" if it's empty. */
1.9 kristaps 625: if ( ! trylink(buf, start, end, dsz))
626: printf("No ");
1.1 schwarze 627: break;
628: case (FMT_FILE):
629: printf("Pa ");
630: break;
631: case (FMT_NBSP):
632: printf("No ");
633: break;
634: default:
635: abort();
636: }
1.32 schwarze 637: st->oust = OUST_MAC;
638: st->wantws = 1;
1.31 schwarze 639: } else
640: outbuf_flush(st);
1.1 schwarze 641:
642: /*
1.6 kristaps 643: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 644: * find a nested format code.
1.1 schwarze 645: * Don't emit any newlines: since we're on a macro line, we
646: * don't want to break the line.
647: */
648: while (*start < end) {
1.5 kristaps 649: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 650: (*start)++;
651: break;
1.5 kristaps 652: } else if ('>' == buf[*start] &&
653: ' ' == buf[*start - 1]) {
654: /*
655: * Handle custom delimiters.
656: * These require a certain number of
657: * space-preceded carrots before we're really at
658: * the end.
659: */
660: i = *start;
661: for (j = 0; i < end && j < dsz; j++)
662: if ('>' != buf[i++])
663: break;
664: if (dsz == j) {
665: *start += dsz;
666: break;
667: }
1.1 schwarze 668: }
1.34 schwarze 669: if (*start + 1 < end && '<' == buf[*start + 1] &&
670: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.32 schwarze 671: formatcode(st, buf, start, end, nomacro, 1);
1.1 schwarze 672: continue;
673: }
1.3 schwarze 674:
1.32 schwarze 675: /* Suppress newlines and multiple spaces. */
676:
677: last = buf[(*start)++];
678: if (' ' == last || '\n' == last) {
679: putchar(' ');
680: while (*start < end && ' ' == buf[*start])
681: (*start)++;
682: continue;
683: }
684:
1.33 schwarze 685: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.32 schwarze 686: if ( ! st->wantws) {
687: printf(" Ns ");
688: st->wantws = 1;
689: }
690:
691: /*
692: * Escape macro-like words.
693: * This matches "Xx " and "XxEOLN".
694: */
695:
696: if (end - *start > 0 &&
697: isupper((unsigned char)last) &&
698: islower((unsigned char)buf[*start]) &&
699: (end - *start == 1 ||
700: ' ' == buf[*start + 1] ||
701: '>' == buf[*start + 1]))
702: printf("\\&");
703: }
1.3 schwarze 704:
1.32 schwarze 705: putchar(last);
1.4 schwarze 706:
1.8 kristaps 707: /* Protect against character escapes. */
1.32 schwarze 708:
1.8 kristaps 709: if ('\\' == last)
710: putchar('e');
1.1 schwarze 711: }
1.2 schwarze 712:
1.33 schwarze 713: if (FMT__MAX == fmt)
714: return(0);
715:
1.2 schwarze 716: if ( ! nomacro && FMT_CODE == fmt)
717: printf(" Qc ");
1.1 schwarze 718:
1.33 schwarze 719: st->wantws = ' ' == last;
720: return(1);
1.1 schwarze 721: }
722:
723: /*
724: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 725: * Goes to OUST_MAC mode and stays there when returning,
726: * such that the caller can add arguments to the macro line
727: * before closing it out.
1.1 schwarze 728: */
729: static void
1.32 schwarze 730: formatcodeln(struct state *st, const char *linemac,
731: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 732: {
1.33 schwarze 733: int gotmacro, wantws;
1.1 schwarze 734:
1.32 schwarze 735: assert(OUST_NL == st->oust);
736: assert(st->wantws);
737: printf(".%s ", linemac);
738: st->oust = OUST_MAC;
739:
1.33 schwarze 740: gotmacro = 0;
1.1 schwarze 741: while (*start < end) {
1.33 schwarze 742: wantws = ' ' == buf[*start] || '\n' == buf[*start];
743: if (wantws) {
744: last = ' ';
745: do {
746: (*start)++;
747: } while (*start < end && ' ' == buf[*start]);
748: }
749:
1.34 schwarze 750: if (*start + 1 < end && '<' == buf[*start + 1] &&
751: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 752: st->wantws |= wantws;
753: gotmacro = formatcode(st, buf,
754: start, end, nomacro, 1);
1.1 schwarze 755: continue;
756: }
1.32 schwarze 757:
1.33 schwarze 758: if (gotmacro) {
759: if (*start < end || st->outbuflen) {
760: if (st->wantws ||
761: (wantws && !st->outbuflen))
762: printf(" No ");
763: else
764: printf(" Ns ");
765: }
766: gotmacro = 0;
767: }
768: outbuf_flush(st);
769: st->wantws = wantws;
770:
771: if (*start >= end)
772: break;
773:
774: if (st->wantws) {
775: putchar(' ');
776: st->wantws = 0;
1.32 schwarze 777: }
778:
1.4 schwarze 779: /*
780: * Since we're already on a macro line, we want to make
781: * sure that we don't inadvertently invoke a macro.
782: * We need to do this carefully because section names
783: * are used in troff and we don't want to escape
784: * something that needn't be escaped.
785: */
786: if (' ' == last && end - *start > 1 &&
1.33 schwarze 787: isupper((unsigned char)buf[*start]) &&
788: islower((unsigned char)buf[*start + 1]) &&
789: (end - *start == 2 || ' ' == buf[*start + 2]))
1.4 schwarze 790: printf("\\&");
791:
1.33 schwarze 792: putchar(last = buf[*start]);
1.8 kristaps 793:
794: /* Protect against character escapes. */
1.33 schwarze 795:
1.8 kristaps 796: if ('\\' == last)
797: putchar('e');
798:
1.1 schwarze 799: (*start)++;
800: }
801: }
802:
803: /*
1.4 schwarze 804: * Guess at what kind of list we are.
805: * These are taken straight from the POD manual.
806: * I don't know what people do in real life.
807: */
808: static enum list
809: listguess(const char *buf, size_t start, size_t end)
810: {
811: size_t len = end - start;
812:
813: assert(end >= start);
814:
815: if (len == 1 && '*' == buf[start])
816: return(LIST_BULLET);
817: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
818: return(LIST_ENUM);
819: else if (len == 1 && '1' == buf[start])
820: return(LIST_ENUM);
821: else
822: return(LIST_TAG);
823: }
824:
825: /*
1.1 schwarze 826: * A command paragraph, as noted in the perlpod manual, just indicates
827: * that we should do something, optionally with some text to print as
828: * well.
1.32 schwarze 829: * From the perspective of external callers,
830: * always stays in OUST_NL/wantws mode,
831: * but its children do use OUST_MAC.
1.1 schwarze 832: */
833: static void
834: command(struct state *st, const char *buf, size_t start, size_t end)
835: {
836: size_t len, csz;
837: enum cmd cmd;
838:
839: assert('=' == buf[start]);
840: start++;
841: len = end - start;
842:
843: for (cmd = 0; cmd < CMD__MAX; cmd++) {
844: csz = strlen(cmds[cmd]);
845: if (len < csz)
846: continue;
847: if (0 == memcmp(&buf[start], cmd[cmds], csz))
848: break;
849: }
850:
851: /* Ignore bogus commands. */
852:
853: if (CMD__MAX == cmd)
854: return;
855:
856: start += csz;
1.8 kristaps 857: while (start < end && ' ' == buf[start])
858: start++;
859:
1.1 schwarze 860: len = end - start;
861:
862: if (st->paused) {
863: st->paused = CMD_END != cmd;
864: return;
865: }
866:
867: switch (cmd) {
868: case (CMD_POD):
869: break;
870: case (CMD_HEAD1):
871: /*
872: * The behaviour of head= follows from a quick glance at
873: * how pod2man handles it.
874: */
1.11 kristaps 875: st->sect = SECT_NONE;
876: if (end - start == 4) {
1.1 schwarze 877: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 878: st->sect = SECT_NAME;
879: } else if (end - start == 8) {
880: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
881: st->sect = SECT_SYNOPSIS;
882: }
1.32 schwarze 883: formatcodeln(st, "Sh", buf, &start, end, 1);
884: mdoc_newln(st);
1.1 schwarze 885: st->haspar = 1;
886: break;
887: case (CMD_HEAD2):
1.32 schwarze 888: formatcodeln(st, "Ss", buf, &start, end, 1);
889: mdoc_newln(st);
1.1 schwarze 890: st->haspar = 1;
891: break;
892: case (CMD_HEAD3):
893: puts(".Pp");
1.32 schwarze 894: formatcodeln(st, "Em", buf, &start, end, 0);
895: mdoc_newln(st);
1.1 schwarze 896: puts(".Pp");
897: st->haspar = 1;
898: break;
899: case (CMD_HEAD4):
900: puts(".Pp");
1.32 schwarze 901: formatcodeln(st, "No", buf, &start, end, 0);
902: mdoc_newln(st);
1.1 schwarze 903: puts(".Pp");
904: st->haspar = 1;
905: break;
906: case (CMD_OVER):
1.4 schwarze 907: /*
908: * If we have an existing list that hasn't had an =item
909: * yet, then make sure that we open it now.
910: * We use the default list type, but that can't be
911: * helped (we haven't seen any items yet).
1.1 schwarze 912: */
1.4 schwarze 913: if (st->lpos > 0)
914: if (LIST__MAX == st->lstack[st->lpos - 1]) {
915: st->lstack[st->lpos - 1] = LIST_TAG;
916: puts(".Bl -tag -width Ds");
917: }
918: st->lpos++;
919: assert(st->lpos < LIST_STACKSZ);
920: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 921: break;
922: case (CMD_ITEM):
1.6 kristaps 923: if (0 == st->lpos) {
924: /*
925: * Bad markup.
926: * Try to compensate.
927: */
928: st->lstack[st->lpos] = LIST__MAX;
929: st->lpos++;
930: }
1.4 schwarze 931: assert(st->lpos > 0);
932: /*
933: * If we're the first =item, guess at what our content
934: * will be: "*" is a bullet list, "1." is a numbered
935: * list, and everything is tagged.
936: */
937: if (LIST__MAX == st->lstack[st->lpos - 1]) {
938: st->lstack[st->lpos - 1] =
939: listguess(buf, start, end);
940: switch (st->lstack[st->lpos - 1]) {
941: case (LIST_BULLET):
942: puts(".Bl -bullet");
943: break;
944: case (LIST_ENUM):
945: puts(".Bl -enum");
946: break;
947: default:
948: puts(".Bl -tag -width Ds");
949: break;
950: }
951: }
952: switch (st->lstack[st->lpos - 1]) {
953: case (LIST_TAG):
1.32 schwarze 954: formatcodeln(st, "It", buf, &start, end, 0);
955: mdoc_newln(st);
1.4 schwarze 956: break;
957: case (LIST_ENUM):
958: /* FALLTHROUGH */
959: case (LIST_BULLET):
960: /*
961: * Abandon the remainder of the paragraph
962: * because we're going to be a bulletted or
963: * numbered list.
964: */
965: puts(".It");
966: break;
967: default:
968: abort();
969: }
1.1 schwarze 970: st->haspar = 1;
971: break;
972: case (CMD_BACK):
1.4 schwarze 973: /* Make sure we don't back over the stack. */
974: if (st->lpos > 0) {
975: st->lpos--;
976: puts(".El");
977: }
1.1 schwarze 978: break;
979: case (CMD_BEGIN):
980: /*
981: * We disregard all types for now.
982: * TODO: process at least "text" in a -literal block.
983: */
984: st->paused = 1;
985: break;
986: case (CMD_FOR):
987: /*
988: * We ignore all types of encodings and formats
989: * unilaterally.
990: */
991: break;
992: case (CMD_ENCODING):
993: break;
994: case (CMD_CUT):
995: st->parsing = 0;
996: return;
997: default:
998: abort();
999: }
1000:
1001: /* Any command (but =cut) makes us start parsing. */
1002: st->parsing = 1;
1003: }
1004:
1005: /*
1006: * Just pump out the line in a verbatim block.
1.32 schwarze 1007: * From the perspective of external callers,
1008: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1009: */
1010: static void
1.35 schwarze 1011: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1012: {
1.36 ! schwarze 1013: size_t i, ift, ifo, ifa, ifc, inl;
1.35 schwarze 1014: char *cp;
1.36 ! schwarze 1015: int nopen;
1.1 schwarze 1016:
1.35 schwarze 1017: if ( ! st->parsing || st->paused || start == end)
1.1 schwarze 1018: return;
1.22 kristaps 1019: again:
1020: /*
1021: * If we're in the SYNOPSIS, see if we're an #include block.
1022: * If we are, then print the "In" macro and re-loop.
1023: * This handles any number of inclusions, but only when they
1024: * come before the remaining parts...
1025: */
1026: if (SECT_SYNOPSIS == st->sect) {
1027: i = start;
1.35 schwarze 1028: while (i < end && buf[i] == ' ')
1029: i++;
1.22 kristaps 1030: if (i == end)
1031: return;
1.35 schwarze 1032:
1.22 kristaps 1033: /* We're an include block! */
1034: if (end - i > 10 &&
1035: 0 == memcmp(&buf[i], "#include <", 10)) {
1036: start = i + 10;
1037: while (start < end && ' ' == buf[start])
1038: start++;
1039: fputs(".In ", stdout);
1040: /* Stop til the '>' marker or we hit eoln. */
1041: while (start < end &&
1042: '>' != buf[start] && '\n' != buf[start])
1043: putchar(buf[start++]);
1044: putchar('\n');
1045: if (start < end && '>' == buf[start])
1046: start++;
1047: if (start < end && '\n' == buf[start])
1048: start++;
1049: if (start < end)
1050: goto again;
1051: return;
1052: }
1.35 schwarze 1053:
1054: /* Parse function declaration. */
1055: ifo = ifa = ifc = 0;
1.36 ! schwarze 1056: inl = end;
! 1057: nopen = 0;
! 1058: for (ift = i; i < end; i++) {
! 1059: if (ifc) {
! 1060: if (buf[i] != '\n')
! 1061: continue;
! 1062: inl = i;
! 1063: break;
! 1064: }
! 1065: switch (buf[i]) {
! 1066: case ' ':
! 1067: if ( ! ifa)
! 1068: ifo = i;
! 1069: break;
! 1070: case '(':
! 1071: if (ifo) {
! 1072: nopen++;
! 1073: if ( ! ifa)
! 1074: ifa = i;
! 1075: } else
! 1076: i = end;
! 1077: break;
! 1078: case ')':
! 1079: switch (nopen) {
! 1080: case 0:
! 1081: i = end;
! 1082: break;
! 1083: case 1:
1.35 schwarze 1084: ifc = i;
1.36 ! schwarze 1085: break;
! 1086: default:
! 1087: nopen--;
! 1088: break;
! 1089: }
! 1090: break;
! 1091: default:
! 1092: break;
! 1093: }
1.35 schwarze 1094: }
1095:
1096: /* Encode function declaration. */
1097: if (ifc) {
1.36 ! schwarze 1098: for (i = ifa; i < ifc; i++)
! 1099: if (buf[i] == '\n')
! 1100: buf[i] = ' ';
1.35 schwarze 1101: buf[ifo++] = '\0';
1102: printf(".Ft %s", buf + ift);
1103: if (buf[ifo] == '*') {
1104: fputs(" *", stdout);
1105: ifo++;
1106: }
1107: putchar('\n');
1108: buf[ifa++] = '\0';
1109: printf(".Fo %s\n", buf + ifo);
1110: buf[ifc++] = '\0';
1111: for (;;) {
1112: cp = strchr(buf + ifa, ',');
1113: if (cp != NULL)
1.36 ! schwarze 1114: *cp++ = '\0';
1.35 schwarze 1115: printf(".Fa \"%s\"\n", buf + ifa);
1116: if (cp == NULL)
1117: break;
1.36 ! schwarze 1118: while (*cp == ' ')
! 1119: cp++;
! 1120: ifa = cp - buf;
1.35 schwarze 1121: }
1122: puts(".Fc");
1123: if (buf[ifc] == ';')
1124: ifc++;
1.36 ! schwarze 1125: if (ifc < inl) {
! 1126: buf[inl] = '\0';
1.35 schwarze 1127: puts(buf + ifc);
1128: }
1.36 ! schwarze 1129: start = inl + 1;
1.35 schwarze 1130: if (start < end)
1131: goto again;
1132: return;
1133: }
1.22 kristaps 1134: }
1135:
1.1 schwarze 1136: puts(".Bd -literal");
1.8 kristaps 1137: for (last = ' '; start < end; start++) {
1138: /*
1139: * Handle accidental macros (newline starting with
1140: * control character) and escapes.
1141: */
1142: if ('\n' == last)
1.7 kristaps 1143: if ('.' == buf[start] || '\'' == buf[start])
1144: printf("\\&");
1.8 kristaps 1145: putchar(last = buf[start]);
1146: if ('\\' == buf[start])
1147: printf("e");
1.7 kristaps 1148: }
1.31 schwarze 1149: putchar(last = '\n');
1.1 schwarze 1150: puts(".Ed");
1151: }
1152:
1153: /*
1.13 kristaps 1154: * See dosynopsisop().
1155: */
1156: static int
1157: hasmatch(const char *buf, size_t start, size_t end)
1158: {
1159: size_t stack;
1160:
1161: for (stack = 0; start < end; start++)
1162: if (buf[start] == '[')
1163: stack++;
1164: else if (buf[start] == ']' && 0 == stack)
1165: return(1);
1166: else if (buf[start] == ']')
1167: stack--;
1168: return(0);
1169: }
1170:
1171: /*
1172: * If we're in the SYNOPSIS section and we've encounter braces in an
1173: * ordinary paragraph, then try to see whether we're an [-option].
1174: * Do this, if we're an opening bracket, by first seeing if we have a
1175: * matching end via hasmatch().
1176: * If we're an ending bracket, see if we have a stack already.
1177: */
1178: static int
1.32 schwarze 1179: dosynopsisop(struct state *st, const char *buf,
1180: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1181: {
1182:
1183: assert('[' == buf[*start] || ']' == buf[*start]);
1184:
1185: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1186: mdoc_newln(st);
1.13 kristaps 1187: puts(".Oo");
1188: (*opstack)++;
1189: } else if ('[' == buf[*start])
1190: return(0);
1191:
1192: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1193: mdoc_newln(st);
1.13 kristaps 1194: puts(".Oc");
1195: (*opstack)--;
1196: } else if (']' == buf[*start])
1197: return(0);
1198:
1199: (*start)++;
1.31 schwarze 1200: last = '\n';
1.13 kristaps 1201: while (' ' == buf[*start])
1202: (*start)++;
1203: return(1);
1204: }
1205:
1206: /*
1.17 kristaps 1207: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1208: * From the perspective of external callers,
1209: * always stays in OUST_NL/wantws mode,
1210: * but its children do use OUST_MAC.
1.17 kristaps 1211: */
1212: static void
1213: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1214: {
1215: size_t word;
1216:
1.32 schwarze 1217: assert(OUST_NL == st->oust);
1218: assert(st->wantws);
1219:
1.17 kristaps 1220: while (*start < end && ' ' == buf[*start])
1221: (*start)++;
1222:
1223: if (end == *start) {
1224: puts(".Nm unknown");
1225: return;
1226: }
1227:
1228: while (*start < end) {
1229: for (word = *start; word < end; word++)
1230: if (',' == buf[word])
1231: break;
1.32 schwarze 1232: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1233: if (*start == end) {
1.32 schwarze 1234: mdoc_newln(st);
1235: break;
1.17 kristaps 1236: }
1237: assert(',' == buf[*start]);
1.32 schwarze 1238: printf(" ,");
1239: mdoc_newln(st);
1.17 kristaps 1240: (*start)++;
1241: while (*start < end && ' ' == buf[*start])
1242: (*start)++;
1243: }
1244: }
1245:
1246: /*
1.1 schwarze 1247: * Ordinary paragraph.
1248: * Well, this is really the hardest--POD seems to assume that, for
1249: * example, a leading space implies a newline, and so on.
1250: * Lots of other snakes in the grass: escaping a newline followed by a
1251: * period (accidental mdoc(7) control), double-newlines after macro
1252: * passages, etc.
1.32 schwarze 1253: *
1254: * Uses formatcode() to go to OUST_MAC mode
1255: * and outbuf_flush() to go to OUST_TXT mode.
1256: * Main text mode wantws handling is in this function.
1257: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1258: */
1259: static void
1260: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1261: {
1.13 kristaps 1262: size_t i, j, opstack;
1.15 kristaps 1263: int seq;
1.1 schwarze 1264:
1265: if ( ! st->parsing || st->paused)
1266: return;
1267:
1268: /*
1269: * Special-case: the NAME section.
1270: * If we find a "-" when searching from the end, assume that
1271: * we're in "name - description" format.
1272: * To wit, print out a "Nm" and "Nd" in that format.
1273: */
1.11 kristaps 1274: if (SECT_NAME == st->sect) {
1.15 kristaps 1275: for (i = end - 2; i > start; i--)
1276: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 1277: break;
1278: if ('-' == buf[i]) {
1279: j = i;
1280: /* Roll over multiple "-". */
1281: for ( ; i > start; i--)
1282: if ('-' != buf[i])
1283: break;
1.17 kristaps 1284: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1285: start = j + 1;
1.17 kristaps 1286: while (start < end && ' ' == buf[start])
1287: start++;
1.32 schwarze 1288: formatcodeln(st, "Nd", buf, &start, end, 1);
1289: mdoc_newln(st);
1.1 schwarze 1290: return;
1291: }
1292: }
1293:
1294: if ( ! st->haspar)
1295: puts(".Pp");
1296:
1297: st->haspar = 0;
1298: last = '\n';
1.13 kristaps 1299: opstack = 0;
1.1 schwarze 1300:
1.15 kristaps 1301: for (seq = 0; start < end; seq++) {
1.1 schwarze 1302: /*
1303: * Loop til we get either to a newline or escape.
1304: * Escape initial control characters.
1305: */
1306: while (start < end) {
1.34 schwarze 1307: if (start < end - 1 && '<' == buf[start + 1] &&
1308: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1309: break;
1310: else if ('\n' == buf[start])
1311: break;
1312: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1313: outbuf_addstr(st, "\\&");
1.1 schwarze 1314: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1315: outbuf_addstr(st, "\\&");
1.12 kristaps 1316: /*
1317: * If we're in the SYNOPSIS, have square
1318: * brackets indicate that we're opening and
1319: * closing an optional context.
1320: */
1.32 schwarze 1321:
1.13 kristaps 1322: if (SECT_SYNOPSIS == st->sect &&
1323: ('[' == buf[start] ||
1324: ']' == buf[start]) &&
1.32 schwarze 1325: dosynopsisop(st, buf,
1326: &start, end, &opstack))
1.13 kristaps 1327: continue;
1.32 schwarze 1328:
1329: /*
1330: * On whitespace, flush the output buffer
1331: * and allow breaking to a macro line.
1332: * Otherwise, buffer text and clear wantws.
1333: */
1334:
1.31 schwarze 1335: last = buf[start++];
1336: if (' ' == last) {
1337: outbuf_flush(st);
1338: putchar(' ');
1.32 schwarze 1339: st->wantws = 1;
1.31 schwarze 1340: } else
1341: outbuf_addchar(st);
1.1 schwarze 1342: }
1343:
1.34 schwarze 1344: if (start < end - 1 && '<' == buf[start + 1] &&
1345: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1346: formatcode(st, buf, &start, end, 0, seq);
1347: if (OUST_MAC == st->oust) {
1.30 schwarze 1348: /*
1349: * Let mdoc(7) handle trailing punctuation.
1350: * XXX Some punctuation characters
1351: * are not handled yet.
1352: */
1.16 kristaps 1353: if ((start == end - 1 ||
1354: (start < end - 1 &&
1355: (' ' == buf[start + 1] ||
1356: '\n' == buf[start + 1]))) &&
1357: ('.' == buf[start] ||
1358: ',' == buf[start])) {
1359: putchar(' ');
1360: putchar(buf[start++]);
1361: }
1.32 schwarze 1362:
1363: if (st->wantws ||
1364: ' ' == buf[start] ||
1365: '\n' == buf[start])
1366: mdoc_newln(st);
1367:
1.30 schwarze 1368: /*
1369: * Consume all whitespace
1370: * so we don't accidentally start
1371: * an implicit literal line.
1372: */
1.32 schwarze 1373:
1.6 kristaps 1374: while (start < end && ' ' == buf[start])
1375: start++;
1.32 schwarze 1376:
1377: /*
1378: * Some text is following.
1379: * Implement requested spacing.
1380: */
1381:
1382: if ( ! st->wantws && start < end &&
1.34 schwarze 1383: ('<' != buf[start + 1] ||
1384: 'A' > buf[start] ||
1385: 'Z' < buf[start])) {
1.32 schwarze 1386: printf(" Ns ");
1387: st->wantws = 1;
1388: }
1.6 kristaps 1389: }
1.1 schwarze 1390: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1391: outbuf_flush(st);
1392: mdoc_newln(st);
1.1 schwarze 1393: if (++start >= end)
1394: continue;
1395: /*
1396: * If we have whitespace next, eat it to prevent
1397: * mdoc(7) from thinking that it's meant for
1398: * verbatim text.
1399: * It is--but if we start with that, we can't
1400: * have a macro subsequent it, which may be
1401: * possible if we have an escape next.
1402: */
1.31 schwarze 1403: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1404: puts(".br");
1405: for ( ; start < end; start++)
1406: if (' ' != buf[start] && '\t' != buf[start])
1407: break;
1.12 kristaps 1408: }
1.1 schwarze 1409: }
1.32 schwarze 1410: outbuf_flush(st);
1411: mdoc_newln(st);
1.1 schwarze 1412: }
1413:
1414: /*
1415: * There are three kinds of paragraphs: verbatim (starts with whitespace
1416: * of some sort), ordinary (starts without "=" marker), or a command
1417: * (default: starts with "=").
1418: */
1419: static void
1.35 schwarze 1420: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1421: {
1422:
1.32 schwarze 1423: assert(OUST_NL == st->oust);
1424: assert(st->wantws);
1425:
1.1 schwarze 1426: if (end == start)
1427: return;
1428: if (' ' == buf[start] || '\t' == buf[start])
1429: verbatim(st, buf, start, end);
1430: else if ('=' != buf[start])
1431: ordinary(st, buf, start, end);
1432: else
1433: command(st, buf, start, end);
1434: }
1435:
1436: /*
1437: * Loop around paragraphs within a document, processing each one in the
1438: * POD way.
1439: */
1440: static void
1441: dofile(const struct args *args, const char *fname,
1.35 schwarze 1442: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1443: {
1.29 schwarze 1444: char datebuf[64];
1.1 schwarze 1445: struct state st;
1.29 schwarze 1446: const char *fbase, *fext, *section, *date;
1.1 schwarze 1447: char *title, *cp;
1.29 schwarze 1448: size_t sup, end, i, cur = 0;
1.1 schwarze 1449:
1450: if (0 == sz)
1451: return;
1452:
1.29 schwarze 1453: /*
1454: * Parsing the filename is almost always required,
1455: * except when both the title and the section
1456: * are provided on the command line.
1457: */
1458:
1459: if (NULL == args->title || NULL == args->section) {
1460: fbase = strrchr(fname, '/');
1461: if (NULL == fbase)
1462: fbase = fname;
1463: else
1464: fbase++;
1465: fext = strrchr(fbase, '.');
1466: } else
1467: fext = NULL;
1468:
1469: /*
1470: * The title will be converted to uppercase,
1471: * so it needs to be copied.
1472: */
1473:
1474: title = (NULL != args->title) ? strdup(args->title) :
1475: (NULL != fext) ? strndup(fbase, fext - fbase) :
1476: strdup(fbase);
1.1 schwarze 1477:
1478: if (NULL == title) {
1479: perror(NULL);
1480: exit(EXIT_FAILURE);
1481: }
1482:
1483: /* Section is 1 unless suffix is "pm". */
1484:
1.29 schwarze 1485: section = (NULL != args->section) ? args->section :
1486: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1487: PERL_SECTION;
1.1 schwarze 1488:
1489: /* Date. Or the given "tm" if not supplied. */
1490:
1491: if (NULL == (date = args->date)) {
1492: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1493: date = datebuf;
1494: }
1495:
1496: for (cp = title; '\0' != *cp; cp++)
1497: *cp = toupper((int)*cp);
1498:
1499: /* The usual mdoc(7) preamble. */
1500:
1501: printf(".Dd %s\n", date);
1502: printf(".Dt %s %s\n", title, section);
1503: puts(".Os");
1504:
1505: free(title);
1506:
1507: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1508: st.oust = OUST_NL;
1509: st.wantws = 1;
1510:
1.1 schwarze 1511: assert(sz > 0);
1512:
1513: /* Main loop over file contents. */
1514:
1515: while (cur < sz) {
1516: /* Read until next paragraph. */
1517: for (i = cur + 1; i < sz; i++)
1518: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1519: /* Consume blank paragraphs. */
1520: while (i + 1 < sz && '\n' == buf[i + 1])
1521: i++;
1522: break;
1523: }
1524:
1525: /* Adjust end marker for EOF. */
1526: end = i < sz ? i - 1 :
1527: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1528: sup = i < sz ? end + 2 : sz;
1529:
1530: /* Process paragraph and adjust start. */
1531: dopar(&st, buf, cur, end);
1532: cur = sup;
1533: }
1534: }
1535:
1536: /*
1537: * Read a single file fully into memory.
1538: * If the file is "-", do it from stdin.
1539: * If successfully read, send the input buffer to dofile() for further
1540: * processing.
1541: */
1542: static int
1543: readfile(const struct args *args, const char *fname)
1544: {
1545: int fd;
1546: char *buf;
1547: size_t bufsz, cur;
1548: ssize_t ssz;
1549: struct tm *tm;
1550: time_t ttm;
1551: struct stat st;
1552:
1553: fd = 0 != strcmp("-", fname) ?
1554: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1555:
1556: if (-1 == fd) {
1557: perror(fname);
1558: return(0);
1559: }
1560:
1561: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1562: ttm = time(NULL);
1563: tm = localtime(&ttm);
1564: } else
1565: tm = localtime(&st.st_mtime);
1566:
1567: /*
1568: * Arbitrarily-sized initial buffer.
1569: * Should be big enough for most files...
1570: */
1571: cur = 0;
1572: bufsz = 1 << 14;
1573: if (NULL == (buf = malloc(bufsz))) {
1574: perror(NULL);
1575: exit(EXIT_FAILURE);
1576: }
1577:
1578: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1579: /* Double buffer size on fill. */
1580: if ((size_t)ssz == bufsz - cur) {
1581: bufsz *= 2;
1582: if (NULL == (buf = realloc(buf, bufsz))) {
1583: perror(NULL);
1584: exit(EXIT_FAILURE);
1585: }
1586: }
1587: cur += (size_t)ssz;
1588: }
1589: if (ssz < 0) {
1590: perror(fname);
1591: free(buf);
1592: return(0);
1593: }
1594:
1595: dofile(args, STDIN_FILENO == fd ?
1596: "STDIN" : fname, tm, buf, cur);
1597: free(buf);
1598: if (STDIN_FILENO != fd)
1599: close(fd);
1600: return(1);
1601: }
1602:
1603: int
1604: main(int argc, char *argv[])
1605: {
1606: const char *fname, *name;
1607: struct args args;
1608: int c;
1609:
1610: name = strrchr(argv[0], '/');
1611: if (name == NULL)
1612: name = argv[0];
1613: else
1614: ++name;
1615:
1616: memset(&args, 0, sizeof(struct args));
1617: fname = "-";
1618:
1619: /* Accept no arguments for now. */
1620:
1621: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1622: switch (c) {
1623: case ('h'):
1624: /* FALLTHROUGH */
1625: case ('l'):
1626: /* FALLTHROUGH */
1627: case ('c'):
1628: /* FALLTHROUGH */
1629: case ('o'):
1630: /* FALLTHROUGH */
1631: case ('q'):
1632: /* FALLTHROUGH */
1633: case ('r'):
1634: /* FALLTHROUGH */
1635: case ('u'):
1636: /* FALLTHROUGH */
1637: case ('v'):
1638: /* Ignore these. */
1639: break;
1640: case ('d'):
1641: args.date = optarg;
1642: break;
1643: case ('n'):
1644: args.title = optarg;
1645: break;
1646: case ('s'):
1647: args.section = optarg;
1648: break;
1649: default:
1650: goto usage;
1651: }
1652:
1653: argc -= optind;
1654: argv += optind;
1655:
1656: /* Accept only a single input file. */
1657:
1.25 schwarze 1658: if (argc > 1)
1659: goto usage;
1.1 schwarze 1660: else if (1 == argc)
1661: fname = *argv;
1662:
1663: return(readfile(&args, fname) ?
1664: EXIT_SUCCESS : EXIT_FAILURE);
1665:
1666: usage:
1667: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1668: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1669:
1670: return(EXIT_FAILURE);
1671: }
CVSweb