Annotation of pod2mdoc/pod2mdoc.c, Revision 1.32
1.32 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.31 2014/07/15 19:03:07 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4: *
5: * Permission to use, copy, modify, and distribute this software for any
6: * purpose with or without fee is hereby granted, provided that the above
7: * copyright notice and this permission notice appear in all copies.
8: *
9: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16: */
17: #include <sys/stat.h>
18: #include <sys/time.h>
19:
20: #include <assert.h>
21: #include <ctype.h>
22: #include <fcntl.h>
23: #include <getopt.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <unistd.h>
28:
1.10 kristaps 29: /*
1.19 kristaps 30: * In what section can we find Perl module manuals?
31: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
32: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 33: */
34: #define PERL_SECTION "3p"
35:
1.1 schwarze 36: struct args {
37: const char *title; /* override "Dt" title */
38: const char *date; /* override "Dd" date */
39: const char *section; /* override "Dt" section */
40: };
41:
1.4 schwarze 42: enum list {
43: LIST_BULLET = 0,
44: LIST_ENUM,
45: LIST_TAG,
46: LIST__MAX
47: };
48:
1.11 kristaps 49: enum sect {
50: SECT_NONE = 0,
51: SECT_NAME, /* NAME section */
52: SECT_SYNOPSIS, /* SYNOPSIS section */
53: };
54:
1.32 ! schwarze 55: enum outstate {
! 56: OUST_NL = 0, /* just started a new output line */
! 57: OUST_TXT, /* text line output in progress */
! 58: OUST_MAC /* macro line output in progress */
! 59: };
! 60:
1.1 schwarze 61: struct state {
1.31 schwarze 62: const char *fname; /* file being parsed */
1.1 schwarze 63: int parsing; /* after =cut of before command */
64: int paused; /* in =begin and before =end */
1.11 kristaps 65: enum sect sect; /* which section are we in? */
1.4 schwarze 66: #define LIST_STACKSZ 128
67: enum list lstack[LIST_STACKSZ]; /* open lists */
68: size_t lpos; /* where in list stack */
1.31 schwarze 69: int haspar; /* in paragraph: do we need Pp? */
1.32 ! schwarze 70: enum outstate oust; /* state of the mdoc output stream */
! 71: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 72: char *outbuf; /* text buffered for output */
73: size_t outbufsz; /* allocated size of outbuf */
74: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 75: };
76:
77: enum fmt {
78: FMT_ITALIC,
79: FMT_BOLD,
80: FMT_CODE,
81: FMT_LINK,
82: FMT_ESCAPE,
83: FMT_FILE,
84: FMT_NBSP,
85: FMT_INDEX,
86: FMT_NULL,
87: FMT__MAX
88: };
89:
90: enum cmd {
91: CMD_POD = 0,
92: CMD_HEAD1,
93: CMD_HEAD2,
94: CMD_HEAD3,
95: CMD_HEAD4,
96: CMD_OVER,
97: CMD_ITEM,
98: CMD_BACK,
99: CMD_BEGIN,
100: CMD_END,
101: CMD_FOR,
102: CMD_ENCODING,
103: CMD_CUT,
104: CMD__MAX
105: };
106:
107: static const char *const cmds[CMD__MAX] = {
108: "pod", /* CMD_POD */
109: "head1", /* CMD_HEAD1 */
110: "head2", /* CMD_HEAD2 */
111: "head3", /* CMD_HEAD3 */
112: "head4", /* CMD_HEAD4 */
113: "over", /* CMD_OVER */
114: "item", /* CMD_ITEM */
115: "back", /* CMD_BACK */
116: "begin", /* CMD_BEGIN */
117: "end", /* CMD_END */
118: "for", /* CMD_FOR */
119: "encoding", /* CMD_ENCODING */
120: "cut" /* CMD_CUT */
121: };
122:
123: static const char fmts[FMT__MAX] = {
124: 'I', /* FMT_ITALIC */
125: 'B', /* FMT_BOLD */
126: 'C', /* FMT_CODE */
127: 'L', /* FMT_LINK */
128: 'E', /* FMT_ESCAPE */
129: 'F', /* FMT_FILE */
130: 'S', /* FMT_NBSP */
131: 'X', /* FMT_INDEX */
132: 'Z' /* FMT_NULL */
133: };
134:
1.6 kristaps 135: static int last;
136:
1.31 schwarze 137:
138: static void
139: outbuf_grow(struct state *st, size_t by)
140: {
141:
142: st->outbufsz += (by / 128 + 1) * 128;
143: st->outbuf = realloc(st->outbuf, st->outbufsz);
144: if (NULL == st->outbuf) {
145: perror(NULL);
146: exit(EXIT_FAILURE);
147: }
148: }
149:
150: static void
151: outbuf_addchar(struct state *st)
152: {
153:
154: if (st->outbuflen + 2 >= st->outbufsz)
155: outbuf_grow(st, 1);
156: st->outbuf[st->outbuflen++] = last;
157: if ('\\' == last)
158: st->outbuf[st->outbuflen++] = 'e';
159: st->outbuf[st->outbuflen] = '\0';
1.32 ! schwarze 160: st->wantws = 0;
1.31 schwarze 161: }
162:
163: static void
164: outbuf_addstr(struct state *st, const char *str)
165: {
166: size_t slen;
167:
168: slen = strlen(str);
169: if (st->outbuflen + slen >= st->outbufsz)
170: outbuf_grow(st, slen);
171: memcpy(st->outbuf + st->outbuflen, str, slen+1);
172: last = str[slen - 1];
1.32 ! schwarze 173: st->wantws = 0;
1.31 schwarze 174: }
175:
176: static void
177: outbuf_flush(struct state *st)
178: {
179:
180: if (0 == st->outbuflen)
181: return;
182:
183: fputs(st->outbuf, stdout);
184: *st->outbuf = '\0';
185: st->outbuflen = 0;
1.32 ! schwarze 186:
! 187: if (OUST_NL == st->oust)
! 188: st->oust = OUST_TXT;
1.31 schwarze 189: }
190:
191: static void
1.32 ! schwarze 192: mdoc_newln(struct state *st)
1.31 schwarze 193: {
194:
1.32 ! schwarze 195: if (OUST_NL == st->oust)
1.31 schwarze 196: return;
1.32 ! schwarze 197:
1.31 schwarze 198: putchar('\n');
199: last = '\n';
1.32 ! schwarze 200: st->oust = OUST_NL;
! 201: st->wantws = 1;
1.31 schwarze 202: }
203:
1.1 schwarze 204: /*
205: * Given buf[*start] is at the start of an escape name, read til the end
206: * of the escape ('>') then try to do something with it.
207: * Sets start to be one after the '>'.
1.32 ! schwarze 208: *
! 209: * This function does not care about output modes,
! 210: * it merely appends text to the output buffer,
! 211: * which can then be used in any mode.
1.1 schwarze 212: */
213: static void
1.31 schwarze 214: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 215: {
216: char esc[16]; /* no more needed */
217: size_t i, max;
218:
219: max = sizeof(esc) - 1;
220: i = 0;
221: /* Read til our buffer is full. */
222: while (*start < end && '>' != buf[*start] && i < max)
223: esc[i++] = buf[(*start)++];
224: esc[i] = '\0';
225:
226: if (i == max) {
227: /* Too long... skip til we end. */
228: while (*start < end && '>' != buf[*start])
229: (*start)++;
230: return;
231: } else if (*start >= end)
232: return;
233:
234: assert('>' == buf[*start]);
235: (*start)++;
236:
237: /*
238: * TODO: right now, we only recognise the named escapes.
239: * Just let the rest of them go.
240: */
1.6 kristaps 241: if (0 == strcmp(esc, "lt"))
1.31 schwarze 242: outbuf_addstr(st, "\\(la");
1.1 schwarze 243: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 244: outbuf_addstr(st, "\\(ra");
1.1 schwarze 245: else if (0 == strcmp(esc, "vb"))
1.31 schwarze 246: outbuf_addstr(st, "\\(ba");
1.1 schwarze 247: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 248: outbuf_addstr(st, "\\(sl");
1.1 schwarze 249: }
250:
251: /*
1.9 kristaps 252: * Run some heuristics to intuit a link format.
1.19 kristaps 253: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 254: * that the caller can safely just continue processing.
1.19 kristaps 255: * If this is just an empty tag, I'll return 0.
1.32 ! schwarze 256: *
! 257: * Always operates in OUST_MAC mode.
! 258: * Mode handling is done by the caller.
1.9 kristaps 259: */
260: static int
261: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
262: {
1.21 kristaps 263: size_t linkstart, realend, linkend,
264: i, j, textsz, stack;
1.9 kristaps 265:
266: /*
267: * Scan to the start of the terminus.
268: * This function is more or less replicated in the formatcode()
269: * for null or index formatting codes.
1.23 kristaps 270: * However, we're slightly different because we might have
271: * nested escapes we need to ignore.
1.9 kristaps 272: */
1.21 kristaps 273: stack = 0;
1.19 kristaps 274: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 275: if ('<' == buf[realend])
276: stack++;
1.19 kristaps 277: if ('>' != buf[realend])
1.9 kristaps 278: continue;
1.23 kristaps 279: else if (stack-- > 0)
280: continue;
281: if (dsz == 1)
1.9 kristaps 282: break;
1.19 kristaps 283: assert(realend > 0);
284: if (' ' != buf[realend - 1])
1.9 kristaps 285: continue;
1.19 kristaps 286: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 287: if ('>' != buf[i++])
288: break;
289: if (dsz == j)
290: break;
291: }
1.19 kristaps 292:
293: /* Ignore stubs. */
294: if (realend == end || realend == *start)
1.9 kristaps 295: return(0);
296:
1.19 kristaps 297: /* Set linkend to the end of content. */
298: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 299:
1.19 kristaps 300: /* Re-scan to see if we have a title or section. */
301: for (textsz = *start; textsz < linkend; textsz++)
302: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 303: break;
304:
1.19 kristaps 305: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 306: /* With title: set start, then end at section. */
1.19 kristaps 307: linkstart = textsz + 1;
1.18 kristaps 308: textsz = textsz - *start;
1.19 kristaps 309: for (i = linkstart; i < linkend; i++)
310: if ('/' == buf[i])
311: break;
312: if (i < linkend)
313: linkend = i;
1.20 kristaps 314: } else if (textsz < linkend && '/' == buf[textsz]) {
315: /* With section: set end at section. */
316: linkend = textsz;
317: textsz = 0;
318: } else
319: /* No title, no section. */
1.18 kristaps 320: textsz = 0;
1.19 kristaps 321:
322: *start = realend;
323: j = linkend - linkstart;
324:
1.20 kristaps 325: /* Do we have only subsection material? */
326: if (0 == j && '/' == buf[linkend]) {
327: linkstart = linkend + 1;
328: linkend = dsz > 1 ? realend - 1 : realend;
329: if (0 == (j = linkend - linkstart))
330: return(0);
331: printf("Sx %.*s", (int)j, &buf[linkstart]);
332: return(1);
333: } else if (0 == j)
1.19 kristaps 334: return(0);
335:
336: /* See if we qualify as being a link or not. */
1.20 kristaps 337: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
338: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
339: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
340: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
341: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
342: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
343: /* Gross. */
344: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
345: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 346: return(1);
347: }
348:
349: /* See if we qualify as a mailto. */
1.20 kristaps 350: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 351: printf("Mt %.*s", (int)j, &buf[linkstart]);
352: return(1);
353: }
354:
355: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
356: if ((j > 3 && ')' == buf[linkend - 1]) &&
357: ('(' == buf[linkend - 3])) {
358: printf("Xr %.*s %c", (int)(j - 3),
359: &buf[linkstart], buf[linkend - 2]);
360: return(1);
361: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
362: ('(' == buf[linkend - 4])) {
363: printf("Xr %.*s %.*s", (int)(j - 4),
364: &buf[linkstart], 2, &buf[linkend - 3]);
365: return(1);
366: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
367: ('(' == buf[linkend - 5])) {
368: printf("Xr %.*s %.*s", (int)(j - 5),
369: &buf[linkstart], 3, &buf[linkend - 4]);
370: return(1);
371: }
372:
373: /* Last try: do we have a double-colon? */
374: for (i = linkstart + 1; i < linkend; i++)
375: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 376: break;
1.9 kristaps 377:
1.19 kristaps 378: if (i < linkend)
1.10 kristaps 379: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 380: (int)j, &buf[linkstart]);
1.9 kristaps 381: else
1.19 kristaps 382: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 383:
384: return(1);
385: }
386:
1.13 kristaps 387: /*
388: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
389: * then it's likely that we're a flag.
390: * Our flag might be followed by an argument, so make sure that we're
391: * accounting for that, too.
392: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 ! schwarze 393: *
! 394: * Always operates in OUST_MAC mode.
! 395: * Mode handlinf is done by the caller.
1.13 kristaps 396: */
397: static void
398: dosynopsisfl(const char *buf, size_t *start, size_t end)
399: {
400: size_t i;
401: again:
1.14 kristaps 402: assert(*start + 1 < end);
403: assert('-' == buf[*start]);
404:
405: if ( ! isalnum((int)buf[*start + 1]) &&
406: '?' != buf[*start + 1] &&
407: '-' != buf[*start + 1]) {
408: (*start)--;
409: fputs("Ar ", stdout);
410: return;
411: }
412:
1.13 kristaps 413: (*start)++;
414: for (i = *start; i < end; i++)
415: if (isalnum((int)buf[i]))
416: continue;
1.14 kristaps 417: else if ('?' == buf[i])
418: continue;
1.13 kristaps 419: else if ('-' == buf[i])
420: continue;
421: else if ('_' == buf[i])
422: continue;
423: else
424: break;
425:
426: assert(i < end);
427:
428: if ( ! (' ' == buf[i] || '>' == buf[i])) {
429: printf("Ar ");
430: return;
431: }
432:
433: printf("Fl ");
434: if (end - *start > 1 &&
435: isupper((int)buf[*start]) &&
436: islower((int)buf[*start + 1]) &&
437: (end - *start == 2 ||
438: ' ' == buf[*start + 2]))
439: printf("\\&");
440: printf("%.*s ", (int)(i - *start), &buf[*start]);
441: *start = i;
442:
443: if (' ' == buf[i]) {
444: while (i < end && ' ' == buf[i])
445: i++;
446: assert(i < end);
447: if ('-' == buf[i]) {
448: *start = i;
449: goto again;
450: }
451: printf("Ar ");
452: *start = i;
453: }
454: }
455:
1.9 kristaps 456: /*
1.1 schwarze 457: * We're at the character in front of a format code, which is structured
458: * like X<...> and can contain nested format codes.
459: * This consumes the whole format code, and any nested format codes, til
460: * the end of matched production.
1.6 kristaps 461: * If "nomacro", then we don't print any macros, just contained data
462: * (e.g., following "Sh" or "Nm").
1.15 kristaps 463: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
464: * as the first format code on a line (for decoration as an "Nm"),
465: * non-zero otherwise.
1.32 ! schwarze 466: *
! 467: * Output mode handling is most complicated here.
! 468: * We may enter in any mode.
! 469: * We usually exit in OUST_MAC mode, except when
! 470: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 471: */
1.32 ! schwarze 472: static void
1.15 kristaps 473: formatcode(struct state *st, const char *buf, size_t *start,
1.32 ! schwarze 474: size_t end, int nomacro, int pos)
1.1 schwarze 475: {
476: enum fmt fmt;
1.5 kristaps 477: size_t i, j, dsz;
1.1 schwarze 478:
479: assert(*start + 1 < end);
480: assert('<' == buf[*start + 1]);
481:
1.6 kristaps 482: /*
483: * First, look up the format code.
1.30 schwarze 484: * If it's not valid, treat it as a NOOP.
1.6 kristaps 485: */
486: for (fmt = 0; fmt < FMT__MAX; fmt++)
487: if (buf[*start] == fmts[fmt])
488: break;
489:
1.5 kristaps 490: /*
491: * Determine whether we're overriding our delimiter.
492: * According to POD, if we have more than one '<' followed by a
493: * space, then we need a space followed by matching '>' to close
494: * the expression.
495: * Otherwise we use the usual '<' and '>' matched pair.
496: */
497: i = *start + 1;
498: while (i < end && '<' == buf[i])
499: i++;
500: assert(i > *start + 1);
501: dsz = i - (*start + 1);
502: if (dsz > 1 && (i >= end || ' ' != buf[i]))
503: dsz = 1;
504:
505: /* Remember, if dsz>1, to jump the trailing space. */
506: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 507:
508: /*
1.6 kristaps 509: * Escapes and ignored codes (NULL and INDEX) don't print macro
510: * sequences, so just output them like normal text before
511: * processing for real macros.
1.1 schwarze 512: */
513: if (FMT_ESCAPE == fmt) {
1.31 schwarze 514: formatescape(st, buf, start, end);
1.32 ! schwarze 515: return;
1.1 schwarze 516: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 517: /*
1.6 kristaps 518: * Just consume til the end delimiter, accounting for
519: * whether it's a custom one.
1.5 kristaps 520: */
521: for ( ; *start < end; (*start)++) {
522: if ('>' != buf[*start])
523: continue;
524: else if (dsz == 1)
525: break;
526: assert(*start > 0);
527: if (' ' != buf[*start - 1])
528: continue;
529: i = *start;
530: for (j = 0; i < end && j < dsz; j++)
531: if ('>' != buf[i++])
532: break;
533: if (dsz != j)
534: continue;
535: (*start) += dsz;
536: break;
537: }
1.24 kristaps 538: if (*start < end) {
539: assert('>' == buf[*start]);
540: (*start)++;
541: }
542: if (isspace(last))
543: while (*start < end && isspace((int)buf[*start]))
544: (*start)++;
1.32 ! schwarze 545: return;
1.1 schwarze 546: }
547:
1.6 kristaps 548: /*
549: * Check whether we're supposed to print macro stuff (this is
550: * suppressed in, e.g., "Nm" and "Sh" macros).
551: */
1.30 schwarze 552: if (FMT__MAX != fmt && !nomacro) {
1.32 ! schwarze 553:
! 554: /*
! 555: * We may already have wantws if there was whitespace
! 556: * before the code ("text B<text"), but initial
! 557: * whitespace inside our scope ("textB< text")
! 558: * allows to break at this point as well.
! 559: */
! 560:
! 561: st->wantws |= ' ' == buf[*start];
1.31 schwarze 562:
1.1 schwarze 563: /*
1.31 schwarze 564: * If we are on a text line and there is no
565: * whitespace before our content, we have to make
566: * the previous word a prefix to the macro line.
1.32 ! schwarze 567: * In the following, mdoc_newln() must not be used
! 568: * lest we clobber out output state.
1.1 schwarze 569: */
1.31 schwarze 570:
1.32 ! schwarze 571: if (OUST_MAC != st->oust && !st->wantws) {
! 572: if (OUST_NL != st->oust)
1.31 schwarze 573: putchar('\n');
574: printf(".Pf ");
575: }
576:
577: outbuf_flush(st);
578:
579: /* Whitespace is easier to suppress on macro lines. */
580:
1.32 ! schwarze 581: if (OUST_MAC == st->oust && !st->wantws)
! 582: printf(" Ns ");
1.31 schwarze 583:
584: /* Unless we are on a macro line, start one. */
585:
1.32 ! schwarze 586: if (OUST_MAC != st->oust && st->wantws) {
! 587: if (OUST_NL != st->oust)
1.6 kristaps 588: putchar('\n');
1.1 schwarze 589: putchar('.');
1.31 schwarze 590: } else
1.1 schwarze 591: putchar(' ');
1.31 schwarze 592:
1.32 ! schwarze 593: /*
! 594: * Print the macro corresponding to this format code,
! 595: * and update the output state afterwards.
! 596: */
1.6 kristaps 597:
1.1 schwarze 598: switch (fmt) {
599: case (FMT_ITALIC):
600: printf("Em ");
601: break;
602: case (FMT_BOLD):
1.14 kristaps 603: if (SECT_SYNOPSIS == st->sect) {
604: if (1 == dsz && '-' == buf[*start])
605: dosynopsisfl(buf, start, end);
1.15 kristaps 606: else if (0 == pos)
607: printf("Nm ");
1.14 kristaps 608: else
609: printf("Ar ");
610: break;
611: }
1.27 schwarze 612: if (0 == strncmp(buf + *start, "NULL", 4) &&
613: ('=' == buf[*start + 4] ||
614: '>' == buf[*start + 4]))
615: printf("Dv ");
616: else
617: printf("Sy ");
1.1 schwarze 618: break;
619: case (FMT_CODE):
1.2 schwarze 620: printf("Qo Li ");
1.1 schwarze 621: break;
622: case (FMT_LINK):
1.19 kristaps 623: /* Try to link; use "No" if it's empty. */
1.9 kristaps 624: if ( ! trylink(buf, start, end, dsz))
625: printf("No ");
1.1 schwarze 626: break;
627: case (FMT_FILE):
628: printf("Pa ");
629: break;
630: case (FMT_NBSP):
631: printf("No ");
632: break;
633: default:
634: abort();
635: }
1.32 ! schwarze 636: st->oust = OUST_MAC;
! 637: st->wantws = 1;
1.31 schwarze 638: } else
639: outbuf_flush(st);
1.1 schwarze 640:
641: /*
1.6 kristaps 642: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 643: * find a nested format code.
1.1 schwarze 644: * Don't emit any newlines: since we're on a macro line, we
645: * don't want to break the line.
646: */
647: while (*start < end) {
1.5 kristaps 648: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 649: (*start)++;
650: break;
1.5 kristaps 651: } else if ('>' == buf[*start] &&
652: ' ' == buf[*start - 1]) {
653: /*
654: * Handle custom delimiters.
655: * These require a certain number of
656: * space-preceded carrots before we're really at
657: * the end.
658: */
659: i = *start;
660: for (j = 0; i < end && j < dsz; j++)
661: if ('>' != buf[i++])
662: break;
663: if (dsz == j) {
664: *start += dsz;
665: break;
666: }
1.1 schwarze 667: }
668: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.32 ! schwarze 669: formatcode(st, buf, start, end, nomacro, 1);
1.1 schwarze 670: continue;
671: }
1.3 schwarze 672:
1.32 ! schwarze 673: /* Suppress newlines and multiple spaces. */
! 674:
! 675: last = buf[(*start)++];
! 676: if (' ' == last || '\n' == last) {
! 677: putchar(' ');
! 678: while (*start < end && ' ' == buf[*start])
! 679: (*start)++;
! 680: continue;
! 681: }
! 682:
! 683: if (OUST_MAC == st->oust) {
! 684: if ( ! st->wantws) {
! 685: printf(" Ns ");
! 686: st->wantws = 1;
! 687: }
! 688:
! 689: /*
! 690: * Escape macro-like words.
! 691: * This matches "Xx " and "XxEOLN".
! 692: */
! 693:
! 694: if (end - *start > 0 &&
! 695: isupper((unsigned char)last) &&
! 696: islower((unsigned char)buf[*start]) &&
! 697: (end - *start == 1 ||
! 698: ' ' == buf[*start + 1] ||
! 699: '>' == buf[*start + 1]))
! 700: printf("\\&");
! 701: }
1.3 schwarze 702:
1.32 ! schwarze 703: putchar(last);
1.4 schwarze 704:
1.8 kristaps 705: /* Protect against character escapes. */
1.32 ! schwarze 706:
1.8 kristaps 707: if ('\\' == last)
708: putchar('e');
1.1 schwarze 709: }
1.2 schwarze 710:
711: if ( ! nomacro && FMT_CODE == fmt)
712: printf(" Qc ");
1.1 schwarze 713:
1.32 ! schwarze 714: if (FMT__MAX != fmt)
! 715: st->wantws = ' ' == last;
1.1 schwarze 716: }
717:
718: /*
719: * Calls formatcode() til the end of a paragraph.
1.32 ! schwarze 720: * Goes to OUST_MAC mode and stays there when returning,
! 721: * such that the caller can add arguments to the macro line
! 722: * before closing it out.
1.1 schwarze 723: */
724: static void
1.32 ! schwarze 725: formatcodeln(struct state *st, const char *linemac,
! 726: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 727: {
728:
1.32 ! schwarze 729: assert(OUST_NL == st->oust);
! 730: assert(st->wantws);
! 731: printf(".%s ", linemac);
! 732: st->oust = OUST_MAC;
! 733:
1.4 schwarze 734: last = ' ';
1.1 schwarze 735: while (*start < end) {
736: if (*start + 1 < end && '<' == buf[*start + 1]) {
1.32 ! schwarze 737: formatcode(st, buf, start, end, nomacro, 1);
1.1 schwarze 738: continue;
739: }
1.32 ! schwarze 740:
! 741: if (OUST_MAC == st->oust) {
! 742: if ( ! st->wantws &&
! 743: ' ' != buf[*start] &&
! 744: '\n' != buf[*start])
! 745: printf(" Ns ");
! 746: st->wantws = 1;
! 747: }
! 748:
1.4 schwarze 749: /*
750: * Since we're already on a macro line, we want to make
751: * sure that we don't inadvertently invoke a macro.
752: * We need to do this carefully because section names
753: * are used in troff and we don't want to escape
754: * something that needn't be escaped.
755: */
756: if (' ' == last && end - *start > 1 &&
757: isupper((int)buf[*start]) &&
758: islower((int)buf[*start + 1]) &&
759: (end - *start == 2 ||
760: ' ' == buf[*start + 2]))
761: printf("\\&");
762:
1.8 kristaps 763: if ('\n' == buf[*start])
764: putchar(last = ' ');
765: else
1.1 schwarze 766: putchar(last = buf[*start]);
1.8 kristaps 767:
768: /* Protect against character escapes. */
769: if ('\\' == last)
770: putchar('e');
771:
1.1 schwarze 772: (*start)++;
773: }
774: }
775:
776: /*
1.4 schwarze 777: * Guess at what kind of list we are.
778: * These are taken straight from the POD manual.
779: * I don't know what people do in real life.
780: */
781: static enum list
782: listguess(const char *buf, size_t start, size_t end)
783: {
784: size_t len = end - start;
785:
786: assert(end >= start);
787:
788: if (len == 1 && '*' == buf[start])
789: return(LIST_BULLET);
790: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
791: return(LIST_ENUM);
792: else if (len == 1 && '1' == buf[start])
793: return(LIST_ENUM);
794: else
795: return(LIST_TAG);
796: }
797:
798: /*
1.1 schwarze 799: * A command paragraph, as noted in the perlpod manual, just indicates
800: * that we should do something, optionally with some text to print as
801: * well.
1.32 ! schwarze 802: * From the perspective of external callers,
! 803: * always stays in OUST_NL/wantws mode,
! 804: * but its children do use OUST_MAC.
1.1 schwarze 805: */
806: static void
807: command(struct state *st, const char *buf, size_t start, size_t end)
808: {
809: size_t len, csz;
810: enum cmd cmd;
811:
812: assert('=' == buf[start]);
813: start++;
814: len = end - start;
815:
816: for (cmd = 0; cmd < CMD__MAX; cmd++) {
817: csz = strlen(cmds[cmd]);
818: if (len < csz)
819: continue;
820: if (0 == memcmp(&buf[start], cmd[cmds], csz))
821: break;
822: }
823:
824: /* Ignore bogus commands. */
825:
826: if (CMD__MAX == cmd)
827: return;
828:
829: start += csz;
1.8 kristaps 830: while (start < end && ' ' == buf[start])
831: start++;
832:
1.1 schwarze 833: len = end - start;
834:
835: if (st->paused) {
836: st->paused = CMD_END != cmd;
837: return;
838: }
839:
840: switch (cmd) {
841: case (CMD_POD):
842: break;
843: case (CMD_HEAD1):
844: /*
845: * The behaviour of head= follows from a quick glance at
846: * how pod2man handles it.
847: */
1.11 kristaps 848: st->sect = SECT_NONE;
849: if (end - start == 4) {
1.1 schwarze 850: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 851: st->sect = SECT_NAME;
852: } else if (end - start == 8) {
853: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
854: st->sect = SECT_SYNOPSIS;
855: }
1.32 ! schwarze 856: formatcodeln(st, "Sh", buf, &start, end, 1);
! 857: mdoc_newln(st);
1.1 schwarze 858: st->haspar = 1;
859: break;
860: case (CMD_HEAD2):
1.32 ! schwarze 861: formatcodeln(st, "Ss", buf, &start, end, 1);
! 862: mdoc_newln(st);
1.1 schwarze 863: st->haspar = 1;
864: break;
865: case (CMD_HEAD3):
866: puts(".Pp");
1.32 ! schwarze 867: formatcodeln(st, "Em", buf, &start, end, 0);
! 868: mdoc_newln(st);
1.1 schwarze 869: puts(".Pp");
870: st->haspar = 1;
871: break;
872: case (CMD_HEAD4):
873: puts(".Pp");
1.32 ! schwarze 874: formatcodeln(st, "No", buf, &start, end, 0);
! 875: mdoc_newln(st);
1.1 schwarze 876: puts(".Pp");
877: st->haspar = 1;
878: break;
879: case (CMD_OVER):
1.4 schwarze 880: /*
881: * If we have an existing list that hasn't had an =item
882: * yet, then make sure that we open it now.
883: * We use the default list type, but that can't be
884: * helped (we haven't seen any items yet).
1.1 schwarze 885: */
1.4 schwarze 886: if (st->lpos > 0)
887: if (LIST__MAX == st->lstack[st->lpos - 1]) {
888: st->lstack[st->lpos - 1] = LIST_TAG;
889: puts(".Bl -tag -width Ds");
890: }
891: st->lpos++;
892: assert(st->lpos < LIST_STACKSZ);
893: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 894: break;
895: case (CMD_ITEM):
1.6 kristaps 896: if (0 == st->lpos) {
897: /*
898: * Bad markup.
899: * Try to compensate.
900: */
901: st->lstack[st->lpos] = LIST__MAX;
902: st->lpos++;
903: }
1.4 schwarze 904: assert(st->lpos > 0);
905: /*
906: * If we're the first =item, guess at what our content
907: * will be: "*" is a bullet list, "1." is a numbered
908: * list, and everything is tagged.
909: */
910: if (LIST__MAX == st->lstack[st->lpos - 1]) {
911: st->lstack[st->lpos - 1] =
912: listguess(buf, start, end);
913: switch (st->lstack[st->lpos - 1]) {
914: case (LIST_BULLET):
915: puts(".Bl -bullet");
916: break;
917: case (LIST_ENUM):
918: puts(".Bl -enum");
919: break;
920: default:
921: puts(".Bl -tag -width Ds");
922: break;
923: }
924: }
925: switch (st->lstack[st->lpos - 1]) {
926: case (LIST_TAG):
1.32 ! schwarze 927: formatcodeln(st, "It", buf, &start, end, 0);
! 928: mdoc_newln(st);
1.4 schwarze 929: break;
930: case (LIST_ENUM):
931: /* FALLTHROUGH */
932: case (LIST_BULLET):
933: /*
934: * Abandon the remainder of the paragraph
935: * because we're going to be a bulletted or
936: * numbered list.
937: */
938: puts(".It");
939: break;
940: default:
941: abort();
942: }
1.1 schwarze 943: st->haspar = 1;
944: break;
945: case (CMD_BACK):
1.4 schwarze 946: /* Make sure we don't back over the stack. */
947: if (st->lpos > 0) {
948: st->lpos--;
949: puts(".El");
950: }
1.1 schwarze 951: break;
952: case (CMD_BEGIN):
953: /*
954: * We disregard all types for now.
955: * TODO: process at least "text" in a -literal block.
956: */
957: st->paused = 1;
958: break;
959: case (CMD_FOR):
960: /*
961: * We ignore all types of encodings and formats
962: * unilaterally.
963: */
964: break;
965: case (CMD_ENCODING):
966: break;
967: case (CMD_CUT):
968: st->parsing = 0;
969: return;
970: default:
971: abort();
972: }
973:
974: /* Any command (but =cut) makes us start parsing. */
975: st->parsing = 1;
976: }
977:
978: /*
979: * Just pump out the line in a verbatim block.
1.32 ! schwarze 980: * From the perspective of external callers,
! 981: * always stays in OUST_NL/wantws mode.
1.1 schwarze 982: */
983: static void
984: verbatim(struct state *st, const char *buf, size_t start, size_t end)
985: {
1.22 kristaps 986: size_t i;
1.1 schwarze 987:
988: if ( ! st->parsing || st->paused)
989: return;
1.22 kristaps 990: again:
991: /*
992: * If we're in the SYNOPSIS, see if we're an #include block.
993: * If we are, then print the "In" macro and re-loop.
994: * This handles any number of inclusions, but only when they
995: * come before the remaining parts...
996: */
997: if (SECT_SYNOPSIS == st->sect) {
998: i = start;
999: for (i = start; i < end && ' ' == buf[i]; i++)
1000: /* Spin. */ ;
1001: if (i == end)
1002: return;
1003: /* We're an include block! */
1004: if (end - i > 10 &&
1005: 0 == memcmp(&buf[i], "#include <", 10)) {
1006: start = i + 10;
1007: while (start < end && ' ' == buf[start])
1008: start++;
1009: fputs(".In ", stdout);
1010: /* Stop til the '>' marker or we hit eoln. */
1011: while (start < end &&
1012: '>' != buf[start] && '\n' != buf[start])
1013: putchar(buf[start++]);
1014: putchar('\n');
1015: if (start < end && '>' == buf[start])
1016: start++;
1017: if (start < end && '\n' == buf[start])
1018: start++;
1019: if (start < end)
1020: goto again;
1021: return;
1022: }
1023: }
1024:
1025: if (start == end)
1026: return;
1.1 schwarze 1027: puts(".Bd -literal");
1.8 kristaps 1028: for (last = ' '; start < end; start++) {
1029: /*
1030: * Handle accidental macros (newline starting with
1031: * control character) and escapes.
1032: */
1033: if ('\n' == last)
1.7 kristaps 1034: if ('.' == buf[start] || '\'' == buf[start])
1035: printf("\\&");
1.8 kristaps 1036: putchar(last = buf[start]);
1037: if ('\\' == buf[start])
1038: printf("e");
1.7 kristaps 1039: }
1.31 schwarze 1040: putchar(last = '\n');
1.1 schwarze 1041: puts(".Ed");
1042: }
1043:
1044: /*
1.13 kristaps 1045: * See dosynopsisop().
1046: */
1047: static int
1048: hasmatch(const char *buf, size_t start, size_t end)
1049: {
1050: size_t stack;
1051:
1052: for (stack = 0; start < end; start++)
1053: if (buf[start] == '[')
1054: stack++;
1055: else if (buf[start] == ']' && 0 == stack)
1056: return(1);
1057: else if (buf[start] == ']')
1058: stack--;
1059: return(0);
1060: }
1061:
1062: /*
1063: * If we're in the SYNOPSIS section and we've encounter braces in an
1064: * ordinary paragraph, then try to see whether we're an [-option].
1065: * Do this, if we're an opening bracket, by first seeing if we have a
1066: * matching end via hasmatch().
1067: * If we're an ending bracket, see if we have a stack already.
1068: */
1069: static int
1.32 ! schwarze 1070: dosynopsisop(struct state *st, const char *buf,
! 1071: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1072: {
1073:
1074: assert('[' == buf[*start] || ']' == buf[*start]);
1075:
1076: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 ! schwarze 1077: mdoc_newln(st);
1.13 kristaps 1078: puts(".Oo");
1079: (*opstack)++;
1080: } else if ('[' == buf[*start])
1081: return(0);
1082:
1083: if (']' == buf[*start] && *opstack > 0) {
1.32 ! schwarze 1084: mdoc_newln(st);
1.13 kristaps 1085: puts(".Oc");
1086: (*opstack)--;
1087: } else if (']' == buf[*start])
1088: return(0);
1089:
1090: (*start)++;
1.31 schwarze 1091: last = '\n';
1.13 kristaps 1092: while (' ' == buf[*start])
1093: (*start)++;
1094: return(1);
1095: }
1096:
1097: /*
1.17 kristaps 1098: * Format multiple "Nm" manpage names in the NAME section.
1.32 ! schwarze 1099: * From the perspective of external callers,
! 1100: * always stays in OUST_NL/wantws mode,
! 1101: * but its children do use OUST_MAC.
1.17 kristaps 1102: */
1103: static void
1104: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1105: {
1106: size_t word;
1107:
1.32 ! schwarze 1108: assert(OUST_NL == st->oust);
! 1109: assert(st->wantws);
! 1110:
1.17 kristaps 1111: while (*start < end && ' ' == buf[*start])
1112: (*start)++;
1113:
1114: if (end == *start) {
1115: puts(".Nm unknown");
1116: return;
1117: }
1118:
1119: while (*start < end) {
1120: for (word = *start; word < end; word++)
1121: if (',' == buf[word])
1122: break;
1.32 ! schwarze 1123: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1124: if (*start == end) {
1.32 ! schwarze 1125: mdoc_newln(st);
! 1126: break;
1.17 kristaps 1127: }
1128: assert(',' == buf[*start]);
1.32 ! schwarze 1129: printf(" ,");
! 1130: mdoc_newln(st);
1.17 kristaps 1131: (*start)++;
1132: while (*start < end && ' ' == buf[*start])
1133: (*start)++;
1134: }
1135: }
1136:
1137: /*
1.1 schwarze 1138: * Ordinary paragraph.
1139: * Well, this is really the hardest--POD seems to assume that, for
1140: * example, a leading space implies a newline, and so on.
1141: * Lots of other snakes in the grass: escaping a newline followed by a
1142: * period (accidental mdoc(7) control), double-newlines after macro
1143: * passages, etc.
1.32 ! schwarze 1144: *
! 1145: * Uses formatcode() to go to OUST_MAC mode
! 1146: * and outbuf_flush() to go to OUST_TXT mode.
! 1147: * Main text mode wantws handling is in this function.
! 1148: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1149: */
1150: static void
1151: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1152: {
1.13 kristaps 1153: size_t i, j, opstack;
1.15 kristaps 1154: int seq;
1.1 schwarze 1155:
1156: if ( ! st->parsing || st->paused)
1157: return;
1158:
1159: /*
1160: * Special-case: the NAME section.
1161: * If we find a "-" when searching from the end, assume that
1162: * we're in "name - description" format.
1163: * To wit, print out a "Nm" and "Nd" in that format.
1164: */
1.11 kristaps 1165: if (SECT_NAME == st->sect) {
1.15 kristaps 1166: for (i = end - 2; i > start; i--)
1167: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 1168: break;
1169: if ('-' == buf[i]) {
1170: j = i;
1171: /* Roll over multiple "-". */
1172: for ( ; i > start; i--)
1173: if ('-' != buf[i])
1174: break;
1.17 kristaps 1175: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1176: start = j + 1;
1.17 kristaps 1177: while (start < end && ' ' == buf[start])
1178: start++;
1.32 ! schwarze 1179: formatcodeln(st, "Nd", buf, &start, end, 1);
! 1180: mdoc_newln(st);
1.1 schwarze 1181: return;
1182: }
1183: }
1184:
1185: if ( ! st->haspar)
1186: puts(".Pp");
1187:
1188: st->haspar = 0;
1189: last = '\n';
1.13 kristaps 1190: opstack = 0;
1.1 schwarze 1191:
1.15 kristaps 1192: for (seq = 0; start < end; seq++) {
1.1 schwarze 1193: /*
1194: * Loop til we get either to a newline or escape.
1195: * Escape initial control characters.
1196: */
1197: while (start < end) {
1198: if (start < end - 1 && '<' == buf[start + 1])
1199: break;
1200: else if ('\n' == buf[start])
1201: break;
1202: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1203: outbuf_addstr(st, "\\&");
1.1 schwarze 1204: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1205: outbuf_addstr(st, "\\&");
1.12 kristaps 1206: /*
1207: * If we're in the SYNOPSIS, have square
1208: * brackets indicate that we're opening and
1209: * closing an optional context.
1210: */
1.32 ! schwarze 1211:
1.13 kristaps 1212: if (SECT_SYNOPSIS == st->sect &&
1213: ('[' == buf[start] ||
1214: ']' == buf[start]) &&
1.32 ! schwarze 1215: dosynopsisop(st, buf,
! 1216: &start, end, &opstack))
1.13 kristaps 1217: continue;
1.32 ! schwarze 1218:
! 1219: /*
! 1220: * On whitespace, flush the output buffer
! 1221: * and allow breaking to a macro line.
! 1222: * Otherwise, buffer text and clear wantws.
! 1223: */
! 1224:
1.31 schwarze 1225: last = buf[start++];
1226: if (' ' == last) {
1227: outbuf_flush(st);
1228: putchar(' ');
1.32 ! schwarze 1229: st->wantws = 1;
1.31 schwarze 1230: } else
1231: outbuf_addchar(st);
1.1 schwarze 1232: }
1233:
1234: if (start < end - 1 && '<' == buf[start + 1]) {
1.32 ! schwarze 1235: formatcode(st, buf, &start, end, 0, seq);
! 1236: if (OUST_MAC == st->oust) {
1.30 schwarze 1237: /*
1238: * Let mdoc(7) handle trailing punctuation.
1239: * XXX Some punctuation characters
1240: * are not handled yet.
1241: */
1.16 kristaps 1242: if ((start == end - 1 ||
1243: (start < end - 1 &&
1244: (' ' == buf[start + 1] ||
1245: '\n' == buf[start + 1]))) &&
1246: ('.' == buf[start] ||
1247: ',' == buf[start])) {
1248: putchar(' ');
1249: putchar(buf[start++]);
1250: }
1.32 ! schwarze 1251:
! 1252: if (st->wantws ||
! 1253: ' ' == buf[start] ||
! 1254: '\n' == buf[start])
! 1255: mdoc_newln(st);
! 1256:
1.30 schwarze 1257: /*
1258: * Consume all whitespace
1259: * so we don't accidentally start
1260: * an implicit literal line.
1261: */
1.32 ! schwarze 1262:
1.6 kristaps 1263: while (start < end && ' ' == buf[start])
1264: start++;
1.32 ! schwarze 1265:
! 1266: /*
! 1267: * Some text is following.
! 1268: * Implement requested spacing.
! 1269: */
! 1270:
! 1271: if ( ! st->wantws && start < end &&
! 1272: '<' != buf[start + 1]) {
! 1273: printf(" Ns ");
! 1274: st->wantws = 1;
! 1275: }
1.6 kristaps 1276: }
1.1 schwarze 1277: } else if (start < end && '\n' == buf[start]) {
1.32 ! schwarze 1278: outbuf_flush(st);
! 1279: mdoc_newln(st);
1.1 schwarze 1280: if (++start >= end)
1281: continue;
1282: /*
1283: * If we have whitespace next, eat it to prevent
1284: * mdoc(7) from thinking that it's meant for
1285: * verbatim text.
1286: * It is--but if we start with that, we can't
1287: * have a macro subsequent it, which may be
1288: * possible if we have an escape next.
1289: */
1.31 schwarze 1290: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1291: puts(".br");
1292: for ( ; start < end; start++)
1293: if (' ' != buf[start] && '\t' != buf[start])
1294: break;
1.12 kristaps 1295: }
1.1 schwarze 1296: }
1.32 ! schwarze 1297: outbuf_flush(st);
! 1298: mdoc_newln(st);
1.1 schwarze 1299: }
1300:
1301: /*
1302: * There are three kinds of paragraphs: verbatim (starts with whitespace
1303: * of some sort), ordinary (starts without "=" marker), or a command
1304: * (default: starts with "=").
1305: */
1306: static void
1307: dopar(struct state *st, const char *buf, size_t start, size_t end)
1308: {
1309:
1.32 ! schwarze 1310: assert(OUST_NL == st->oust);
! 1311: assert(st->wantws);
! 1312:
1.1 schwarze 1313: if (end == start)
1314: return;
1315: if (' ' == buf[start] || '\t' == buf[start])
1316: verbatim(st, buf, start, end);
1317: else if ('=' != buf[start])
1318: ordinary(st, buf, start, end);
1319: else
1320: command(st, buf, start, end);
1321: }
1322:
1323: /*
1324: * Loop around paragraphs within a document, processing each one in the
1325: * POD way.
1326: */
1327: static void
1328: dofile(const struct args *args, const char *fname,
1329: const struct tm *tm, const char *buf, size_t sz)
1330: {
1.29 schwarze 1331: char datebuf[64];
1.1 schwarze 1332: struct state st;
1.29 schwarze 1333: const char *fbase, *fext, *section, *date;
1.1 schwarze 1334: char *title, *cp;
1.29 schwarze 1335: size_t sup, end, i, cur = 0;
1.1 schwarze 1336:
1337: if (0 == sz)
1338: return;
1339:
1.29 schwarze 1340: /*
1341: * Parsing the filename is almost always required,
1342: * except when both the title and the section
1343: * are provided on the command line.
1344: */
1345:
1346: if (NULL == args->title || NULL == args->section) {
1347: fbase = strrchr(fname, '/');
1348: if (NULL == fbase)
1349: fbase = fname;
1350: else
1351: fbase++;
1352: fext = strrchr(fbase, '.');
1353: } else
1354: fext = NULL;
1355:
1356: /*
1357: * The title will be converted to uppercase,
1358: * so it needs to be copied.
1359: */
1360:
1361: title = (NULL != args->title) ? strdup(args->title) :
1362: (NULL != fext) ? strndup(fbase, fext - fbase) :
1363: strdup(fbase);
1.1 schwarze 1364:
1365: if (NULL == title) {
1366: perror(NULL);
1367: exit(EXIT_FAILURE);
1368: }
1369:
1370: /* Section is 1 unless suffix is "pm". */
1371:
1.29 schwarze 1372: section = (NULL != args->section) ? args->section :
1373: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1374: PERL_SECTION;
1.1 schwarze 1375:
1376: /* Date. Or the given "tm" if not supplied. */
1377:
1378: if (NULL == (date = args->date)) {
1379: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1380: date = datebuf;
1381: }
1382:
1383: for (cp = title; '\0' != *cp; cp++)
1384: *cp = toupper((int)*cp);
1385:
1386: /* The usual mdoc(7) preamble. */
1387:
1388: printf(".Dd %s\n", date);
1389: printf(".Dt %s %s\n", title, section);
1390: puts(".Os");
1391:
1392: free(title);
1393:
1394: memset(&st, 0, sizeof(struct state));
1.32 ! schwarze 1395: st.oust = OUST_NL;
! 1396: st.wantws = 1;
! 1397:
1.1 schwarze 1398: assert(sz > 0);
1399:
1400: /* Main loop over file contents. */
1401:
1402: while (cur < sz) {
1403: /* Read until next paragraph. */
1404: for (i = cur + 1; i < sz; i++)
1405: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1406: /* Consume blank paragraphs. */
1407: while (i + 1 < sz && '\n' == buf[i + 1])
1408: i++;
1409: break;
1410: }
1411:
1412: /* Adjust end marker for EOF. */
1413: end = i < sz ? i - 1 :
1414: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1415: sup = i < sz ? end + 2 : sz;
1416:
1417: /* Process paragraph and adjust start. */
1418: dopar(&st, buf, cur, end);
1419: cur = sup;
1420: }
1421: }
1422:
1423: /*
1424: * Read a single file fully into memory.
1425: * If the file is "-", do it from stdin.
1426: * If successfully read, send the input buffer to dofile() for further
1427: * processing.
1428: */
1429: static int
1430: readfile(const struct args *args, const char *fname)
1431: {
1432: int fd;
1433: char *buf;
1434: size_t bufsz, cur;
1435: ssize_t ssz;
1436: struct tm *tm;
1437: time_t ttm;
1438: struct stat st;
1439:
1440: fd = 0 != strcmp("-", fname) ?
1441: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1442:
1443: if (-1 == fd) {
1444: perror(fname);
1445: return(0);
1446: }
1447:
1448: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1449: ttm = time(NULL);
1450: tm = localtime(&ttm);
1451: } else
1452: tm = localtime(&st.st_mtime);
1453:
1454: /*
1455: * Arbitrarily-sized initial buffer.
1456: * Should be big enough for most files...
1457: */
1458: cur = 0;
1459: bufsz = 1 << 14;
1460: if (NULL == (buf = malloc(bufsz))) {
1461: perror(NULL);
1462: exit(EXIT_FAILURE);
1463: }
1464:
1465: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1466: /* Double buffer size on fill. */
1467: if ((size_t)ssz == bufsz - cur) {
1468: bufsz *= 2;
1469: if (NULL == (buf = realloc(buf, bufsz))) {
1470: perror(NULL);
1471: exit(EXIT_FAILURE);
1472: }
1473: }
1474: cur += (size_t)ssz;
1475: }
1476: if (ssz < 0) {
1477: perror(fname);
1478: free(buf);
1479: return(0);
1480: }
1481:
1482: dofile(args, STDIN_FILENO == fd ?
1483: "STDIN" : fname, tm, buf, cur);
1484: free(buf);
1485: if (STDIN_FILENO != fd)
1486: close(fd);
1487: return(1);
1488: }
1489:
1490: int
1491: main(int argc, char *argv[])
1492: {
1493: const char *fname, *name;
1494: struct args args;
1495: int c;
1496:
1497: name = strrchr(argv[0], '/');
1498: if (name == NULL)
1499: name = argv[0];
1500: else
1501: ++name;
1502:
1503: memset(&args, 0, sizeof(struct args));
1504: fname = "-";
1505:
1506: /* Accept no arguments for now. */
1507:
1508: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1509: switch (c) {
1510: case ('h'):
1511: /* FALLTHROUGH */
1512: case ('l'):
1513: /* FALLTHROUGH */
1514: case ('c'):
1515: /* FALLTHROUGH */
1516: case ('o'):
1517: /* FALLTHROUGH */
1518: case ('q'):
1519: /* FALLTHROUGH */
1520: case ('r'):
1521: /* FALLTHROUGH */
1522: case ('u'):
1523: /* FALLTHROUGH */
1524: case ('v'):
1525: /* Ignore these. */
1526: break;
1527: case ('d'):
1528: args.date = optarg;
1529: break;
1530: case ('n'):
1531: args.title = optarg;
1532: break;
1533: case ('s'):
1534: args.section = optarg;
1535: break;
1536: default:
1537: goto usage;
1538: }
1539:
1540: argc -= optind;
1541: argv += optind;
1542:
1543: /* Accept only a single input file. */
1544:
1.25 schwarze 1545: if (argc > 1)
1546: goto usage;
1.1 schwarze 1547: else if (1 == argc)
1548: fname = *argv;
1549:
1550: return(readfile(&args, fname) ?
1551: EXIT_SUCCESS : EXIT_FAILURE);
1552:
1553: usage:
1554: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1555: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1556:
1557: return(EXIT_FAILURE);
1558: }
CVSweb