Annotation of pod2mdoc/pod2mdoc.c, Revision 1.39
1.39 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.38 2015/02/13 12:40:54 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/stat.h>
19: #include <sys/time.h>
20:
21: #include <assert.h>
22: #include <ctype.h>
23: #include <fcntl.h>
24: #include <getopt.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
1.37 schwarze 30: #include "dict.h"
31:
1.10 kristaps 32: /*
1.19 kristaps 33: * In what section can we find Perl module manuals?
34: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
35: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 36: */
37: #define PERL_SECTION "3p"
38:
1.1 schwarze 39: struct args {
40: const char *title; /* override "Dt" title */
41: const char *date; /* override "Dd" date */
42: const char *section; /* override "Dt" section */
43: };
44:
1.4 schwarze 45: enum list {
46: LIST_BULLET = 0,
47: LIST_ENUM,
48: LIST_TAG,
49: LIST__MAX
50: };
51:
1.11 kristaps 52: enum sect {
53: SECT_NONE = 0,
54: SECT_NAME, /* NAME section */
55: SECT_SYNOPSIS, /* SYNOPSIS section */
56: };
57:
1.32 schwarze 58: enum outstate {
59: OUST_NL = 0, /* just started a new output line */
60: OUST_TXT, /* text line output in progress */
61: OUST_MAC /* macro line output in progress */
62: };
63:
1.1 schwarze 64: struct state {
1.31 schwarze 65: const char *fname; /* file being parsed */
1.1 schwarze 66: int parsing; /* after =cut of before command */
67: int paused; /* in =begin and before =end */
1.11 kristaps 68: enum sect sect; /* which section are we in? */
1.4 schwarze 69: #define LIST_STACKSZ 128
70: enum list lstack[LIST_STACKSZ]; /* open lists */
71: size_t lpos; /* where in list stack */
1.31 schwarze 72: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 73: enum outstate oust; /* state of the mdoc output stream */
74: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 75: char *outbuf; /* text buffered for output */
76: size_t outbufsz; /* allocated size of outbuf */
77: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 78: };
79:
80: enum fmt {
81: FMT_ITALIC,
82: FMT_BOLD,
83: FMT_CODE,
84: FMT_LINK,
85: FMT_ESCAPE,
86: FMT_FILE,
87: FMT_NBSP,
88: FMT_INDEX,
89: FMT_NULL,
90: FMT__MAX
91: };
92:
93: enum cmd {
94: CMD_POD = 0,
95: CMD_HEAD1,
96: CMD_HEAD2,
97: CMD_HEAD3,
98: CMD_HEAD4,
99: CMD_OVER,
100: CMD_ITEM,
101: CMD_BACK,
102: CMD_BEGIN,
103: CMD_END,
104: CMD_FOR,
105: CMD_ENCODING,
106: CMD_CUT,
107: CMD__MAX
108: };
109:
110: static const char *const cmds[CMD__MAX] = {
111: "pod", /* CMD_POD */
112: "head1", /* CMD_HEAD1 */
113: "head2", /* CMD_HEAD2 */
114: "head3", /* CMD_HEAD3 */
115: "head4", /* CMD_HEAD4 */
116: "over", /* CMD_OVER */
117: "item", /* CMD_ITEM */
118: "back", /* CMD_BACK */
119: "begin", /* CMD_BEGIN */
120: "end", /* CMD_END */
121: "for", /* CMD_FOR */
122: "encoding", /* CMD_ENCODING */
123: "cut" /* CMD_CUT */
124: };
125:
126: static const char fmts[FMT__MAX] = {
127: 'I', /* FMT_ITALIC */
128: 'B', /* FMT_BOLD */
129: 'C', /* FMT_CODE */
130: 'L', /* FMT_LINK */
131: 'E', /* FMT_ESCAPE */
132: 'F', /* FMT_FILE */
133: 'S', /* FMT_NBSP */
134: 'X', /* FMT_INDEX */
135: 'Z' /* FMT_NULL */
136: };
137:
1.6 kristaps 138: static int last;
139:
1.31 schwarze 140:
141: static void
142: outbuf_grow(struct state *st, size_t by)
143: {
144:
145: st->outbufsz += (by / 128 + 1) * 128;
146: st->outbuf = realloc(st->outbuf, st->outbufsz);
147: if (NULL == st->outbuf) {
148: perror(NULL);
149: exit(EXIT_FAILURE);
150: }
151: }
152:
153: static void
154: outbuf_addchar(struct state *st)
155: {
156:
157: if (st->outbuflen + 2 >= st->outbufsz)
158: outbuf_grow(st, 1);
159: st->outbuf[st->outbuflen++] = last;
160: if ('\\' == last)
161: st->outbuf[st->outbuflen++] = 'e';
162: st->outbuf[st->outbuflen] = '\0';
1.32 schwarze 163: st->wantws = 0;
1.31 schwarze 164: }
165:
166: static void
167: outbuf_addstr(struct state *st, const char *str)
168: {
169: size_t slen;
170:
171: slen = strlen(str);
172: if (st->outbuflen + slen >= st->outbufsz)
173: outbuf_grow(st, slen);
174: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 175: st->outbuflen += slen;
1.31 schwarze 176: last = str[slen - 1];
1.32 schwarze 177: st->wantws = 0;
1.31 schwarze 178: }
179:
180: static void
181: outbuf_flush(struct state *st)
182: {
183:
184: if (0 == st->outbuflen)
185: return;
186:
187: fputs(st->outbuf, stdout);
188: *st->outbuf = '\0';
189: st->outbuflen = 0;
1.32 schwarze 190:
191: if (OUST_NL == st->oust)
192: st->oust = OUST_TXT;
1.31 schwarze 193: }
194:
195: static void
1.32 schwarze 196: mdoc_newln(struct state *st)
1.31 schwarze 197: {
198:
1.32 schwarze 199: if (OUST_NL == st->oust)
1.31 schwarze 200: return;
1.32 schwarze 201:
1.31 schwarze 202: putchar('\n');
203: last = '\n';
1.32 schwarze 204: st->oust = OUST_NL;
205: st->wantws = 1;
1.31 schwarze 206: }
207:
1.1 schwarze 208: /*
209: * Given buf[*start] is at the start of an escape name, read til the end
210: * of the escape ('>') then try to do something with it.
211: * Sets start to be one after the '>'.
1.32 schwarze 212: *
213: * This function does not care about output modes,
214: * it merely appends text to the output buffer,
215: * which can then be used in any mode.
1.1 schwarze 216: */
217: static void
1.31 schwarze 218: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 219: {
220: char esc[16]; /* no more needed */
221: size_t i, max;
222:
223: max = sizeof(esc) - 1;
224: i = 0;
225: /* Read til our buffer is full. */
226: while (*start < end && '>' != buf[*start] && i < max)
227: esc[i++] = buf[(*start)++];
228: esc[i] = '\0';
229:
230: if (i == max) {
231: /* Too long... skip til we end. */
232: while (*start < end && '>' != buf[*start])
233: (*start)++;
234: return;
235: } else if (*start >= end)
236: return;
237:
238: assert('>' == buf[*start]);
239: (*start)++;
240:
241: /*
242: * TODO: right now, we only recognise the named escapes.
243: * Just let the rest of them go.
244: */
1.6 kristaps 245: if (0 == strcmp(esc, "lt"))
1.31 schwarze 246: outbuf_addstr(st, "\\(la");
1.1 schwarze 247: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 248: outbuf_addstr(st, "\\(ra");
1.33 schwarze 249: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 250: outbuf_addstr(st, "\\(ba");
1.1 schwarze 251: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 252: outbuf_addstr(st, "\\(sl");
1.1 schwarze 253: }
254:
255: /*
1.9 kristaps 256: * Run some heuristics to intuit a link format.
1.19 kristaps 257: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 258: * that the caller can safely just continue processing.
1.19 kristaps 259: * If this is just an empty tag, I'll return 0.
1.32 schwarze 260: *
261: * Always operates in OUST_MAC mode.
262: * Mode handling is done by the caller.
1.9 kristaps 263: */
264: static int
265: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
266: {
1.21 kristaps 267: size_t linkstart, realend, linkend,
268: i, j, textsz, stack;
1.9 kristaps 269:
270: /*
271: * Scan to the start of the terminus.
272: * This function is more or less replicated in the formatcode()
273: * for null or index formatting codes.
1.23 kristaps 274: * However, we're slightly different because we might have
275: * nested escapes we need to ignore.
1.9 kristaps 276: */
1.21 kristaps 277: stack = 0;
1.19 kristaps 278: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 279: if ('<' == buf[realend])
280: stack++;
1.19 kristaps 281: if ('>' != buf[realend])
1.9 kristaps 282: continue;
1.23 kristaps 283: else if (stack-- > 0)
284: continue;
285: if (dsz == 1)
1.9 kristaps 286: break;
1.19 kristaps 287: assert(realend > 0);
288: if (' ' != buf[realend - 1])
1.9 kristaps 289: continue;
1.19 kristaps 290: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 291: if ('>' != buf[i++])
292: break;
293: if (dsz == j)
294: break;
295: }
1.19 kristaps 296:
297: /* Ignore stubs. */
298: if (realend == end || realend == *start)
1.9 kristaps 299: return(0);
300:
1.19 kristaps 301: /* Set linkend to the end of content. */
302: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 303:
1.19 kristaps 304: /* Re-scan to see if we have a title or section. */
305: for (textsz = *start; textsz < linkend; textsz++)
306: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 307: break;
308:
1.19 kristaps 309: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 310: /* With title: set start, then end at section. */
1.19 kristaps 311: linkstart = textsz + 1;
1.18 kristaps 312: textsz = textsz - *start;
1.19 kristaps 313: for (i = linkstart; i < linkend; i++)
314: if ('/' == buf[i])
315: break;
316: if (i < linkend)
317: linkend = i;
1.20 kristaps 318: } else if (textsz < linkend && '/' == buf[textsz]) {
319: /* With section: set end at section. */
320: linkend = textsz;
321: textsz = 0;
322: } else
323: /* No title, no section. */
1.18 kristaps 324: textsz = 0;
1.19 kristaps 325:
326: *start = realend;
327: j = linkend - linkstart;
328:
1.20 kristaps 329: /* Do we have only subsection material? */
330: if (0 == j && '/' == buf[linkend]) {
331: linkstart = linkend + 1;
332: linkend = dsz > 1 ? realend - 1 : realend;
333: if (0 == (j = linkend - linkstart))
334: return(0);
335: printf("Sx %.*s", (int)j, &buf[linkstart]);
336: return(1);
337: } else if (0 == j)
1.19 kristaps 338: return(0);
339:
340: /* See if we qualify as being a link or not. */
1.20 kristaps 341: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
342: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
343: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
344: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
345: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
346: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
347: /* Gross. */
348: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
349: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 350: return(1);
351: }
352:
353: /* See if we qualify as a mailto. */
1.20 kristaps 354: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 355: printf("Mt %.*s", (int)j, &buf[linkstart]);
356: return(1);
357: }
358:
359: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
360: if ((j > 3 && ')' == buf[linkend - 1]) &&
361: ('(' == buf[linkend - 3])) {
362: printf("Xr %.*s %c", (int)(j - 3),
363: &buf[linkstart], buf[linkend - 2]);
364: return(1);
365: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
366: ('(' == buf[linkend - 4])) {
367: printf("Xr %.*s %.*s", (int)(j - 4),
368: &buf[linkstart], 2, &buf[linkend - 3]);
369: return(1);
370: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
371: ('(' == buf[linkend - 5])) {
372: printf("Xr %.*s %.*s", (int)(j - 5),
373: &buf[linkstart], 3, &buf[linkend - 4]);
374: return(1);
375: }
376:
377: /* Last try: do we have a double-colon? */
378: for (i = linkstart + 1; i < linkend; i++)
379: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 380: break;
1.9 kristaps 381:
1.19 kristaps 382: if (i < linkend)
1.10 kristaps 383: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 384: (int)j, &buf[linkstart]);
1.9 kristaps 385: else
1.19 kristaps 386: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 387:
388: return(1);
389: }
390:
1.13 kristaps 391: /*
392: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
393: * then it's likely that we're a flag.
394: * Our flag might be followed by an argument, so make sure that we're
395: * accounting for that, too.
396: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 397: *
398: * Always operates in OUST_MAC mode.
399: * Mode handlinf is done by the caller.
1.13 kristaps 400: */
401: static void
402: dosynopsisfl(const char *buf, size_t *start, size_t end)
403: {
404: size_t i;
405: again:
1.14 kristaps 406: assert(*start + 1 < end);
407: assert('-' == buf[*start]);
408:
409: if ( ! isalnum((int)buf[*start + 1]) &&
410: '?' != buf[*start + 1] &&
411: '-' != buf[*start + 1]) {
412: (*start)--;
413: fputs("Ar ", stdout);
414: return;
415: }
416:
1.13 kristaps 417: (*start)++;
418: for (i = *start; i < end; i++)
419: if (isalnum((int)buf[i]))
420: continue;
1.14 kristaps 421: else if ('?' == buf[i])
422: continue;
1.13 kristaps 423: else if ('-' == buf[i])
424: continue;
425: else if ('_' == buf[i])
426: continue;
427: else
428: break;
429:
430: assert(i < end);
431:
432: if ( ! (' ' == buf[i] || '>' == buf[i])) {
433: printf("Ar ");
434: return;
435: }
436:
437: printf("Fl ");
438: if (end - *start > 1 &&
439: isupper((int)buf[*start]) &&
440: islower((int)buf[*start + 1]) &&
441: (end - *start == 2 ||
442: ' ' == buf[*start + 2]))
443: printf("\\&");
444: printf("%.*s ", (int)(i - *start), &buf[*start]);
445: *start = i;
446:
447: if (' ' == buf[i]) {
448: while (i < end && ' ' == buf[i])
449: i++;
450: assert(i < end);
451: if ('-' == buf[i]) {
452: *start = i;
453: goto again;
454: }
455: printf("Ar ");
456: *start = i;
457: }
458: }
459:
1.9 kristaps 460: /*
1.1 schwarze 461: * We're at the character in front of a format code, which is structured
462: * like X<...> and can contain nested format codes.
463: * This consumes the whole format code, and any nested format codes, til
464: * the end of matched production.
1.6 kristaps 465: * If "nomacro", then we don't print any macros, just contained data
466: * (e.g., following "Sh" or "Nm").
1.15 kristaps 467: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
468: * as the first format code on a line (for decoration as an "Nm"),
469: * non-zero otherwise.
1.32 schwarze 470: *
471: * Output mode handling is most complicated here.
472: * We may enter in any mode.
473: * We usually exit in OUST_MAC mode, except when
474: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 475: */
1.33 schwarze 476: static int
1.15 kristaps 477: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 478: size_t end, int nomacro, int pos)
1.1 schwarze 479: {
480: enum fmt fmt;
1.5 kristaps 481: size_t i, j, dsz;
1.39 ! schwarze 482: unsigned char uc;
1.1 schwarze 483:
484: assert(*start + 1 < end);
485: assert('<' == buf[*start + 1]);
486:
1.6 kristaps 487: /*
488: * First, look up the format code.
1.30 schwarze 489: * If it's not valid, treat it as a NOOP.
1.6 kristaps 490: */
491: for (fmt = 0; fmt < FMT__MAX; fmt++)
492: if (buf[*start] == fmts[fmt])
493: break;
494:
1.5 kristaps 495: /*
496: * Determine whether we're overriding our delimiter.
497: * According to POD, if we have more than one '<' followed by a
498: * space, then we need a space followed by matching '>' to close
499: * the expression.
500: * Otherwise we use the usual '<' and '>' matched pair.
501: */
502: i = *start + 1;
503: while (i < end && '<' == buf[i])
504: i++;
505: assert(i > *start + 1);
506: dsz = i - (*start + 1);
507: if (dsz > 1 && (i >= end || ' ' != buf[i]))
508: dsz = 1;
509:
510: /* Remember, if dsz>1, to jump the trailing space. */
511: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 512:
513: /*
1.6 kristaps 514: * Escapes and ignored codes (NULL and INDEX) don't print macro
515: * sequences, so just output them like normal text before
516: * processing for real macros.
1.1 schwarze 517: */
518: if (FMT_ESCAPE == fmt) {
1.31 schwarze 519: formatescape(st, buf, start, end);
1.33 schwarze 520: return(0);
1.1 schwarze 521: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 522: /*
1.6 kristaps 523: * Just consume til the end delimiter, accounting for
524: * whether it's a custom one.
1.5 kristaps 525: */
526: for ( ; *start < end; (*start)++) {
527: if ('>' != buf[*start])
528: continue;
529: else if (dsz == 1)
530: break;
531: assert(*start > 0);
532: if (' ' != buf[*start - 1])
533: continue;
534: i = *start;
535: for (j = 0; i < end && j < dsz; j++)
536: if ('>' != buf[i++])
537: break;
538: if (dsz != j)
539: continue;
540: (*start) += dsz;
541: break;
542: }
1.24 kristaps 543: if (*start < end) {
544: assert('>' == buf[*start]);
545: (*start)++;
546: }
547: if (isspace(last))
548: while (*start < end && isspace((int)buf[*start]))
549: (*start)++;
1.33 schwarze 550: return(0);
1.1 schwarze 551: }
552:
1.6 kristaps 553: /*
554: * Check whether we're supposed to print macro stuff (this is
555: * suppressed in, e.g., "Nm" and "Sh" macros).
556: */
1.30 schwarze 557: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 558:
559: /*
560: * We may already have wantws if there was whitespace
561: * before the code ("text B<text"), but initial
562: * whitespace inside our scope ("textB< text")
563: * allows to break at this point as well.
564: */
565:
566: st->wantws |= ' ' == buf[*start];
1.31 schwarze 567:
1.1 schwarze 568: /*
1.31 schwarze 569: * If we are on a text line and there is no
570: * whitespace before our content, we have to make
571: * the previous word a prefix to the macro line.
1.32 schwarze 572: * In the following, mdoc_newln() must not be used
573: * lest we clobber out output state.
1.1 schwarze 574: */
1.31 schwarze 575:
1.32 schwarze 576: if (OUST_MAC != st->oust && !st->wantws) {
577: if (OUST_NL != st->oust)
1.31 schwarze 578: putchar('\n');
579: printf(".Pf ");
580: }
581:
582: outbuf_flush(st);
583:
584: /* Whitespace is easier to suppress on macro lines. */
585:
1.32 schwarze 586: if (OUST_MAC == st->oust && !st->wantws)
587: printf(" Ns ");
1.31 schwarze 588:
589: /* Unless we are on a macro line, start one. */
590:
1.32 schwarze 591: if (OUST_MAC != st->oust && st->wantws) {
592: if (OUST_NL != st->oust)
1.6 kristaps 593: putchar('\n');
1.1 schwarze 594: putchar('.');
1.31 schwarze 595: } else
1.1 schwarze 596: putchar(' ');
1.31 schwarze 597:
1.32 schwarze 598: /*
599: * Print the macro corresponding to this format code,
600: * and update the output state afterwards.
601: */
1.6 kristaps 602:
1.1 schwarze 603: switch (fmt) {
604: case (FMT_ITALIC):
605: printf("Em ");
606: break;
607: case (FMT_BOLD):
1.14 kristaps 608: if (SECT_SYNOPSIS == st->sect) {
609: if (1 == dsz && '-' == buf[*start])
610: dosynopsisfl(buf, start, end);
1.15 kristaps 611: else if (0 == pos)
612: printf("Nm ");
1.14 kristaps 613: else
614: printf("Ar ");
615: break;
1.39 ! schwarze 616: }
! 617: i = 0;
! 618: uc = buf[*start];
! 619: while (isalnum(uc) || '_' == uc || ' ' == uc)
! 620: uc = buf[*start + ++i];
! 621: if ('=' != uc && '>' != uc)
! 622: i = 0;
! 623: if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
1.27 schwarze 624: printf("Dv ");
1.38 schwarze 625: break;
626: }
1.39 ! schwarze 627: switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
! 628: case MDOC_Fa:
1.38 schwarze 629: printf("Fa ");
1.39 ! schwarze 630: break;
! 631: case MDOC_Vt:
! 632: printf("Vt ");
! 633: break;
! 634: default:
1.27 schwarze 635: printf("Sy ");
1.39 ! schwarze 636: break;
! 637: }
1.1 schwarze 638: break;
639: case (FMT_CODE):
1.2 schwarze 640: printf("Qo Li ");
1.1 schwarze 641: break;
642: case (FMT_LINK):
1.19 kristaps 643: /* Try to link; use "No" if it's empty. */
1.9 kristaps 644: if ( ! trylink(buf, start, end, dsz))
645: printf("No ");
1.1 schwarze 646: break;
647: case (FMT_FILE):
648: printf("Pa ");
649: break;
650: case (FMT_NBSP):
651: printf("No ");
652: break;
653: default:
654: abort();
655: }
1.32 schwarze 656: st->oust = OUST_MAC;
657: st->wantws = 1;
1.31 schwarze 658: } else
659: outbuf_flush(st);
1.1 schwarze 660:
661: /*
1.6 kristaps 662: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 663: * find a nested format code.
1.1 schwarze 664: * Don't emit any newlines: since we're on a macro line, we
665: * don't want to break the line.
666: */
667: while (*start < end) {
1.5 kristaps 668: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 669: (*start)++;
670: break;
1.5 kristaps 671: } else if ('>' == buf[*start] &&
672: ' ' == buf[*start - 1]) {
673: /*
674: * Handle custom delimiters.
675: * These require a certain number of
676: * space-preceded carrots before we're really at
677: * the end.
678: */
679: i = *start;
680: for (j = 0; i < end && j < dsz; j++)
681: if ('>' != buf[i++])
682: break;
683: if (dsz == j) {
684: *start += dsz;
685: break;
686: }
1.1 schwarze 687: }
1.34 schwarze 688: if (*start + 1 < end && '<' == buf[*start + 1] &&
689: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.32 schwarze 690: formatcode(st, buf, start, end, nomacro, 1);
1.1 schwarze 691: continue;
692: }
1.3 schwarze 693:
1.32 schwarze 694: /* Suppress newlines and multiple spaces. */
695:
696: last = buf[(*start)++];
697: if (' ' == last || '\n' == last) {
698: putchar(' ');
699: while (*start < end && ' ' == buf[*start])
700: (*start)++;
701: continue;
702: }
703:
1.33 schwarze 704: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.32 schwarze 705: if ( ! st->wantws) {
706: printf(" Ns ");
707: st->wantws = 1;
708: }
709:
710: /*
711: * Escape macro-like words.
712: * This matches "Xx " and "XxEOLN".
713: */
714:
715: if (end - *start > 0 &&
716: isupper((unsigned char)last) &&
717: islower((unsigned char)buf[*start]) &&
718: (end - *start == 1 ||
719: ' ' == buf[*start + 1] ||
720: '>' == buf[*start + 1]))
721: printf("\\&");
722: }
1.3 schwarze 723:
1.32 schwarze 724: putchar(last);
1.4 schwarze 725:
1.8 kristaps 726: /* Protect against character escapes. */
1.32 schwarze 727:
1.8 kristaps 728: if ('\\' == last)
729: putchar('e');
1.1 schwarze 730: }
1.2 schwarze 731:
1.33 schwarze 732: if (FMT__MAX == fmt)
733: return(0);
734:
1.2 schwarze 735: if ( ! nomacro && FMT_CODE == fmt)
736: printf(" Qc ");
1.1 schwarze 737:
1.33 schwarze 738: st->wantws = ' ' == last;
739: return(1);
1.1 schwarze 740: }
741:
742: /*
743: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 744: * Goes to OUST_MAC mode and stays there when returning,
745: * such that the caller can add arguments to the macro line
746: * before closing it out.
1.1 schwarze 747: */
748: static void
1.32 schwarze 749: formatcodeln(struct state *st, const char *linemac,
750: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 751: {
1.33 schwarze 752: int gotmacro, wantws;
1.1 schwarze 753:
1.32 schwarze 754: assert(OUST_NL == st->oust);
755: assert(st->wantws);
756: printf(".%s ", linemac);
757: st->oust = OUST_MAC;
758:
1.33 schwarze 759: gotmacro = 0;
1.1 schwarze 760: while (*start < end) {
1.33 schwarze 761: wantws = ' ' == buf[*start] || '\n' == buf[*start];
762: if (wantws) {
763: last = ' ';
764: do {
765: (*start)++;
766: } while (*start < end && ' ' == buf[*start]);
767: }
768:
1.34 schwarze 769: if (*start + 1 < end && '<' == buf[*start + 1] &&
770: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 771: st->wantws |= wantws;
772: gotmacro = formatcode(st, buf,
773: start, end, nomacro, 1);
1.1 schwarze 774: continue;
775: }
1.32 schwarze 776:
1.33 schwarze 777: if (gotmacro) {
778: if (*start < end || st->outbuflen) {
779: if (st->wantws ||
780: (wantws && !st->outbuflen))
781: printf(" No ");
782: else
783: printf(" Ns ");
784: }
785: gotmacro = 0;
786: }
787: outbuf_flush(st);
788: st->wantws = wantws;
789:
790: if (*start >= end)
791: break;
792:
793: if (st->wantws) {
794: putchar(' ');
795: st->wantws = 0;
1.32 schwarze 796: }
797:
1.4 schwarze 798: /*
799: * Since we're already on a macro line, we want to make
800: * sure that we don't inadvertently invoke a macro.
801: * We need to do this carefully because section names
802: * are used in troff and we don't want to escape
803: * something that needn't be escaped.
804: */
805: if (' ' == last && end - *start > 1 &&
1.33 schwarze 806: isupper((unsigned char)buf[*start]) &&
807: islower((unsigned char)buf[*start + 1]) &&
808: (end - *start == 2 || ' ' == buf[*start + 2]))
1.4 schwarze 809: printf("\\&");
810:
1.33 schwarze 811: putchar(last = buf[*start]);
1.8 kristaps 812:
813: /* Protect against character escapes. */
1.33 schwarze 814:
1.8 kristaps 815: if ('\\' == last)
816: putchar('e');
817:
1.1 schwarze 818: (*start)++;
819: }
820: }
821:
822: /*
1.4 schwarze 823: * Guess at what kind of list we are.
824: * These are taken straight from the POD manual.
825: * I don't know what people do in real life.
826: */
827: static enum list
828: listguess(const char *buf, size_t start, size_t end)
829: {
830: size_t len = end - start;
831:
832: assert(end >= start);
833:
834: if (len == 1 && '*' == buf[start])
835: return(LIST_BULLET);
836: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
837: return(LIST_ENUM);
838: else if (len == 1 && '1' == buf[start])
839: return(LIST_ENUM);
840: else
841: return(LIST_TAG);
842: }
843:
844: /*
1.1 schwarze 845: * A command paragraph, as noted in the perlpod manual, just indicates
846: * that we should do something, optionally with some text to print as
847: * well.
1.32 schwarze 848: * From the perspective of external callers,
849: * always stays in OUST_NL/wantws mode,
850: * but its children do use OUST_MAC.
1.1 schwarze 851: */
852: static void
853: command(struct state *st, const char *buf, size_t start, size_t end)
854: {
855: size_t len, csz;
856: enum cmd cmd;
857:
858: assert('=' == buf[start]);
859: start++;
860: len = end - start;
861:
862: for (cmd = 0; cmd < CMD__MAX; cmd++) {
863: csz = strlen(cmds[cmd]);
864: if (len < csz)
865: continue;
866: if (0 == memcmp(&buf[start], cmd[cmds], csz))
867: break;
868: }
869:
870: /* Ignore bogus commands. */
871:
872: if (CMD__MAX == cmd)
873: return;
874:
875: start += csz;
1.8 kristaps 876: while (start < end && ' ' == buf[start])
877: start++;
878:
1.1 schwarze 879: len = end - start;
880:
881: if (st->paused) {
882: st->paused = CMD_END != cmd;
883: return;
884: }
885:
886: switch (cmd) {
887: case (CMD_POD):
888: break;
889: case (CMD_HEAD1):
890: /*
891: * The behaviour of head= follows from a quick glance at
892: * how pod2man handles it.
893: */
1.11 kristaps 894: st->sect = SECT_NONE;
895: if (end - start == 4) {
1.1 schwarze 896: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 897: st->sect = SECT_NAME;
898: } else if (end - start == 8) {
899: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
900: st->sect = SECT_SYNOPSIS;
901: }
1.32 schwarze 902: formatcodeln(st, "Sh", buf, &start, end, 1);
903: mdoc_newln(st);
1.1 schwarze 904: st->haspar = 1;
905: break;
906: case (CMD_HEAD2):
1.32 schwarze 907: formatcodeln(st, "Ss", buf, &start, end, 1);
908: mdoc_newln(st);
1.1 schwarze 909: st->haspar = 1;
910: break;
911: case (CMD_HEAD3):
912: puts(".Pp");
1.32 schwarze 913: formatcodeln(st, "Em", buf, &start, end, 0);
914: mdoc_newln(st);
1.1 schwarze 915: puts(".Pp");
916: st->haspar = 1;
917: break;
918: case (CMD_HEAD4):
919: puts(".Pp");
1.32 schwarze 920: formatcodeln(st, "No", buf, &start, end, 0);
921: mdoc_newln(st);
1.1 schwarze 922: puts(".Pp");
923: st->haspar = 1;
924: break;
925: case (CMD_OVER):
1.4 schwarze 926: /*
927: * If we have an existing list that hasn't had an =item
928: * yet, then make sure that we open it now.
929: * We use the default list type, but that can't be
930: * helped (we haven't seen any items yet).
1.1 schwarze 931: */
1.4 schwarze 932: if (st->lpos > 0)
933: if (LIST__MAX == st->lstack[st->lpos - 1]) {
934: st->lstack[st->lpos - 1] = LIST_TAG;
935: puts(".Bl -tag -width Ds");
936: }
937: st->lpos++;
938: assert(st->lpos < LIST_STACKSZ);
939: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 940: break;
941: case (CMD_ITEM):
1.6 kristaps 942: if (0 == st->lpos) {
943: /*
944: * Bad markup.
945: * Try to compensate.
946: */
947: st->lstack[st->lpos] = LIST__MAX;
948: st->lpos++;
949: }
1.4 schwarze 950: assert(st->lpos > 0);
951: /*
952: * If we're the first =item, guess at what our content
953: * will be: "*" is a bullet list, "1." is a numbered
954: * list, and everything is tagged.
955: */
956: if (LIST__MAX == st->lstack[st->lpos - 1]) {
957: st->lstack[st->lpos - 1] =
958: listguess(buf, start, end);
959: switch (st->lstack[st->lpos - 1]) {
960: case (LIST_BULLET):
961: puts(".Bl -bullet");
962: break;
963: case (LIST_ENUM):
964: puts(".Bl -enum");
965: break;
966: default:
967: puts(".Bl -tag -width Ds");
968: break;
969: }
970: }
971: switch (st->lstack[st->lpos - 1]) {
972: case (LIST_TAG):
1.32 schwarze 973: formatcodeln(st, "It", buf, &start, end, 0);
974: mdoc_newln(st);
1.4 schwarze 975: break;
976: case (LIST_ENUM):
977: /* FALLTHROUGH */
978: case (LIST_BULLET):
979: /*
980: * Abandon the remainder of the paragraph
981: * because we're going to be a bulletted or
982: * numbered list.
983: */
984: puts(".It");
985: break;
986: default:
987: abort();
988: }
1.1 schwarze 989: st->haspar = 1;
990: break;
991: case (CMD_BACK):
1.4 schwarze 992: /* Make sure we don't back over the stack. */
993: if (st->lpos > 0) {
994: st->lpos--;
995: puts(".El");
996: }
1.1 schwarze 997: break;
998: case (CMD_BEGIN):
999: /*
1000: * We disregard all types for now.
1001: * TODO: process at least "text" in a -literal block.
1002: */
1003: st->paused = 1;
1004: break;
1005: case (CMD_FOR):
1006: /*
1007: * We ignore all types of encodings and formats
1008: * unilaterally.
1009: */
1010: break;
1011: case (CMD_ENCODING):
1012: break;
1013: case (CMD_CUT):
1014: st->parsing = 0;
1015: return;
1016: default:
1017: abort();
1018: }
1019:
1020: /* Any command (but =cut) makes us start parsing. */
1021: st->parsing = 1;
1022: }
1023:
1024: /*
1.39 ! schwarze 1025: * Put the type provided as an argument into the dictionary.
! 1026: */
! 1027: static void
! 1028: register_type(const char *ptype)
! 1029: {
! 1030: const char *pname, *pend;
! 1031:
! 1032: pname = ptype;
! 1033: while (isalnum((unsigned char)*pname) || '_' == *pname)
! 1034: pname++;
! 1035: if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
! 1036: (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
! 1037: while (' ' == *pname)
! 1038: pname++;
! 1039: pend = pname;
! 1040: while (isalnum((unsigned char)*pend) || '_' == *pend)
! 1041: pend++;
! 1042: if (pend > pname)
! 1043: dict_put(pname, pend - pname, MDOC_Vt);
! 1044: } else
! 1045: pend = pname;
! 1046: if (pend > ptype)
! 1047: dict_put(ptype, pend - ptype, MDOC_Vt);
! 1048: }
! 1049:
! 1050: /*
1.1 schwarze 1051: * Just pump out the line in a verbatim block.
1.32 schwarze 1052: * From the perspective of external callers,
1053: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1054: */
1055: static void
1.35 schwarze 1056: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1057: {
1.36 schwarze 1058: size_t i, ift, ifo, ifa, ifc, inl;
1.38 schwarze 1059: char *cp, *cp2;
1.36 schwarze 1060: int nopen;
1.1 schwarze 1061:
1.35 schwarze 1062: if ( ! st->parsing || st->paused || start == end)
1.1 schwarze 1063: return;
1.22 kristaps 1064: again:
1065: /*
1066: * If we're in the SYNOPSIS, see if we're an #include block.
1067: * If we are, then print the "In" macro and re-loop.
1068: * This handles any number of inclusions, but only when they
1069: * come before the remaining parts...
1070: */
1071: if (SECT_SYNOPSIS == st->sect) {
1072: i = start;
1.35 schwarze 1073: while (i < end && buf[i] == ' ')
1074: i++;
1.22 kristaps 1075: if (i == end)
1076: return;
1.35 schwarze 1077:
1.22 kristaps 1078: /* We're an include block! */
1079: if (end - i > 10 &&
1080: 0 == memcmp(&buf[i], "#include <", 10)) {
1081: start = i + 10;
1082: while (start < end && ' ' == buf[start])
1083: start++;
1084: fputs(".In ", stdout);
1085: /* Stop til the '>' marker or we hit eoln. */
1086: while (start < end &&
1087: '>' != buf[start] && '\n' != buf[start])
1088: putchar(buf[start++]);
1089: putchar('\n');
1090: if (start < end && '>' == buf[start])
1091: start++;
1092: if (start < end && '\n' == buf[start])
1093: start++;
1094: if (start < end)
1095: goto again;
1096: return;
1097: }
1.35 schwarze 1098:
1099: /* Parse function declaration. */
1100: ifo = ifa = ifc = 0;
1.36 schwarze 1101: inl = end;
1102: nopen = 0;
1103: for (ift = i; i < end; i++) {
1104: if (ifc) {
1105: if (buf[i] != '\n')
1106: continue;
1107: inl = i;
1108: break;
1109: }
1110: switch (buf[i]) {
1111: case ' ':
1112: if ( ! ifa)
1113: ifo = i;
1114: break;
1115: case '(':
1116: if (ifo) {
1117: nopen++;
1118: if ( ! ifa)
1119: ifa = i;
1120: } else
1121: i = end;
1122: break;
1123: case ')':
1124: switch (nopen) {
1125: case 0:
1126: i = end;
1127: break;
1128: case 1:
1.35 schwarze 1129: ifc = i;
1.36 schwarze 1130: break;
1131: default:
1132: nopen--;
1133: break;
1134: }
1135: break;
1136: default:
1137: break;
1138: }
1.35 schwarze 1139: }
1140:
1141: /* Encode function declaration. */
1142: if (ifc) {
1.36 schwarze 1143: for (i = ifa; i < ifc; i++)
1144: if (buf[i] == '\n')
1145: buf[i] = ' ';
1.35 schwarze 1146: buf[ifo++] = '\0';
1.39 ! schwarze 1147: register_type(buf + ift);
1.35 schwarze 1148: printf(".Ft %s", buf + ift);
1149: if (buf[ifo] == '*') {
1150: fputs(" *", stdout);
1151: ifo++;
1152: }
1153: putchar('\n');
1154: buf[ifa++] = '\0';
1155: printf(".Fo %s\n", buf + ifo);
1.39 ! schwarze 1156: dict_put(buf + ifo, 0, MDOC_Fo);
1.35 schwarze 1157: buf[ifc++] = '\0';
1158: for (;;) {
1159: cp = strchr(buf + ifa, ',');
1.38 schwarze 1160: if (cp != NULL) {
1161: cp2 = cp;
1.36 schwarze 1162: *cp++ = '\0';
1.38 schwarze 1163: } else
1164: cp2 = strchr(buf + ifa, '\0');
1165: while (isalnum((unsigned char)cp2[-1]) ||
1166: '_' == cp2[-1])
1167: cp2--;
1168: if ('\0' != *cp2)
1.39 ! schwarze 1169: dict_put(cp2, 0, MDOC_Fa);
! 1170: register_type(buf + ifa);
1.35 schwarze 1171: printf(".Fa \"%s\"\n", buf + ifa);
1172: if (cp == NULL)
1173: break;
1.36 schwarze 1174: while (*cp == ' ')
1175: cp++;
1176: ifa = cp - buf;
1.35 schwarze 1177: }
1178: puts(".Fc");
1179: if (buf[ifc] == ';')
1180: ifc++;
1.36 schwarze 1181: if (ifc < inl) {
1182: buf[inl] = '\0';
1.35 schwarze 1183: puts(buf + ifc);
1184: }
1.36 schwarze 1185: start = inl + 1;
1.35 schwarze 1186: if (start < end)
1187: goto again;
1188: return;
1189: }
1.22 kristaps 1190: }
1191:
1.1 schwarze 1192: puts(".Bd -literal");
1.8 kristaps 1193: for (last = ' '; start < end; start++) {
1194: /*
1195: * Handle accidental macros (newline starting with
1196: * control character) and escapes.
1197: */
1198: if ('\n' == last)
1.7 kristaps 1199: if ('.' == buf[start] || '\'' == buf[start])
1200: printf("\\&");
1.8 kristaps 1201: putchar(last = buf[start]);
1202: if ('\\' == buf[start])
1203: printf("e");
1.7 kristaps 1204: }
1.31 schwarze 1205: putchar(last = '\n');
1.1 schwarze 1206: puts(".Ed");
1207: }
1208:
1209: /*
1.13 kristaps 1210: * See dosynopsisop().
1211: */
1212: static int
1213: hasmatch(const char *buf, size_t start, size_t end)
1214: {
1215: size_t stack;
1216:
1217: for (stack = 0; start < end; start++)
1218: if (buf[start] == '[')
1219: stack++;
1220: else if (buf[start] == ']' && 0 == stack)
1221: return(1);
1222: else if (buf[start] == ']')
1223: stack--;
1224: return(0);
1225: }
1226:
1227: /*
1228: * If we're in the SYNOPSIS section and we've encounter braces in an
1229: * ordinary paragraph, then try to see whether we're an [-option].
1230: * Do this, if we're an opening bracket, by first seeing if we have a
1231: * matching end via hasmatch().
1232: * If we're an ending bracket, see if we have a stack already.
1233: */
1234: static int
1.32 schwarze 1235: dosynopsisop(struct state *st, const char *buf,
1236: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1237: {
1238:
1239: assert('[' == buf[*start] || ']' == buf[*start]);
1240:
1241: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1242: mdoc_newln(st);
1.13 kristaps 1243: puts(".Oo");
1244: (*opstack)++;
1245: } else if ('[' == buf[*start])
1246: return(0);
1247:
1248: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1249: mdoc_newln(st);
1.13 kristaps 1250: puts(".Oc");
1251: (*opstack)--;
1252: } else if (']' == buf[*start])
1253: return(0);
1254:
1255: (*start)++;
1.31 schwarze 1256: last = '\n';
1.13 kristaps 1257: while (' ' == buf[*start])
1258: (*start)++;
1259: return(1);
1260: }
1261:
1262: /*
1.17 kristaps 1263: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1264: * From the perspective of external callers,
1265: * always stays in OUST_NL/wantws mode,
1266: * but its children do use OUST_MAC.
1.17 kristaps 1267: */
1268: static void
1269: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1270: {
1271: size_t word;
1272:
1.32 schwarze 1273: assert(OUST_NL == st->oust);
1274: assert(st->wantws);
1275:
1.17 kristaps 1276: while (*start < end && ' ' == buf[*start])
1277: (*start)++;
1278:
1279: if (end == *start) {
1280: puts(".Nm unknown");
1281: return;
1282: }
1283:
1284: while (*start < end) {
1285: for (word = *start; word < end; word++)
1286: if (',' == buf[word])
1287: break;
1.32 schwarze 1288: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1289: if (*start == end) {
1.32 schwarze 1290: mdoc_newln(st);
1291: break;
1.17 kristaps 1292: }
1293: assert(',' == buf[*start]);
1.32 schwarze 1294: printf(" ,");
1295: mdoc_newln(st);
1.17 kristaps 1296: (*start)++;
1297: while (*start < end && ' ' == buf[*start])
1298: (*start)++;
1299: }
1300: }
1301:
1302: /*
1.1 schwarze 1303: * Ordinary paragraph.
1304: * Well, this is really the hardest--POD seems to assume that, for
1305: * example, a leading space implies a newline, and so on.
1306: * Lots of other snakes in the grass: escaping a newline followed by a
1307: * period (accidental mdoc(7) control), double-newlines after macro
1308: * passages, etc.
1.32 schwarze 1309: *
1310: * Uses formatcode() to go to OUST_MAC mode
1311: * and outbuf_flush() to go to OUST_TXT mode.
1312: * Main text mode wantws handling is in this function.
1313: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1314: */
1315: static void
1316: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1317: {
1.13 kristaps 1318: size_t i, j, opstack;
1.15 kristaps 1319: int seq;
1.1 schwarze 1320:
1321: if ( ! st->parsing || st->paused)
1322: return;
1323:
1324: /*
1325: * Special-case: the NAME section.
1326: * If we find a "-" when searching from the end, assume that
1327: * we're in "name - description" format.
1328: * To wit, print out a "Nm" and "Nd" in that format.
1329: */
1.11 kristaps 1330: if (SECT_NAME == st->sect) {
1.15 kristaps 1331: for (i = end - 2; i > start; i--)
1332: if ('-' == buf[i] && ' ' == buf[i + 1])
1.1 schwarze 1333: break;
1334: if ('-' == buf[i]) {
1335: j = i;
1336: /* Roll over multiple "-". */
1337: for ( ; i > start; i--)
1338: if ('-' != buf[i])
1339: break;
1.17 kristaps 1340: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1341: start = j + 1;
1.17 kristaps 1342: while (start < end && ' ' == buf[start])
1343: start++;
1.32 schwarze 1344: formatcodeln(st, "Nd", buf, &start, end, 1);
1345: mdoc_newln(st);
1.1 schwarze 1346: return;
1347: }
1348: }
1349:
1350: if ( ! st->haspar)
1351: puts(".Pp");
1352:
1353: st->haspar = 0;
1354: last = '\n';
1.13 kristaps 1355: opstack = 0;
1.1 schwarze 1356:
1.15 kristaps 1357: for (seq = 0; start < end; seq++) {
1.1 schwarze 1358: /*
1359: * Loop til we get either to a newline or escape.
1360: * Escape initial control characters.
1361: */
1362: while (start < end) {
1.34 schwarze 1363: if (start < end - 1 && '<' == buf[start + 1] &&
1364: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1365: break;
1366: else if ('\n' == buf[start])
1367: break;
1368: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1369: outbuf_addstr(st, "\\&");
1.1 schwarze 1370: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1371: outbuf_addstr(st, "\\&");
1.12 kristaps 1372: /*
1373: * If we're in the SYNOPSIS, have square
1374: * brackets indicate that we're opening and
1375: * closing an optional context.
1376: */
1.32 schwarze 1377:
1.13 kristaps 1378: if (SECT_SYNOPSIS == st->sect &&
1379: ('[' == buf[start] ||
1380: ']' == buf[start]) &&
1.32 schwarze 1381: dosynopsisop(st, buf,
1382: &start, end, &opstack))
1.13 kristaps 1383: continue;
1.32 schwarze 1384:
1385: /*
1386: * On whitespace, flush the output buffer
1387: * and allow breaking to a macro line.
1388: * Otherwise, buffer text and clear wantws.
1389: */
1390:
1.31 schwarze 1391: last = buf[start++];
1.37 schwarze 1392: if (' ' != last) {
1393: outbuf_addchar(st);
1394: continue;
1395: }
1396:
1397: if ( ! strcmp(st->outbuf + st->outbuflen - 2, "()") &&
1398: dict_get(st->outbuf, st->outbuflen - 2) ==
1399: MDOC_Fo) {
1400: st->outbuflen -= 2;
1401: st->outbuf[st->outbuflen] = '\0';
1402: mdoc_newln(st);
1403: fputs(".Fn ", stdout);
1.31 schwarze 1404: outbuf_flush(st);
1.37 schwarze 1405: mdoc_newln(st);
1406: continue;
1407: }
1408:
1409: outbuf_flush(st);
1410: putchar(' ');
1411: st->wantws = 1;
1.1 schwarze 1412: }
1413:
1.34 schwarze 1414: if (start < end - 1 && '<' == buf[start + 1] &&
1415: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1416: formatcode(st, buf, &start, end, 0, seq);
1417: if (OUST_MAC == st->oust) {
1.30 schwarze 1418: /*
1419: * Let mdoc(7) handle trailing punctuation.
1420: * XXX Some punctuation characters
1421: * are not handled yet.
1422: */
1.16 kristaps 1423: if ((start == end - 1 ||
1424: (start < end - 1 &&
1425: (' ' == buf[start + 1] ||
1426: '\n' == buf[start + 1]))) &&
1427: ('.' == buf[start] ||
1428: ',' == buf[start])) {
1429: putchar(' ');
1430: putchar(buf[start++]);
1431: }
1.32 schwarze 1432:
1433: if (st->wantws ||
1434: ' ' == buf[start] ||
1435: '\n' == buf[start])
1436: mdoc_newln(st);
1437:
1.30 schwarze 1438: /*
1439: * Consume all whitespace
1440: * so we don't accidentally start
1441: * an implicit literal line.
1442: */
1.32 schwarze 1443:
1.6 kristaps 1444: while (start < end && ' ' == buf[start])
1445: start++;
1.32 schwarze 1446:
1447: /*
1448: * Some text is following.
1449: * Implement requested spacing.
1450: */
1451:
1452: if ( ! st->wantws && start < end &&
1.34 schwarze 1453: ('<' != buf[start + 1] ||
1454: 'A' > buf[start] ||
1455: 'Z' < buf[start])) {
1.32 schwarze 1456: printf(" Ns ");
1457: st->wantws = 1;
1458: }
1.6 kristaps 1459: }
1.1 schwarze 1460: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1461: outbuf_flush(st);
1462: mdoc_newln(st);
1.1 schwarze 1463: if (++start >= end)
1464: continue;
1465: /*
1466: * If we have whitespace next, eat it to prevent
1467: * mdoc(7) from thinking that it's meant for
1468: * verbatim text.
1469: * It is--but if we start with that, we can't
1470: * have a macro subsequent it, which may be
1471: * possible if we have an escape next.
1472: */
1.31 schwarze 1473: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1474: puts(".br");
1475: for ( ; start < end; start++)
1476: if (' ' != buf[start] && '\t' != buf[start])
1477: break;
1.12 kristaps 1478: }
1.1 schwarze 1479: }
1.32 schwarze 1480: outbuf_flush(st);
1481: mdoc_newln(st);
1.1 schwarze 1482: }
1483:
1484: /*
1485: * There are three kinds of paragraphs: verbatim (starts with whitespace
1486: * of some sort), ordinary (starts without "=" marker), or a command
1487: * (default: starts with "=").
1488: */
1489: static void
1.35 schwarze 1490: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1491: {
1492:
1.32 schwarze 1493: assert(OUST_NL == st->oust);
1494: assert(st->wantws);
1495:
1.1 schwarze 1496: if (end == start)
1497: return;
1498: if (' ' == buf[start] || '\t' == buf[start])
1499: verbatim(st, buf, start, end);
1500: else if ('=' != buf[start])
1501: ordinary(st, buf, start, end);
1502: else
1503: command(st, buf, start, end);
1504: }
1505:
1506: /*
1507: * Loop around paragraphs within a document, processing each one in the
1508: * POD way.
1509: */
1510: static void
1511: dofile(const struct args *args, const char *fname,
1.35 schwarze 1512: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1513: {
1.29 schwarze 1514: char datebuf[64];
1.1 schwarze 1515: struct state st;
1.29 schwarze 1516: const char *fbase, *fext, *section, *date;
1.1 schwarze 1517: char *title, *cp;
1.29 schwarze 1518: size_t sup, end, i, cur = 0;
1.1 schwarze 1519:
1520: if (0 == sz)
1521: return;
1522:
1.29 schwarze 1523: /*
1524: * Parsing the filename is almost always required,
1525: * except when both the title and the section
1526: * are provided on the command line.
1527: */
1528:
1529: if (NULL == args->title || NULL == args->section) {
1530: fbase = strrchr(fname, '/');
1531: if (NULL == fbase)
1532: fbase = fname;
1533: else
1534: fbase++;
1535: fext = strrchr(fbase, '.');
1536: } else
1537: fext = NULL;
1538:
1539: /*
1540: * The title will be converted to uppercase,
1541: * so it needs to be copied.
1542: */
1543:
1544: title = (NULL != args->title) ? strdup(args->title) :
1545: (NULL != fext) ? strndup(fbase, fext - fbase) :
1546: strdup(fbase);
1.1 schwarze 1547:
1548: if (NULL == title) {
1549: perror(NULL);
1550: exit(EXIT_FAILURE);
1551: }
1552:
1553: /* Section is 1 unless suffix is "pm". */
1554:
1.29 schwarze 1555: section = (NULL != args->section) ? args->section :
1556: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1557: PERL_SECTION;
1.1 schwarze 1558:
1559: /* Date. Or the given "tm" if not supplied. */
1560:
1561: if (NULL == (date = args->date)) {
1562: strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm);
1563: date = datebuf;
1564: }
1565:
1566: for (cp = title; '\0' != *cp; cp++)
1567: *cp = toupper((int)*cp);
1568:
1569: /* The usual mdoc(7) preamble. */
1570:
1571: printf(".Dd %s\n", date);
1572: printf(".Dt %s %s\n", title, section);
1573: puts(".Os");
1574:
1575: free(title);
1576:
1.37 schwarze 1577: dict_init();
1.1 schwarze 1578: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1579: st.oust = OUST_NL;
1580: st.wantws = 1;
1581:
1.1 schwarze 1582: assert(sz > 0);
1583:
1584: /* Main loop over file contents. */
1585:
1586: while (cur < sz) {
1587: /* Read until next paragraph. */
1588: for (i = cur + 1; i < sz; i++)
1589: if ('\n' == buf[i] && '\n' == buf[i - 1]) {
1590: /* Consume blank paragraphs. */
1591: while (i + 1 < sz && '\n' == buf[i + 1])
1592: i++;
1593: break;
1594: }
1595:
1596: /* Adjust end marker for EOF. */
1597: end = i < sz ? i - 1 :
1598: ('\n' == buf[sz - 1] ? sz - 1 : sz);
1599: sup = i < sz ? end + 2 : sz;
1600:
1601: /* Process paragraph and adjust start. */
1602: dopar(&st, buf, cur, end);
1603: cur = sup;
1604: }
1.37 schwarze 1605: dict_destroy();
1.1 schwarze 1606: }
1607:
1608: /*
1609: * Read a single file fully into memory.
1610: * If the file is "-", do it from stdin.
1611: * If successfully read, send the input buffer to dofile() for further
1612: * processing.
1613: */
1614: static int
1615: readfile(const struct args *args, const char *fname)
1616: {
1617: int fd;
1618: char *buf;
1619: size_t bufsz, cur;
1620: ssize_t ssz;
1621: struct tm *tm;
1622: time_t ttm;
1623: struct stat st;
1624:
1625: fd = 0 != strcmp("-", fname) ?
1626: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1627:
1628: if (-1 == fd) {
1629: perror(fname);
1630: return(0);
1631: }
1632:
1633: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1634: ttm = time(NULL);
1635: tm = localtime(&ttm);
1636: } else
1637: tm = localtime(&st.st_mtime);
1638:
1639: /*
1640: * Arbitrarily-sized initial buffer.
1641: * Should be big enough for most files...
1642: */
1643: cur = 0;
1644: bufsz = 1 << 14;
1645: if (NULL == (buf = malloc(bufsz))) {
1646: perror(NULL);
1647: exit(EXIT_FAILURE);
1648: }
1649:
1650: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1651: /* Double buffer size on fill. */
1652: if ((size_t)ssz == bufsz - cur) {
1653: bufsz *= 2;
1654: if (NULL == (buf = realloc(buf, bufsz))) {
1655: perror(NULL);
1656: exit(EXIT_FAILURE);
1657: }
1658: }
1659: cur += (size_t)ssz;
1660: }
1661: if (ssz < 0) {
1662: perror(fname);
1663: free(buf);
1664: return(0);
1665: }
1666:
1667: dofile(args, STDIN_FILENO == fd ?
1668: "STDIN" : fname, tm, buf, cur);
1669: free(buf);
1670: if (STDIN_FILENO != fd)
1671: close(fd);
1672: return(1);
1673: }
1674:
1675: int
1676: main(int argc, char *argv[])
1677: {
1678: const char *fname, *name;
1679: struct args args;
1680: int c;
1681:
1682: name = strrchr(argv[0], '/');
1683: if (name == NULL)
1684: name = argv[0];
1685: else
1686: ++name;
1687:
1688: memset(&args, 0, sizeof(struct args));
1689: fname = "-";
1690:
1691: /* Accept no arguments for now. */
1692:
1693: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1694: switch (c) {
1695: case ('h'):
1696: /* FALLTHROUGH */
1697: case ('l'):
1698: /* FALLTHROUGH */
1699: case ('c'):
1700: /* FALLTHROUGH */
1701: case ('o'):
1702: /* FALLTHROUGH */
1703: case ('q'):
1704: /* FALLTHROUGH */
1705: case ('r'):
1706: /* FALLTHROUGH */
1707: case ('u'):
1708: /* FALLTHROUGH */
1709: case ('v'):
1710: /* Ignore these. */
1711: break;
1712: case ('d'):
1713: args.date = optarg;
1714: break;
1715: case ('n'):
1716: args.title = optarg;
1717: break;
1718: case ('s'):
1719: args.section = optarg;
1720: break;
1721: default:
1722: goto usage;
1723: }
1724:
1725: argc -= optind;
1726: argv += optind;
1727:
1728: /* Accept only a single input file. */
1729:
1.25 schwarze 1730: if (argc > 1)
1731: goto usage;
1.1 schwarze 1732: else if (1 == argc)
1733: fname = *argv;
1734:
1735: return(readfile(&args, fname) ?
1736: EXIT_SUCCESS : EXIT_FAILURE);
1737:
1738: usage:
1739: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1740: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1741:
1742: return(EXIT_FAILURE);
1743: }
CVSweb