Annotation of pod2mdoc/pod2mdoc.c, Revision 1.58
1.58 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.57 2015/02/21 21:15:41 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/stat.h>
19: #include <sys/time.h>
20:
21: #include <assert.h>
22: #include <ctype.h>
23: #include <fcntl.h>
24: #include <getopt.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
1.37 schwarze 30: #include "dict.h"
31:
1.10 kristaps 32: /*
1.19 kristaps 33: * In what section can we find Perl module manuals?
34: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
35: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 36: */
37: #define PERL_SECTION "3p"
38:
1.1 schwarze 39: struct args {
40: const char *title; /* override "Dt" title */
41: const char *date; /* override "Dd" date */
42: const char *section; /* override "Dt" section */
43: };
44:
1.4 schwarze 45: enum list {
46: LIST_BULLET = 0,
47: LIST_ENUM,
48: LIST_TAG,
49: LIST__MAX
50: };
51:
1.11 kristaps 52: enum sect {
53: SECT_NONE = 0,
54: SECT_NAME, /* NAME section */
55: SECT_SYNOPSIS, /* SYNOPSIS section */
56: };
57:
1.32 schwarze 58: enum outstate {
59: OUST_NL = 0, /* just started a new output line */
60: OUST_TXT, /* text line output in progress */
61: OUST_MAC /* macro line output in progress */
62: };
63:
1.1 schwarze 64: struct state {
1.31 schwarze 65: const char *fname; /* file being parsed */
1.1 schwarze 66: int parsing; /* after =cut of before command */
67: int paused; /* in =begin and before =end */
1.11 kristaps 68: enum sect sect; /* which section are we in? */
1.4 schwarze 69: #define LIST_STACKSZ 128
70: enum list lstack[LIST_STACKSZ]; /* open lists */
71: size_t lpos; /* where in list stack */
1.31 schwarze 72: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 73: enum outstate oust; /* state of the mdoc output stream */
74: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 75: char *outbuf; /* text buffered for output */
76: size_t outbufsz; /* allocated size of outbuf */
77: size_t outbuflen; /* current length of outbuf */
1.58 ! schwarze 78: size_t outlnlen; /* chars so far on this output line */
1.1 schwarze 79: };
80:
81: enum fmt {
82: FMT_ITALIC,
83: FMT_BOLD,
84: FMT_CODE,
85: FMT_LINK,
86: FMT_ESCAPE,
87: FMT_FILE,
88: FMT_NBSP,
89: FMT_INDEX,
90: FMT_NULL,
91: FMT__MAX
92: };
93:
94: enum cmd {
95: CMD_POD = 0,
96: CMD_HEAD1,
97: CMD_HEAD2,
98: CMD_HEAD3,
99: CMD_HEAD4,
100: CMD_OVER,
101: CMD_ITEM,
102: CMD_BACK,
103: CMD_BEGIN,
104: CMD_END,
105: CMD_FOR,
106: CMD_ENCODING,
107: CMD_CUT,
108: CMD__MAX
109: };
1.55 schwarze 110:
111: static void command(struct state *, const char *, size_t, size_t);
112: static void dofile(const struct args *, const char *,
113: const struct tm *, char *, size_t);
114: static void donamenm(struct state *, const char *, size_t *, size_t);
115: static void dopar(struct state *, char *, size_t, size_t);
116: static void dosynopsisfl(const char *, size_t *, size_t);
117: static int dosynopsisop(struct state *, const char *, size_t *,
118: size_t, size_t *);
119: static int formatcode(struct state *, const char *, size_t *,
120: size_t, int, int);
121: static void formatcodeln(struct state *, const char *, const char *,
122: size_t *, size_t, int);
123: static void formatescape(struct state *, const char *, size_t *, size_t);
124: static int hasmatch(const char *, size_t, size_t);
125: static void ordinary(struct state *, const char *, size_t, size_t);
126: static void outbuf_addchar(struct state *);
127: static void outbuf_addstr(struct state *, const char *);
128: static void outbuf_flush(struct state *);
129: static void outbuf_grow(struct state *, size_t);
130: static enum list listguess(const char *, size_t, size_t);
131: static void mdoc_newln(struct state *);
132: static int readfile(const struct args *, const char *);
133: static void register_type(const char *);
134: static int trylink(const char *, size_t *, size_t, size_t);
135: static void verbatim(struct state *, char *, size_t, size_t);
1.1 schwarze 136:
137: static const char *const cmds[CMD__MAX] = {
138: "pod", /* CMD_POD */
139: "head1", /* CMD_HEAD1 */
140: "head2", /* CMD_HEAD2 */
141: "head3", /* CMD_HEAD3 */
142: "head4", /* CMD_HEAD4 */
143: "over", /* CMD_OVER */
144: "item", /* CMD_ITEM */
145: "back", /* CMD_BACK */
146: "begin", /* CMD_BEGIN */
147: "end", /* CMD_END */
148: "for", /* CMD_FOR */
149: "encoding", /* CMD_ENCODING */
150: "cut" /* CMD_CUT */
151: };
152:
153: static const char fmts[FMT__MAX] = {
154: 'I', /* FMT_ITALIC */
155: 'B', /* FMT_BOLD */
156: 'C', /* FMT_CODE */
157: 'L', /* FMT_LINK */
158: 'E', /* FMT_ESCAPE */
159: 'F', /* FMT_FILE */
160: 'S', /* FMT_NBSP */
161: 'X', /* FMT_INDEX */
162: 'Z' /* FMT_NULL */
163: };
164:
1.42 schwarze 165: static unsigned char last;
1.6 kristaps 166:
1.31 schwarze 167:
168: static void
169: outbuf_grow(struct state *st, size_t by)
170: {
171:
172: st->outbufsz += (by / 128 + 1) * 128;
173: st->outbuf = realloc(st->outbuf, st->outbufsz);
174: if (NULL == st->outbuf) {
175: perror(NULL);
176: exit(EXIT_FAILURE);
177: }
178: }
179:
180: static void
181: outbuf_addchar(struct state *st)
182: {
183:
184: if (st->outbuflen + 2 >= st->outbufsz)
185: outbuf_grow(st, 1);
186: st->outbuf[st->outbuflen++] = last;
187: if ('\\' == last)
188: st->outbuf[st->outbuflen++] = 'e';
189: st->outbuf[st->outbuflen] = '\0';
190: }
191:
192: static void
193: outbuf_addstr(struct state *st, const char *str)
194: {
195: size_t slen;
196:
197: slen = strlen(str);
198: if (st->outbuflen + slen >= st->outbufsz)
199: outbuf_grow(st, slen);
200: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 201: st->outbuflen += slen;
1.31 schwarze 202: last = str[slen - 1];
203: }
204:
205: static void
206: outbuf_flush(struct state *st)
207: {
208:
209: if (0 == st->outbuflen)
210: return;
211:
1.58 ! schwarze 212: st->outlnlen += st->outbuflen;
! 213: if (OUST_TXT == st->oust && st->wantws) {
! 214: if (++st->outlnlen > 72) {
! 215: putchar('\n');
! 216: st->oust = OUST_NL;
! 217: st->outlnlen = st->outbuflen;
! 218: }
! 219: }
1.56 schwarze 220: if (OUST_NL != st->oust && st->wantws)
1.40 schwarze 221: putchar(' ');
222:
1.54 schwarze 223: if (OUST_MAC == st->oust && '"' == *st->outbuf)
224: printf("\\(dq%s", st->outbuf + 1);
225: else
226: fputs(st->outbuf, stdout);
227:
1.31 schwarze 228: *st->outbuf = '\0';
229: st->outbuflen = 0;
1.32 schwarze 230:
231: if (OUST_NL == st->oust)
232: st->oust = OUST_TXT;
1.31 schwarze 233: }
234:
235: static void
1.32 schwarze 236: mdoc_newln(struct state *st)
1.31 schwarze 237: {
238:
1.32 schwarze 239: if (OUST_NL == st->oust)
1.31 schwarze 240: return;
1.32 schwarze 241:
1.31 schwarze 242: putchar('\n');
243: last = '\n';
1.32 schwarze 244: st->oust = OUST_NL;
1.58 ! schwarze 245: st->outlnlen = 0;
1.32 schwarze 246: st->wantws = 1;
1.31 schwarze 247: }
248:
1.1 schwarze 249: /*
250: * Given buf[*start] is at the start of an escape name, read til the end
251: * of the escape ('>') then try to do something with it.
252: * Sets start to be one after the '>'.
1.32 schwarze 253: *
254: * This function does not care about output modes,
255: * it merely appends text to the output buffer,
256: * which can then be used in any mode.
1.1 schwarze 257: */
258: static void
1.31 schwarze 259: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 260: {
261: char esc[16]; /* no more needed */
262: size_t i, max;
263:
264: max = sizeof(esc) - 1;
265: i = 0;
266: /* Read til our buffer is full. */
267: while (*start < end && '>' != buf[*start] && i < max)
268: esc[i++] = buf[(*start)++];
269: esc[i] = '\0';
270:
271: if (i == max) {
272: /* Too long... skip til we end. */
273: while (*start < end && '>' != buf[*start])
274: (*start)++;
275: return;
276: } else if (*start >= end)
277: return;
278:
279: assert('>' == buf[*start]);
280: (*start)++;
281:
282: /*
283: * TODO: right now, we only recognise the named escapes.
284: * Just let the rest of them go.
285: */
1.6 kristaps 286: if (0 == strcmp(esc, "lt"))
1.31 schwarze 287: outbuf_addstr(st, "\\(la");
1.1 schwarze 288: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 289: outbuf_addstr(st, "\\(ra");
1.33 schwarze 290: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 291: outbuf_addstr(st, "\\(ba");
1.1 schwarze 292: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 293: outbuf_addstr(st, "\\(sl");
1.1 schwarze 294: }
295:
296: /*
1.9 kristaps 297: * Run some heuristics to intuit a link format.
1.19 kristaps 298: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 299: * that the caller can safely just continue processing.
1.19 kristaps 300: * If this is just an empty tag, I'll return 0.
1.32 schwarze 301: *
302: * Always operates in OUST_MAC mode.
303: * Mode handling is done by the caller.
1.9 kristaps 304: */
305: static int
306: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
307: {
1.21 kristaps 308: size_t linkstart, realend, linkend,
309: i, j, textsz, stack;
1.9 kristaps 310:
311: /*
312: * Scan to the start of the terminus.
313: * This function is more or less replicated in the formatcode()
314: * for null or index formatting codes.
1.23 kristaps 315: * However, we're slightly different because we might have
316: * nested escapes we need to ignore.
1.9 kristaps 317: */
1.21 kristaps 318: stack = 0;
1.19 kristaps 319: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 320: if ('<' == buf[realend])
321: stack++;
1.19 kristaps 322: if ('>' != buf[realend])
1.9 kristaps 323: continue;
1.23 kristaps 324: else if (stack-- > 0)
325: continue;
326: if (dsz == 1)
1.9 kristaps 327: break;
1.19 kristaps 328: assert(realend > 0);
329: if (' ' != buf[realend - 1])
1.9 kristaps 330: continue;
1.19 kristaps 331: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 332: if ('>' != buf[i++])
333: break;
334: if (dsz == j)
335: break;
336: }
1.19 kristaps 337:
338: /* Ignore stubs. */
339: if (realend == end || realend == *start)
1.9 kristaps 340: return(0);
341:
1.19 kristaps 342: /* Set linkend to the end of content. */
343: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 344:
1.19 kristaps 345: /* Re-scan to see if we have a title or section. */
346: for (textsz = *start; textsz < linkend; textsz++)
347: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 348: break;
349:
1.19 kristaps 350: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 351: /* With title: set start, then end at section. */
1.19 kristaps 352: linkstart = textsz + 1;
1.18 kristaps 353: textsz = textsz - *start;
1.19 kristaps 354: for (i = linkstart; i < linkend; i++)
355: if ('/' == buf[i])
356: break;
357: if (i < linkend)
358: linkend = i;
1.20 kristaps 359: } else if (textsz < linkend && '/' == buf[textsz]) {
360: /* With section: set end at section. */
361: linkend = textsz;
362: textsz = 0;
363: } else
364: /* No title, no section. */
1.18 kristaps 365: textsz = 0;
1.19 kristaps 366:
367: *start = realend;
368: j = linkend - linkstart;
369:
1.20 kristaps 370: /* Do we have only subsection material? */
371: if (0 == j && '/' == buf[linkend]) {
372: linkstart = linkend + 1;
373: linkend = dsz > 1 ? realend - 1 : realend;
374: if (0 == (j = linkend - linkstart))
375: return(0);
376: printf("Sx %.*s", (int)j, &buf[linkstart]);
377: return(1);
378: } else if (0 == j)
1.19 kristaps 379: return(0);
380:
381: /* See if we qualify as being a link or not. */
1.20 kristaps 382: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
383: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
384: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
385: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
386: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
387: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
388: /* Gross. */
389: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
390: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 391: return(1);
392: }
393:
394: /* See if we qualify as a mailto. */
1.20 kristaps 395: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 396: printf("Mt %.*s", (int)j, &buf[linkstart]);
397: return(1);
398: }
399:
400: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
401: if ((j > 3 && ')' == buf[linkend - 1]) &&
402: ('(' == buf[linkend - 3])) {
403: printf("Xr %.*s %c", (int)(j - 3),
404: &buf[linkstart], buf[linkend - 2]);
405: return(1);
406: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
407: ('(' == buf[linkend - 4])) {
408: printf("Xr %.*s %.*s", (int)(j - 4),
409: &buf[linkstart], 2, &buf[linkend - 3]);
410: return(1);
411: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
412: ('(' == buf[linkend - 5])) {
413: printf("Xr %.*s %.*s", (int)(j - 5),
414: &buf[linkstart], 3, &buf[linkend - 4]);
415: return(1);
416: }
417:
418: /* Last try: do we have a double-colon? */
419: for (i = linkstart + 1; i < linkend; i++)
420: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 421: break;
1.9 kristaps 422:
1.19 kristaps 423: if (i < linkend)
1.10 kristaps 424: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 425: (int)j, &buf[linkstart]);
1.9 kristaps 426: else
1.19 kristaps 427: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 428:
429: return(1);
430: }
431:
1.13 kristaps 432: /*
433: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
434: * then it's likely that we're a flag.
435: * Our flag might be followed by an argument, so make sure that we're
436: * accounting for that, too.
437: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 438: *
439: * Always operates in OUST_MAC mode.
440: * Mode handlinf is done by the caller.
1.13 kristaps 441: */
442: static void
443: dosynopsisfl(const char *buf, size_t *start, size_t end)
444: {
445: size_t i;
446: again:
1.14 kristaps 447: assert(*start + 1 < end);
448: assert('-' == buf[*start]);
449:
450: if ( ! isalnum((int)buf[*start + 1]) &&
451: '?' != buf[*start + 1] &&
452: '-' != buf[*start + 1]) {
453: (*start)--;
1.56 schwarze 454: fputs("Ar", stdout);
1.14 kristaps 455: return;
456: }
457:
1.13 kristaps 458: (*start)++;
459: for (i = *start; i < end; i++)
460: if (isalnum((int)buf[i]))
461: continue;
1.14 kristaps 462: else if ('?' == buf[i])
463: continue;
1.13 kristaps 464: else if ('-' == buf[i])
465: continue;
466: else if ('_' == buf[i])
467: continue;
468: else
469: break;
470:
471: assert(i < end);
472:
473: if ( ! (' ' == buf[i] || '>' == buf[i])) {
1.56 schwarze 474: fputs("Ar", stdout);
1.13 kristaps 475: return;
476: }
477:
478: printf("Fl ");
479: if (end - *start > 1 &&
480: isupper((int)buf[*start]) &&
481: islower((int)buf[*start + 1]) &&
482: (end - *start == 2 ||
483: ' ' == buf[*start + 2]))
484: printf("\\&");
1.56 schwarze 485: printf("%.*s", (int)(i - *start), &buf[*start]);
1.13 kristaps 486: *start = i;
487:
488: if (' ' == buf[i]) {
489: while (i < end && ' ' == buf[i])
490: i++;
491: assert(i < end);
492: if ('-' == buf[i]) {
493: *start = i;
494: goto again;
495: }
1.56 schwarze 496: fputs("Ar", stdout);
1.13 kristaps 497: *start = i;
498: }
499: }
500:
1.9 kristaps 501: /*
1.1 schwarze 502: * We're at the character in front of a format code, which is structured
503: * like X<...> and can contain nested format codes.
504: * This consumes the whole format code, and any nested format codes, til
505: * the end of matched production.
1.6 kristaps 506: * If "nomacro", then we don't print any macros, just contained data
507: * (e.g., following "Sh" or "Nm").
1.15 kristaps 508: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
509: * as the first format code on a line (for decoration as an "Nm"),
510: * non-zero otherwise.
1.32 schwarze 511: *
512: * Output mode handling is most complicated here.
513: * We may enter in any mode.
514: * We usually exit in OUST_MAC mode, except when
515: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 516: */
1.33 schwarze 517: static int
1.15 kristaps 518: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 519: size_t end, int nomacro, int pos)
1.1 schwarze 520: {
1.40 schwarze 521: size_t i, j, dsz;
1.1 schwarze 522: enum fmt fmt;
1.39 schwarze 523: unsigned char uc;
1.56 schwarze 524: int gotmacro, wantws;
1.1 schwarze 525:
526: assert(*start + 1 < end);
527: assert('<' == buf[*start + 1]);
528:
1.6 kristaps 529: /*
530: * First, look up the format code.
1.30 schwarze 531: * If it's not valid, treat it as a NOOP.
1.6 kristaps 532: */
533: for (fmt = 0; fmt < FMT__MAX; fmt++)
534: if (buf[*start] == fmts[fmt])
535: break;
536:
1.5 kristaps 537: /*
538: * Determine whether we're overriding our delimiter.
539: * According to POD, if we have more than one '<' followed by a
540: * space, then we need a space followed by matching '>' to close
541: * the expression.
542: * Otherwise we use the usual '<' and '>' matched pair.
543: */
544: i = *start + 1;
545: while (i < end && '<' == buf[i])
546: i++;
547: assert(i > *start + 1);
548: dsz = i - (*start + 1);
549: if (dsz > 1 && (i >= end || ' ' != buf[i]))
550: dsz = 1;
551:
552: /* Remember, if dsz>1, to jump the trailing space. */
553: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 554:
555: /*
1.6 kristaps 556: * Escapes and ignored codes (NULL and INDEX) don't print macro
557: * sequences, so just output them like normal text before
558: * processing for real macros.
1.1 schwarze 559: */
560: if (FMT_ESCAPE == fmt) {
1.31 schwarze 561: formatescape(st, buf, start, end);
1.33 schwarze 562: return(0);
1.1 schwarze 563: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 564: /*
1.6 kristaps 565: * Just consume til the end delimiter, accounting for
566: * whether it's a custom one.
1.5 kristaps 567: */
568: for ( ; *start < end; (*start)++) {
569: if ('>' != buf[*start])
570: continue;
571: else if (dsz == 1)
572: break;
573: assert(*start > 0);
574: if (' ' != buf[*start - 1])
575: continue;
576: i = *start;
577: for (j = 0; i < end && j < dsz; j++)
578: if ('>' != buf[i++])
579: break;
580: if (dsz != j)
581: continue;
582: (*start) += dsz;
583: break;
584: }
1.24 kristaps 585: if (*start < end) {
586: assert('>' == buf[*start]);
587: (*start)++;
588: }
589: if (isspace(last))
590: while (*start < end && isspace((int)buf[*start]))
591: (*start)++;
1.33 schwarze 592: return(0);
1.1 schwarze 593: }
594:
1.6 kristaps 595: /*
596: * Check whether we're supposed to print macro stuff (this is
597: * suppressed in, e.g., "Nm" and "Sh" macros).
598: */
1.30 schwarze 599: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 600:
601: /*
1.56 schwarze 602: * Do we need spacing before the upcoming macro,
603: * after any pending text already in the outbuf?
604: * We may already have wantws if there was whitespace
605: * before the code ("text B<text"), or there may be
606: * whitespace inside our scope ("textB< text").
607: */
608:
609: wantws = ' ' == buf[*start] ||
610: (st->wantws && ! st->outbuflen);
611:
612: /*
1.31 schwarze 613: * If we are on a text line and there is no
614: * whitespace before our content, we have to make
615: * the previous word a prefix to the macro line.
1.1 schwarze 616: */
1.31 schwarze 617:
1.56 schwarze 618: if (OUST_MAC != st->oust && ! wantws) {
1.32 schwarze 619: if (OUST_NL != st->oust)
1.54 schwarze 620: mdoc_newln(st);
1.56 schwarze 621: fputs(".Pf", stdout);
1.54 schwarze 622: st->oust = OUST_MAC;
1.56 schwarze 623: st->wantws = wantws = 1;
1.31 schwarze 624: }
625:
626: outbuf_flush(st);
627:
1.56 schwarze 628: /* Whitespace is easier to suppress on macro lines. */
1.31 schwarze 629:
1.56 schwarze 630: if (OUST_MAC == st->oust && ! wantws)
1.54 schwarze 631: printf(" Ns");
1.31 schwarze 632:
633: /* Unless we are on a macro line, start one. */
634:
1.54 schwarze 635: if (OUST_MAC != st->oust) {
1.32 schwarze 636: if (OUST_NL != st->oust)
1.54 schwarze 637: mdoc_newln(st);
1.1 schwarze 638: putchar('.');
1.54 schwarze 639: st->oust = OUST_MAC;
1.31 schwarze 640: } else
1.1 schwarze 641: putchar(' ');
1.54 schwarze 642: st->wantws = 1;
1.31 schwarze 643:
1.32 schwarze 644: /*
645: * Print the macro corresponding to this format code,
646: * and update the output state afterwards.
647: */
1.6 kristaps 648:
1.1 schwarze 649: switch (fmt) {
650: case (FMT_ITALIC):
1.56 schwarze 651: fputs("Em", stdout);
1.1 schwarze 652: break;
653: case (FMT_BOLD):
1.14 kristaps 654: if (SECT_SYNOPSIS == st->sect) {
655: if (1 == dsz && '-' == buf[*start])
656: dosynopsisfl(buf, start, end);
1.15 kristaps 657: else if (0 == pos)
1.56 schwarze 658: fputs("Nm", stdout);
1.14 kristaps 659: else
1.56 schwarze 660: fputs("Ar", stdout);
1.14 kristaps 661: break;
1.39 schwarze 662: }
663: i = 0;
664: uc = buf[*start];
665: while (isalnum(uc) || '_' == uc || ' ' == uc)
666: uc = buf[*start + ++i];
667: if ('=' != uc && '>' != uc)
668: i = 0;
669: if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
1.56 schwarze 670: fputs("Dv", stdout);
1.38 schwarze 671: break;
672: }
1.39 schwarze 673: switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
674: case MDOC_Fa:
1.56 schwarze 675: fputs("Fa", stdout);
1.39 schwarze 676: break;
677: case MDOC_Vt:
1.56 schwarze 678: fputs("Vt", stdout);
1.39 schwarze 679: break;
680: default:
1.56 schwarze 681: fputs("Sy", stdout);
1.39 schwarze 682: break;
683: }
1.1 schwarze 684: break;
685: case (FMT_CODE):
1.56 schwarze 686: fputs("Qo Li", stdout);
1.1 schwarze 687: break;
688: case (FMT_LINK):
1.19 kristaps 689: /* Try to link; use "No" if it's empty. */
1.9 kristaps 690: if ( ! trylink(buf, start, end, dsz))
1.56 schwarze 691: fputs("No", stdout);
1.1 schwarze 692: break;
693: case (FMT_FILE):
1.56 schwarze 694: fputs("Pa", stdout);
1.1 schwarze 695: break;
696: case (FMT_NBSP):
1.56 schwarze 697: fputs("No", stdout);
1.1 schwarze 698: break;
699: default:
700: abort();
701: }
1.56 schwarze 702: } else {
1.31 schwarze 703: outbuf_flush(st);
1.56 schwarze 704: st->wantws = 0;
705: }
1.1 schwarze 706:
707: /*
1.6 kristaps 708: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 709: * find a nested format code.
1.1 schwarze 710: * Don't emit any newlines: since we're on a macro line, we
711: * don't want to break the line.
712: */
1.56 schwarze 713:
714: gotmacro = 0;
1.1 schwarze 715: while (*start < end) {
1.5 kristaps 716: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 717: (*start)++;
718: break;
1.5 kristaps 719: } else if ('>' == buf[*start] &&
720: ' ' == buf[*start - 1]) {
721: /*
722: * Handle custom delimiters.
723: * These require a certain number of
724: * space-preceded carrots before we're really at
725: * the end.
726: */
727: i = *start;
728: for (j = 0; i < end && j < dsz; j++)
729: if ('>' != buf[i++])
730: break;
731: if (dsz == j) {
732: *start += dsz;
733: break;
734: }
1.1 schwarze 735: }
1.34 schwarze 736: if (*start + 1 < end && '<' == buf[*start + 1] &&
737: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.56 schwarze 738: gotmacro = formatcode(st, buf,
739: start, end, nomacro, 1);
1.1 schwarze 740: continue;
741: }
1.3 schwarze 742:
1.32 schwarze 743: /* Suppress newlines and multiple spaces. */
744:
745: last = buf[(*start)++];
1.56 schwarze 746: if (isspace(last)) {
747: outbuf_flush(st);
748: st->wantws = 1;
749: gotmacro = 0;
750: while (*start < end &&
751: isspace((unsigned char)buf[*start]))
1.32 schwarze 752: (*start)++;
753: continue;
754: }
755:
1.33 schwarze 756: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.56 schwarze 757: if (gotmacro && ! st->wantws) {
758: printf(" Ns");
1.32 schwarze 759: st->wantws = 1;
760: }
1.56 schwarze 761: gotmacro = 0;
1.32 schwarze 762:
763: /*
764: * Escape macro-like words.
765: * This matches "Xx " and "XxEOLN".
766: */
767:
1.56 schwarze 768: if (*start < end && ! st->outbuflen &&
769: isupper(last) &&
1.32 schwarze 770: islower((unsigned char)buf[*start]) &&
771: (end - *start == 1 ||
772: ' ' == buf[*start + 1] ||
773: '>' == buf[*start + 1]))
1.56 schwarze 774: outbuf_addstr(st, "\\&");
775: last = buf[*start - 1];
1.32 schwarze 776: }
1.56 schwarze 777: outbuf_addchar(st);
778: }
1.3 schwarze 779:
1.56 schwarze 780: if (FMT__MAX == fmt)
781: return(0);
1.4 schwarze 782:
1.56 schwarze 783: outbuf_flush(st);
1.2 schwarze 784:
785: if ( ! nomacro && FMT_CODE == fmt)
1.56 schwarze 786: fputs(" Qc", stdout);
1.1 schwarze 787:
1.33 schwarze 788: st->wantws = ' ' == last;
1.56 schwarze 789: return(1);
1.1 schwarze 790: }
791:
792: /*
793: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 794: * Goes to OUST_MAC mode and stays there when returning,
795: * such that the caller can add arguments to the macro line
796: * before closing it out.
1.1 schwarze 797: */
798: static void
1.32 schwarze 799: formatcodeln(struct state *st, const char *linemac,
800: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 801: {
1.56 schwarze 802: int gotmacro;
1.1 schwarze 803:
1.32 schwarze 804: assert(OUST_NL == st->oust);
805: assert(st->wantws);
1.56 schwarze 806: printf(".%s", linemac);
1.32 schwarze 807: st->oust = OUST_MAC;
808:
1.33 schwarze 809: gotmacro = 0;
1.1 schwarze 810: while (*start < end) {
1.34 schwarze 811: if (*start + 1 < end && '<' == buf[*start + 1] &&
812: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 813: gotmacro = formatcode(st, buf,
814: start, end, nomacro, 1);
1.1 schwarze 815: continue;
816: }
1.32 schwarze 817:
1.56 schwarze 818: /* Suppress newlines and multiple spaces. */
819:
820: last = buf[(*start)++];
821: if (isspace(last)) {
822: outbuf_flush(st);
823: st->wantws = 1;
824: while (*start < end &&
825: isspace((unsigned char)buf[*start]))
826: (*start)++;
827: continue;
828: }
829:
1.33 schwarze 830: if (gotmacro) {
1.56 schwarze 831: if (*start < end) {
832: if (st->wantws)
833: printf(" No");
1.33 schwarze 834: else
1.56 schwarze 835: printf(" Ns");
1.33 schwarze 836: }
1.56 schwarze 837: st->wantws = 1;
1.33 schwarze 838: gotmacro = 0;
839: }
1.32 schwarze 840:
1.4 schwarze 841: /*
842: * Since we're already on a macro line, we want to make
843: * sure that we don't inadvertently invoke a macro.
844: * We need to do this carefully because section names
845: * are used in troff and we don't want to escape
846: * something that needn't be escaped.
847: */
1.56 schwarze 848: if (*start < end && ! st->outbuflen && isupper(last) &&
849: islower((unsigned char)buf[*start]) &&
850: (end - *start == 1 || ' ' == buf[*start + 1])) {
851: outbuf_addstr(st, "\\&");
852: last = buf[*start - 1];
853: }
854: outbuf_addchar(st);
1.1 schwarze 855: }
1.56 schwarze 856: outbuf_flush(st);
857: st->wantws = 1;
1.1 schwarze 858: }
859:
860: /*
1.4 schwarze 861: * Guess at what kind of list we are.
862: * These are taken straight from the POD manual.
863: * I don't know what people do in real life.
864: */
865: static enum list
866: listguess(const char *buf, size_t start, size_t end)
867: {
868: size_t len = end - start;
869:
870: assert(end >= start);
871:
872: if (len == 1 && '*' == buf[start])
873: return(LIST_BULLET);
874: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
875: return(LIST_ENUM);
876: else if (len == 1 && '1' == buf[start])
877: return(LIST_ENUM);
878: else
879: return(LIST_TAG);
880: }
881:
882: /*
1.1 schwarze 883: * A command paragraph, as noted in the perlpod manual, just indicates
884: * that we should do something, optionally with some text to print as
885: * well.
1.32 schwarze 886: * From the perspective of external callers,
887: * always stays in OUST_NL/wantws mode,
888: * but its children do use OUST_MAC.
1.1 schwarze 889: */
890: static void
891: command(struct state *st, const char *buf, size_t start, size_t end)
892: {
893: size_t len, csz;
894: enum cmd cmd;
895:
896: assert('=' == buf[start]);
897: start++;
898: len = end - start;
899:
900: for (cmd = 0; cmd < CMD__MAX; cmd++) {
901: csz = strlen(cmds[cmd]);
902: if (len < csz)
903: continue;
904: if (0 == memcmp(&buf[start], cmd[cmds], csz))
905: break;
906: }
907:
908: /* Ignore bogus commands. */
909:
910: if (CMD__MAX == cmd)
911: return;
912:
913: start += csz;
1.8 kristaps 914: while (start < end && ' ' == buf[start])
915: start++;
916:
1.1 schwarze 917: len = end - start;
918:
919: if (st->paused) {
920: st->paused = CMD_END != cmd;
921: return;
922: }
923:
924: switch (cmd) {
925: case (CMD_POD):
926: break;
927: case (CMD_HEAD1):
928: /*
929: * The behaviour of head= follows from a quick glance at
930: * how pod2man handles it.
931: */
1.11 kristaps 932: st->sect = SECT_NONE;
933: if (end - start == 4) {
1.1 schwarze 934: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 935: st->sect = SECT_NAME;
936: } else if (end - start == 8) {
937: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
938: st->sect = SECT_SYNOPSIS;
939: }
1.32 schwarze 940: formatcodeln(st, "Sh", buf, &start, end, 1);
941: mdoc_newln(st);
1.1 schwarze 942: st->haspar = 1;
943: break;
944: case (CMD_HEAD2):
1.32 schwarze 945: formatcodeln(st, "Ss", buf, &start, end, 1);
946: mdoc_newln(st);
1.1 schwarze 947: st->haspar = 1;
948: break;
949: case (CMD_HEAD3):
950: puts(".Pp");
1.32 schwarze 951: formatcodeln(st, "Em", buf, &start, end, 0);
952: mdoc_newln(st);
1.1 schwarze 953: puts(".Pp");
954: st->haspar = 1;
955: break;
956: case (CMD_HEAD4):
957: puts(".Pp");
1.32 schwarze 958: formatcodeln(st, "No", buf, &start, end, 0);
959: mdoc_newln(st);
1.1 schwarze 960: puts(".Pp");
961: st->haspar = 1;
962: break;
963: case (CMD_OVER):
1.4 schwarze 964: /*
965: * If we have an existing list that hasn't had an =item
966: * yet, then make sure that we open it now.
967: * We use the default list type, but that can't be
968: * helped (we haven't seen any items yet).
1.1 schwarze 969: */
1.4 schwarze 970: if (st->lpos > 0)
971: if (LIST__MAX == st->lstack[st->lpos - 1]) {
972: st->lstack[st->lpos - 1] = LIST_TAG;
973: puts(".Bl -tag -width Ds");
974: }
975: st->lpos++;
976: assert(st->lpos < LIST_STACKSZ);
977: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 978: break;
979: case (CMD_ITEM):
1.6 kristaps 980: if (0 == st->lpos) {
981: /*
982: * Bad markup.
983: * Try to compensate.
984: */
985: st->lstack[st->lpos] = LIST__MAX;
986: st->lpos++;
987: }
1.4 schwarze 988: assert(st->lpos > 0);
989: /*
990: * If we're the first =item, guess at what our content
991: * will be: "*" is a bullet list, "1." is a numbered
992: * list, and everything is tagged.
993: */
994: if (LIST__MAX == st->lstack[st->lpos - 1]) {
995: st->lstack[st->lpos - 1] =
996: listguess(buf, start, end);
997: switch (st->lstack[st->lpos - 1]) {
998: case (LIST_BULLET):
999: puts(".Bl -bullet");
1000: break;
1001: case (LIST_ENUM):
1002: puts(".Bl -enum");
1003: break;
1004: default:
1005: puts(".Bl -tag -width Ds");
1006: break;
1007: }
1008: }
1009: switch (st->lstack[st->lpos - 1]) {
1010: case (LIST_TAG):
1.32 schwarze 1011: formatcodeln(st, "It", buf, &start, end, 0);
1012: mdoc_newln(st);
1.4 schwarze 1013: break;
1014: case (LIST_ENUM):
1015: /* FALLTHROUGH */
1016: case (LIST_BULLET):
1017: /*
1018: * Abandon the remainder of the paragraph
1019: * because we're going to be a bulletted or
1020: * numbered list.
1021: */
1022: puts(".It");
1023: break;
1024: default:
1025: abort();
1026: }
1.1 schwarze 1027: st->haspar = 1;
1028: break;
1029: case (CMD_BACK):
1.4 schwarze 1030: /* Make sure we don't back over the stack. */
1031: if (st->lpos > 0) {
1032: st->lpos--;
1033: puts(".El");
1034: }
1.1 schwarze 1035: break;
1036: case (CMD_BEGIN):
1037: /*
1038: * We disregard all types for now.
1039: * TODO: process at least "text" in a -literal block.
1040: */
1041: st->paused = 1;
1042: break;
1043: case (CMD_FOR):
1044: /*
1045: * We ignore all types of encodings and formats
1046: * unilaterally.
1047: */
1048: break;
1049: case (CMD_ENCODING):
1050: break;
1051: case (CMD_CUT):
1052: st->parsing = 0;
1053: return;
1054: default:
1055: abort();
1056: }
1057:
1058: /* Any command (but =cut) makes us start parsing. */
1059: st->parsing = 1;
1060: }
1061:
1062: /*
1.39 schwarze 1063: * Put the type provided as an argument into the dictionary.
1064: */
1065: static void
1066: register_type(const char *ptype)
1067: {
1068: const char *pname, *pend;
1069:
1070: pname = ptype;
1071: while (isalnum((unsigned char)*pname) || '_' == *pname)
1072: pname++;
1073: if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
1074: (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
1075: while (' ' == *pname)
1076: pname++;
1077: pend = pname;
1078: while (isalnum((unsigned char)*pend) || '_' == *pend)
1079: pend++;
1080: if (pend > pname)
1081: dict_put(pname, pend - pname, MDOC_Vt);
1082: } else
1083: pend = pname;
1084: if (pend > ptype)
1085: dict_put(ptype, pend - ptype, MDOC_Vt);
1086: }
1087:
1088: /*
1.1 schwarze 1089: * Just pump out the line in a verbatim block.
1.32 schwarze 1090: * From the perspective of external callers,
1091: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1092: */
1093: static void
1.35 schwarze 1094: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1095: {
1.36 schwarze 1096: size_t i, ift, ifo, ifa, ifc, inl;
1.38 schwarze 1097: char *cp, *cp2;
1.53 schwarze 1098: int indisplay, nopen, wantsp;
1.1 schwarze 1099:
1.53 schwarze 1100: if (st->paused || ! st->parsing)
1.1 schwarze 1101: return;
1.53 schwarze 1102:
1103: indisplay = wantsp = 0;
1104:
1.22 kristaps 1105: again:
1.53 schwarze 1106: if (start == end) {
1107: if (indisplay)
1108: puts(".Ed");
1109: return;
1110: }
1111:
1112: if ('\n' == buf[start]) {
1113: wantsp = 1;
1114: start++;
1115: goto again;
1116: }
1117:
1.22 kristaps 1118: /*
1119: * If we're in the SYNOPSIS, see if we're an #include block.
1120: * If we are, then print the "In" macro and re-loop.
1121: * This handles any number of inclusions, but only when they
1122: * come before the remaining parts...
1123: */
1124: if (SECT_SYNOPSIS == st->sect) {
1125: i = start;
1.35 schwarze 1126: while (i < end && buf[i] == ' ')
1127: i++;
1.22 kristaps 1128: if (i == end)
1.53 schwarze 1129: goto again;
1.35 schwarze 1130:
1.22 kristaps 1131: /* We're an include block! */
1132: if (end - i > 10 &&
1133: 0 == memcmp(&buf[i], "#include <", 10)) {
1134: start = i + 10;
1135: while (start < end && ' ' == buf[start])
1136: start++;
1.53 schwarze 1137: if (indisplay)
1138: puts(".Ed");
1139: indisplay = wantsp = 0;
1.22 kristaps 1140: fputs(".In ", stdout);
1141: /* Stop til the '>' marker or we hit eoln. */
1142: while (start < end &&
1143: '>' != buf[start] && '\n' != buf[start])
1144: putchar(buf[start++]);
1145: putchar('\n');
1146: if (start < end && '>' == buf[start])
1147: start++;
1148: if (start < end && '\n' == buf[start])
1149: start++;
1.41 schwarze 1150: goto again;
1151: }
1152:
1153: /* Other preprocessor directives. */
1154: if ('#' == buf[i]) {
1.53 schwarze 1155: if (indisplay)
1156: puts(".Ed");
1157: indisplay = wantsp = 0;
1.41 schwarze 1158: fputs(".Fd ", stdout);
1159: start = i;
1160: while(start < end && '\n' != buf[start])
1161: putchar(buf[start++]);
1162: putchar('\n');
1163: if (start < end && '\n' == buf[start])
1164: start++;
1.49 schwarze 1165:
1166: /* Remember #define for Dv or Fn. */
1167:
1168: if (strncmp(buf + i + 1, "define", 6) ||
1169: ! isspace((unsigned char)buf[i + 7]))
1170: goto again;
1171:
1172: ifo = i + 7;
1173: while (ifo < start &&
1174: isspace((unsigned char)buf[ifo]))
1175: ifo++;
1176: ifa = ifo;
1177: while ('_' == buf[ifa] ||
1178: isalnum((unsigned char)buf[ifa]))
1179: ifa++;
1180: dict_put(buf + ifo, ifa - ifo,
1181: '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv);
1182:
1.41 schwarze 1183: goto again;
1.22 kristaps 1184: }
1.35 schwarze 1185:
1186: /* Parse function declaration. */
1187: ifo = ifa = ifc = 0;
1.36 schwarze 1188: inl = end;
1189: nopen = 0;
1190: for (ift = i; i < end; i++) {
1191: if (ifc) {
1192: if (buf[i] != '\n')
1193: continue;
1194: inl = i;
1195: break;
1196: }
1197: switch (buf[i]) {
1.45 schwarze 1198: case '\t':
1199: /* FALLTHROUGH */
1.36 schwarze 1200: case ' ':
1201: if ( ! ifa)
1202: ifo = i;
1203: break;
1204: case '(':
1205: if (ifo) {
1206: nopen++;
1207: if ( ! ifa)
1208: ifa = i;
1209: } else
1210: i = end;
1211: break;
1212: case ')':
1213: switch (nopen) {
1214: case 0:
1215: i = end;
1216: break;
1217: case 1:
1.35 schwarze 1218: ifc = i;
1.36 schwarze 1219: break;
1220: default:
1221: nopen--;
1222: break;
1223: }
1224: break;
1225: default:
1226: break;
1227: }
1.35 schwarze 1228: }
1229:
1230: /* Encode function declaration. */
1231: if (ifc) {
1.36 schwarze 1232: for (i = ifa; i < ifc; i++)
1233: if (buf[i] == '\n')
1234: buf[i] = ' ';
1.35 schwarze 1235: buf[ifo++] = '\0';
1.39 schwarze 1236: register_type(buf + ift);
1.53 schwarze 1237: if (indisplay)
1238: puts(".Ed");
1239: indisplay = wantsp = 0;
1.35 schwarze 1240: printf(".Ft %s", buf + ift);
1241: if (buf[ifo] == '*') {
1242: fputs(" *", stdout);
1243: ifo++;
1244: }
1245: putchar('\n');
1246: buf[ifa++] = '\0';
1247: printf(".Fo %s\n", buf + ifo);
1.39 schwarze 1248: dict_put(buf + ifo, 0, MDOC_Fo);
1.35 schwarze 1249: buf[ifc++] = '\0';
1250: for (;;) {
1251: cp = strchr(buf + ifa, ',');
1.38 schwarze 1252: if (cp != NULL) {
1253: cp2 = cp;
1.36 schwarze 1254: *cp++ = '\0';
1.38 schwarze 1255: } else
1256: cp2 = strchr(buf + ifa, '\0');
1257: while (isalnum((unsigned char)cp2[-1]) ||
1258: '_' == cp2[-1])
1259: cp2--;
1260: if ('\0' != *cp2)
1.39 schwarze 1261: dict_put(cp2, 0, MDOC_Fa);
1262: register_type(buf + ifa);
1.50 schwarze 1263: if (strchr(buf + ifa, ' ') == NULL)
1264: printf(".Fa %s\n", buf + ifa);
1265: else
1266: printf(".Fa \"%s\"\n", buf + ifa);
1.35 schwarze 1267: if (cp == NULL)
1268: break;
1.45 schwarze 1269: while (*cp == ' ' || *cp == '\t')
1.36 schwarze 1270: cp++;
1271: ifa = cp - buf;
1.35 schwarze 1272: }
1273: puts(".Fc");
1274: if (buf[ifc] == ';')
1275: ifc++;
1.36 schwarze 1276: if (ifc < inl) {
1277: buf[inl] = '\0';
1.35 schwarze 1278: puts(buf + ifc);
1279: }
1.53 schwarze 1280: start = inl < end ? inl + 1 : end;
1281: goto again;
1.35 schwarze 1282: }
1.22 kristaps 1283: }
1.53 schwarze 1284:
1285: if ( ! indisplay)
1286: puts(".Bd -literal");
1287: else if (wantsp)
1288: putchar('\n');
1289: indisplay = 1;
1290: wantsp = 0;
1291:
1292: for (last = '\n'; start < end; start++) {
1.8 kristaps 1293: /*
1294: * Handle accidental macros (newline starting with
1295: * control character) and escapes.
1296: */
1.53 schwarze 1297: if ('\n' == last) {
1298: if ('\n' == buf[start])
1299: goto again;
1.7 kristaps 1300: if ('.' == buf[start] || '\'' == buf[start])
1301: printf("\\&");
1.53 schwarze 1302: }
1.8 kristaps 1303: putchar(last = buf[start]);
1304: if ('\\' == buf[start])
1305: printf("e");
1.7 kristaps 1306: }
1.53 schwarze 1307: if ('\n' != last)
1308: putchar('\n');
1309: if (indisplay)
1310: puts(".Ed");
1.1 schwarze 1311: }
1312:
1313: /*
1.13 kristaps 1314: * See dosynopsisop().
1315: */
1316: static int
1317: hasmatch(const char *buf, size_t start, size_t end)
1318: {
1319: size_t stack;
1320:
1321: for (stack = 0; start < end; start++)
1322: if (buf[start] == '[')
1323: stack++;
1324: else if (buf[start] == ']' && 0 == stack)
1325: return(1);
1326: else if (buf[start] == ']')
1327: stack--;
1328: return(0);
1329: }
1330:
1331: /*
1332: * If we're in the SYNOPSIS section and we've encounter braces in an
1333: * ordinary paragraph, then try to see whether we're an [-option].
1334: * Do this, if we're an opening bracket, by first seeing if we have a
1335: * matching end via hasmatch().
1336: * If we're an ending bracket, see if we have a stack already.
1337: */
1338: static int
1.32 schwarze 1339: dosynopsisop(struct state *st, const char *buf,
1340: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1341: {
1342:
1343: assert('[' == buf[*start] || ']' == buf[*start]);
1344:
1345: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1346: mdoc_newln(st);
1.13 kristaps 1347: puts(".Oo");
1348: (*opstack)++;
1349: } else if ('[' == buf[*start])
1350: return(0);
1351:
1352: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1353: mdoc_newln(st);
1.13 kristaps 1354: puts(".Oc");
1355: (*opstack)--;
1356: } else if (']' == buf[*start])
1357: return(0);
1358:
1359: (*start)++;
1.31 schwarze 1360: last = '\n';
1.13 kristaps 1361: while (' ' == buf[*start])
1362: (*start)++;
1363: return(1);
1364: }
1365:
1366: /*
1.17 kristaps 1367: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1368: * From the perspective of external callers,
1369: * always stays in OUST_NL/wantws mode,
1370: * but its children do use OUST_MAC.
1.17 kristaps 1371: */
1372: static void
1373: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1374: {
1375: size_t word;
1376:
1.32 schwarze 1377: assert(OUST_NL == st->oust);
1378: assert(st->wantws);
1379:
1.47 schwarze 1380: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1381: (*start)++;
1382:
1383: if (end == *start) {
1384: puts(".Nm unknown");
1385: return;
1386: }
1387:
1388: while (*start < end) {
1389: for (word = *start; word < end; word++)
1390: if (',' == buf[word])
1391: break;
1.32 schwarze 1392: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1393: if (*start == end) {
1.32 schwarze 1394: mdoc_newln(st);
1395: break;
1.17 kristaps 1396: }
1397: assert(',' == buf[*start]);
1.32 schwarze 1398: printf(" ,");
1399: mdoc_newln(st);
1.17 kristaps 1400: (*start)++;
1.47 schwarze 1401: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1402: (*start)++;
1403: }
1404: }
1405:
1406: /*
1.1 schwarze 1407: * Ordinary paragraph.
1408: * Well, this is really the hardest--POD seems to assume that, for
1409: * example, a leading space implies a newline, and so on.
1410: * Lots of other snakes in the grass: escaping a newline followed by a
1411: * period (accidental mdoc(7) control), double-newlines after macro
1412: * passages, etc.
1.32 schwarze 1413: *
1414: * Uses formatcode() to go to OUST_MAC mode
1415: * and outbuf_flush() to go to OUST_TXT mode.
1.40 schwarze 1416: * In text mode, wantws requests white space before the text
1417: * currently contained in the outbuf, not before upcoming text.
1.32 schwarze 1418: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1419: */
1420: static void
1421: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1422: {
1.44 schwarze 1423: size_t i, j, opstack, wend;
1.43 schwarze 1424: enum mdoc_type mtype;
1.44 schwarze 1425: int eos, noeos, seq;
1.49 schwarze 1426: char savechar;
1.1 schwarze 1427:
1428: if ( ! st->parsing || st->paused)
1429: return;
1430:
1431: /*
1432: * Special-case: the NAME section.
1433: * If we find a "-" when searching from the end, assume that
1434: * we're in "name - description" format.
1435: * To wit, print out a "Nm" and "Nd" in that format.
1436: */
1.11 kristaps 1437: if (SECT_NAME == st->sect) {
1.15 kristaps 1438: for (i = end - 2; i > start; i--)
1.47 schwarze 1439: if ('-' == buf[i] &&
1440: isspace((unsigned char)buf[i + 1]))
1.1 schwarze 1441: break;
1442: if ('-' == buf[i]) {
1443: j = i;
1444: /* Roll over multiple "-". */
1445: for ( ; i > start; i--)
1446: if ('-' != buf[i])
1447: break;
1.17 kristaps 1448: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1449: start = j + 1;
1.47 schwarze 1450: while (start < end &&
1451: isspace((unsigned char)buf[start]))
1.17 kristaps 1452: start++;
1.57 schwarze 1453: while (start < end && '.' == buf[end - 1])
1454: end--;
1.32 schwarze 1455: formatcodeln(st, "Nd", buf, &start, end, 1);
1456: mdoc_newln(st);
1.1 schwarze 1457: return;
1458: }
1459: }
1460:
1461: if ( ! st->haspar)
1462: puts(".Pp");
1463:
1464: st->haspar = 0;
1465: last = '\n';
1.13 kristaps 1466: opstack = 0;
1.1 schwarze 1467:
1.15 kristaps 1468: for (seq = 0; start < end; seq++) {
1.1 schwarze 1469: /*
1470: * Loop til we get either to a newline or escape.
1471: * Escape initial control characters.
1472: */
1473: while (start < end) {
1.34 schwarze 1474: if (start < end - 1 && '<' == buf[start + 1] &&
1475: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1476: break;
1477: else if ('\n' == buf[start])
1478: break;
1479: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1480: outbuf_addstr(st, "\\&");
1.1 schwarze 1481: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1482: outbuf_addstr(st, "\\&");
1.12 kristaps 1483: /*
1484: * If we're in the SYNOPSIS, have square
1485: * brackets indicate that we're opening and
1486: * closing an optional context.
1487: */
1.32 schwarze 1488:
1.13 kristaps 1489: if (SECT_SYNOPSIS == st->sect &&
1490: ('[' == buf[start] ||
1491: ']' == buf[start]) &&
1.32 schwarze 1492: dosynopsisop(st, buf,
1493: &start, end, &opstack))
1.13 kristaps 1494: continue;
1.32 schwarze 1495:
1.42 schwarze 1496: /* Merely buffer non-whitespace. */
1.32 schwarze 1497:
1.31 schwarze 1498: last = buf[start++];
1.44 schwarze 1499: if ( ! isspace(last))
1.37 schwarze 1500: outbuf_addchar(st);
1.44 schwarze 1501: if (start < end &&
1.52 schwarze 1502: ! isspace((unsigned char)buf[start - 1]) &&
1.44 schwarze 1503: ! isspace((unsigned char)buf[start]))
1.37 schwarze 1504: continue;
1505:
1.44 schwarze 1506: /*
1507: * Found the end of a word.
1508: * Rewind trailing delimiters.
1509: */
1510:
1511: eos = noeos = 0;
1512: for (wend = st->outbuflen; wend; wend--)
1513: if ('.' == st->outbuf[wend - 1] ||
1514: '!' == st->outbuf[wend - 1] ||
1515: '?' == st->outbuf[wend - 1])
1516: eos = 1;
1517: else if ('|' == st->outbuf[wend - 1] ||
1518: ',' == st->outbuf[wend - 1] ||
1519: ';' == st->outbuf[wend - 1] ||
1520: ':' == st->outbuf[wend - 1])
1521: noeos = 1;
1522: else if ('\'' != st->outbuf[wend - 1] &&
1523: '"' != st->outbuf[wend - 1] &&
1524: ')' != st->outbuf[wend - 1] &&
1525: ']' != st->outbuf[wend - 1])
1526: break;
1527: eos &= ! noeos;
1528:
1529: /*
1530: * Detect function names.
1531: */
1.42 schwarze 1532:
1.43 schwarze 1533: mtype = MDOC_Fa;
1.49 schwarze 1534: savechar = '\0';
1.44 schwarze 1535: if (wend && ')' == st->outbuf[wend] &&
1536: '(' == st->outbuf[wend - 1]) {
1537: mtype = dict_get(st->outbuf, --wend);
1.49 schwarze 1538: if (MDOC_Dv == mtype)
1539: mtype = MDOC_Fo;
1.43 schwarze 1540: if (MDOC_Fo == mtype || MDOC_MAX == mtype) {
1.44 schwarze 1541: st->outbuflen = wend;
1542: st->outbuf[wend] = '\0';
1.43 schwarze 1543: mdoc_newln(st);
1544: if (MDOC_Fo == mtype)
1.56 schwarze 1545: fputs(".Fn", stdout);
1.43 schwarze 1546: else
1.56 schwarze 1547: fputs(".Xr", stdout);
1.43 schwarze 1548: st->oust = OUST_MAC;
1549: }
1.49 schwarze 1550: } else {
1551: mtype = dict_get(st->outbuf, wend);
1552: if (MDOC_Dv == mtype) {
1553: savechar = st->outbuf[wend];
1554: st->outbuf[wend] = '\0';
1555: mdoc_newln(st);
1.56 schwarze 1556: fputs(".Dv", stdout);
1.49 schwarze 1557: st->oust = OUST_MAC;
1558: } else
1559: mtype = MDOC_Fa;
1.37 schwarze 1560: }
1561:
1.42 schwarze 1562: /*
1563: * On whitespace, flush the output buffer
1564: * and allow breaking to a macro line.
1565: */
1566:
1.37 schwarze 1567: outbuf_flush(st);
1.42 schwarze 1568:
1569: /*
1570: * End macro lines, and
1571: * end text lines at the end of sentences.
1572: */
1573:
1.44 schwarze 1574: if (OUST_MAC == st->oust || (eos && wend > 1 &&
1575: islower((unsigned char)st->outbuf[wend - 1]))) {
1.43 schwarze 1576: if (MDOC_MAX == mtype)
1577: fputs(" 3", stdout);
1.49 schwarze 1578: if (MDOC_Fa != mtype) {
1579: if (MDOC_Dv == mtype)
1580: st->outbuf[wend] = savechar;
1581: else
1582: wend += 2;
1583: while ('\0' != st->outbuf[wend])
1.44 schwarze 1584: printf(" %c",
1.49 schwarze 1585: st->outbuf[wend++]);
1586: }
1.40 schwarze 1587: mdoc_newln(st);
1.43 schwarze 1588: }
1.42 schwarze 1589:
1590: /* Advance to the next word. */
1591:
1.44 schwarze 1592: while ('\n' != buf[start] &&
1593: isspace((unsigned char)buf[start]))
1.42 schwarze 1594: start++;
1595: st->wantws = 1;
1.1 schwarze 1596: }
1597:
1.34 schwarze 1598: if (start < end - 1 && '<' == buf[start + 1] &&
1599: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1600: formatcode(st, buf, &start, end, 0, seq);
1601: if (OUST_MAC == st->oust) {
1.30 schwarze 1602: /*
1603: * Let mdoc(7) handle trailing punctuation.
1604: * XXX Some punctuation characters
1605: * are not handled yet.
1606: */
1.51 schwarze 1607: if ((start == end - 1 ||
1608: (start < end - 1 &&
1609: (' ' == buf[start + 1] ||
1610: '\n' == buf[start + 1]))) &&
1611: NULL != strchr("|.,;:?!)]", buf[start])) {
1.16 kristaps 1612: putchar(' ');
1613: putchar(buf[start++]);
1614: }
1.32 schwarze 1615:
1616: if (st->wantws ||
1617: ' ' == buf[start] ||
1618: '\n' == buf[start])
1619: mdoc_newln(st);
1620:
1.30 schwarze 1621: /*
1622: * Consume all whitespace
1623: * so we don't accidentally start
1624: * an implicit literal line.
1625: */
1.32 schwarze 1626:
1.6 kristaps 1627: while (start < end && ' ' == buf[start])
1628: start++;
1.32 schwarze 1629:
1630: /*
1631: * Some text is following.
1632: * Implement requested spacing.
1633: */
1634:
1635: if ( ! st->wantws && start < end &&
1.34 schwarze 1636: ('<' != buf[start + 1] ||
1637: 'A' > buf[start] ||
1638: 'Z' < buf[start])) {
1.56 schwarze 1639: fputs(" Ns", stdout);
1.32 schwarze 1640: st->wantws = 1;
1641: }
1.6 kristaps 1642: }
1.1 schwarze 1643: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1644: outbuf_flush(st);
1.58 ! schwarze 1645: st->wantws = 1;
1.1 schwarze 1646: if (++start >= end)
1647: continue;
1648: /*
1649: * If we have whitespace next, eat it to prevent
1650: * mdoc(7) from thinking that it's meant for
1651: * verbatim text.
1652: * It is--but if we start with that, we can't
1653: * have a macro subsequent it, which may be
1654: * possible if we have an escape next.
1655: */
1.58 ! schwarze 1656: if (' ' == buf[start] || '\t' == buf[start]) {
! 1657: mdoc_newln(st);
1.1 schwarze 1658: puts(".br");
1.58 ! schwarze 1659: }
1.1 schwarze 1660: for ( ; start < end; start++)
1661: if (' ' != buf[start] && '\t' != buf[start])
1662: break;
1.12 kristaps 1663: }
1.1 schwarze 1664: }
1.32 schwarze 1665: outbuf_flush(st);
1666: mdoc_newln(st);
1.1 schwarze 1667: }
1668:
1669: /*
1670: * There are three kinds of paragraphs: verbatim (starts with whitespace
1671: * of some sort), ordinary (starts without "=" marker), or a command
1672: * (default: starts with "=").
1673: */
1674: static void
1.35 schwarze 1675: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1676: {
1677:
1.32 schwarze 1678: assert(OUST_NL == st->oust);
1679: assert(st->wantws);
1680:
1.1 schwarze 1681: if (end == start)
1682: return;
1683: if (' ' == buf[start] || '\t' == buf[start])
1684: verbatim(st, buf, start, end);
1685: else if ('=' != buf[start])
1686: ordinary(st, buf, start, end);
1687: else
1688: command(st, buf, start, end);
1689: }
1690:
1691: /*
1692: * Loop around paragraphs within a document, processing each one in the
1693: * POD way.
1694: */
1695: static void
1696: dofile(const struct args *args, const char *fname,
1.35 schwarze 1697: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1698: {
1.29 schwarze 1699: char datebuf[64];
1.1 schwarze 1700: struct state st;
1.46 schwarze 1701: const char *fbase, *fext, *section, *date, *format;
1.1 schwarze 1702: char *title, *cp;
1.53 schwarze 1703: size_t cur, end;
1704: int verb;
1.1 schwarze 1705:
1706: if (0 == sz)
1707: return;
1708:
1.29 schwarze 1709: /*
1710: * Parsing the filename is almost always required,
1711: * except when both the title and the section
1712: * are provided on the command line.
1713: */
1714:
1715: if (NULL == args->title || NULL == args->section) {
1716: fbase = strrchr(fname, '/');
1717: if (NULL == fbase)
1718: fbase = fname;
1719: else
1720: fbase++;
1721: fext = strrchr(fbase, '.');
1722: } else
1723: fext = NULL;
1724:
1725: /*
1726: * The title will be converted to uppercase,
1727: * so it needs to be copied.
1728: */
1729:
1730: title = (NULL != args->title) ? strdup(args->title) :
1731: (NULL != fext) ? strndup(fbase, fext - fbase) :
1732: strdup(fbase);
1.1 schwarze 1733:
1734: if (NULL == title) {
1735: perror(NULL);
1736: exit(EXIT_FAILURE);
1737: }
1738:
1739: /* Section is 1 unless suffix is "pm". */
1740:
1.29 schwarze 1741: section = (NULL != args->section) ? args->section :
1742: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1743: PERL_SECTION;
1.1 schwarze 1744:
1745: /* Date. Or the given "tm" if not supplied. */
1746:
1.46 schwarze 1747: date = args->date;
1748: format = (NULL == date) ? "%B %d, %Y" :
1.48 schwarze 1749: strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $";
1.46 schwarze 1750:
1751: if (NULL != format) {
1752: strftime(datebuf, sizeof(datebuf), format, tm);
1.1 schwarze 1753: date = datebuf;
1754: }
1755:
1756: for (cp = title; '\0' != *cp; cp++)
1757: *cp = toupper((int)*cp);
1758:
1759: /* The usual mdoc(7) preamble. */
1760:
1761: printf(".Dd %s\n", date);
1762: printf(".Dt %s %s\n", title, section);
1763: puts(".Os");
1764:
1765: free(title);
1766:
1.37 schwarze 1767: dict_init();
1.1 schwarze 1768: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1769: st.oust = OUST_NL;
1770: st.wantws = 1;
1771:
1.1 schwarze 1772: assert(sz > 0);
1773:
1774: /* Main loop over file contents. */
1775:
1.53 schwarze 1776: cur = 0;
1777: for (;;) {
1778: while (cur < sz && '\n' == buf[cur])
1779: cur++;
1780: if (cur >= sz)
1781: break;
1782:
1783: verb = isspace((unsigned char)buf[cur]);
1784:
1.1 schwarze 1785: /* Read until next paragraph. */
1.53 schwarze 1786:
1787: for (end = cur + 1; end + 1 < sz; end++)
1788: if ('\n' == buf[end] && '\n' == buf[end + 1] &&
1789: !(verb && end + 2 < sz &&
1790: isspace((unsigned char)buf[end + 2])))
1.1 schwarze 1791: break;
1792:
1793: /* Adjust end marker for EOF. */
1.53 schwarze 1794:
1795: if (end < sz && '\n' != buf[end])
1796: end++;
1.1 schwarze 1797:
1798: /* Process paragraph and adjust start. */
1.53 schwarze 1799:
1.1 schwarze 1800: dopar(&st, buf, cur, end);
1.53 schwarze 1801: cur = end + 2;
1.1 schwarze 1802: }
1.37 schwarze 1803: dict_destroy();
1.1 schwarze 1804: }
1805:
1806: /*
1807: * Read a single file fully into memory.
1808: * If the file is "-", do it from stdin.
1809: * If successfully read, send the input buffer to dofile() for further
1810: * processing.
1811: */
1812: static int
1813: readfile(const struct args *args, const char *fname)
1814: {
1815: int fd;
1816: char *buf;
1817: size_t bufsz, cur;
1818: ssize_t ssz;
1819: struct tm *tm;
1820: time_t ttm;
1821: struct stat st;
1822:
1823: fd = 0 != strcmp("-", fname) ?
1824: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1825:
1826: if (-1 == fd) {
1827: perror(fname);
1828: return(0);
1829: }
1830:
1831: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1832: ttm = time(NULL);
1833: tm = localtime(&ttm);
1834: } else
1835: tm = localtime(&st.st_mtime);
1836:
1837: /*
1838: * Arbitrarily-sized initial buffer.
1839: * Should be big enough for most files...
1840: */
1841: cur = 0;
1842: bufsz = 1 << 14;
1843: if (NULL == (buf = malloc(bufsz))) {
1844: perror(NULL);
1845: exit(EXIT_FAILURE);
1846: }
1847:
1848: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1849: /* Double buffer size on fill. */
1850: if ((size_t)ssz == bufsz - cur) {
1851: bufsz *= 2;
1852: if (NULL == (buf = realloc(buf, bufsz))) {
1853: perror(NULL);
1854: exit(EXIT_FAILURE);
1855: }
1856: }
1857: cur += (size_t)ssz;
1858: }
1859: if (ssz < 0) {
1860: perror(fname);
1861: free(buf);
1862: return(0);
1863: }
1864:
1865: dofile(args, STDIN_FILENO == fd ?
1866: "STDIN" : fname, tm, buf, cur);
1867: free(buf);
1868: if (STDIN_FILENO != fd)
1869: close(fd);
1870: return(1);
1871: }
1872:
1873: int
1874: main(int argc, char *argv[])
1875: {
1876: const char *fname, *name;
1877: struct args args;
1878: int c;
1879:
1880: name = strrchr(argv[0], '/');
1881: if (name == NULL)
1882: name = argv[0];
1883: else
1884: ++name;
1885:
1886: memset(&args, 0, sizeof(struct args));
1887: fname = "-";
1888:
1889: /* Accept no arguments for now. */
1890:
1891: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1892: switch (c) {
1893: case ('h'):
1894: /* FALLTHROUGH */
1895: case ('l'):
1896: /* FALLTHROUGH */
1897: case ('c'):
1898: /* FALLTHROUGH */
1899: case ('o'):
1900: /* FALLTHROUGH */
1901: case ('q'):
1902: /* FALLTHROUGH */
1903: case ('r'):
1904: /* FALLTHROUGH */
1905: case ('u'):
1906: /* FALLTHROUGH */
1907: case ('v'):
1908: /* Ignore these. */
1909: break;
1910: case ('d'):
1911: args.date = optarg;
1912: break;
1913: case ('n'):
1914: args.title = optarg;
1915: break;
1916: case ('s'):
1917: args.section = optarg;
1918: break;
1919: default:
1920: goto usage;
1921: }
1922:
1923: argc -= optind;
1924: argv += optind;
1925:
1926: /* Accept only a single input file. */
1927:
1.25 schwarze 1928: if (argc > 1)
1929: goto usage;
1.1 schwarze 1930: else if (1 == argc)
1931: fname = *argv;
1932:
1933: return(readfile(&args, fname) ?
1934: EXIT_SUCCESS : EXIT_FAILURE);
1935:
1936: usage:
1937: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1938: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1939:
1940: return(EXIT_FAILURE);
1941: }
CVSweb