Annotation of pod2mdoc/pod2mdoc.c, Revision 1.57
1.57 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.56 2015/02/20 13:33:52 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/stat.h>
19: #include <sys/time.h>
20:
21: #include <assert.h>
22: #include <ctype.h>
23: #include <fcntl.h>
24: #include <getopt.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
1.37 schwarze 30: #include "dict.h"
31:
1.10 kristaps 32: /*
1.19 kristaps 33: * In what section can we find Perl module manuals?
34: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
35: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 36: */
37: #define PERL_SECTION "3p"
38:
1.1 schwarze 39: struct args {
40: const char *title; /* override "Dt" title */
41: const char *date; /* override "Dd" date */
42: const char *section; /* override "Dt" section */
43: };
44:
1.4 schwarze 45: enum list {
46: LIST_BULLET = 0,
47: LIST_ENUM,
48: LIST_TAG,
49: LIST__MAX
50: };
51:
1.11 kristaps 52: enum sect {
53: SECT_NONE = 0,
54: SECT_NAME, /* NAME section */
55: SECT_SYNOPSIS, /* SYNOPSIS section */
56: };
57:
1.32 schwarze 58: enum outstate {
59: OUST_NL = 0, /* just started a new output line */
60: OUST_TXT, /* text line output in progress */
61: OUST_MAC /* macro line output in progress */
62: };
63:
1.1 schwarze 64: struct state {
1.31 schwarze 65: const char *fname; /* file being parsed */
1.1 schwarze 66: int parsing; /* after =cut of before command */
67: int paused; /* in =begin and before =end */
1.11 kristaps 68: enum sect sect; /* which section are we in? */
1.4 schwarze 69: #define LIST_STACKSZ 128
70: enum list lstack[LIST_STACKSZ]; /* open lists */
71: size_t lpos; /* where in list stack */
1.31 schwarze 72: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 73: enum outstate oust; /* state of the mdoc output stream */
74: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 75: char *outbuf; /* text buffered for output */
76: size_t outbufsz; /* allocated size of outbuf */
77: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 78: };
79:
80: enum fmt {
81: FMT_ITALIC,
82: FMT_BOLD,
83: FMT_CODE,
84: FMT_LINK,
85: FMT_ESCAPE,
86: FMT_FILE,
87: FMT_NBSP,
88: FMT_INDEX,
89: FMT_NULL,
90: FMT__MAX
91: };
92:
93: enum cmd {
94: CMD_POD = 0,
95: CMD_HEAD1,
96: CMD_HEAD2,
97: CMD_HEAD3,
98: CMD_HEAD4,
99: CMD_OVER,
100: CMD_ITEM,
101: CMD_BACK,
102: CMD_BEGIN,
103: CMD_END,
104: CMD_FOR,
105: CMD_ENCODING,
106: CMD_CUT,
107: CMD__MAX
108: };
1.55 schwarze 109:
110: static void command(struct state *, const char *, size_t, size_t);
111: static void dofile(const struct args *, const char *,
112: const struct tm *, char *, size_t);
113: static void donamenm(struct state *, const char *, size_t *, size_t);
114: static void dopar(struct state *, char *, size_t, size_t);
115: static void dosynopsisfl(const char *, size_t *, size_t);
116: static int dosynopsisop(struct state *, const char *, size_t *,
117: size_t, size_t *);
118: static int formatcode(struct state *, const char *, size_t *,
119: size_t, int, int);
120: static void formatcodeln(struct state *, const char *, const char *,
121: size_t *, size_t, int);
122: static void formatescape(struct state *, const char *, size_t *, size_t);
123: static int hasmatch(const char *, size_t, size_t);
124: static void ordinary(struct state *, const char *, size_t, size_t);
125: static void outbuf_addchar(struct state *);
126: static void outbuf_addstr(struct state *, const char *);
127: static void outbuf_flush(struct state *);
128: static void outbuf_grow(struct state *, size_t);
129: static enum list listguess(const char *, size_t, size_t);
130: static void mdoc_newln(struct state *);
131: static int readfile(const struct args *, const char *);
132: static void register_type(const char *);
133: static int trylink(const char *, size_t *, size_t, size_t);
134: static void verbatim(struct state *, char *, size_t, size_t);
1.1 schwarze 135:
136: static const char *const cmds[CMD__MAX] = {
137: "pod", /* CMD_POD */
138: "head1", /* CMD_HEAD1 */
139: "head2", /* CMD_HEAD2 */
140: "head3", /* CMD_HEAD3 */
141: "head4", /* CMD_HEAD4 */
142: "over", /* CMD_OVER */
143: "item", /* CMD_ITEM */
144: "back", /* CMD_BACK */
145: "begin", /* CMD_BEGIN */
146: "end", /* CMD_END */
147: "for", /* CMD_FOR */
148: "encoding", /* CMD_ENCODING */
149: "cut" /* CMD_CUT */
150: };
151:
152: static const char fmts[FMT__MAX] = {
153: 'I', /* FMT_ITALIC */
154: 'B', /* FMT_BOLD */
155: 'C', /* FMT_CODE */
156: 'L', /* FMT_LINK */
157: 'E', /* FMT_ESCAPE */
158: 'F', /* FMT_FILE */
159: 'S', /* FMT_NBSP */
160: 'X', /* FMT_INDEX */
161: 'Z' /* FMT_NULL */
162: };
163:
1.42 schwarze 164: static unsigned char last;
1.6 kristaps 165:
1.31 schwarze 166:
167: static void
168: outbuf_grow(struct state *st, size_t by)
169: {
170:
171: st->outbufsz += (by / 128 + 1) * 128;
172: st->outbuf = realloc(st->outbuf, st->outbufsz);
173: if (NULL == st->outbuf) {
174: perror(NULL);
175: exit(EXIT_FAILURE);
176: }
177: }
178:
179: static void
180: outbuf_addchar(struct state *st)
181: {
182:
183: if (st->outbuflen + 2 >= st->outbufsz)
184: outbuf_grow(st, 1);
185: st->outbuf[st->outbuflen++] = last;
186: if ('\\' == last)
187: st->outbuf[st->outbuflen++] = 'e';
188: st->outbuf[st->outbuflen] = '\0';
189: }
190:
191: static void
192: outbuf_addstr(struct state *st, const char *str)
193: {
194: size_t slen;
195:
196: slen = strlen(str);
197: if (st->outbuflen + slen >= st->outbufsz)
198: outbuf_grow(st, slen);
199: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 200: st->outbuflen += slen;
1.31 schwarze 201: last = str[slen - 1];
202: }
203:
204: static void
205: outbuf_flush(struct state *st)
206: {
207:
208: if (0 == st->outbuflen)
209: return;
210:
1.56 schwarze 211: if (OUST_NL != st->oust && st->wantws)
1.40 schwarze 212: putchar(' ');
213:
1.54 schwarze 214: if (OUST_MAC == st->oust && '"' == *st->outbuf)
215: printf("\\(dq%s", st->outbuf + 1);
216: else
217: fputs(st->outbuf, stdout);
218:
1.31 schwarze 219: *st->outbuf = '\0';
220: st->outbuflen = 0;
1.32 schwarze 221:
222: if (OUST_NL == st->oust)
223: st->oust = OUST_TXT;
1.31 schwarze 224: }
225:
226: static void
1.32 schwarze 227: mdoc_newln(struct state *st)
1.31 schwarze 228: {
229:
1.32 schwarze 230: if (OUST_NL == st->oust)
1.31 schwarze 231: return;
1.32 schwarze 232:
1.31 schwarze 233: putchar('\n');
234: last = '\n';
1.32 schwarze 235: st->oust = OUST_NL;
236: st->wantws = 1;
1.31 schwarze 237: }
238:
1.1 schwarze 239: /*
240: * Given buf[*start] is at the start of an escape name, read til the end
241: * of the escape ('>') then try to do something with it.
242: * Sets start to be one after the '>'.
1.32 schwarze 243: *
244: * This function does not care about output modes,
245: * it merely appends text to the output buffer,
246: * which can then be used in any mode.
1.1 schwarze 247: */
248: static void
1.31 schwarze 249: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 250: {
251: char esc[16]; /* no more needed */
252: size_t i, max;
253:
254: max = sizeof(esc) - 1;
255: i = 0;
256: /* Read til our buffer is full. */
257: while (*start < end && '>' != buf[*start] && i < max)
258: esc[i++] = buf[(*start)++];
259: esc[i] = '\0';
260:
261: if (i == max) {
262: /* Too long... skip til we end. */
263: while (*start < end && '>' != buf[*start])
264: (*start)++;
265: return;
266: } else if (*start >= end)
267: return;
268:
269: assert('>' == buf[*start]);
270: (*start)++;
271:
272: /*
273: * TODO: right now, we only recognise the named escapes.
274: * Just let the rest of them go.
275: */
1.6 kristaps 276: if (0 == strcmp(esc, "lt"))
1.31 schwarze 277: outbuf_addstr(st, "\\(la");
1.1 schwarze 278: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 279: outbuf_addstr(st, "\\(ra");
1.33 schwarze 280: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 281: outbuf_addstr(st, "\\(ba");
1.1 schwarze 282: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 283: outbuf_addstr(st, "\\(sl");
1.1 schwarze 284: }
285:
286: /*
1.9 kristaps 287: * Run some heuristics to intuit a link format.
1.19 kristaps 288: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 289: * that the caller can safely just continue processing.
1.19 kristaps 290: * If this is just an empty tag, I'll return 0.
1.32 schwarze 291: *
292: * Always operates in OUST_MAC mode.
293: * Mode handling is done by the caller.
1.9 kristaps 294: */
295: static int
296: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
297: {
1.21 kristaps 298: size_t linkstart, realend, linkend,
299: i, j, textsz, stack;
1.9 kristaps 300:
301: /*
302: * Scan to the start of the terminus.
303: * This function is more or less replicated in the formatcode()
304: * for null or index formatting codes.
1.23 kristaps 305: * However, we're slightly different because we might have
306: * nested escapes we need to ignore.
1.9 kristaps 307: */
1.21 kristaps 308: stack = 0;
1.19 kristaps 309: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 310: if ('<' == buf[realend])
311: stack++;
1.19 kristaps 312: if ('>' != buf[realend])
1.9 kristaps 313: continue;
1.23 kristaps 314: else if (stack-- > 0)
315: continue;
316: if (dsz == 1)
1.9 kristaps 317: break;
1.19 kristaps 318: assert(realend > 0);
319: if (' ' != buf[realend - 1])
1.9 kristaps 320: continue;
1.19 kristaps 321: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 322: if ('>' != buf[i++])
323: break;
324: if (dsz == j)
325: break;
326: }
1.19 kristaps 327:
328: /* Ignore stubs. */
329: if (realend == end || realend == *start)
1.9 kristaps 330: return(0);
331:
1.19 kristaps 332: /* Set linkend to the end of content. */
333: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 334:
1.19 kristaps 335: /* Re-scan to see if we have a title or section. */
336: for (textsz = *start; textsz < linkend; textsz++)
337: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 338: break;
339:
1.19 kristaps 340: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 341: /* With title: set start, then end at section. */
1.19 kristaps 342: linkstart = textsz + 1;
1.18 kristaps 343: textsz = textsz - *start;
1.19 kristaps 344: for (i = linkstart; i < linkend; i++)
345: if ('/' == buf[i])
346: break;
347: if (i < linkend)
348: linkend = i;
1.20 kristaps 349: } else if (textsz < linkend && '/' == buf[textsz]) {
350: /* With section: set end at section. */
351: linkend = textsz;
352: textsz = 0;
353: } else
354: /* No title, no section. */
1.18 kristaps 355: textsz = 0;
1.19 kristaps 356:
357: *start = realend;
358: j = linkend - linkstart;
359:
1.20 kristaps 360: /* Do we have only subsection material? */
361: if (0 == j && '/' == buf[linkend]) {
362: linkstart = linkend + 1;
363: linkend = dsz > 1 ? realend - 1 : realend;
364: if (0 == (j = linkend - linkstart))
365: return(0);
366: printf("Sx %.*s", (int)j, &buf[linkstart]);
367: return(1);
368: } else if (0 == j)
1.19 kristaps 369: return(0);
370:
371: /* See if we qualify as being a link or not. */
1.20 kristaps 372: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
373: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
374: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
375: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
376: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
377: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
378: /* Gross. */
379: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
380: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 381: return(1);
382: }
383:
384: /* See if we qualify as a mailto. */
1.20 kristaps 385: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 386: printf("Mt %.*s", (int)j, &buf[linkstart]);
387: return(1);
388: }
389:
390: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
391: if ((j > 3 && ')' == buf[linkend - 1]) &&
392: ('(' == buf[linkend - 3])) {
393: printf("Xr %.*s %c", (int)(j - 3),
394: &buf[linkstart], buf[linkend - 2]);
395: return(1);
396: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
397: ('(' == buf[linkend - 4])) {
398: printf("Xr %.*s %.*s", (int)(j - 4),
399: &buf[linkstart], 2, &buf[linkend - 3]);
400: return(1);
401: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
402: ('(' == buf[linkend - 5])) {
403: printf("Xr %.*s %.*s", (int)(j - 5),
404: &buf[linkstart], 3, &buf[linkend - 4]);
405: return(1);
406: }
407:
408: /* Last try: do we have a double-colon? */
409: for (i = linkstart + 1; i < linkend; i++)
410: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 411: break;
1.9 kristaps 412:
1.19 kristaps 413: if (i < linkend)
1.10 kristaps 414: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 415: (int)j, &buf[linkstart]);
1.9 kristaps 416: else
1.19 kristaps 417: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 418:
419: return(1);
420: }
421:
1.13 kristaps 422: /*
423: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
424: * then it's likely that we're a flag.
425: * Our flag might be followed by an argument, so make sure that we're
426: * accounting for that, too.
427: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 428: *
429: * Always operates in OUST_MAC mode.
430: * Mode handlinf is done by the caller.
1.13 kristaps 431: */
432: static void
433: dosynopsisfl(const char *buf, size_t *start, size_t end)
434: {
435: size_t i;
436: again:
1.14 kristaps 437: assert(*start + 1 < end);
438: assert('-' == buf[*start]);
439:
440: if ( ! isalnum((int)buf[*start + 1]) &&
441: '?' != buf[*start + 1] &&
442: '-' != buf[*start + 1]) {
443: (*start)--;
1.56 schwarze 444: fputs("Ar", stdout);
1.14 kristaps 445: return;
446: }
447:
1.13 kristaps 448: (*start)++;
449: for (i = *start; i < end; i++)
450: if (isalnum((int)buf[i]))
451: continue;
1.14 kristaps 452: else if ('?' == buf[i])
453: continue;
1.13 kristaps 454: else if ('-' == buf[i])
455: continue;
456: else if ('_' == buf[i])
457: continue;
458: else
459: break;
460:
461: assert(i < end);
462:
463: if ( ! (' ' == buf[i] || '>' == buf[i])) {
1.56 schwarze 464: fputs("Ar", stdout);
1.13 kristaps 465: return;
466: }
467:
468: printf("Fl ");
469: if (end - *start > 1 &&
470: isupper((int)buf[*start]) &&
471: islower((int)buf[*start + 1]) &&
472: (end - *start == 2 ||
473: ' ' == buf[*start + 2]))
474: printf("\\&");
1.56 schwarze 475: printf("%.*s", (int)(i - *start), &buf[*start]);
1.13 kristaps 476: *start = i;
477:
478: if (' ' == buf[i]) {
479: while (i < end && ' ' == buf[i])
480: i++;
481: assert(i < end);
482: if ('-' == buf[i]) {
483: *start = i;
484: goto again;
485: }
1.56 schwarze 486: fputs("Ar", stdout);
1.13 kristaps 487: *start = i;
488: }
489: }
490:
1.9 kristaps 491: /*
1.1 schwarze 492: * We're at the character in front of a format code, which is structured
493: * like X<...> and can contain nested format codes.
494: * This consumes the whole format code, and any nested format codes, til
495: * the end of matched production.
1.6 kristaps 496: * If "nomacro", then we don't print any macros, just contained data
497: * (e.g., following "Sh" or "Nm").
1.15 kristaps 498: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
499: * as the first format code on a line (for decoration as an "Nm"),
500: * non-zero otherwise.
1.32 schwarze 501: *
502: * Output mode handling is most complicated here.
503: * We may enter in any mode.
504: * We usually exit in OUST_MAC mode, except when
505: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 506: */
1.33 schwarze 507: static int
1.15 kristaps 508: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 509: size_t end, int nomacro, int pos)
1.1 schwarze 510: {
1.40 schwarze 511: size_t i, j, dsz;
1.1 schwarze 512: enum fmt fmt;
1.39 schwarze 513: unsigned char uc;
1.56 schwarze 514: int gotmacro, wantws;
1.1 schwarze 515:
516: assert(*start + 1 < end);
517: assert('<' == buf[*start + 1]);
518:
1.6 kristaps 519: /*
520: * First, look up the format code.
1.30 schwarze 521: * If it's not valid, treat it as a NOOP.
1.6 kristaps 522: */
523: for (fmt = 0; fmt < FMT__MAX; fmt++)
524: if (buf[*start] == fmts[fmt])
525: break;
526:
1.5 kristaps 527: /*
528: * Determine whether we're overriding our delimiter.
529: * According to POD, if we have more than one '<' followed by a
530: * space, then we need a space followed by matching '>' to close
531: * the expression.
532: * Otherwise we use the usual '<' and '>' matched pair.
533: */
534: i = *start + 1;
535: while (i < end && '<' == buf[i])
536: i++;
537: assert(i > *start + 1);
538: dsz = i - (*start + 1);
539: if (dsz > 1 && (i >= end || ' ' != buf[i]))
540: dsz = 1;
541:
542: /* Remember, if dsz>1, to jump the trailing space. */
543: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 544:
545: /*
1.6 kristaps 546: * Escapes and ignored codes (NULL and INDEX) don't print macro
547: * sequences, so just output them like normal text before
548: * processing for real macros.
1.1 schwarze 549: */
550: if (FMT_ESCAPE == fmt) {
1.31 schwarze 551: formatescape(st, buf, start, end);
1.33 schwarze 552: return(0);
1.1 schwarze 553: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 554: /*
1.6 kristaps 555: * Just consume til the end delimiter, accounting for
556: * whether it's a custom one.
1.5 kristaps 557: */
558: for ( ; *start < end; (*start)++) {
559: if ('>' != buf[*start])
560: continue;
561: else if (dsz == 1)
562: break;
563: assert(*start > 0);
564: if (' ' != buf[*start - 1])
565: continue;
566: i = *start;
567: for (j = 0; i < end && j < dsz; j++)
568: if ('>' != buf[i++])
569: break;
570: if (dsz != j)
571: continue;
572: (*start) += dsz;
573: break;
574: }
1.24 kristaps 575: if (*start < end) {
576: assert('>' == buf[*start]);
577: (*start)++;
578: }
579: if (isspace(last))
580: while (*start < end && isspace((int)buf[*start]))
581: (*start)++;
1.33 schwarze 582: return(0);
1.1 schwarze 583: }
584:
1.6 kristaps 585: /*
586: * Check whether we're supposed to print macro stuff (this is
587: * suppressed in, e.g., "Nm" and "Sh" macros).
588: */
1.30 schwarze 589: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 590:
591: /*
1.56 schwarze 592: * Do we need spacing before the upcoming macro,
593: * after any pending text already in the outbuf?
594: * We may already have wantws if there was whitespace
595: * before the code ("text B<text"), or there may be
596: * whitespace inside our scope ("textB< text").
597: */
598:
599: wantws = ' ' == buf[*start] ||
600: (st->wantws && ! st->outbuflen);
601:
602: /*
1.31 schwarze 603: * If we are on a text line and there is no
604: * whitespace before our content, we have to make
605: * the previous word a prefix to the macro line.
1.1 schwarze 606: */
1.31 schwarze 607:
1.56 schwarze 608: if (OUST_MAC != st->oust && ! wantws) {
1.32 schwarze 609: if (OUST_NL != st->oust)
1.54 schwarze 610: mdoc_newln(st);
1.56 schwarze 611: fputs(".Pf", stdout);
1.54 schwarze 612: st->oust = OUST_MAC;
1.56 schwarze 613: st->wantws = wantws = 1;
1.31 schwarze 614: }
615:
616: outbuf_flush(st);
617:
1.56 schwarze 618: /* Whitespace is easier to suppress on macro lines. */
1.31 schwarze 619:
1.56 schwarze 620: if (OUST_MAC == st->oust && ! wantws)
1.54 schwarze 621: printf(" Ns");
1.31 schwarze 622:
623: /* Unless we are on a macro line, start one. */
624:
1.54 schwarze 625: if (OUST_MAC != st->oust) {
1.32 schwarze 626: if (OUST_NL != st->oust)
1.54 schwarze 627: mdoc_newln(st);
1.1 schwarze 628: putchar('.');
1.54 schwarze 629: st->oust = OUST_MAC;
1.31 schwarze 630: } else
1.1 schwarze 631: putchar(' ');
1.54 schwarze 632: st->wantws = 1;
1.31 schwarze 633:
1.32 schwarze 634: /*
635: * Print the macro corresponding to this format code,
636: * and update the output state afterwards.
637: */
1.6 kristaps 638:
1.1 schwarze 639: switch (fmt) {
640: case (FMT_ITALIC):
1.56 schwarze 641: fputs("Em", stdout);
1.1 schwarze 642: break;
643: case (FMT_BOLD):
1.14 kristaps 644: if (SECT_SYNOPSIS == st->sect) {
645: if (1 == dsz && '-' == buf[*start])
646: dosynopsisfl(buf, start, end);
1.15 kristaps 647: else if (0 == pos)
1.56 schwarze 648: fputs("Nm", stdout);
1.14 kristaps 649: else
1.56 schwarze 650: fputs("Ar", stdout);
1.14 kristaps 651: break;
1.39 schwarze 652: }
653: i = 0;
654: uc = buf[*start];
655: while (isalnum(uc) || '_' == uc || ' ' == uc)
656: uc = buf[*start + ++i];
657: if ('=' != uc && '>' != uc)
658: i = 0;
659: if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
1.56 schwarze 660: fputs("Dv", stdout);
1.38 schwarze 661: break;
662: }
1.39 schwarze 663: switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
664: case MDOC_Fa:
1.56 schwarze 665: fputs("Fa", stdout);
1.39 schwarze 666: break;
667: case MDOC_Vt:
1.56 schwarze 668: fputs("Vt", stdout);
1.39 schwarze 669: break;
670: default:
1.56 schwarze 671: fputs("Sy", stdout);
1.39 schwarze 672: break;
673: }
1.1 schwarze 674: break;
675: case (FMT_CODE):
1.56 schwarze 676: fputs("Qo Li", stdout);
1.1 schwarze 677: break;
678: case (FMT_LINK):
1.19 kristaps 679: /* Try to link; use "No" if it's empty. */
1.9 kristaps 680: if ( ! trylink(buf, start, end, dsz))
1.56 schwarze 681: fputs("No", stdout);
1.1 schwarze 682: break;
683: case (FMT_FILE):
1.56 schwarze 684: fputs("Pa", stdout);
1.1 schwarze 685: break;
686: case (FMT_NBSP):
1.56 schwarze 687: fputs("No", stdout);
1.1 schwarze 688: break;
689: default:
690: abort();
691: }
1.56 schwarze 692: } else {
1.31 schwarze 693: outbuf_flush(st);
1.56 schwarze 694: st->wantws = 0;
695: }
1.1 schwarze 696:
697: /*
1.6 kristaps 698: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 699: * find a nested format code.
1.1 schwarze 700: * Don't emit any newlines: since we're on a macro line, we
701: * don't want to break the line.
702: */
1.56 schwarze 703:
704: gotmacro = 0;
1.1 schwarze 705: while (*start < end) {
1.5 kristaps 706: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 707: (*start)++;
708: break;
1.5 kristaps 709: } else if ('>' == buf[*start] &&
710: ' ' == buf[*start - 1]) {
711: /*
712: * Handle custom delimiters.
713: * These require a certain number of
714: * space-preceded carrots before we're really at
715: * the end.
716: */
717: i = *start;
718: for (j = 0; i < end && j < dsz; j++)
719: if ('>' != buf[i++])
720: break;
721: if (dsz == j) {
722: *start += dsz;
723: break;
724: }
1.1 schwarze 725: }
1.34 schwarze 726: if (*start + 1 < end && '<' == buf[*start + 1] &&
727: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.56 schwarze 728: gotmacro = formatcode(st, buf,
729: start, end, nomacro, 1);
1.1 schwarze 730: continue;
731: }
1.3 schwarze 732:
1.32 schwarze 733: /* Suppress newlines and multiple spaces. */
734:
735: last = buf[(*start)++];
1.56 schwarze 736: if (isspace(last)) {
737: outbuf_flush(st);
738: st->wantws = 1;
739: gotmacro = 0;
740: while (*start < end &&
741: isspace((unsigned char)buf[*start]))
1.32 schwarze 742: (*start)++;
743: continue;
744: }
745:
1.33 schwarze 746: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.56 schwarze 747: if (gotmacro && ! st->wantws) {
748: printf(" Ns");
1.32 schwarze 749: st->wantws = 1;
750: }
1.56 schwarze 751: gotmacro = 0;
1.32 schwarze 752:
753: /*
754: * Escape macro-like words.
755: * This matches "Xx " and "XxEOLN".
756: */
757:
1.56 schwarze 758: if (*start < end && ! st->outbuflen &&
759: isupper(last) &&
1.32 schwarze 760: islower((unsigned char)buf[*start]) &&
761: (end - *start == 1 ||
762: ' ' == buf[*start + 1] ||
763: '>' == buf[*start + 1]))
1.56 schwarze 764: outbuf_addstr(st, "\\&");
765: last = buf[*start - 1];
1.32 schwarze 766: }
1.56 schwarze 767: outbuf_addchar(st);
768: }
1.3 schwarze 769:
1.56 schwarze 770: if (FMT__MAX == fmt)
771: return(0);
1.4 schwarze 772:
1.56 schwarze 773: outbuf_flush(st);
1.2 schwarze 774:
775: if ( ! nomacro && FMT_CODE == fmt)
1.56 schwarze 776: fputs(" Qc", stdout);
1.1 schwarze 777:
1.33 schwarze 778: st->wantws = ' ' == last;
1.56 schwarze 779: return(1);
1.1 schwarze 780: }
781:
782: /*
783: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 784: * Goes to OUST_MAC mode and stays there when returning,
785: * such that the caller can add arguments to the macro line
786: * before closing it out.
1.1 schwarze 787: */
788: static void
1.32 schwarze 789: formatcodeln(struct state *st, const char *linemac,
790: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 791: {
1.56 schwarze 792: int gotmacro;
1.1 schwarze 793:
1.32 schwarze 794: assert(OUST_NL == st->oust);
795: assert(st->wantws);
1.56 schwarze 796: printf(".%s", linemac);
1.32 schwarze 797: st->oust = OUST_MAC;
798:
1.33 schwarze 799: gotmacro = 0;
1.1 schwarze 800: while (*start < end) {
1.34 schwarze 801: if (*start + 1 < end && '<' == buf[*start + 1] &&
802: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 803: gotmacro = formatcode(st, buf,
804: start, end, nomacro, 1);
1.1 schwarze 805: continue;
806: }
1.32 schwarze 807:
1.56 schwarze 808: /* Suppress newlines and multiple spaces. */
809:
810: last = buf[(*start)++];
811: if (isspace(last)) {
812: outbuf_flush(st);
813: st->wantws = 1;
814: while (*start < end &&
815: isspace((unsigned char)buf[*start]))
816: (*start)++;
817: continue;
818: }
819:
1.33 schwarze 820: if (gotmacro) {
1.56 schwarze 821: if (*start < end) {
822: if (st->wantws)
823: printf(" No");
1.33 schwarze 824: else
1.56 schwarze 825: printf(" Ns");
1.33 schwarze 826: }
1.56 schwarze 827: st->wantws = 1;
1.33 schwarze 828: gotmacro = 0;
829: }
1.32 schwarze 830:
1.4 schwarze 831: /*
832: * Since we're already on a macro line, we want to make
833: * sure that we don't inadvertently invoke a macro.
834: * We need to do this carefully because section names
835: * are used in troff and we don't want to escape
836: * something that needn't be escaped.
837: */
1.56 schwarze 838: if (*start < end && ! st->outbuflen && isupper(last) &&
839: islower((unsigned char)buf[*start]) &&
840: (end - *start == 1 || ' ' == buf[*start + 1])) {
841: outbuf_addstr(st, "\\&");
842: last = buf[*start - 1];
843: }
844: outbuf_addchar(st);
1.1 schwarze 845: }
1.56 schwarze 846: outbuf_flush(st);
847: st->wantws = 1;
1.1 schwarze 848: }
849:
850: /*
1.4 schwarze 851: * Guess at what kind of list we are.
852: * These are taken straight from the POD manual.
853: * I don't know what people do in real life.
854: */
855: static enum list
856: listguess(const char *buf, size_t start, size_t end)
857: {
858: size_t len = end - start;
859:
860: assert(end >= start);
861:
862: if (len == 1 && '*' == buf[start])
863: return(LIST_BULLET);
864: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
865: return(LIST_ENUM);
866: else if (len == 1 && '1' == buf[start])
867: return(LIST_ENUM);
868: else
869: return(LIST_TAG);
870: }
871:
872: /*
1.1 schwarze 873: * A command paragraph, as noted in the perlpod manual, just indicates
874: * that we should do something, optionally with some text to print as
875: * well.
1.32 schwarze 876: * From the perspective of external callers,
877: * always stays in OUST_NL/wantws mode,
878: * but its children do use OUST_MAC.
1.1 schwarze 879: */
880: static void
881: command(struct state *st, const char *buf, size_t start, size_t end)
882: {
883: size_t len, csz;
884: enum cmd cmd;
885:
886: assert('=' == buf[start]);
887: start++;
888: len = end - start;
889:
890: for (cmd = 0; cmd < CMD__MAX; cmd++) {
891: csz = strlen(cmds[cmd]);
892: if (len < csz)
893: continue;
894: if (0 == memcmp(&buf[start], cmd[cmds], csz))
895: break;
896: }
897:
898: /* Ignore bogus commands. */
899:
900: if (CMD__MAX == cmd)
901: return;
902:
903: start += csz;
1.8 kristaps 904: while (start < end && ' ' == buf[start])
905: start++;
906:
1.1 schwarze 907: len = end - start;
908:
909: if (st->paused) {
910: st->paused = CMD_END != cmd;
911: return;
912: }
913:
914: switch (cmd) {
915: case (CMD_POD):
916: break;
917: case (CMD_HEAD1):
918: /*
919: * The behaviour of head= follows from a quick glance at
920: * how pod2man handles it.
921: */
1.11 kristaps 922: st->sect = SECT_NONE;
923: if (end - start == 4) {
1.1 schwarze 924: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 925: st->sect = SECT_NAME;
926: } else if (end - start == 8) {
927: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
928: st->sect = SECT_SYNOPSIS;
929: }
1.32 schwarze 930: formatcodeln(st, "Sh", buf, &start, end, 1);
931: mdoc_newln(st);
1.1 schwarze 932: st->haspar = 1;
933: break;
934: case (CMD_HEAD2):
1.32 schwarze 935: formatcodeln(st, "Ss", buf, &start, end, 1);
936: mdoc_newln(st);
1.1 schwarze 937: st->haspar = 1;
938: break;
939: case (CMD_HEAD3):
940: puts(".Pp");
1.32 schwarze 941: formatcodeln(st, "Em", buf, &start, end, 0);
942: mdoc_newln(st);
1.1 schwarze 943: puts(".Pp");
944: st->haspar = 1;
945: break;
946: case (CMD_HEAD4):
947: puts(".Pp");
1.32 schwarze 948: formatcodeln(st, "No", buf, &start, end, 0);
949: mdoc_newln(st);
1.1 schwarze 950: puts(".Pp");
951: st->haspar = 1;
952: break;
953: case (CMD_OVER):
1.4 schwarze 954: /*
955: * If we have an existing list that hasn't had an =item
956: * yet, then make sure that we open it now.
957: * We use the default list type, but that can't be
958: * helped (we haven't seen any items yet).
1.1 schwarze 959: */
1.4 schwarze 960: if (st->lpos > 0)
961: if (LIST__MAX == st->lstack[st->lpos - 1]) {
962: st->lstack[st->lpos - 1] = LIST_TAG;
963: puts(".Bl -tag -width Ds");
964: }
965: st->lpos++;
966: assert(st->lpos < LIST_STACKSZ);
967: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 968: break;
969: case (CMD_ITEM):
1.6 kristaps 970: if (0 == st->lpos) {
971: /*
972: * Bad markup.
973: * Try to compensate.
974: */
975: st->lstack[st->lpos] = LIST__MAX;
976: st->lpos++;
977: }
1.4 schwarze 978: assert(st->lpos > 0);
979: /*
980: * If we're the first =item, guess at what our content
981: * will be: "*" is a bullet list, "1." is a numbered
982: * list, and everything is tagged.
983: */
984: if (LIST__MAX == st->lstack[st->lpos - 1]) {
985: st->lstack[st->lpos - 1] =
986: listguess(buf, start, end);
987: switch (st->lstack[st->lpos - 1]) {
988: case (LIST_BULLET):
989: puts(".Bl -bullet");
990: break;
991: case (LIST_ENUM):
992: puts(".Bl -enum");
993: break;
994: default:
995: puts(".Bl -tag -width Ds");
996: break;
997: }
998: }
999: switch (st->lstack[st->lpos - 1]) {
1000: case (LIST_TAG):
1.32 schwarze 1001: formatcodeln(st, "It", buf, &start, end, 0);
1002: mdoc_newln(st);
1.4 schwarze 1003: break;
1004: case (LIST_ENUM):
1005: /* FALLTHROUGH */
1006: case (LIST_BULLET):
1007: /*
1008: * Abandon the remainder of the paragraph
1009: * because we're going to be a bulletted or
1010: * numbered list.
1011: */
1012: puts(".It");
1013: break;
1014: default:
1015: abort();
1016: }
1.1 schwarze 1017: st->haspar = 1;
1018: break;
1019: case (CMD_BACK):
1.4 schwarze 1020: /* Make sure we don't back over the stack. */
1021: if (st->lpos > 0) {
1022: st->lpos--;
1023: puts(".El");
1024: }
1.1 schwarze 1025: break;
1026: case (CMD_BEGIN):
1027: /*
1028: * We disregard all types for now.
1029: * TODO: process at least "text" in a -literal block.
1030: */
1031: st->paused = 1;
1032: break;
1033: case (CMD_FOR):
1034: /*
1035: * We ignore all types of encodings and formats
1036: * unilaterally.
1037: */
1038: break;
1039: case (CMD_ENCODING):
1040: break;
1041: case (CMD_CUT):
1042: st->parsing = 0;
1043: return;
1044: default:
1045: abort();
1046: }
1047:
1048: /* Any command (but =cut) makes us start parsing. */
1049: st->parsing = 1;
1050: }
1051:
1052: /*
1.39 schwarze 1053: * Put the type provided as an argument into the dictionary.
1054: */
1055: static void
1056: register_type(const char *ptype)
1057: {
1058: const char *pname, *pend;
1059:
1060: pname = ptype;
1061: while (isalnum((unsigned char)*pname) || '_' == *pname)
1062: pname++;
1063: if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
1064: (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
1065: while (' ' == *pname)
1066: pname++;
1067: pend = pname;
1068: while (isalnum((unsigned char)*pend) || '_' == *pend)
1069: pend++;
1070: if (pend > pname)
1071: dict_put(pname, pend - pname, MDOC_Vt);
1072: } else
1073: pend = pname;
1074: if (pend > ptype)
1075: dict_put(ptype, pend - ptype, MDOC_Vt);
1076: }
1077:
1078: /*
1.1 schwarze 1079: * Just pump out the line in a verbatim block.
1.32 schwarze 1080: * From the perspective of external callers,
1081: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1082: */
1083: static void
1.35 schwarze 1084: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1085: {
1.36 schwarze 1086: size_t i, ift, ifo, ifa, ifc, inl;
1.38 schwarze 1087: char *cp, *cp2;
1.53 schwarze 1088: int indisplay, nopen, wantsp;
1.1 schwarze 1089:
1.53 schwarze 1090: if (st->paused || ! st->parsing)
1.1 schwarze 1091: return;
1.53 schwarze 1092:
1093: indisplay = wantsp = 0;
1094:
1.22 kristaps 1095: again:
1.53 schwarze 1096: if (start == end) {
1097: if (indisplay)
1098: puts(".Ed");
1099: return;
1100: }
1101:
1102: if ('\n' == buf[start]) {
1103: wantsp = 1;
1104: start++;
1105: goto again;
1106: }
1107:
1.22 kristaps 1108: /*
1109: * If we're in the SYNOPSIS, see if we're an #include block.
1110: * If we are, then print the "In" macro and re-loop.
1111: * This handles any number of inclusions, but only when they
1112: * come before the remaining parts...
1113: */
1114: if (SECT_SYNOPSIS == st->sect) {
1115: i = start;
1.35 schwarze 1116: while (i < end && buf[i] == ' ')
1117: i++;
1.22 kristaps 1118: if (i == end)
1.53 schwarze 1119: goto again;
1.35 schwarze 1120:
1.22 kristaps 1121: /* We're an include block! */
1122: if (end - i > 10 &&
1123: 0 == memcmp(&buf[i], "#include <", 10)) {
1124: start = i + 10;
1125: while (start < end && ' ' == buf[start])
1126: start++;
1.53 schwarze 1127: if (indisplay)
1128: puts(".Ed");
1129: indisplay = wantsp = 0;
1.22 kristaps 1130: fputs(".In ", stdout);
1131: /* Stop til the '>' marker or we hit eoln. */
1132: while (start < end &&
1133: '>' != buf[start] && '\n' != buf[start])
1134: putchar(buf[start++]);
1135: putchar('\n');
1136: if (start < end && '>' == buf[start])
1137: start++;
1138: if (start < end && '\n' == buf[start])
1139: start++;
1.41 schwarze 1140: goto again;
1141: }
1142:
1143: /* Other preprocessor directives. */
1144: if ('#' == buf[i]) {
1.53 schwarze 1145: if (indisplay)
1146: puts(".Ed");
1147: indisplay = wantsp = 0;
1.41 schwarze 1148: fputs(".Fd ", stdout);
1149: start = i;
1150: while(start < end && '\n' != buf[start])
1151: putchar(buf[start++]);
1152: putchar('\n');
1153: if (start < end && '\n' == buf[start])
1154: start++;
1.49 schwarze 1155:
1156: /* Remember #define for Dv or Fn. */
1157:
1158: if (strncmp(buf + i + 1, "define", 6) ||
1159: ! isspace((unsigned char)buf[i + 7]))
1160: goto again;
1161:
1162: ifo = i + 7;
1163: while (ifo < start &&
1164: isspace((unsigned char)buf[ifo]))
1165: ifo++;
1166: ifa = ifo;
1167: while ('_' == buf[ifa] ||
1168: isalnum((unsigned char)buf[ifa]))
1169: ifa++;
1170: dict_put(buf + ifo, ifa - ifo,
1171: '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv);
1172:
1.41 schwarze 1173: goto again;
1.22 kristaps 1174: }
1.35 schwarze 1175:
1176: /* Parse function declaration. */
1177: ifo = ifa = ifc = 0;
1.36 schwarze 1178: inl = end;
1179: nopen = 0;
1180: for (ift = i; i < end; i++) {
1181: if (ifc) {
1182: if (buf[i] != '\n')
1183: continue;
1184: inl = i;
1185: break;
1186: }
1187: switch (buf[i]) {
1.45 schwarze 1188: case '\t':
1189: /* FALLTHROUGH */
1.36 schwarze 1190: case ' ':
1191: if ( ! ifa)
1192: ifo = i;
1193: break;
1194: case '(':
1195: if (ifo) {
1196: nopen++;
1197: if ( ! ifa)
1198: ifa = i;
1199: } else
1200: i = end;
1201: break;
1202: case ')':
1203: switch (nopen) {
1204: case 0:
1205: i = end;
1206: break;
1207: case 1:
1.35 schwarze 1208: ifc = i;
1.36 schwarze 1209: break;
1210: default:
1211: nopen--;
1212: break;
1213: }
1214: break;
1215: default:
1216: break;
1217: }
1.35 schwarze 1218: }
1219:
1220: /* Encode function declaration. */
1221: if (ifc) {
1.36 schwarze 1222: for (i = ifa; i < ifc; i++)
1223: if (buf[i] == '\n')
1224: buf[i] = ' ';
1.35 schwarze 1225: buf[ifo++] = '\0';
1.39 schwarze 1226: register_type(buf + ift);
1.53 schwarze 1227: if (indisplay)
1228: puts(".Ed");
1229: indisplay = wantsp = 0;
1.35 schwarze 1230: printf(".Ft %s", buf + ift);
1231: if (buf[ifo] == '*') {
1232: fputs(" *", stdout);
1233: ifo++;
1234: }
1235: putchar('\n');
1236: buf[ifa++] = '\0';
1237: printf(".Fo %s\n", buf + ifo);
1.39 schwarze 1238: dict_put(buf + ifo, 0, MDOC_Fo);
1.35 schwarze 1239: buf[ifc++] = '\0';
1240: for (;;) {
1241: cp = strchr(buf + ifa, ',');
1.38 schwarze 1242: if (cp != NULL) {
1243: cp2 = cp;
1.36 schwarze 1244: *cp++ = '\0';
1.38 schwarze 1245: } else
1246: cp2 = strchr(buf + ifa, '\0');
1247: while (isalnum((unsigned char)cp2[-1]) ||
1248: '_' == cp2[-1])
1249: cp2--;
1250: if ('\0' != *cp2)
1.39 schwarze 1251: dict_put(cp2, 0, MDOC_Fa);
1252: register_type(buf + ifa);
1.50 schwarze 1253: if (strchr(buf + ifa, ' ') == NULL)
1254: printf(".Fa %s\n", buf + ifa);
1255: else
1256: printf(".Fa \"%s\"\n", buf + ifa);
1.35 schwarze 1257: if (cp == NULL)
1258: break;
1.45 schwarze 1259: while (*cp == ' ' || *cp == '\t')
1.36 schwarze 1260: cp++;
1261: ifa = cp - buf;
1.35 schwarze 1262: }
1263: puts(".Fc");
1264: if (buf[ifc] == ';')
1265: ifc++;
1.36 schwarze 1266: if (ifc < inl) {
1267: buf[inl] = '\0';
1.35 schwarze 1268: puts(buf + ifc);
1269: }
1.53 schwarze 1270: start = inl < end ? inl + 1 : end;
1271: goto again;
1.35 schwarze 1272: }
1.22 kristaps 1273: }
1.53 schwarze 1274:
1275: if ( ! indisplay)
1276: puts(".Bd -literal");
1277: else if (wantsp)
1278: putchar('\n');
1279: indisplay = 1;
1280: wantsp = 0;
1281:
1282: for (last = '\n'; start < end; start++) {
1.8 kristaps 1283: /*
1284: * Handle accidental macros (newline starting with
1285: * control character) and escapes.
1286: */
1.53 schwarze 1287: if ('\n' == last) {
1288: if ('\n' == buf[start])
1289: goto again;
1.7 kristaps 1290: if ('.' == buf[start] || '\'' == buf[start])
1291: printf("\\&");
1.53 schwarze 1292: }
1.8 kristaps 1293: putchar(last = buf[start]);
1294: if ('\\' == buf[start])
1295: printf("e");
1.7 kristaps 1296: }
1.53 schwarze 1297: if ('\n' != last)
1298: putchar('\n');
1299: if (indisplay)
1300: puts(".Ed");
1.1 schwarze 1301: }
1302:
1303: /*
1.13 kristaps 1304: * See dosynopsisop().
1305: */
1306: static int
1307: hasmatch(const char *buf, size_t start, size_t end)
1308: {
1309: size_t stack;
1310:
1311: for (stack = 0; start < end; start++)
1312: if (buf[start] == '[')
1313: stack++;
1314: else if (buf[start] == ']' && 0 == stack)
1315: return(1);
1316: else if (buf[start] == ']')
1317: stack--;
1318: return(0);
1319: }
1320:
1321: /*
1322: * If we're in the SYNOPSIS section and we've encounter braces in an
1323: * ordinary paragraph, then try to see whether we're an [-option].
1324: * Do this, if we're an opening bracket, by first seeing if we have a
1325: * matching end via hasmatch().
1326: * If we're an ending bracket, see if we have a stack already.
1327: */
1328: static int
1.32 schwarze 1329: dosynopsisop(struct state *st, const char *buf,
1330: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1331: {
1332:
1333: assert('[' == buf[*start] || ']' == buf[*start]);
1334:
1335: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1336: mdoc_newln(st);
1.13 kristaps 1337: puts(".Oo");
1338: (*opstack)++;
1339: } else if ('[' == buf[*start])
1340: return(0);
1341:
1342: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1343: mdoc_newln(st);
1.13 kristaps 1344: puts(".Oc");
1345: (*opstack)--;
1346: } else if (']' == buf[*start])
1347: return(0);
1348:
1349: (*start)++;
1.31 schwarze 1350: last = '\n';
1.13 kristaps 1351: while (' ' == buf[*start])
1352: (*start)++;
1353: return(1);
1354: }
1355:
1356: /*
1.17 kristaps 1357: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1358: * From the perspective of external callers,
1359: * always stays in OUST_NL/wantws mode,
1360: * but its children do use OUST_MAC.
1.17 kristaps 1361: */
1362: static void
1363: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1364: {
1365: size_t word;
1366:
1.32 schwarze 1367: assert(OUST_NL == st->oust);
1368: assert(st->wantws);
1369:
1.47 schwarze 1370: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1371: (*start)++;
1372:
1373: if (end == *start) {
1374: puts(".Nm unknown");
1375: return;
1376: }
1377:
1378: while (*start < end) {
1379: for (word = *start; word < end; word++)
1380: if (',' == buf[word])
1381: break;
1.32 schwarze 1382: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1383: if (*start == end) {
1.32 schwarze 1384: mdoc_newln(st);
1385: break;
1.17 kristaps 1386: }
1387: assert(',' == buf[*start]);
1.32 schwarze 1388: printf(" ,");
1389: mdoc_newln(st);
1.17 kristaps 1390: (*start)++;
1.47 schwarze 1391: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1392: (*start)++;
1393: }
1394: }
1395:
1396: /*
1.1 schwarze 1397: * Ordinary paragraph.
1398: * Well, this is really the hardest--POD seems to assume that, for
1399: * example, a leading space implies a newline, and so on.
1400: * Lots of other snakes in the grass: escaping a newline followed by a
1401: * period (accidental mdoc(7) control), double-newlines after macro
1402: * passages, etc.
1.32 schwarze 1403: *
1404: * Uses formatcode() to go to OUST_MAC mode
1405: * and outbuf_flush() to go to OUST_TXT mode.
1.40 schwarze 1406: * In text mode, wantws requests white space before the text
1407: * currently contained in the outbuf, not before upcoming text.
1.32 schwarze 1408: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1409: */
1410: static void
1411: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1412: {
1.44 schwarze 1413: size_t i, j, opstack, wend;
1.43 schwarze 1414: enum mdoc_type mtype;
1.44 schwarze 1415: int eos, noeos, seq;
1.49 schwarze 1416: char savechar;
1.1 schwarze 1417:
1418: if ( ! st->parsing || st->paused)
1419: return;
1420:
1421: /*
1422: * Special-case: the NAME section.
1423: * If we find a "-" when searching from the end, assume that
1424: * we're in "name - description" format.
1425: * To wit, print out a "Nm" and "Nd" in that format.
1426: */
1.11 kristaps 1427: if (SECT_NAME == st->sect) {
1.15 kristaps 1428: for (i = end - 2; i > start; i--)
1.47 schwarze 1429: if ('-' == buf[i] &&
1430: isspace((unsigned char)buf[i + 1]))
1.1 schwarze 1431: break;
1432: if ('-' == buf[i]) {
1433: j = i;
1434: /* Roll over multiple "-". */
1435: for ( ; i > start; i--)
1436: if ('-' != buf[i])
1437: break;
1.17 kristaps 1438: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1439: start = j + 1;
1.47 schwarze 1440: while (start < end &&
1441: isspace((unsigned char)buf[start]))
1.17 kristaps 1442: start++;
1.57 ! schwarze 1443: while (start < end && '.' == buf[end - 1])
! 1444: end--;
1.32 schwarze 1445: formatcodeln(st, "Nd", buf, &start, end, 1);
1446: mdoc_newln(st);
1.1 schwarze 1447: return;
1448: }
1449: }
1450:
1451: if ( ! st->haspar)
1452: puts(".Pp");
1453:
1454: st->haspar = 0;
1455: last = '\n';
1.13 kristaps 1456: opstack = 0;
1.1 schwarze 1457:
1.15 kristaps 1458: for (seq = 0; start < end; seq++) {
1.1 schwarze 1459: /*
1460: * Loop til we get either to a newline or escape.
1461: * Escape initial control characters.
1462: */
1463: while (start < end) {
1.34 schwarze 1464: if (start < end - 1 && '<' == buf[start + 1] &&
1465: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1466: break;
1467: else if ('\n' == buf[start])
1468: break;
1469: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1470: outbuf_addstr(st, "\\&");
1.1 schwarze 1471: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1472: outbuf_addstr(st, "\\&");
1.12 kristaps 1473: /*
1474: * If we're in the SYNOPSIS, have square
1475: * brackets indicate that we're opening and
1476: * closing an optional context.
1477: */
1.32 schwarze 1478:
1.13 kristaps 1479: if (SECT_SYNOPSIS == st->sect &&
1480: ('[' == buf[start] ||
1481: ']' == buf[start]) &&
1.32 schwarze 1482: dosynopsisop(st, buf,
1483: &start, end, &opstack))
1.13 kristaps 1484: continue;
1.32 schwarze 1485:
1.42 schwarze 1486: /* Merely buffer non-whitespace. */
1.32 schwarze 1487:
1.31 schwarze 1488: last = buf[start++];
1.44 schwarze 1489: if ( ! isspace(last))
1.37 schwarze 1490: outbuf_addchar(st);
1.44 schwarze 1491: if (start < end &&
1.52 schwarze 1492: ! isspace((unsigned char)buf[start - 1]) &&
1.44 schwarze 1493: ! isspace((unsigned char)buf[start]))
1.37 schwarze 1494: continue;
1495:
1.44 schwarze 1496: /*
1497: * Found the end of a word.
1498: * Rewind trailing delimiters.
1499: */
1500:
1501: eos = noeos = 0;
1502: for (wend = st->outbuflen; wend; wend--)
1503: if ('.' == st->outbuf[wend - 1] ||
1504: '!' == st->outbuf[wend - 1] ||
1505: '?' == st->outbuf[wend - 1])
1506: eos = 1;
1507: else if ('|' == st->outbuf[wend - 1] ||
1508: ',' == st->outbuf[wend - 1] ||
1509: ';' == st->outbuf[wend - 1] ||
1510: ':' == st->outbuf[wend - 1])
1511: noeos = 1;
1512: else if ('\'' != st->outbuf[wend - 1] &&
1513: '"' != st->outbuf[wend - 1] &&
1514: ')' != st->outbuf[wend - 1] &&
1515: ']' != st->outbuf[wend - 1])
1516: break;
1517: eos &= ! noeos;
1518:
1519: /*
1520: * Detect function names.
1521: */
1.42 schwarze 1522:
1.43 schwarze 1523: mtype = MDOC_Fa;
1.49 schwarze 1524: savechar = '\0';
1.44 schwarze 1525: if (wend && ')' == st->outbuf[wend] &&
1526: '(' == st->outbuf[wend - 1]) {
1527: mtype = dict_get(st->outbuf, --wend);
1.49 schwarze 1528: if (MDOC_Dv == mtype)
1529: mtype = MDOC_Fo;
1.43 schwarze 1530: if (MDOC_Fo == mtype || MDOC_MAX == mtype) {
1.44 schwarze 1531: st->outbuflen = wend;
1532: st->outbuf[wend] = '\0';
1.43 schwarze 1533: mdoc_newln(st);
1534: if (MDOC_Fo == mtype)
1.56 schwarze 1535: fputs(".Fn", stdout);
1.43 schwarze 1536: else
1.56 schwarze 1537: fputs(".Xr", stdout);
1.43 schwarze 1538: st->oust = OUST_MAC;
1539: }
1.49 schwarze 1540: } else {
1541: mtype = dict_get(st->outbuf, wend);
1542: if (MDOC_Dv == mtype) {
1543: savechar = st->outbuf[wend];
1544: st->outbuf[wend] = '\0';
1545: mdoc_newln(st);
1.56 schwarze 1546: fputs(".Dv", stdout);
1.49 schwarze 1547: st->oust = OUST_MAC;
1548: } else
1549: mtype = MDOC_Fa;
1.37 schwarze 1550: }
1551:
1.42 schwarze 1552: /*
1553: * On whitespace, flush the output buffer
1554: * and allow breaking to a macro line.
1555: */
1556:
1.37 schwarze 1557: outbuf_flush(st);
1.42 schwarze 1558:
1559: /*
1560: * End macro lines, and
1561: * end text lines at the end of sentences.
1562: */
1563:
1.44 schwarze 1564: if (OUST_MAC == st->oust || (eos && wend > 1 &&
1565: islower((unsigned char)st->outbuf[wend - 1]))) {
1.43 schwarze 1566: if (MDOC_MAX == mtype)
1567: fputs(" 3", stdout);
1.49 schwarze 1568: if (MDOC_Fa != mtype) {
1569: if (MDOC_Dv == mtype)
1570: st->outbuf[wend] = savechar;
1571: else
1572: wend += 2;
1573: while ('\0' != st->outbuf[wend])
1.44 schwarze 1574: printf(" %c",
1.49 schwarze 1575: st->outbuf[wend++]);
1576: }
1.40 schwarze 1577: mdoc_newln(st);
1.43 schwarze 1578: }
1.42 schwarze 1579:
1580: /* Advance to the next word. */
1581:
1.44 schwarze 1582: while ('\n' != buf[start] &&
1583: isspace((unsigned char)buf[start]))
1.42 schwarze 1584: start++;
1585: st->wantws = 1;
1.1 schwarze 1586: }
1587:
1.34 schwarze 1588: if (start < end - 1 && '<' == buf[start + 1] &&
1589: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1590: formatcode(st, buf, &start, end, 0, seq);
1591: if (OUST_MAC == st->oust) {
1.30 schwarze 1592: /*
1593: * Let mdoc(7) handle trailing punctuation.
1594: * XXX Some punctuation characters
1595: * are not handled yet.
1596: */
1.51 schwarze 1597: if ((start == end - 1 ||
1598: (start < end - 1 &&
1599: (' ' == buf[start + 1] ||
1600: '\n' == buf[start + 1]))) &&
1601: NULL != strchr("|.,;:?!)]", buf[start])) {
1.16 kristaps 1602: putchar(' ');
1603: putchar(buf[start++]);
1604: }
1.32 schwarze 1605:
1606: if (st->wantws ||
1607: ' ' == buf[start] ||
1608: '\n' == buf[start])
1609: mdoc_newln(st);
1610:
1.30 schwarze 1611: /*
1612: * Consume all whitespace
1613: * so we don't accidentally start
1614: * an implicit literal line.
1615: */
1.32 schwarze 1616:
1.6 kristaps 1617: while (start < end && ' ' == buf[start])
1618: start++;
1.32 schwarze 1619:
1620: /*
1621: * Some text is following.
1622: * Implement requested spacing.
1623: */
1624:
1625: if ( ! st->wantws && start < end &&
1.34 schwarze 1626: ('<' != buf[start + 1] ||
1627: 'A' > buf[start] ||
1628: 'Z' < buf[start])) {
1.56 schwarze 1629: fputs(" Ns", stdout);
1.32 schwarze 1630: st->wantws = 1;
1631: }
1.6 kristaps 1632: }
1.1 schwarze 1633: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1634: outbuf_flush(st);
1635: mdoc_newln(st);
1.1 schwarze 1636: if (++start >= end)
1637: continue;
1638: /*
1639: * If we have whitespace next, eat it to prevent
1640: * mdoc(7) from thinking that it's meant for
1641: * verbatim text.
1642: * It is--but if we start with that, we can't
1643: * have a macro subsequent it, which may be
1644: * possible if we have an escape next.
1645: */
1.31 schwarze 1646: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1647: puts(".br");
1648: for ( ; start < end; start++)
1649: if (' ' != buf[start] && '\t' != buf[start])
1650: break;
1.12 kristaps 1651: }
1.1 schwarze 1652: }
1.32 schwarze 1653: outbuf_flush(st);
1654: mdoc_newln(st);
1.1 schwarze 1655: }
1656:
1657: /*
1658: * There are three kinds of paragraphs: verbatim (starts with whitespace
1659: * of some sort), ordinary (starts without "=" marker), or a command
1660: * (default: starts with "=").
1661: */
1662: static void
1.35 schwarze 1663: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1664: {
1665:
1.32 schwarze 1666: assert(OUST_NL == st->oust);
1667: assert(st->wantws);
1668:
1.1 schwarze 1669: if (end == start)
1670: return;
1671: if (' ' == buf[start] || '\t' == buf[start])
1672: verbatim(st, buf, start, end);
1673: else if ('=' != buf[start])
1674: ordinary(st, buf, start, end);
1675: else
1676: command(st, buf, start, end);
1677: }
1678:
1679: /*
1680: * Loop around paragraphs within a document, processing each one in the
1681: * POD way.
1682: */
1683: static void
1684: dofile(const struct args *args, const char *fname,
1.35 schwarze 1685: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1686: {
1.29 schwarze 1687: char datebuf[64];
1.1 schwarze 1688: struct state st;
1.46 schwarze 1689: const char *fbase, *fext, *section, *date, *format;
1.1 schwarze 1690: char *title, *cp;
1.53 schwarze 1691: size_t cur, end;
1692: int verb;
1.1 schwarze 1693:
1694: if (0 == sz)
1695: return;
1696:
1.29 schwarze 1697: /*
1698: * Parsing the filename is almost always required,
1699: * except when both the title and the section
1700: * are provided on the command line.
1701: */
1702:
1703: if (NULL == args->title || NULL == args->section) {
1704: fbase = strrchr(fname, '/');
1705: if (NULL == fbase)
1706: fbase = fname;
1707: else
1708: fbase++;
1709: fext = strrchr(fbase, '.');
1710: } else
1711: fext = NULL;
1712:
1713: /*
1714: * The title will be converted to uppercase,
1715: * so it needs to be copied.
1716: */
1717:
1718: title = (NULL != args->title) ? strdup(args->title) :
1719: (NULL != fext) ? strndup(fbase, fext - fbase) :
1720: strdup(fbase);
1.1 schwarze 1721:
1722: if (NULL == title) {
1723: perror(NULL);
1724: exit(EXIT_FAILURE);
1725: }
1726:
1727: /* Section is 1 unless suffix is "pm". */
1728:
1.29 schwarze 1729: section = (NULL != args->section) ? args->section :
1730: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1731: PERL_SECTION;
1.1 schwarze 1732:
1733: /* Date. Or the given "tm" if not supplied. */
1734:
1.46 schwarze 1735: date = args->date;
1736: format = (NULL == date) ? "%B %d, %Y" :
1.48 schwarze 1737: strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $";
1.46 schwarze 1738:
1739: if (NULL != format) {
1740: strftime(datebuf, sizeof(datebuf), format, tm);
1.1 schwarze 1741: date = datebuf;
1742: }
1743:
1744: for (cp = title; '\0' != *cp; cp++)
1745: *cp = toupper((int)*cp);
1746:
1747: /* The usual mdoc(7) preamble. */
1748:
1749: printf(".Dd %s\n", date);
1750: printf(".Dt %s %s\n", title, section);
1751: puts(".Os");
1752:
1753: free(title);
1754:
1.37 schwarze 1755: dict_init();
1.1 schwarze 1756: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1757: st.oust = OUST_NL;
1758: st.wantws = 1;
1759:
1.1 schwarze 1760: assert(sz > 0);
1761:
1762: /* Main loop over file contents. */
1763:
1.53 schwarze 1764: cur = 0;
1765: for (;;) {
1766: while (cur < sz && '\n' == buf[cur])
1767: cur++;
1768: if (cur >= sz)
1769: break;
1770:
1771: verb = isspace((unsigned char)buf[cur]);
1772:
1.1 schwarze 1773: /* Read until next paragraph. */
1.53 schwarze 1774:
1775: for (end = cur + 1; end + 1 < sz; end++)
1776: if ('\n' == buf[end] && '\n' == buf[end + 1] &&
1777: !(verb && end + 2 < sz &&
1778: isspace((unsigned char)buf[end + 2])))
1.1 schwarze 1779: break;
1780:
1781: /* Adjust end marker for EOF. */
1.53 schwarze 1782:
1783: if (end < sz && '\n' != buf[end])
1784: end++;
1.1 schwarze 1785:
1786: /* Process paragraph and adjust start. */
1.53 schwarze 1787:
1.1 schwarze 1788: dopar(&st, buf, cur, end);
1.53 schwarze 1789: cur = end + 2;
1.1 schwarze 1790: }
1.37 schwarze 1791: dict_destroy();
1.1 schwarze 1792: }
1793:
1794: /*
1795: * Read a single file fully into memory.
1796: * If the file is "-", do it from stdin.
1797: * If successfully read, send the input buffer to dofile() for further
1798: * processing.
1799: */
1800: static int
1801: readfile(const struct args *args, const char *fname)
1802: {
1803: int fd;
1804: char *buf;
1805: size_t bufsz, cur;
1806: ssize_t ssz;
1807: struct tm *tm;
1808: time_t ttm;
1809: struct stat st;
1810:
1811: fd = 0 != strcmp("-", fname) ?
1812: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1813:
1814: if (-1 == fd) {
1815: perror(fname);
1816: return(0);
1817: }
1818:
1819: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1820: ttm = time(NULL);
1821: tm = localtime(&ttm);
1822: } else
1823: tm = localtime(&st.st_mtime);
1824:
1825: /*
1826: * Arbitrarily-sized initial buffer.
1827: * Should be big enough for most files...
1828: */
1829: cur = 0;
1830: bufsz = 1 << 14;
1831: if (NULL == (buf = malloc(bufsz))) {
1832: perror(NULL);
1833: exit(EXIT_FAILURE);
1834: }
1835:
1836: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1837: /* Double buffer size on fill. */
1838: if ((size_t)ssz == bufsz - cur) {
1839: bufsz *= 2;
1840: if (NULL == (buf = realloc(buf, bufsz))) {
1841: perror(NULL);
1842: exit(EXIT_FAILURE);
1843: }
1844: }
1845: cur += (size_t)ssz;
1846: }
1847: if (ssz < 0) {
1848: perror(fname);
1849: free(buf);
1850: return(0);
1851: }
1852:
1853: dofile(args, STDIN_FILENO == fd ?
1854: "STDIN" : fname, tm, buf, cur);
1855: free(buf);
1856: if (STDIN_FILENO != fd)
1857: close(fd);
1858: return(1);
1859: }
1860:
1861: int
1862: main(int argc, char *argv[])
1863: {
1864: const char *fname, *name;
1865: struct args args;
1866: int c;
1867:
1868: name = strrchr(argv[0], '/');
1869: if (name == NULL)
1870: name = argv[0];
1871: else
1872: ++name;
1873:
1874: memset(&args, 0, sizeof(struct args));
1875: fname = "-";
1876:
1877: /* Accept no arguments for now. */
1878:
1879: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1880: switch (c) {
1881: case ('h'):
1882: /* FALLTHROUGH */
1883: case ('l'):
1884: /* FALLTHROUGH */
1885: case ('c'):
1886: /* FALLTHROUGH */
1887: case ('o'):
1888: /* FALLTHROUGH */
1889: case ('q'):
1890: /* FALLTHROUGH */
1891: case ('r'):
1892: /* FALLTHROUGH */
1893: case ('u'):
1894: /* FALLTHROUGH */
1895: case ('v'):
1896: /* Ignore these. */
1897: break;
1898: case ('d'):
1899: args.date = optarg;
1900: break;
1901: case ('n'):
1902: args.title = optarg;
1903: break;
1904: case ('s'):
1905: args.section = optarg;
1906: break;
1907: default:
1908: goto usage;
1909: }
1910:
1911: argc -= optind;
1912: argv += optind;
1913:
1914: /* Accept only a single input file. */
1915:
1.25 schwarze 1916: if (argc > 1)
1917: goto usage;
1.1 schwarze 1918: else if (1 == argc)
1919: fname = *argv;
1920:
1921: return(readfile(&args, fname) ?
1922: EXIT_SUCCESS : EXIT_FAILURE);
1923:
1924: usage:
1925: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1926: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1927:
1928: return(EXIT_FAILURE);
1929: }
CVSweb