Annotation of pod2mdoc/pod2mdoc.c, Revision 1.53
1.53 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.52 2015/02/19 11:14:27 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/stat.h>
19: #include <sys/time.h>
20:
21: #include <assert.h>
22: #include <ctype.h>
23: #include <fcntl.h>
24: #include <getopt.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
1.37 schwarze 30: #include "dict.h"
31:
1.10 kristaps 32: /*
1.19 kristaps 33: * In what section can we find Perl module manuals?
34: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
35: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 36: */
37: #define PERL_SECTION "3p"
38:
1.1 schwarze 39: struct args {
40: const char *title; /* override "Dt" title */
41: const char *date; /* override "Dd" date */
42: const char *section; /* override "Dt" section */
43: };
44:
1.4 schwarze 45: enum list {
46: LIST_BULLET = 0,
47: LIST_ENUM,
48: LIST_TAG,
49: LIST__MAX
50: };
51:
1.11 kristaps 52: enum sect {
53: SECT_NONE = 0,
54: SECT_NAME, /* NAME section */
55: SECT_SYNOPSIS, /* SYNOPSIS section */
56: };
57:
1.32 schwarze 58: enum outstate {
59: OUST_NL = 0, /* just started a new output line */
60: OUST_TXT, /* text line output in progress */
61: OUST_MAC /* macro line output in progress */
62: };
63:
1.1 schwarze 64: struct state {
1.31 schwarze 65: const char *fname; /* file being parsed */
1.1 schwarze 66: int parsing; /* after =cut of before command */
67: int paused; /* in =begin and before =end */
1.11 kristaps 68: enum sect sect; /* which section are we in? */
1.4 schwarze 69: #define LIST_STACKSZ 128
70: enum list lstack[LIST_STACKSZ]; /* open lists */
71: size_t lpos; /* where in list stack */
1.31 schwarze 72: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 73: enum outstate oust; /* state of the mdoc output stream */
74: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 75: char *outbuf; /* text buffered for output */
76: size_t outbufsz; /* allocated size of outbuf */
77: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 78: };
79:
80: enum fmt {
81: FMT_ITALIC,
82: FMT_BOLD,
83: FMT_CODE,
84: FMT_LINK,
85: FMT_ESCAPE,
86: FMT_FILE,
87: FMT_NBSP,
88: FMT_INDEX,
89: FMT_NULL,
90: FMT__MAX
91: };
92:
93: enum cmd {
94: CMD_POD = 0,
95: CMD_HEAD1,
96: CMD_HEAD2,
97: CMD_HEAD3,
98: CMD_HEAD4,
99: CMD_OVER,
100: CMD_ITEM,
101: CMD_BACK,
102: CMD_BEGIN,
103: CMD_END,
104: CMD_FOR,
105: CMD_ENCODING,
106: CMD_CUT,
107: CMD__MAX
108: };
109:
110: static const char *const cmds[CMD__MAX] = {
111: "pod", /* CMD_POD */
112: "head1", /* CMD_HEAD1 */
113: "head2", /* CMD_HEAD2 */
114: "head3", /* CMD_HEAD3 */
115: "head4", /* CMD_HEAD4 */
116: "over", /* CMD_OVER */
117: "item", /* CMD_ITEM */
118: "back", /* CMD_BACK */
119: "begin", /* CMD_BEGIN */
120: "end", /* CMD_END */
121: "for", /* CMD_FOR */
122: "encoding", /* CMD_ENCODING */
123: "cut" /* CMD_CUT */
124: };
125:
126: static const char fmts[FMT__MAX] = {
127: 'I', /* FMT_ITALIC */
128: 'B', /* FMT_BOLD */
129: 'C', /* FMT_CODE */
130: 'L', /* FMT_LINK */
131: 'E', /* FMT_ESCAPE */
132: 'F', /* FMT_FILE */
133: 'S', /* FMT_NBSP */
134: 'X', /* FMT_INDEX */
135: 'Z' /* FMT_NULL */
136: };
137:
1.42 schwarze 138: static unsigned char last;
1.6 kristaps 139:
1.31 schwarze 140:
141: static void
142: outbuf_grow(struct state *st, size_t by)
143: {
144:
145: st->outbufsz += (by / 128 + 1) * 128;
146: st->outbuf = realloc(st->outbuf, st->outbufsz);
147: if (NULL == st->outbuf) {
148: perror(NULL);
149: exit(EXIT_FAILURE);
150: }
151: }
152:
153: static void
154: outbuf_addchar(struct state *st)
155: {
156:
157: if (st->outbuflen + 2 >= st->outbufsz)
158: outbuf_grow(st, 1);
159: st->outbuf[st->outbuflen++] = last;
160: if ('\\' == last)
161: st->outbuf[st->outbuflen++] = 'e';
162: st->outbuf[st->outbuflen] = '\0';
163: }
164:
165: static void
166: outbuf_addstr(struct state *st, const char *str)
167: {
168: size_t slen;
169:
170: slen = strlen(str);
171: if (st->outbuflen + slen >= st->outbufsz)
172: outbuf_grow(st, slen);
173: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 174: st->outbuflen += slen;
1.31 schwarze 175: last = str[slen - 1];
176: }
177:
178: static void
179: outbuf_flush(struct state *st)
180: {
181:
182: if (0 == st->outbuflen)
183: return;
184:
1.40 schwarze 185: if (OUST_TXT == st->oust && st->wantws)
186: putchar(' ');
187:
1.31 schwarze 188: fputs(st->outbuf, stdout);
189: *st->outbuf = '\0';
190: st->outbuflen = 0;
1.32 schwarze 191:
192: if (OUST_NL == st->oust)
193: st->oust = OUST_TXT;
1.31 schwarze 194: }
195:
196: static void
1.32 schwarze 197: mdoc_newln(struct state *st)
1.31 schwarze 198: {
199:
1.32 schwarze 200: if (OUST_NL == st->oust)
1.31 schwarze 201: return;
1.32 schwarze 202:
1.31 schwarze 203: putchar('\n');
204: last = '\n';
1.32 schwarze 205: st->oust = OUST_NL;
206: st->wantws = 1;
1.31 schwarze 207: }
208:
1.1 schwarze 209: /*
210: * Given buf[*start] is at the start of an escape name, read til the end
211: * of the escape ('>') then try to do something with it.
212: * Sets start to be one after the '>'.
1.32 schwarze 213: *
214: * This function does not care about output modes,
215: * it merely appends text to the output buffer,
216: * which can then be used in any mode.
1.1 schwarze 217: */
218: static void
1.31 schwarze 219: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 220: {
221: char esc[16]; /* no more needed */
222: size_t i, max;
223:
224: max = sizeof(esc) - 1;
225: i = 0;
226: /* Read til our buffer is full. */
227: while (*start < end && '>' != buf[*start] && i < max)
228: esc[i++] = buf[(*start)++];
229: esc[i] = '\0';
230:
231: if (i == max) {
232: /* Too long... skip til we end. */
233: while (*start < end && '>' != buf[*start])
234: (*start)++;
235: return;
236: } else if (*start >= end)
237: return;
238:
239: assert('>' == buf[*start]);
240: (*start)++;
241:
242: /*
243: * TODO: right now, we only recognise the named escapes.
244: * Just let the rest of them go.
245: */
1.6 kristaps 246: if (0 == strcmp(esc, "lt"))
1.31 schwarze 247: outbuf_addstr(st, "\\(la");
1.1 schwarze 248: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 249: outbuf_addstr(st, "\\(ra");
1.33 schwarze 250: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 251: outbuf_addstr(st, "\\(ba");
1.1 schwarze 252: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 253: outbuf_addstr(st, "\\(sl");
1.1 schwarze 254: }
255:
256: /*
1.9 kristaps 257: * Run some heuristics to intuit a link format.
1.19 kristaps 258: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 259: * that the caller can safely just continue processing.
1.19 kristaps 260: * If this is just an empty tag, I'll return 0.
1.32 schwarze 261: *
262: * Always operates in OUST_MAC mode.
263: * Mode handling is done by the caller.
1.9 kristaps 264: */
265: static int
266: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
267: {
1.21 kristaps 268: size_t linkstart, realend, linkend,
269: i, j, textsz, stack;
1.9 kristaps 270:
271: /*
272: * Scan to the start of the terminus.
273: * This function is more or less replicated in the formatcode()
274: * for null or index formatting codes.
1.23 kristaps 275: * However, we're slightly different because we might have
276: * nested escapes we need to ignore.
1.9 kristaps 277: */
1.21 kristaps 278: stack = 0;
1.19 kristaps 279: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 280: if ('<' == buf[realend])
281: stack++;
1.19 kristaps 282: if ('>' != buf[realend])
1.9 kristaps 283: continue;
1.23 kristaps 284: else if (stack-- > 0)
285: continue;
286: if (dsz == 1)
1.9 kristaps 287: break;
1.19 kristaps 288: assert(realend > 0);
289: if (' ' != buf[realend - 1])
1.9 kristaps 290: continue;
1.19 kristaps 291: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 292: if ('>' != buf[i++])
293: break;
294: if (dsz == j)
295: break;
296: }
1.19 kristaps 297:
298: /* Ignore stubs. */
299: if (realend == end || realend == *start)
1.9 kristaps 300: return(0);
301:
1.19 kristaps 302: /* Set linkend to the end of content. */
303: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 304:
1.19 kristaps 305: /* Re-scan to see if we have a title or section. */
306: for (textsz = *start; textsz < linkend; textsz++)
307: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 308: break;
309:
1.19 kristaps 310: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 311: /* With title: set start, then end at section. */
1.19 kristaps 312: linkstart = textsz + 1;
1.18 kristaps 313: textsz = textsz - *start;
1.19 kristaps 314: for (i = linkstart; i < linkend; i++)
315: if ('/' == buf[i])
316: break;
317: if (i < linkend)
318: linkend = i;
1.20 kristaps 319: } else if (textsz < linkend && '/' == buf[textsz]) {
320: /* With section: set end at section. */
321: linkend = textsz;
322: textsz = 0;
323: } else
324: /* No title, no section. */
1.18 kristaps 325: textsz = 0;
1.19 kristaps 326:
327: *start = realend;
328: j = linkend - linkstart;
329:
1.20 kristaps 330: /* Do we have only subsection material? */
331: if (0 == j && '/' == buf[linkend]) {
332: linkstart = linkend + 1;
333: linkend = dsz > 1 ? realend - 1 : realend;
334: if (0 == (j = linkend - linkstart))
335: return(0);
336: printf("Sx %.*s", (int)j, &buf[linkstart]);
337: return(1);
338: } else if (0 == j)
1.19 kristaps 339: return(0);
340:
341: /* See if we qualify as being a link or not. */
1.20 kristaps 342: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
343: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
344: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
345: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
346: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
347: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
348: /* Gross. */
349: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
350: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 351: return(1);
352: }
353:
354: /* See if we qualify as a mailto. */
1.20 kristaps 355: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 356: printf("Mt %.*s", (int)j, &buf[linkstart]);
357: return(1);
358: }
359:
360: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
361: if ((j > 3 && ')' == buf[linkend - 1]) &&
362: ('(' == buf[linkend - 3])) {
363: printf("Xr %.*s %c", (int)(j - 3),
364: &buf[linkstart], buf[linkend - 2]);
365: return(1);
366: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
367: ('(' == buf[linkend - 4])) {
368: printf("Xr %.*s %.*s", (int)(j - 4),
369: &buf[linkstart], 2, &buf[linkend - 3]);
370: return(1);
371: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
372: ('(' == buf[linkend - 5])) {
373: printf("Xr %.*s %.*s", (int)(j - 5),
374: &buf[linkstart], 3, &buf[linkend - 4]);
375: return(1);
376: }
377:
378: /* Last try: do we have a double-colon? */
379: for (i = linkstart + 1; i < linkend; i++)
380: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 381: break;
1.9 kristaps 382:
1.19 kristaps 383: if (i < linkend)
1.10 kristaps 384: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 385: (int)j, &buf[linkstart]);
1.9 kristaps 386: else
1.19 kristaps 387: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 388:
389: return(1);
390: }
391:
1.13 kristaps 392: /*
393: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
394: * then it's likely that we're a flag.
395: * Our flag might be followed by an argument, so make sure that we're
396: * accounting for that, too.
397: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 398: *
399: * Always operates in OUST_MAC mode.
400: * Mode handlinf is done by the caller.
1.13 kristaps 401: */
402: static void
403: dosynopsisfl(const char *buf, size_t *start, size_t end)
404: {
405: size_t i;
406: again:
1.14 kristaps 407: assert(*start + 1 < end);
408: assert('-' == buf[*start]);
409:
410: if ( ! isalnum((int)buf[*start + 1]) &&
411: '?' != buf[*start + 1] &&
412: '-' != buf[*start + 1]) {
413: (*start)--;
414: fputs("Ar ", stdout);
415: return;
416: }
417:
1.13 kristaps 418: (*start)++;
419: for (i = *start; i < end; i++)
420: if (isalnum((int)buf[i]))
421: continue;
1.14 kristaps 422: else if ('?' == buf[i])
423: continue;
1.13 kristaps 424: else if ('-' == buf[i])
425: continue;
426: else if ('_' == buf[i])
427: continue;
428: else
429: break;
430:
431: assert(i < end);
432:
433: if ( ! (' ' == buf[i] || '>' == buf[i])) {
434: printf("Ar ");
435: return;
436: }
437:
438: printf("Fl ");
439: if (end - *start > 1 &&
440: isupper((int)buf[*start]) &&
441: islower((int)buf[*start + 1]) &&
442: (end - *start == 2 ||
443: ' ' == buf[*start + 2]))
444: printf("\\&");
445: printf("%.*s ", (int)(i - *start), &buf[*start]);
446: *start = i;
447:
448: if (' ' == buf[i]) {
449: while (i < end && ' ' == buf[i])
450: i++;
451: assert(i < end);
452: if ('-' == buf[i]) {
453: *start = i;
454: goto again;
455: }
456: printf("Ar ");
457: *start = i;
458: }
459: }
460:
1.9 kristaps 461: /*
1.1 schwarze 462: * We're at the character in front of a format code, which is structured
463: * like X<...> and can contain nested format codes.
464: * This consumes the whole format code, and any nested format codes, til
465: * the end of matched production.
1.6 kristaps 466: * If "nomacro", then we don't print any macros, just contained data
467: * (e.g., following "Sh" or "Nm").
1.15 kristaps 468: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
469: * as the first format code on a line (for decoration as an "Nm"),
470: * non-zero otherwise.
1.32 schwarze 471: *
472: * Output mode handling is most complicated here.
473: * We may enter in any mode.
474: * We usually exit in OUST_MAC mode, except when
475: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 476: */
1.33 schwarze 477: static int
1.15 kristaps 478: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 479: size_t end, int nomacro, int pos)
1.1 schwarze 480: {
1.40 schwarze 481: size_t i, j, dsz;
1.1 schwarze 482: enum fmt fmt;
1.40 schwarze 483: int wantws;
1.39 schwarze 484: unsigned char uc;
1.1 schwarze 485:
486: assert(*start + 1 < end);
487: assert('<' == buf[*start + 1]);
488:
1.6 kristaps 489: /*
490: * First, look up the format code.
1.30 schwarze 491: * If it's not valid, treat it as a NOOP.
1.6 kristaps 492: */
493: for (fmt = 0; fmt < FMT__MAX; fmt++)
494: if (buf[*start] == fmts[fmt])
495: break;
496:
1.5 kristaps 497: /*
498: * Determine whether we're overriding our delimiter.
499: * According to POD, if we have more than one '<' followed by a
500: * space, then we need a space followed by matching '>' to close
501: * the expression.
502: * Otherwise we use the usual '<' and '>' matched pair.
503: */
504: i = *start + 1;
505: while (i < end && '<' == buf[i])
506: i++;
507: assert(i > *start + 1);
508: dsz = i - (*start + 1);
509: if (dsz > 1 && (i >= end || ' ' != buf[i]))
510: dsz = 1;
511:
512: /* Remember, if dsz>1, to jump the trailing space. */
513: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 514:
515: /*
1.6 kristaps 516: * Escapes and ignored codes (NULL and INDEX) don't print macro
517: * sequences, so just output them like normal text before
518: * processing for real macros.
1.1 schwarze 519: */
520: if (FMT_ESCAPE == fmt) {
1.31 schwarze 521: formatescape(st, buf, start, end);
1.33 schwarze 522: return(0);
1.1 schwarze 523: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 524: /*
1.6 kristaps 525: * Just consume til the end delimiter, accounting for
526: * whether it's a custom one.
1.5 kristaps 527: */
528: for ( ; *start < end; (*start)++) {
529: if ('>' != buf[*start])
530: continue;
531: else if (dsz == 1)
532: break;
533: assert(*start > 0);
534: if (' ' != buf[*start - 1])
535: continue;
536: i = *start;
537: for (j = 0; i < end && j < dsz; j++)
538: if ('>' != buf[i++])
539: break;
540: if (dsz != j)
541: continue;
542: (*start) += dsz;
543: break;
544: }
1.24 kristaps 545: if (*start < end) {
546: assert('>' == buf[*start]);
547: (*start)++;
548: }
549: if (isspace(last))
550: while (*start < end && isspace((int)buf[*start]))
551: (*start)++;
1.33 schwarze 552: return(0);
1.1 schwarze 553: }
554:
1.6 kristaps 555: /*
556: * Check whether we're supposed to print macro stuff (this is
557: * suppressed in, e.g., "Nm" and "Sh" macros).
558: */
1.30 schwarze 559: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 560:
561: /*
562: * We may already have wantws if there was whitespace
563: * before the code ("text B<text"), but initial
564: * whitespace inside our scope ("textB< text")
565: * allows to break at this point as well.
566: */
567:
1.40 schwarze 568: wantws = ' ' == buf[*start] ||
569: (OUST_MAC == st->oust ? st->wantws : ! st->outbuflen);
1.31 schwarze 570:
1.1 schwarze 571: /*
1.31 schwarze 572: * If we are on a text line and there is no
573: * whitespace before our content, we have to make
574: * the previous word a prefix to the macro line.
1.32 schwarze 575: * In the following, mdoc_newln() must not be used
576: * lest we clobber out output state.
1.1 schwarze 577: */
1.31 schwarze 578:
1.40 schwarze 579: if (OUST_MAC != st->oust && ! wantws) {
1.32 schwarze 580: if (OUST_NL != st->oust)
1.31 schwarze 581: putchar('\n');
582: printf(".Pf ");
1.40 schwarze 583: st->wantws = 0;
1.31 schwarze 584: }
585:
586: outbuf_flush(st);
587:
588: /* Whitespace is easier to suppress on macro lines. */
589:
1.40 schwarze 590: if (OUST_MAC == st->oust && ! wantws)
1.32 schwarze 591: printf(" Ns ");
1.31 schwarze 592:
593: /* Unless we are on a macro line, start one. */
594:
1.40 schwarze 595: if (OUST_MAC != st->oust && wantws) {
1.32 schwarze 596: if (OUST_NL != st->oust)
1.6 kristaps 597: putchar('\n');
1.1 schwarze 598: putchar('.');
1.31 schwarze 599: } else
1.1 schwarze 600: putchar(' ');
1.31 schwarze 601:
1.32 schwarze 602: /*
603: * Print the macro corresponding to this format code,
604: * and update the output state afterwards.
605: */
1.6 kristaps 606:
1.1 schwarze 607: switch (fmt) {
608: case (FMT_ITALIC):
609: printf("Em ");
610: break;
611: case (FMT_BOLD):
1.14 kristaps 612: if (SECT_SYNOPSIS == st->sect) {
613: if (1 == dsz && '-' == buf[*start])
614: dosynopsisfl(buf, start, end);
1.15 kristaps 615: else if (0 == pos)
616: printf("Nm ");
1.14 kristaps 617: else
618: printf("Ar ");
619: break;
1.39 schwarze 620: }
621: i = 0;
622: uc = buf[*start];
623: while (isalnum(uc) || '_' == uc || ' ' == uc)
624: uc = buf[*start + ++i];
625: if ('=' != uc && '>' != uc)
626: i = 0;
627: if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
1.27 schwarze 628: printf("Dv ");
1.38 schwarze 629: break;
630: }
1.39 schwarze 631: switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
632: case MDOC_Fa:
1.38 schwarze 633: printf("Fa ");
1.39 schwarze 634: break;
635: case MDOC_Vt:
636: printf("Vt ");
637: break;
638: default:
1.27 schwarze 639: printf("Sy ");
1.39 schwarze 640: break;
641: }
1.1 schwarze 642: break;
643: case (FMT_CODE):
1.2 schwarze 644: printf("Qo Li ");
1.1 schwarze 645: break;
646: case (FMT_LINK):
1.19 kristaps 647: /* Try to link; use "No" if it's empty. */
1.9 kristaps 648: if ( ! trylink(buf, start, end, dsz))
649: printf("No ");
1.1 schwarze 650: break;
651: case (FMT_FILE):
652: printf("Pa ");
653: break;
654: case (FMT_NBSP):
655: printf("No ");
656: break;
657: default:
658: abort();
659: }
1.32 schwarze 660: st->oust = OUST_MAC;
661: st->wantws = 1;
1.31 schwarze 662: } else
663: outbuf_flush(st);
1.1 schwarze 664:
665: /*
1.6 kristaps 666: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 667: * find a nested format code.
1.1 schwarze 668: * Don't emit any newlines: since we're on a macro line, we
669: * don't want to break the line.
670: */
671: while (*start < end) {
1.5 kristaps 672: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 673: (*start)++;
674: break;
1.5 kristaps 675: } else if ('>' == buf[*start] &&
676: ' ' == buf[*start - 1]) {
677: /*
678: * Handle custom delimiters.
679: * These require a certain number of
680: * space-preceded carrots before we're really at
681: * the end.
682: */
683: i = *start;
684: for (j = 0; i < end && j < dsz; j++)
685: if ('>' != buf[i++])
686: break;
687: if (dsz == j) {
688: *start += dsz;
689: break;
690: }
1.1 schwarze 691: }
1.34 schwarze 692: if (*start + 1 < end && '<' == buf[*start + 1] &&
693: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.40 schwarze 694: if ( ! formatcode(st, buf, start, end, nomacro, 1))
695: st->wantws = 1;
1.1 schwarze 696: continue;
697: }
1.3 schwarze 698:
1.32 schwarze 699: /* Suppress newlines and multiple spaces. */
700:
701: last = buf[(*start)++];
702: if (' ' == last || '\n' == last) {
703: putchar(' ');
704: while (*start < end && ' ' == buf[*start])
705: (*start)++;
706: continue;
707: }
708:
1.33 schwarze 709: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.32 schwarze 710: if ( ! st->wantws) {
711: printf(" Ns ");
712: st->wantws = 1;
713: }
714:
715: /*
716: * Escape macro-like words.
717: * This matches "Xx " and "XxEOLN".
718: */
719:
720: if (end - *start > 0 &&
721: isupper((unsigned char)last) &&
722: islower((unsigned char)buf[*start]) &&
723: (end - *start == 1 ||
724: ' ' == buf[*start + 1] ||
725: '>' == buf[*start + 1]))
726: printf("\\&");
727: }
1.3 schwarze 728:
1.32 schwarze 729: putchar(last);
1.4 schwarze 730:
1.8 kristaps 731: /* Protect against character escapes. */
1.32 schwarze 732:
1.8 kristaps 733: if ('\\' == last)
734: putchar('e');
1.1 schwarze 735: }
1.2 schwarze 736:
737: if ( ! nomacro && FMT_CODE == fmt)
738: printf(" Qc ");
1.1 schwarze 739:
1.33 schwarze 740: st->wantws = ' ' == last;
1.40 schwarze 741: return(FMT__MAX != fmt);
1.1 schwarze 742: }
743:
744: /*
745: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 746: * Goes to OUST_MAC mode and stays there when returning,
747: * such that the caller can add arguments to the macro line
748: * before closing it out.
1.1 schwarze 749: */
750: static void
1.32 schwarze 751: formatcodeln(struct state *st, const char *linemac,
752: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 753: {
1.33 schwarze 754: int gotmacro, wantws;
1.1 schwarze 755:
1.32 schwarze 756: assert(OUST_NL == st->oust);
757: assert(st->wantws);
758: printf(".%s ", linemac);
759: st->oust = OUST_MAC;
760:
1.33 schwarze 761: gotmacro = 0;
1.1 schwarze 762: while (*start < end) {
1.33 schwarze 763: wantws = ' ' == buf[*start] || '\n' == buf[*start];
764: if (wantws) {
765: last = ' ';
766: do {
767: (*start)++;
768: } while (*start < end && ' ' == buf[*start]);
769: }
770:
1.34 schwarze 771: if (*start + 1 < end && '<' == buf[*start + 1] &&
772: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 773: st->wantws |= wantws;
774: gotmacro = formatcode(st, buf,
775: start, end, nomacro, 1);
1.1 schwarze 776: continue;
777: }
1.32 schwarze 778:
1.33 schwarze 779: if (gotmacro) {
780: if (*start < end || st->outbuflen) {
781: if (st->wantws ||
782: (wantws && !st->outbuflen))
783: printf(" No ");
784: else
785: printf(" Ns ");
786: }
787: gotmacro = 0;
788: }
789: outbuf_flush(st);
790: st->wantws = wantws;
791:
792: if (*start >= end)
793: break;
794:
795: if (st->wantws) {
796: putchar(' ');
797: st->wantws = 0;
1.32 schwarze 798: }
799:
1.4 schwarze 800: /*
801: * Since we're already on a macro line, we want to make
802: * sure that we don't inadvertently invoke a macro.
803: * We need to do this carefully because section names
804: * are used in troff and we don't want to escape
805: * something that needn't be escaped.
806: */
807: if (' ' == last && end - *start > 1 &&
1.33 schwarze 808: isupper((unsigned char)buf[*start]) &&
809: islower((unsigned char)buf[*start + 1]) &&
810: (end - *start == 2 || ' ' == buf[*start + 2]))
1.4 schwarze 811: printf("\\&");
812:
1.33 schwarze 813: putchar(last = buf[*start]);
1.8 kristaps 814:
815: /* Protect against character escapes. */
1.33 schwarze 816:
1.8 kristaps 817: if ('\\' == last)
818: putchar('e');
819:
1.1 schwarze 820: (*start)++;
821: }
822: }
823:
824: /*
1.4 schwarze 825: * Guess at what kind of list we are.
826: * These are taken straight from the POD manual.
827: * I don't know what people do in real life.
828: */
829: static enum list
830: listguess(const char *buf, size_t start, size_t end)
831: {
832: size_t len = end - start;
833:
834: assert(end >= start);
835:
836: if (len == 1 && '*' == buf[start])
837: return(LIST_BULLET);
838: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
839: return(LIST_ENUM);
840: else if (len == 1 && '1' == buf[start])
841: return(LIST_ENUM);
842: else
843: return(LIST_TAG);
844: }
845:
846: /*
1.1 schwarze 847: * A command paragraph, as noted in the perlpod manual, just indicates
848: * that we should do something, optionally with some text to print as
849: * well.
1.32 schwarze 850: * From the perspective of external callers,
851: * always stays in OUST_NL/wantws mode,
852: * but its children do use OUST_MAC.
1.1 schwarze 853: */
854: static void
855: command(struct state *st, const char *buf, size_t start, size_t end)
856: {
857: size_t len, csz;
858: enum cmd cmd;
859:
860: assert('=' == buf[start]);
861: start++;
862: len = end - start;
863:
864: for (cmd = 0; cmd < CMD__MAX; cmd++) {
865: csz = strlen(cmds[cmd]);
866: if (len < csz)
867: continue;
868: if (0 == memcmp(&buf[start], cmd[cmds], csz))
869: break;
870: }
871:
872: /* Ignore bogus commands. */
873:
874: if (CMD__MAX == cmd)
875: return;
876:
877: start += csz;
1.8 kristaps 878: while (start < end && ' ' == buf[start])
879: start++;
880:
1.1 schwarze 881: len = end - start;
882:
883: if (st->paused) {
884: st->paused = CMD_END != cmd;
885: return;
886: }
887:
888: switch (cmd) {
889: case (CMD_POD):
890: break;
891: case (CMD_HEAD1):
892: /*
893: * The behaviour of head= follows from a quick glance at
894: * how pod2man handles it.
895: */
1.11 kristaps 896: st->sect = SECT_NONE;
897: if (end - start == 4) {
1.1 schwarze 898: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 899: st->sect = SECT_NAME;
900: } else if (end - start == 8) {
901: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
902: st->sect = SECT_SYNOPSIS;
903: }
1.32 schwarze 904: formatcodeln(st, "Sh", buf, &start, end, 1);
905: mdoc_newln(st);
1.1 schwarze 906: st->haspar = 1;
907: break;
908: case (CMD_HEAD2):
1.32 schwarze 909: formatcodeln(st, "Ss", buf, &start, end, 1);
910: mdoc_newln(st);
1.1 schwarze 911: st->haspar = 1;
912: break;
913: case (CMD_HEAD3):
914: puts(".Pp");
1.32 schwarze 915: formatcodeln(st, "Em", buf, &start, end, 0);
916: mdoc_newln(st);
1.1 schwarze 917: puts(".Pp");
918: st->haspar = 1;
919: break;
920: case (CMD_HEAD4):
921: puts(".Pp");
1.32 schwarze 922: formatcodeln(st, "No", buf, &start, end, 0);
923: mdoc_newln(st);
1.1 schwarze 924: puts(".Pp");
925: st->haspar = 1;
926: break;
927: case (CMD_OVER):
1.4 schwarze 928: /*
929: * If we have an existing list that hasn't had an =item
930: * yet, then make sure that we open it now.
931: * We use the default list type, but that can't be
932: * helped (we haven't seen any items yet).
1.1 schwarze 933: */
1.4 schwarze 934: if (st->lpos > 0)
935: if (LIST__MAX == st->lstack[st->lpos - 1]) {
936: st->lstack[st->lpos - 1] = LIST_TAG;
937: puts(".Bl -tag -width Ds");
938: }
939: st->lpos++;
940: assert(st->lpos < LIST_STACKSZ);
941: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 942: break;
943: case (CMD_ITEM):
1.6 kristaps 944: if (0 == st->lpos) {
945: /*
946: * Bad markup.
947: * Try to compensate.
948: */
949: st->lstack[st->lpos] = LIST__MAX;
950: st->lpos++;
951: }
1.4 schwarze 952: assert(st->lpos > 0);
953: /*
954: * If we're the first =item, guess at what our content
955: * will be: "*" is a bullet list, "1." is a numbered
956: * list, and everything is tagged.
957: */
958: if (LIST__MAX == st->lstack[st->lpos - 1]) {
959: st->lstack[st->lpos - 1] =
960: listguess(buf, start, end);
961: switch (st->lstack[st->lpos - 1]) {
962: case (LIST_BULLET):
963: puts(".Bl -bullet");
964: break;
965: case (LIST_ENUM):
966: puts(".Bl -enum");
967: break;
968: default:
969: puts(".Bl -tag -width Ds");
970: break;
971: }
972: }
973: switch (st->lstack[st->lpos - 1]) {
974: case (LIST_TAG):
1.32 schwarze 975: formatcodeln(st, "It", buf, &start, end, 0);
976: mdoc_newln(st);
1.4 schwarze 977: break;
978: case (LIST_ENUM):
979: /* FALLTHROUGH */
980: case (LIST_BULLET):
981: /*
982: * Abandon the remainder of the paragraph
983: * because we're going to be a bulletted or
984: * numbered list.
985: */
986: puts(".It");
987: break;
988: default:
989: abort();
990: }
1.1 schwarze 991: st->haspar = 1;
992: break;
993: case (CMD_BACK):
1.4 schwarze 994: /* Make sure we don't back over the stack. */
995: if (st->lpos > 0) {
996: st->lpos--;
997: puts(".El");
998: }
1.1 schwarze 999: break;
1000: case (CMD_BEGIN):
1001: /*
1002: * We disregard all types for now.
1003: * TODO: process at least "text" in a -literal block.
1004: */
1005: st->paused = 1;
1006: break;
1007: case (CMD_FOR):
1008: /*
1009: * We ignore all types of encodings and formats
1010: * unilaterally.
1011: */
1012: break;
1013: case (CMD_ENCODING):
1014: break;
1015: case (CMD_CUT):
1016: st->parsing = 0;
1017: return;
1018: default:
1019: abort();
1020: }
1021:
1022: /* Any command (but =cut) makes us start parsing. */
1023: st->parsing = 1;
1024: }
1025:
1026: /*
1.39 schwarze 1027: * Put the type provided as an argument into the dictionary.
1028: */
1029: static void
1030: register_type(const char *ptype)
1031: {
1032: const char *pname, *pend;
1033:
1034: pname = ptype;
1035: while (isalnum((unsigned char)*pname) || '_' == *pname)
1036: pname++;
1037: if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
1038: (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
1039: while (' ' == *pname)
1040: pname++;
1041: pend = pname;
1042: while (isalnum((unsigned char)*pend) || '_' == *pend)
1043: pend++;
1044: if (pend > pname)
1045: dict_put(pname, pend - pname, MDOC_Vt);
1046: } else
1047: pend = pname;
1048: if (pend > ptype)
1049: dict_put(ptype, pend - ptype, MDOC_Vt);
1050: }
1051:
1052: /*
1.1 schwarze 1053: * Just pump out the line in a verbatim block.
1.32 schwarze 1054: * From the perspective of external callers,
1055: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1056: */
1057: static void
1.35 schwarze 1058: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1059: {
1.36 schwarze 1060: size_t i, ift, ifo, ifa, ifc, inl;
1.38 schwarze 1061: char *cp, *cp2;
1.53 ! schwarze 1062: int indisplay, nopen, wantsp;
1.1 schwarze 1063:
1.53 ! schwarze 1064: if (st->paused || ! st->parsing)
1.1 schwarze 1065: return;
1.53 ! schwarze 1066:
! 1067: indisplay = wantsp = 0;
! 1068:
1.22 kristaps 1069: again:
1.53 ! schwarze 1070: if (start == end) {
! 1071: if (indisplay)
! 1072: puts(".Ed");
! 1073: return;
! 1074: }
! 1075:
! 1076: if ('\n' == buf[start]) {
! 1077: wantsp = 1;
! 1078: start++;
! 1079: goto again;
! 1080: }
! 1081:
1.22 kristaps 1082: /*
1083: * If we're in the SYNOPSIS, see if we're an #include block.
1084: * If we are, then print the "In" macro and re-loop.
1085: * This handles any number of inclusions, but only when they
1086: * come before the remaining parts...
1087: */
1088: if (SECT_SYNOPSIS == st->sect) {
1089: i = start;
1.35 schwarze 1090: while (i < end && buf[i] == ' ')
1091: i++;
1.22 kristaps 1092: if (i == end)
1.53 ! schwarze 1093: goto again;
1.35 schwarze 1094:
1.22 kristaps 1095: /* We're an include block! */
1096: if (end - i > 10 &&
1097: 0 == memcmp(&buf[i], "#include <", 10)) {
1098: start = i + 10;
1099: while (start < end && ' ' == buf[start])
1100: start++;
1.53 ! schwarze 1101: if (indisplay)
! 1102: puts(".Ed");
! 1103: indisplay = wantsp = 0;
1.22 kristaps 1104: fputs(".In ", stdout);
1105: /* Stop til the '>' marker or we hit eoln. */
1106: while (start < end &&
1107: '>' != buf[start] && '\n' != buf[start])
1108: putchar(buf[start++]);
1109: putchar('\n');
1110: if (start < end && '>' == buf[start])
1111: start++;
1112: if (start < end && '\n' == buf[start])
1113: start++;
1.41 schwarze 1114: goto again;
1115: }
1116:
1117: /* Other preprocessor directives. */
1118: if ('#' == buf[i]) {
1.53 ! schwarze 1119: if (indisplay)
! 1120: puts(".Ed");
! 1121: indisplay = wantsp = 0;
1.41 schwarze 1122: fputs(".Fd ", stdout);
1123: start = i;
1124: while(start < end && '\n' != buf[start])
1125: putchar(buf[start++]);
1126: putchar('\n');
1127: if (start < end && '\n' == buf[start])
1128: start++;
1.49 schwarze 1129:
1130: /* Remember #define for Dv or Fn. */
1131:
1132: if (strncmp(buf + i + 1, "define", 6) ||
1133: ! isspace((unsigned char)buf[i + 7]))
1134: goto again;
1135:
1136: ifo = i + 7;
1137: while (ifo < start &&
1138: isspace((unsigned char)buf[ifo]))
1139: ifo++;
1140: ifa = ifo;
1141: while ('_' == buf[ifa] ||
1142: isalnum((unsigned char)buf[ifa]))
1143: ifa++;
1144: dict_put(buf + ifo, ifa - ifo,
1145: '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv);
1146:
1.41 schwarze 1147: goto again;
1.22 kristaps 1148: }
1.35 schwarze 1149:
1150: /* Parse function declaration. */
1151: ifo = ifa = ifc = 0;
1.36 schwarze 1152: inl = end;
1153: nopen = 0;
1154: for (ift = i; i < end; i++) {
1155: if (ifc) {
1156: if (buf[i] != '\n')
1157: continue;
1158: inl = i;
1159: break;
1160: }
1161: switch (buf[i]) {
1.45 schwarze 1162: case '\t':
1163: /* FALLTHROUGH */
1.36 schwarze 1164: case ' ':
1165: if ( ! ifa)
1166: ifo = i;
1167: break;
1168: case '(':
1169: if (ifo) {
1170: nopen++;
1171: if ( ! ifa)
1172: ifa = i;
1173: } else
1174: i = end;
1175: break;
1176: case ')':
1177: switch (nopen) {
1178: case 0:
1179: i = end;
1180: break;
1181: case 1:
1.35 schwarze 1182: ifc = i;
1.36 schwarze 1183: break;
1184: default:
1185: nopen--;
1186: break;
1187: }
1188: break;
1189: default:
1190: break;
1191: }
1.35 schwarze 1192: }
1193:
1194: /* Encode function declaration. */
1195: if (ifc) {
1.36 schwarze 1196: for (i = ifa; i < ifc; i++)
1197: if (buf[i] == '\n')
1198: buf[i] = ' ';
1.35 schwarze 1199: buf[ifo++] = '\0';
1.39 schwarze 1200: register_type(buf + ift);
1.53 ! schwarze 1201: if (indisplay)
! 1202: puts(".Ed");
! 1203: indisplay = wantsp = 0;
1.35 schwarze 1204: printf(".Ft %s", buf + ift);
1205: if (buf[ifo] == '*') {
1206: fputs(" *", stdout);
1207: ifo++;
1208: }
1209: putchar('\n');
1210: buf[ifa++] = '\0';
1211: printf(".Fo %s\n", buf + ifo);
1.39 schwarze 1212: dict_put(buf + ifo, 0, MDOC_Fo);
1.35 schwarze 1213: buf[ifc++] = '\0';
1214: for (;;) {
1215: cp = strchr(buf + ifa, ',');
1.38 schwarze 1216: if (cp != NULL) {
1217: cp2 = cp;
1.36 schwarze 1218: *cp++ = '\0';
1.38 schwarze 1219: } else
1220: cp2 = strchr(buf + ifa, '\0');
1221: while (isalnum((unsigned char)cp2[-1]) ||
1222: '_' == cp2[-1])
1223: cp2--;
1224: if ('\0' != *cp2)
1.39 schwarze 1225: dict_put(cp2, 0, MDOC_Fa);
1226: register_type(buf + ifa);
1.50 schwarze 1227: if (strchr(buf + ifa, ' ') == NULL)
1228: printf(".Fa %s\n", buf + ifa);
1229: else
1230: printf(".Fa \"%s\"\n", buf + ifa);
1.35 schwarze 1231: if (cp == NULL)
1232: break;
1.45 schwarze 1233: while (*cp == ' ' || *cp == '\t')
1.36 schwarze 1234: cp++;
1235: ifa = cp - buf;
1.35 schwarze 1236: }
1237: puts(".Fc");
1238: if (buf[ifc] == ';')
1239: ifc++;
1.36 schwarze 1240: if (ifc < inl) {
1241: buf[inl] = '\0';
1.35 schwarze 1242: puts(buf + ifc);
1243: }
1.53 ! schwarze 1244: start = inl < end ? inl + 1 : end;
! 1245: goto again;
1.35 schwarze 1246: }
1.22 kristaps 1247: }
1.53 ! schwarze 1248:
! 1249: if ( ! indisplay)
! 1250: puts(".Bd -literal");
! 1251: else if (wantsp)
! 1252: putchar('\n');
! 1253: indisplay = 1;
! 1254: wantsp = 0;
! 1255:
! 1256: for (last = '\n'; start < end; start++) {
1.8 kristaps 1257: /*
1258: * Handle accidental macros (newline starting with
1259: * control character) and escapes.
1260: */
1.53 ! schwarze 1261: if ('\n' == last) {
! 1262: if ('\n' == buf[start])
! 1263: goto again;
1.7 kristaps 1264: if ('.' == buf[start] || '\'' == buf[start])
1265: printf("\\&");
1.53 ! schwarze 1266: }
1.8 kristaps 1267: putchar(last = buf[start]);
1268: if ('\\' == buf[start])
1269: printf("e");
1.7 kristaps 1270: }
1.53 ! schwarze 1271: if ('\n' != last)
! 1272: putchar('\n');
! 1273: if (indisplay)
! 1274: puts(".Ed");
1.1 schwarze 1275: }
1276:
1277: /*
1.13 kristaps 1278: * See dosynopsisop().
1279: */
1280: static int
1281: hasmatch(const char *buf, size_t start, size_t end)
1282: {
1283: size_t stack;
1284:
1285: for (stack = 0; start < end; start++)
1286: if (buf[start] == '[')
1287: stack++;
1288: else if (buf[start] == ']' && 0 == stack)
1289: return(1);
1290: else if (buf[start] == ']')
1291: stack--;
1292: return(0);
1293: }
1294:
1295: /*
1296: * If we're in the SYNOPSIS section and we've encounter braces in an
1297: * ordinary paragraph, then try to see whether we're an [-option].
1298: * Do this, if we're an opening bracket, by first seeing if we have a
1299: * matching end via hasmatch().
1300: * If we're an ending bracket, see if we have a stack already.
1301: */
1302: static int
1.32 schwarze 1303: dosynopsisop(struct state *st, const char *buf,
1304: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1305: {
1306:
1307: assert('[' == buf[*start] || ']' == buf[*start]);
1308:
1309: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1310: mdoc_newln(st);
1.13 kristaps 1311: puts(".Oo");
1312: (*opstack)++;
1313: } else if ('[' == buf[*start])
1314: return(0);
1315:
1316: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1317: mdoc_newln(st);
1.13 kristaps 1318: puts(".Oc");
1319: (*opstack)--;
1320: } else if (']' == buf[*start])
1321: return(0);
1322:
1323: (*start)++;
1.31 schwarze 1324: last = '\n';
1.13 kristaps 1325: while (' ' == buf[*start])
1326: (*start)++;
1327: return(1);
1328: }
1329:
1330: /*
1.17 kristaps 1331: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1332: * From the perspective of external callers,
1333: * always stays in OUST_NL/wantws mode,
1334: * but its children do use OUST_MAC.
1.17 kristaps 1335: */
1336: static void
1337: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1338: {
1339: size_t word;
1340:
1.32 schwarze 1341: assert(OUST_NL == st->oust);
1342: assert(st->wantws);
1343:
1.47 schwarze 1344: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1345: (*start)++;
1346:
1347: if (end == *start) {
1348: puts(".Nm unknown");
1349: return;
1350: }
1351:
1352: while (*start < end) {
1353: for (word = *start; word < end; word++)
1354: if (',' == buf[word])
1355: break;
1.32 schwarze 1356: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1357: if (*start == end) {
1.32 schwarze 1358: mdoc_newln(st);
1359: break;
1.17 kristaps 1360: }
1361: assert(',' == buf[*start]);
1.32 schwarze 1362: printf(" ,");
1363: mdoc_newln(st);
1.17 kristaps 1364: (*start)++;
1.47 schwarze 1365: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1366: (*start)++;
1367: }
1368: }
1369:
1370: /*
1.1 schwarze 1371: * Ordinary paragraph.
1372: * Well, this is really the hardest--POD seems to assume that, for
1373: * example, a leading space implies a newline, and so on.
1374: * Lots of other snakes in the grass: escaping a newline followed by a
1375: * period (accidental mdoc(7) control), double-newlines after macro
1376: * passages, etc.
1.32 schwarze 1377: *
1378: * Uses formatcode() to go to OUST_MAC mode
1379: * and outbuf_flush() to go to OUST_TXT mode.
1.40 schwarze 1380: * In text mode, wantws requests white space before the text
1381: * currently contained in the outbuf, not before upcoming text.
1.32 schwarze 1382: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1383: */
1384: static void
1385: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1386: {
1.44 schwarze 1387: size_t i, j, opstack, wend;
1.43 schwarze 1388: enum mdoc_type mtype;
1.44 schwarze 1389: int eos, noeos, seq;
1.49 schwarze 1390: char savechar;
1.1 schwarze 1391:
1392: if ( ! st->parsing || st->paused)
1393: return;
1394:
1395: /*
1396: * Special-case: the NAME section.
1397: * If we find a "-" when searching from the end, assume that
1398: * we're in "name - description" format.
1399: * To wit, print out a "Nm" and "Nd" in that format.
1400: */
1.11 kristaps 1401: if (SECT_NAME == st->sect) {
1.15 kristaps 1402: for (i = end - 2; i > start; i--)
1.47 schwarze 1403: if ('-' == buf[i] &&
1404: isspace((unsigned char)buf[i + 1]))
1.1 schwarze 1405: break;
1406: if ('-' == buf[i]) {
1407: j = i;
1408: /* Roll over multiple "-". */
1409: for ( ; i > start; i--)
1410: if ('-' != buf[i])
1411: break;
1.17 kristaps 1412: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1413: start = j + 1;
1.47 schwarze 1414: while (start < end &&
1415: isspace((unsigned char)buf[start]))
1.17 kristaps 1416: start++;
1.32 schwarze 1417: formatcodeln(st, "Nd", buf, &start, end, 1);
1418: mdoc_newln(st);
1.1 schwarze 1419: return;
1420: }
1421: }
1422:
1423: if ( ! st->haspar)
1424: puts(".Pp");
1425:
1426: st->haspar = 0;
1427: last = '\n';
1.13 kristaps 1428: opstack = 0;
1.1 schwarze 1429:
1.15 kristaps 1430: for (seq = 0; start < end; seq++) {
1.1 schwarze 1431: /*
1432: * Loop til we get either to a newline or escape.
1433: * Escape initial control characters.
1434: */
1435: while (start < end) {
1.34 schwarze 1436: if (start < end - 1 && '<' == buf[start + 1] &&
1437: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1438: break;
1439: else if ('\n' == buf[start])
1440: break;
1441: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1442: outbuf_addstr(st, "\\&");
1.1 schwarze 1443: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1444: outbuf_addstr(st, "\\&");
1.12 kristaps 1445: /*
1446: * If we're in the SYNOPSIS, have square
1447: * brackets indicate that we're opening and
1448: * closing an optional context.
1449: */
1.32 schwarze 1450:
1.13 kristaps 1451: if (SECT_SYNOPSIS == st->sect &&
1452: ('[' == buf[start] ||
1453: ']' == buf[start]) &&
1.32 schwarze 1454: dosynopsisop(st, buf,
1455: &start, end, &opstack))
1.13 kristaps 1456: continue;
1.32 schwarze 1457:
1.42 schwarze 1458: /* Merely buffer non-whitespace. */
1.32 schwarze 1459:
1.31 schwarze 1460: last = buf[start++];
1.44 schwarze 1461: if ( ! isspace(last))
1.37 schwarze 1462: outbuf_addchar(st);
1.44 schwarze 1463: if (start < end &&
1.52 schwarze 1464: ! isspace((unsigned char)buf[start - 1]) &&
1.44 schwarze 1465: ! isspace((unsigned char)buf[start]))
1.37 schwarze 1466: continue;
1467:
1.44 schwarze 1468: /*
1469: * Found the end of a word.
1470: * Rewind trailing delimiters.
1471: */
1472:
1473: eos = noeos = 0;
1474: for (wend = st->outbuflen; wend; wend--)
1475: if ('.' == st->outbuf[wend - 1] ||
1476: '!' == st->outbuf[wend - 1] ||
1477: '?' == st->outbuf[wend - 1])
1478: eos = 1;
1479: else if ('|' == st->outbuf[wend - 1] ||
1480: ',' == st->outbuf[wend - 1] ||
1481: ';' == st->outbuf[wend - 1] ||
1482: ':' == st->outbuf[wend - 1])
1483: noeos = 1;
1484: else if ('\'' != st->outbuf[wend - 1] &&
1485: '"' != st->outbuf[wend - 1] &&
1486: ')' != st->outbuf[wend - 1] &&
1487: ']' != st->outbuf[wend - 1])
1488: break;
1489: eos &= ! noeos;
1490:
1491: /*
1492: * Detect function names.
1493: */
1.42 schwarze 1494:
1.43 schwarze 1495: mtype = MDOC_Fa;
1.49 schwarze 1496: savechar = '\0';
1.44 schwarze 1497: if (wend && ')' == st->outbuf[wend] &&
1498: '(' == st->outbuf[wend - 1]) {
1499: mtype = dict_get(st->outbuf, --wend);
1.49 schwarze 1500: if (MDOC_Dv == mtype)
1501: mtype = MDOC_Fo;
1.43 schwarze 1502: if (MDOC_Fo == mtype || MDOC_MAX == mtype) {
1.44 schwarze 1503: st->outbuflen = wend;
1504: st->outbuf[wend] = '\0';
1.43 schwarze 1505: mdoc_newln(st);
1506: if (MDOC_Fo == mtype)
1507: fputs(".Fn ", stdout);
1508: else
1509: fputs(".Xr ", stdout);
1510: st->oust = OUST_MAC;
1511: }
1.49 schwarze 1512: } else {
1513: mtype = dict_get(st->outbuf, wend);
1514: if (MDOC_Dv == mtype) {
1515: savechar = st->outbuf[wend];
1516: st->outbuf[wend] = '\0';
1517: mdoc_newln(st);
1518: fputs(".Dv ", stdout);
1519: st->oust = OUST_MAC;
1520: } else
1521: mtype = MDOC_Fa;
1.37 schwarze 1522: }
1523:
1.42 schwarze 1524: /*
1525: * On whitespace, flush the output buffer
1526: * and allow breaking to a macro line.
1527: */
1528:
1.37 schwarze 1529: outbuf_flush(st);
1.42 schwarze 1530:
1531: /*
1532: * End macro lines, and
1533: * end text lines at the end of sentences.
1534: */
1535:
1.44 schwarze 1536: if (OUST_MAC == st->oust || (eos && wend > 1 &&
1537: islower((unsigned char)st->outbuf[wend - 1]))) {
1.43 schwarze 1538: if (MDOC_MAX == mtype)
1539: fputs(" 3", stdout);
1.49 schwarze 1540: if (MDOC_Fa != mtype) {
1541: if (MDOC_Dv == mtype)
1542: st->outbuf[wend] = savechar;
1543: else
1544: wend += 2;
1545: while ('\0' != st->outbuf[wend])
1.44 schwarze 1546: printf(" %c",
1.49 schwarze 1547: st->outbuf[wend++]);
1548: }
1.40 schwarze 1549: mdoc_newln(st);
1.43 schwarze 1550: }
1.42 schwarze 1551:
1552: /* Advance to the next word. */
1553:
1.44 schwarze 1554: while ('\n' != buf[start] &&
1555: isspace((unsigned char)buf[start]))
1.42 schwarze 1556: start++;
1557: st->wantws = 1;
1.1 schwarze 1558: }
1559:
1.34 schwarze 1560: if (start < end - 1 && '<' == buf[start + 1] &&
1561: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1562: formatcode(st, buf, &start, end, 0, seq);
1563: if (OUST_MAC == st->oust) {
1.30 schwarze 1564: /*
1565: * Let mdoc(7) handle trailing punctuation.
1566: * XXX Some punctuation characters
1567: * are not handled yet.
1568: */
1.51 schwarze 1569: if ((start == end - 1 ||
1570: (start < end - 1 &&
1571: (' ' == buf[start + 1] ||
1572: '\n' == buf[start + 1]))) &&
1573: NULL != strchr("|.,;:?!)]", buf[start])) {
1.16 kristaps 1574: putchar(' ');
1575: putchar(buf[start++]);
1576: }
1.32 schwarze 1577:
1578: if (st->wantws ||
1579: ' ' == buf[start] ||
1580: '\n' == buf[start])
1581: mdoc_newln(st);
1582:
1.30 schwarze 1583: /*
1584: * Consume all whitespace
1585: * so we don't accidentally start
1586: * an implicit literal line.
1587: */
1.32 schwarze 1588:
1.6 kristaps 1589: while (start < end && ' ' == buf[start])
1590: start++;
1.32 schwarze 1591:
1592: /*
1593: * Some text is following.
1594: * Implement requested spacing.
1595: */
1596:
1597: if ( ! st->wantws && start < end &&
1.34 schwarze 1598: ('<' != buf[start + 1] ||
1599: 'A' > buf[start] ||
1600: 'Z' < buf[start])) {
1.32 schwarze 1601: printf(" Ns ");
1602: st->wantws = 1;
1603: }
1.6 kristaps 1604: }
1.1 schwarze 1605: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1606: outbuf_flush(st);
1607: mdoc_newln(st);
1.1 schwarze 1608: if (++start >= end)
1609: continue;
1610: /*
1611: * If we have whitespace next, eat it to prevent
1612: * mdoc(7) from thinking that it's meant for
1613: * verbatim text.
1614: * It is--but if we start with that, we can't
1615: * have a macro subsequent it, which may be
1616: * possible if we have an escape next.
1617: */
1.31 schwarze 1618: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1619: puts(".br");
1620: for ( ; start < end; start++)
1621: if (' ' != buf[start] && '\t' != buf[start])
1622: break;
1.12 kristaps 1623: }
1.1 schwarze 1624: }
1.32 schwarze 1625: outbuf_flush(st);
1626: mdoc_newln(st);
1.1 schwarze 1627: }
1628:
1629: /*
1630: * There are three kinds of paragraphs: verbatim (starts with whitespace
1631: * of some sort), ordinary (starts without "=" marker), or a command
1632: * (default: starts with "=").
1633: */
1634: static void
1.35 schwarze 1635: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1636: {
1637:
1.32 schwarze 1638: assert(OUST_NL == st->oust);
1639: assert(st->wantws);
1640:
1.1 schwarze 1641: if (end == start)
1642: return;
1643: if (' ' == buf[start] || '\t' == buf[start])
1644: verbatim(st, buf, start, end);
1645: else if ('=' != buf[start])
1646: ordinary(st, buf, start, end);
1647: else
1648: command(st, buf, start, end);
1649: }
1650:
1651: /*
1652: * Loop around paragraphs within a document, processing each one in the
1653: * POD way.
1654: */
1655: static void
1656: dofile(const struct args *args, const char *fname,
1.35 schwarze 1657: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1658: {
1.29 schwarze 1659: char datebuf[64];
1.1 schwarze 1660: struct state st;
1.46 schwarze 1661: const char *fbase, *fext, *section, *date, *format;
1.1 schwarze 1662: char *title, *cp;
1.53 ! schwarze 1663: size_t cur, end;
! 1664: int verb;
1.1 schwarze 1665:
1666: if (0 == sz)
1667: return;
1668:
1.29 schwarze 1669: /*
1670: * Parsing the filename is almost always required,
1671: * except when both the title and the section
1672: * are provided on the command line.
1673: */
1674:
1675: if (NULL == args->title || NULL == args->section) {
1676: fbase = strrchr(fname, '/');
1677: if (NULL == fbase)
1678: fbase = fname;
1679: else
1680: fbase++;
1681: fext = strrchr(fbase, '.');
1682: } else
1683: fext = NULL;
1684:
1685: /*
1686: * The title will be converted to uppercase,
1687: * so it needs to be copied.
1688: */
1689:
1690: title = (NULL != args->title) ? strdup(args->title) :
1691: (NULL != fext) ? strndup(fbase, fext - fbase) :
1692: strdup(fbase);
1.1 schwarze 1693:
1694: if (NULL == title) {
1695: perror(NULL);
1696: exit(EXIT_FAILURE);
1697: }
1698:
1699: /* Section is 1 unless suffix is "pm". */
1700:
1.29 schwarze 1701: section = (NULL != args->section) ? args->section :
1702: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1703: PERL_SECTION;
1.1 schwarze 1704:
1705: /* Date. Or the given "tm" if not supplied. */
1706:
1.46 schwarze 1707: date = args->date;
1708: format = (NULL == date) ? "%B %d, %Y" :
1.48 schwarze 1709: strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $";
1.46 schwarze 1710:
1711: if (NULL != format) {
1712: strftime(datebuf, sizeof(datebuf), format, tm);
1.1 schwarze 1713: date = datebuf;
1714: }
1715:
1716: for (cp = title; '\0' != *cp; cp++)
1717: *cp = toupper((int)*cp);
1718:
1719: /* The usual mdoc(7) preamble. */
1720:
1721: printf(".Dd %s\n", date);
1722: printf(".Dt %s %s\n", title, section);
1723: puts(".Os");
1724:
1725: free(title);
1726:
1.37 schwarze 1727: dict_init();
1.1 schwarze 1728: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1729: st.oust = OUST_NL;
1730: st.wantws = 1;
1731:
1.1 schwarze 1732: assert(sz > 0);
1733:
1734: /* Main loop over file contents. */
1735:
1.53 ! schwarze 1736: cur = 0;
! 1737: for (;;) {
! 1738: while (cur < sz && '\n' == buf[cur])
! 1739: cur++;
! 1740: if (cur >= sz)
! 1741: break;
! 1742:
! 1743: verb = isspace((unsigned char)buf[cur]);
! 1744:
1.1 schwarze 1745: /* Read until next paragraph. */
1.53 ! schwarze 1746:
! 1747: for (end = cur + 1; end + 1 < sz; end++)
! 1748: if ('\n' == buf[end] && '\n' == buf[end + 1] &&
! 1749: !(verb && end + 2 < sz &&
! 1750: isspace((unsigned char)buf[end + 2])))
1.1 schwarze 1751: break;
1752:
1753: /* Adjust end marker for EOF. */
1.53 ! schwarze 1754:
! 1755: if (end < sz && '\n' != buf[end])
! 1756: end++;
1.1 schwarze 1757:
1758: /* Process paragraph and adjust start. */
1.53 ! schwarze 1759:
1.1 schwarze 1760: dopar(&st, buf, cur, end);
1.53 ! schwarze 1761: cur = end + 2;
1.1 schwarze 1762: }
1.37 schwarze 1763: dict_destroy();
1.1 schwarze 1764: }
1765:
1766: /*
1767: * Read a single file fully into memory.
1768: * If the file is "-", do it from stdin.
1769: * If successfully read, send the input buffer to dofile() for further
1770: * processing.
1771: */
1772: static int
1773: readfile(const struct args *args, const char *fname)
1774: {
1775: int fd;
1776: char *buf;
1777: size_t bufsz, cur;
1778: ssize_t ssz;
1779: struct tm *tm;
1780: time_t ttm;
1781: struct stat st;
1782:
1783: fd = 0 != strcmp("-", fname) ?
1784: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1785:
1786: if (-1 == fd) {
1787: perror(fname);
1788: return(0);
1789: }
1790:
1791: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1792: ttm = time(NULL);
1793: tm = localtime(&ttm);
1794: } else
1795: tm = localtime(&st.st_mtime);
1796:
1797: /*
1798: * Arbitrarily-sized initial buffer.
1799: * Should be big enough for most files...
1800: */
1801: cur = 0;
1802: bufsz = 1 << 14;
1803: if (NULL == (buf = malloc(bufsz))) {
1804: perror(NULL);
1805: exit(EXIT_FAILURE);
1806: }
1807:
1808: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1809: /* Double buffer size on fill. */
1810: if ((size_t)ssz == bufsz - cur) {
1811: bufsz *= 2;
1812: if (NULL == (buf = realloc(buf, bufsz))) {
1813: perror(NULL);
1814: exit(EXIT_FAILURE);
1815: }
1816: }
1817: cur += (size_t)ssz;
1818: }
1819: if (ssz < 0) {
1820: perror(fname);
1821: free(buf);
1822: return(0);
1823: }
1824:
1825: dofile(args, STDIN_FILENO == fd ?
1826: "STDIN" : fname, tm, buf, cur);
1827: free(buf);
1828: if (STDIN_FILENO != fd)
1829: close(fd);
1830: return(1);
1831: }
1832:
1833: int
1834: main(int argc, char *argv[])
1835: {
1836: const char *fname, *name;
1837: struct args args;
1838: int c;
1839:
1840: name = strrchr(argv[0], '/');
1841: if (name == NULL)
1842: name = argv[0];
1843: else
1844: ++name;
1845:
1846: memset(&args, 0, sizeof(struct args));
1847: fname = "-";
1848:
1849: /* Accept no arguments for now. */
1850:
1851: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1852: switch (c) {
1853: case ('h'):
1854: /* FALLTHROUGH */
1855: case ('l'):
1856: /* FALLTHROUGH */
1857: case ('c'):
1858: /* FALLTHROUGH */
1859: case ('o'):
1860: /* FALLTHROUGH */
1861: case ('q'):
1862: /* FALLTHROUGH */
1863: case ('r'):
1864: /* FALLTHROUGH */
1865: case ('u'):
1866: /* FALLTHROUGH */
1867: case ('v'):
1868: /* Ignore these. */
1869: break;
1870: case ('d'):
1871: args.date = optarg;
1872: break;
1873: case ('n'):
1874: args.title = optarg;
1875: break;
1876: case ('s'):
1877: args.section = optarg;
1878: break;
1879: default:
1880: goto usage;
1881: }
1882:
1883: argc -= optind;
1884: argv += optind;
1885:
1886: /* Accept only a single input file. */
1887:
1.25 schwarze 1888: if (argc > 1)
1889: goto usage;
1.1 schwarze 1890: else if (1 == argc)
1891: fname = *argv;
1892:
1893: return(readfile(&args, fname) ?
1894: EXIT_SUCCESS : EXIT_FAILURE);
1895:
1896: usage:
1897: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1898: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1899:
1900: return(EXIT_FAILURE);
1901: }
CVSweb