Annotation of pod2mdoc/pod2mdoc.c, Revision 1.54
1.54 ! schwarze 1: /* $Id: pod2mdoc.c,v 1.53 2015/02/19 13:50:45 schwarze Exp $ */
1.1 schwarze 2: /*
3: * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
1.37 schwarze 4: * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
1.1 schwarze 5: *
6: * Permission to use, copy, modify, and distribute this software for any
7: * purpose with or without fee is hereby granted, provided that the above
8: * copyright notice and this permission notice appear in all copies.
9: *
10: * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11: * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12: * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13: * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16: * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17: */
18: #include <sys/stat.h>
19: #include <sys/time.h>
20:
21: #include <assert.h>
22: #include <ctype.h>
23: #include <fcntl.h>
24: #include <getopt.h>
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <unistd.h>
29:
1.37 schwarze 30: #include "dict.h"
31:
1.10 kristaps 32: /*
1.19 kristaps 33: * In what section can we find Perl module manuals?
34: * Sometimes (Mac OS X) it's 3pm, sometimes (OpenBSD, etc.) 3p.
35: * XXX IF YOU CHANGE THIS, CHANGE POD2MDOC.1 AS WELL.
1.10 kristaps 36: */
37: #define PERL_SECTION "3p"
38:
1.1 schwarze 39: struct args {
40: const char *title; /* override "Dt" title */
41: const char *date; /* override "Dd" date */
42: const char *section; /* override "Dt" section */
43: };
44:
1.4 schwarze 45: enum list {
46: LIST_BULLET = 0,
47: LIST_ENUM,
48: LIST_TAG,
49: LIST__MAX
50: };
51:
1.11 kristaps 52: enum sect {
53: SECT_NONE = 0,
54: SECT_NAME, /* NAME section */
55: SECT_SYNOPSIS, /* SYNOPSIS section */
56: };
57:
1.32 schwarze 58: enum outstate {
59: OUST_NL = 0, /* just started a new output line */
60: OUST_TXT, /* text line output in progress */
61: OUST_MAC /* macro line output in progress */
62: };
63:
1.1 schwarze 64: struct state {
1.31 schwarze 65: const char *fname; /* file being parsed */
1.1 schwarze 66: int parsing; /* after =cut of before command */
67: int paused; /* in =begin and before =end */
1.11 kristaps 68: enum sect sect; /* which section are we in? */
1.4 schwarze 69: #define LIST_STACKSZ 128
70: enum list lstack[LIST_STACKSZ]; /* open lists */
71: size_t lpos; /* where in list stack */
1.31 schwarze 72: int haspar; /* in paragraph: do we need Pp? */
1.32 schwarze 73: enum outstate oust; /* state of the mdoc output stream */
74: int wantws; /* let mdoc(7) output whitespace here */
1.31 schwarze 75: char *outbuf; /* text buffered for output */
76: size_t outbufsz; /* allocated size of outbuf */
77: size_t outbuflen; /* current length of outbuf */
1.1 schwarze 78: };
79:
80: enum fmt {
81: FMT_ITALIC,
82: FMT_BOLD,
83: FMT_CODE,
84: FMT_LINK,
85: FMT_ESCAPE,
86: FMT_FILE,
87: FMT_NBSP,
88: FMT_INDEX,
89: FMT_NULL,
90: FMT__MAX
91: };
92:
93: enum cmd {
94: CMD_POD = 0,
95: CMD_HEAD1,
96: CMD_HEAD2,
97: CMD_HEAD3,
98: CMD_HEAD4,
99: CMD_OVER,
100: CMD_ITEM,
101: CMD_BACK,
102: CMD_BEGIN,
103: CMD_END,
104: CMD_FOR,
105: CMD_ENCODING,
106: CMD_CUT,
107: CMD__MAX
108: };
109:
110: static const char *const cmds[CMD__MAX] = {
111: "pod", /* CMD_POD */
112: "head1", /* CMD_HEAD1 */
113: "head2", /* CMD_HEAD2 */
114: "head3", /* CMD_HEAD3 */
115: "head4", /* CMD_HEAD4 */
116: "over", /* CMD_OVER */
117: "item", /* CMD_ITEM */
118: "back", /* CMD_BACK */
119: "begin", /* CMD_BEGIN */
120: "end", /* CMD_END */
121: "for", /* CMD_FOR */
122: "encoding", /* CMD_ENCODING */
123: "cut" /* CMD_CUT */
124: };
125:
126: static const char fmts[FMT__MAX] = {
127: 'I', /* FMT_ITALIC */
128: 'B', /* FMT_BOLD */
129: 'C', /* FMT_CODE */
130: 'L', /* FMT_LINK */
131: 'E', /* FMT_ESCAPE */
132: 'F', /* FMT_FILE */
133: 'S', /* FMT_NBSP */
134: 'X', /* FMT_INDEX */
135: 'Z' /* FMT_NULL */
136: };
137:
1.42 schwarze 138: static unsigned char last;
1.6 kristaps 139:
1.31 schwarze 140:
141: static void
142: outbuf_grow(struct state *st, size_t by)
143: {
144:
145: st->outbufsz += (by / 128 + 1) * 128;
146: st->outbuf = realloc(st->outbuf, st->outbufsz);
147: if (NULL == st->outbuf) {
148: perror(NULL);
149: exit(EXIT_FAILURE);
150: }
151: }
152:
153: static void
154: outbuf_addchar(struct state *st)
155: {
156:
157: if (st->outbuflen + 2 >= st->outbufsz)
158: outbuf_grow(st, 1);
159: st->outbuf[st->outbuflen++] = last;
160: if ('\\' == last)
161: st->outbuf[st->outbuflen++] = 'e';
162: st->outbuf[st->outbuflen] = '\0';
163: }
164:
165: static void
166: outbuf_addstr(struct state *st, const char *str)
167: {
168: size_t slen;
169:
170: slen = strlen(str);
171: if (st->outbuflen + slen >= st->outbufsz)
172: outbuf_grow(st, slen);
173: memcpy(st->outbuf + st->outbuflen, str, slen+1);
1.33 schwarze 174: st->outbuflen += slen;
1.31 schwarze 175: last = str[slen - 1];
176: }
177:
178: static void
179: outbuf_flush(struct state *st)
180: {
181:
182: if (0 == st->outbuflen)
183: return;
184:
1.40 schwarze 185: if (OUST_TXT == st->oust && st->wantws)
186: putchar(' ');
187:
1.54 ! schwarze 188: if (OUST_MAC == st->oust && '"' == *st->outbuf)
! 189: printf("\\(dq%s", st->outbuf + 1);
! 190: else
! 191: fputs(st->outbuf, stdout);
! 192:
1.31 schwarze 193: *st->outbuf = '\0';
194: st->outbuflen = 0;
1.32 schwarze 195:
196: if (OUST_NL == st->oust)
197: st->oust = OUST_TXT;
1.31 schwarze 198: }
199:
200: static void
1.32 schwarze 201: mdoc_newln(struct state *st)
1.31 schwarze 202: {
203:
1.32 schwarze 204: if (OUST_NL == st->oust)
1.31 schwarze 205: return;
1.32 schwarze 206:
1.31 schwarze 207: putchar('\n');
208: last = '\n';
1.32 schwarze 209: st->oust = OUST_NL;
210: st->wantws = 1;
1.31 schwarze 211: }
212:
1.1 schwarze 213: /*
214: * Given buf[*start] is at the start of an escape name, read til the end
215: * of the escape ('>') then try to do something with it.
216: * Sets start to be one after the '>'.
1.32 schwarze 217: *
218: * This function does not care about output modes,
219: * it merely appends text to the output buffer,
220: * which can then be used in any mode.
1.1 schwarze 221: */
222: static void
1.31 schwarze 223: formatescape(struct state *st, const char *buf, size_t *start, size_t end)
1.1 schwarze 224: {
225: char esc[16]; /* no more needed */
226: size_t i, max;
227:
228: max = sizeof(esc) - 1;
229: i = 0;
230: /* Read til our buffer is full. */
231: while (*start < end && '>' != buf[*start] && i < max)
232: esc[i++] = buf[(*start)++];
233: esc[i] = '\0';
234:
235: if (i == max) {
236: /* Too long... skip til we end. */
237: while (*start < end && '>' != buf[*start])
238: (*start)++;
239: return;
240: } else if (*start >= end)
241: return;
242:
243: assert('>' == buf[*start]);
244: (*start)++;
245:
246: /*
247: * TODO: right now, we only recognise the named escapes.
248: * Just let the rest of them go.
249: */
1.6 kristaps 250: if (0 == strcmp(esc, "lt"))
1.31 schwarze 251: outbuf_addstr(st, "\\(la");
1.1 schwarze 252: else if (0 == strcmp(esc, "gt"))
1.31 schwarze 253: outbuf_addstr(st, "\\(ra");
1.33 schwarze 254: else if (0 == strcmp(esc, "verbar"))
1.31 schwarze 255: outbuf_addstr(st, "\\(ba");
1.1 schwarze 256: else if (0 == strcmp(esc, "sol"))
1.31 schwarze 257: outbuf_addstr(st, "\\(sl");
1.1 schwarze 258: }
259:
260: /*
1.9 kristaps 261: * Run some heuristics to intuit a link format.
1.19 kristaps 262: * I set "start" to be the end of the sequence (last right-carrot) so
1.9 kristaps 263: * that the caller can safely just continue processing.
1.19 kristaps 264: * If this is just an empty tag, I'll return 0.
1.32 schwarze 265: *
266: * Always operates in OUST_MAC mode.
267: * Mode handling is done by the caller.
1.9 kristaps 268: */
269: static int
270: trylink(const char *buf, size_t *start, size_t end, size_t dsz)
271: {
1.21 kristaps 272: size_t linkstart, realend, linkend,
273: i, j, textsz, stack;
1.9 kristaps 274:
275: /*
276: * Scan to the start of the terminus.
277: * This function is more or less replicated in the formatcode()
278: * for null or index formatting codes.
1.23 kristaps 279: * However, we're slightly different because we might have
280: * nested escapes we need to ignore.
1.9 kristaps 281: */
1.21 kristaps 282: stack = 0;
1.19 kristaps 283: for (linkstart = realend = *start; realend < end; realend++) {
1.23 kristaps 284: if ('<' == buf[realend])
285: stack++;
1.19 kristaps 286: if ('>' != buf[realend])
1.9 kristaps 287: continue;
1.23 kristaps 288: else if (stack-- > 0)
289: continue;
290: if (dsz == 1)
1.9 kristaps 291: break;
1.19 kristaps 292: assert(realend > 0);
293: if (' ' != buf[realend - 1])
1.9 kristaps 294: continue;
1.19 kristaps 295: for (i = realend, j = 0; i < end && j < dsz; j++)
1.9 kristaps 296: if ('>' != buf[i++])
297: break;
298: if (dsz == j)
299: break;
300: }
1.19 kristaps 301:
302: /* Ignore stubs. */
303: if (realend == end || realend == *start)
1.9 kristaps 304: return(0);
305:
1.19 kristaps 306: /* Set linkend to the end of content. */
307: linkend = dsz > 1 ? realend - 1 : realend;
1.18 kristaps 308:
1.19 kristaps 309: /* Re-scan to see if we have a title or section. */
310: for (textsz = *start; textsz < linkend; textsz++)
311: if ('|' == buf[textsz] || '/' == buf[textsz])
1.18 kristaps 312: break;
313:
1.19 kristaps 314: if (textsz < linkend && '|' == buf[textsz]) {
1.20 kristaps 315: /* With title: set start, then end at section. */
1.19 kristaps 316: linkstart = textsz + 1;
1.18 kristaps 317: textsz = textsz - *start;
1.19 kristaps 318: for (i = linkstart; i < linkend; i++)
319: if ('/' == buf[i])
320: break;
321: if (i < linkend)
322: linkend = i;
1.20 kristaps 323: } else if (textsz < linkend && '/' == buf[textsz]) {
324: /* With section: set end at section. */
325: linkend = textsz;
326: textsz = 0;
327: } else
328: /* No title, no section. */
1.18 kristaps 329: textsz = 0;
1.19 kristaps 330:
331: *start = realend;
332: j = linkend - linkstart;
333:
1.20 kristaps 334: /* Do we have only subsection material? */
335: if (0 == j && '/' == buf[linkend]) {
336: linkstart = linkend + 1;
337: linkend = dsz > 1 ? realend - 1 : realend;
338: if (0 == (j = linkend - linkstart))
339: return(0);
340: printf("Sx %.*s", (int)j, &buf[linkstart]);
341: return(1);
342: } else if (0 == j)
1.19 kristaps 343: return(0);
344:
345: /* See if we qualify as being a link or not. */
1.20 kristaps 346: if ((j > 4 && 0 == memcmp("http:", &buf[linkstart], j)) ||
347: (j > 5 && 0 == memcmp("https:", &buf[linkstart], j)) ||
348: (j > 3 && 0 == memcmp("ftp:", &buf[linkstart], j)) ||
349: (j > 4 && 0 == memcmp("sftp:", &buf[linkstart], j)) ||
350: (j > 3 && 0 == memcmp("smb:", &buf[linkstart], j)) ||
351: (j > 3 && 0 == memcmp("afs:", &buf[linkstart], j))) {
352: /* Gross. */
353: printf("Lk %.*s", (int)((dsz > 1 ? realend - 1 :
354: realend) - linkstart), &buf[linkstart]);
1.19 kristaps 355: return(1);
356: }
357:
358: /* See if we qualify as a mailto. */
1.20 kristaps 359: if (j > 6 && 0 == memcmp("mailto:", &buf[linkstart], j)) {
1.19 kristaps 360: printf("Mt %.*s", (int)j, &buf[linkstart]);
361: return(1);
362: }
363:
364: /* See if we're a foo(5), foo(5x), or foo(5xx) manpage. */
365: if ((j > 3 && ')' == buf[linkend - 1]) &&
366: ('(' == buf[linkend - 3])) {
367: printf("Xr %.*s %c", (int)(j - 3),
368: &buf[linkstart], buf[linkend - 2]);
369: return(1);
370: } else if ((j > 4 && ')' == buf[linkend - 1]) &&
371: ('(' == buf[linkend - 4])) {
372: printf("Xr %.*s %.*s", (int)(j - 4),
373: &buf[linkstart], 2, &buf[linkend - 3]);
374: return(1);
375: } else if ((j > 5 && ')' == buf[linkend - 1]) &&
376: ('(' == buf[linkend - 5])) {
377: printf("Xr %.*s %.*s", (int)(j - 5),
378: &buf[linkstart], 3, &buf[linkend - 4]);
379: return(1);
380: }
381:
382: /* Last try: do we have a double-colon? */
383: for (i = linkstart + 1; i < linkend; i++)
384: if (':' == buf[i] && ':' == buf[i - 1])
1.18 kristaps 385: break;
1.9 kristaps 386:
1.19 kristaps 387: if (i < linkend)
1.10 kristaps 388: printf("Xr %.*s " PERL_SECTION,
1.19 kristaps 389: (int)j, &buf[linkstart]);
1.9 kristaps 390: else
1.19 kristaps 391: printf("Xr %.*s 1", (int)j, &buf[linkstart]);
1.9 kristaps 392:
393: return(1);
394: }
395:
1.13 kristaps 396: /*
397: * Doclifting: if we're a bold "-xx" and we're in the SYNOPSIS section,
398: * then it's likely that we're a flag.
399: * Our flag might be followed by an argument, so make sure that we're
400: * accounting for that, too.
401: * If we don't have a flag at all, however, then assume we're an "Ar".
1.32 schwarze 402: *
403: * Always operates in OUST_MAC mode.
404: * Mode handlinf is done by the caller.
1.13 kristaps 405: */
406: static void
407: dosynopsisfl(const char *buf, size_t *start, size_t end)
408: {
409: size_t i;
410: again:
1.14 kristaps 411: assert(*start + 1 < end);
412: assert('-' == buf[*start]);
413:
414: if ( ! isalnum((int)buf[*start + 1]) &&
415: '?' != buf[*start + 1] &&
416: '-' != buf[*start + 1]) {
417: (*start)--;
418: fputs("Ar ", stdout);
419: return;
420: }
421:
1.13 kristaps 422: (*start)++;
423: for (i = *start; i < end; i++)
424: if (isalnum((int)buf[i]))
425: continue;
1.14 kristaps 426: else if ('?' == buf[i])
427: continue;
1.13 kristaps 428: else if ('-' == buf[i])
429: continue;
430: else if ('_' == buf[i])
431: continue;
432: else
433: break;
434:
435: assert(i < end);
436:
437: if ( ! (' ' == buf[i] || '>' == buf[i])) {
438: printf("Ar ");
439: return;
440: }
441:
442: printf("Fl ");
443: if (end - *start > 1 &&
444: isupper((int)buf[*start]) &&
445: islower((int)buf[*start + 1]) &&
446: (end - *start == 2 ||
447: ' ' == buf[*start + 2]))
448: printf("\\&");
449: printf("%.*s ", (int)(i - *start), &buf[*start]);
450: *start = i;
451:
452: if (' ' == buf[i]) {
453: while (i < end && ' ' == buf[i])
454: i++;
455: assert(i < end);
456: if ('-' == buf[i]) {
457: *start = i;
458: goto again;
459: }
460: printf("Ar ");
461: *start = i;
462: }
463: }
464:
1.9 kristaps 465: /*
1.1 schwarze 466: * We're at the character in front of a format code, which is structured
467: * like X<...> and can contain nested format codes.
468: * This consumes the whole format code, and any nested format codes, til
469: * the end of matched production.
1.6 kristaps 470: * If "nomacro", then we don't print any macros, just contained data
471: * (e.g., following "Sh" or "Nm").
1.15 kristaps 472: * "pos" is only significant in SYNOPSIS, and should be 0 when invoked
473: * as the first format code on a line (for decoration as an "Nm"),
474: * non-zero otherwise.
1.32 schwarze 475: *
476: * Output mode handling is most complicated here.
477: * We may enter in any mode.
478: * We usually exit in OUST_MAC mode, except when
479: * entering without OUST_MAC and the code is invalid.
1.1 schwarze 480: */
1.33 schwarze 481: static int
1.15 kristaps 482: formatcode(struct state *st, const char *buf, size_t *start,
1.32 schwarze 483: size_t end, int nomacro, int pos)
1.1 schwarze 484: {
1.40 schwarze 485: size_t i, j, dsz;
1.1 schwarze 486: enum fmt fmt;
1.39 schwarze 487: unsigned char uc;
1.1 schwarze 488:
489: assert(*start + 1 < end);
490: assert('<' == buf[*start + 1]);
491:
1.6 kristaps 492: /*
493: * First, look up the format code.
1.30 schwarze 494: * If it's not valid, treat it as a NOOP.
1.6 kristaps 495: */
496: for (fmt = 0; fmt < FMT__MAX; fmt++)
497: if (buf[*start] == fmts[fmt])
498: break;
499:
1.5 kristaps 500: /*
501: * Determine whether we're overriding our delimiter.
502: * According to POD, if we have more than one '<' followed by a
503: * space, then we need a space followed by matching '>' to close
504: * the expression.
505: * Otherwise we use the usual '<' and '>' matched pair.
506: */
507: i = *start + 1;
508: while (i < end && '<' == buf[i])
509: i++;
510: assert(i > *start + 1);
511: dsz = i - (*start + 1);
512: if (dsz > 1 && (i >= end || ' ' != buf[i]))
513: dsz = 1;
514:
515: /* Remember, if dsz>1, to jump the trailing space. */
516: *start += dsz + 1 + (dsz > 1 ? 1 : 0);
1.1 schwarze 517:
518: /*
1.6 kristaps 519: * Escapes and ignored codes (NULL and INDEX) don't print macro
520: * sequences, so just output them like normal text before
521: * processing for real macros.
1.1 schwarze 522: */
523: if (FMT_ESCAPE == fmt) {
1.31 schwarze 524: formatescape(st, buf, start, end);
1.33 schwarze 525: return(0);
1.1 schwarze 526: } else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
1.5 kristaps 527: /*
1.6 kristaps 528: * Just consume til the end delimiter, accounting for
529: * whether it's a custom one.
1.5 kristaps 530: */
531: for ( ; *start < end; (*start)++) {
532: if ('>' != buf[*start])
533: continue;
534: else if (dsz == 1)
535: break;
536: assert(*start > 0);
537: if (' ' != buf[*start - 1])
538: continue;
539: i = *start;
540: for (j = 0; i < end && j < dsz; j++)
541: if ('>' != buf[i++])
542: break;
543: if (dsz != j)
544: continue;
545: (*start) += dsz;
546: break;
547: }
1.24 kristaps 548: if (*start < end) {
549: assert('>' == buf[*start]);
550: (*start)++;
551: }
552: if (isspace(last))
553: while (*start < end && isspace((int)buf[*start]))
554: (*start)++;
1.33 schwarze 555: return(0);
1.1 schwarze 556: }
557:
1.6 kristaps 558: /*
559: * Check whether we're supposed to print macro stuff (this is
560: * suppressed in, e.g., "Nm" and "Sh" macros).
561: */
1.30 schwarze 562: if (FMT__MAX != fmt && !nomacro) {
1.32 schwarze 563:
564: /*
1.31 schwarze 565: * If we are on a text line and there is no
566: * whitespace before our content, we have to make
567: * the previous word a prefix to the macro line.
1.1 schwarze 568: */
1.31 schwarze 569:
1.54 ! schwarze 570: if (OUST_MAC != st->oust && ' ' != buf[*start] &&
! 571: st->outbuflen) {
1.32 schwarze 572: if (OUST_NL != st->oust)
1.54 ! schwarze 573: mdoc_newln(st);
1.31 schwarze 574: printf(".Pf ");
1.54 ! schwarze 575: st->oust = OUST_MAC;
! 576: st->wantws = 1;
1.31 schwarze 577: }
578:
579: outbuf_flush(st);
580:
1.54 ! schwarze 581: /*
! 582: * Whitespace is easier to suppress on macro lines.
! 583: * We may already have wantws if there was whitespace
! 584: * before the code ("text B<text"), or there may be
! 585: * whitespace inside our scope ("textB< text").
! 586: */
1.31 schwarze 587:
1.54 ! schwarze 588: if (OUST_MAC == st->oust && ' ' != buf[*start] &&
! 589: ! st->wantws)
! 590: printf(" Ns");
1.31 schwarze 591:
592: /* Unless we are on a macro line, start one. */
593:
1.54 ! schwarze 594: if (OUST_MAC != st->oust) {
1.32 schwarze 595: if (OUST_NL != st->oust)
1.54 ! schwarze 596: mdoc_newln(st);
1.1 schwarze 597: putchar('.');
1.54 ! schwarze 598: st->oust = OUST_MAC;
1.31 schwarze 599: } else
1.1 schwarze 600: putchar(' ');
1.54 ! schwarze 601: st->wantws = 1;
1.31 schwarze 602:
1.32 schwarze 603: /*
604: * Print the macro corresponding to this format code,
605: * and update the output state afterwards.
606: */
1.6 kristaps 607:
1.1 schwarze 608: switch (fmt) {
609: case (FMT_ITALIC):
610: printf("Em ");
611: break;
612: case (FMT_BOLD):
1.14 kristaps 613: if (SECT_SYNOPSIS == st->sect) {
614: if (1 == dsz && '-' == buf[*start])
615: dosynopsisfl(buf, start, end);
1.15 kristaps 616: else if (0 == pos)
617: printf("Nm ");
1.14 kristaps 618: else
619: printf("Ar ");
620: break;
1.39 schwarze 621: }
622: i = 0;
623: uc = buf[*start];
624: while (isalnum(uc) || '_' == uc || ' ' == uc)
625: uc = buf[*start + ++i];
626: if ('=' != uc && '>' != uc)
627: i = 0;
628: if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
1.27 schwarze 629: printf("Dv ");
1.38 schwarze 630: break;
631: }
1.39 schwarze 632: switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
633: case MDOC_Fa:
1.38 schwarze 634: printf("Fa ");
1.39 schwarze 635: break;
636: case MDOC_Vt:
637: printf("Vt ");
638: break;
639: default:
1.27 schwarze 640: printf("Sy ");
1.39 schwarze 641: break;
642: }
1.1 schwarze 643: break;
644: case (FMT_CODE):
1.2 schwarze 645: printf("Qo Li ");
1.1 schwarze 646: break;
647: case (FMT_LINK):
1.19 kristaps 648: /* Try to link; use "No" if it's empty. */
1.9 kristaps 649: if ( ! trylink(buf, start, end, dsz))
650: printf("No ");
1.1 schwarze 651: break;
652: case (FMT_FILE):
653: printf("Pa ");
654: break;
655: case (FMT_NBSP):
656: printf("No ");
657: break;
658: default:
659: abort();
660: }
1.31 schwarze 661: } else
662: outbuf_flush(st);
1.1 schwarze 663:
664: /*
1.6 kristaps 665: * Process until we reach the end marker (e.g., '>') or until we
1.5 kristaps 666: * find a nested format code.
1.1 schwarze 667: * Don't emit any newlines: since we're on a macro line, we
668: * don't want to break the line.
669: */
670: while (*start < end) {
1.5 kristaps 671: if ('>' == buf[*start] && 1 == dsz) {
1.1 schwarze 672: (*start)++;
673: break;
1.5 kristaps 674: } else if ('>' == buf[*start] &&
675: ' ' == buf[*start - 1]) {
676: /*
677: * Handle custom delimiters.
678: * These require a certain number of
679: * space-preceded carrots before we're really at
680: * the end.
681: */
682: i = *start;
683: for (j = 0; i < end && j < dsz; j++)
684: if ('>' != buf[i++])
685: break;
686: if (dsz == j) {
687: *start += dsz;
688: break;
689: }
1.1 schwarze 690: }
1.34 schwarze 691: if (*start + 1 < end && '<' == buf[*start + 1] &&
692: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.40 schwarze 693: if ( ! formatcode(st, buf, start, end, nomacro, 1))
694: st->wantws = 1;
1.1 schwarze 695: continue;
696: }
1.3 schwarze 697:
1.32 schwarze 698: /* Suppress newlines and multiple spaces. */
699:
700: last = buf[(*start)++];
701: if (' ' == last || '\n' == last) {
702: putchar(' ');
703: while (*start < end && ' ' == buf[*start])
704: (*start)++;
705: continue;
706: }
707:
1.33 schwarze 708: if (OUST_MAC == st->oust && FMT__MAX != fmt) {
1.32 schwarze 709: if ( ! st->wantws) {
710: printf(" Ns ");
711: st->wantws = 1;
712: }
713:
714: /*
715: * Escape macro-like words.
716: * This matches "Xx " and "XxEOLN".
717: */
718:
719: if (end - *start > 0 &&
720: isupper((unsigned char)last) &&
721: islower((unsigned char)buf[*start]) &&
722: (end - *start == 1 ||
723: ' ' == buf[*start + 1] ||
724: '>' == buf[*start + 1]))
725: printf("\\&");
726: }
1.3 schwarze 727:
1.32 schwarze 728: putchar(last);
1.4 schwarze 729:
1.8 kristaps 730: /* Protect against character escapes. */
1.32 schwarze 731:
1.8 kristaps 732: if ('\\' == last)
733: putchar('e');
1.1 schwarze 734: }
1.2 schwarze 735:
736: if ( ! nomacro && FMT_CODE == fmt)
737: printf(" Qc ");
1.1 schwarze 738:
1.33 schwarze 739: st->wantws = ' ' == last;
1.40 schwarze 740: return(FMT__MAX != fmt);
1.1 schwarze 741: }
742:
743: /*
744: * Calls formatcode() til the end of a paragraph.
1.32 schwarze 745: * Goes to OUST_MAC mode and stays there when returning,
746: * such that the caller can add arguments to the macro line
747: * before closing it out.
1.1 schwarze 748: */
749: static void
1.32 schwarze 750: formatcodeln(struct state *st, const char *linemac,
751: const char *buf, size_t *start, size_t end, int nomacro)
1.1 schwarze 752: {
1.33 schwarze 753: int gotmacro, wantws;
1.1 schwarze 754:
1.32 schwarze 755: assert(OUST_NL == st->oust);
756: assert(st->wantws);
757: printf(".%s ", linemac);
758: st->oust = OUST_MAC;
759:
1.33 schwarze 760: gotmacro = 0;
1.1 schwarze 761: while (*start < end) {
1.33 schwarze 762: wantws = ' ' == buf[*start] || '\n' == buf[*start];
763: if (wantws) {
764: last = ' ';
765: do {
766: (*start)++;
767: } while (*start < end && ' ' == buf[*start]);
768: }
769:
1.34 schwarze 770: if (*start + 1 < end && '<' == buf[*start + 1] &&
771: 'A' <= buf[*start] && 'Z' >= buf[*start]) {
1.33 schwarze 772: st->wantws |= wantws;
773: gotmacro = formatcode(st, buf,
774: start, end, nomacro, 1);
1.1 schwarze 775: continue;
776: }
1.32 schwarze 777:
1.33 schwarze 778: if (gotmacro) {
779: if (*start < end || st->outbuflen) {
780: if (st->wantws ||
781: (wantws && !st->outbuflen))
782: printf(" No ");
783: else
784: printf(" Ns ");
785: }
786: gotmacro = 0;
787: }
788: outbuf_flush(st);
789: st->wantws = wantws;
790:
791: if (*start >= end)
792: break;
793:
794: if (st->wantws) {
795: putchar(' ');
796: st->wantws = 0;
1.32 schwarze 797: }
798:
1.4 schwarze 799: /*
800: * Since we're already on a macro line, we want to make
801: * sure that we don't inadvertently invoke a macro.
802: * We need to do this carefully because section names
803: * are used in troff and we don't want to escape
804: * something that needn't be escaped.
805: */
806: if (' ' == last && end - *start > 1 &&
1.33 schwarze 807: isupper((unsigned char)buf[*start]) &&
808: islower((unsigned char)buf[*start + 1]) &&
809: (end - *start == 2 || ' ' == buf[*start + 2]))
1.4 schwarze 810: printf("\\&");
811:
1.33 schwarze 812: putchar(last = buf[*start]);
1.8 kristaps 813:
814: /* Protect against character escapes. */
1.33 schwarze 815:
1.8 kristaps 816: if ('\\' == last)
817: putchar('e');
818:
1.1 schwarze 819: (*start)++;
820: }
821: }
822:
823: /*
1.4 schwarze 824: * Guess at what kind of list we are.
825: * These are taken straight from the POD manual.
826: * I don't know what people do in real life.
827: */
828: static enum list
829: listguess(const char *buf, size_t start, size_t end)
830: {
831: size_t len = end - start;
832:
833: assert(end >= start);
834:
835: if (len == 1 && '*' == buf[start])
836: return(LIST_BULLET);
837: if (len == 2 && '1' == buf[start] && '.' == buf[start + 1])
838: return(LIST_ENUM);
839: else if (len == 1 && '1' == buf[start])
840: return(LIST_ENUM);
841: else
842: return(LIST_TAG);
843: }
844:
845: /*
1.1 schwarze 846: * A command paragraph, as noted in the perlpod manual, just indicates
847: * that we should do something, optionally with some text to print as
848: * well.
1.32 schwarze 849: * From the perspective of external callers,
850: * always stays in OUST_NL/wantws mode,
851: * but its children do use OUST_MAC.
1.1 schwarze 852: */
853: static void
854: command(struct state *st, const char *buf, size_t start, size_t end)
855: {
856: size_t len, csz;
857: enum cmd cmd;
858:
859: assert('=' == buf[start]);
860: start++;
861: len = end - start;
862:
863: for (cmd = 0; cmd < CMD__MAX; cmd++) {
864: csz = strlen(cmds[cmd]);
865: if (len < csz)
866: continue;
867: if (0 == memcmp(&buf[start], cmd[cmds], csz))
868: break;
869: }
870:
871: /* Ignore bogus commands. */
872:
873: if (CMD__MAX == cmd)
874: return;
875:
876: start += csz;
1.8 kristaps 877: while (start < end && ' ' == buf[start])
878: start++;
879:
1.1 schwarze 880: len = end - start;
881:
882: if (st->paused) {
883: st->paused = CMD_END != cmd;
884: return;
885: }
886:
887: switch (cmd) {
888: case (CMD_POD):
889: break;
890: case (CMD_HEAD1):
891: /*
892: * The behaviour of head= follows from a quick glance at
893: * how pod2man handles it.
894: */
1.11 kristaps 895: st->sect = SECT_NONE;
896: if (end - start == 4) {
1.1 schwarze 897: if (0 == memcmp(&buf[start], "NAME", 4))
1.11 kristaps 898: st->sect = SECT_NAME;
899: } else if (end - start == 8) {
900: if (0 == memcmp(&buf[start], "SYNOPSIS", 8))
901: st->sect = SECT_SYNOPSIS;
902: }
1.32 schwarze 903: formatcodeln(st, "Sh", buf, &start, end, 1);
904: mdoc_newln(st);
1.1 schwarze 905: st->haspar = 1;
906: break;
907: case (CMD_HEAD2):
1.32 schwarze 908: formatcodeln(st, "Ss", buf, &start, end, 1);
909: mdoc_newln(st);
1.1 schwarze 910: st->haspar = 1;
911: break;
912: case (CMD_HEAD3):
913: puts(".Pp");
1.32 schwarze 914: formatcodeln(st, "Em", buf, &start, end, 0);
915: mdoc_newln(st);
1.1 schwarze 916: puts(".Pp");
917: st->haspar = 1;
918: break;
919: case (CMD_HEAD4):
920: puts(".Pp");
1.32 schwarze 921: formatcodeln(st, "No", buf, &start, end, 0);
922: mdoc_newln(st);
1.1 schwarze 923: puts(".Pp");
924: st->haspar = 1;
925: break;
926: case (CMD_OVER):
1.4 schwarze 927: /*
928: * If we have an existing list that hasn't had an =item
929: * yet, then make sure that we open it now.
930: * We use the default list type, but that can't be
931: * helped (we haven't seen any items yet).
1.1 schwarze 932: */
1.4 schwarze 933: if (st->lpos > 0)
934: if (LIST__MAX == st->lstack[st->lpos - 1]) {
935: st->lstack[st->lpos - 1] = LIST_TAG;
936: puts(".Bl -tag -width Ds");
937: }
938: st->lpos++;
939: assert(st->lpos < LIST_STACKSZ);
940: st->lstack[st->lpos - 1] = LIST__MAX;
1.1 schwarze 941: break;
942: case (CMD_ITEM):
1.6 kristaps 943: if (0 == st->lpos) {
944: /*
945: * Bad markup.
946: * Try to compensate.
947: */
948: st->lstack[st->lpos] = LIST__MAX;
949: st->lpos++;
950: }
1.4 schwarze 951: assert(st->lpos > 0);
952: /*
953: * If we're the first =item, guess at what our content
954: * will be: "*" is a bullet list, "1." is a numbered
955: * list, and everything is tagged.
956: */
957: if (LIST__MAX == st->lstack[st->lpos - 1]) {
958: st->lstack[st->lpos - 1] =
959: listguess(buf, start, end);
960: switch (st->lstack[st->lpos - 1]) {
961: case (LIST_BULLET):
962: puts(".Bl -bullet");
963: break;
964: case (LIST_ENUM):
965: puts(".Bl -enum");
966: break;
967: default:
968: puts(".Bl -tag -width Ds");
969: break;
970: }
971: }
972: switch (st->lstack[st->lpos - 1]) {
973: case (LIST_TAG):
1.32 schwarze 974: formatcodeln(st, "It", buf, &start, end, 0);
975: mdoc_newln(st);
1.4 schwarze 976: break;
977: case (LIST_ENUM):
978: /* FALLTHROUGH */
979: case (LIST_BULLET):
980: /*
981: * Abandon the remainder of the paragraph
982: * because we're going to be a bulletted or
983: * numbered list.
984: */
985: puts(".It");
986: break;
987: default:
988: abort();
989: }
1.1 schwarze 990: st->haspar = 1;
991: break;
992: case (CMD_BACK):
1.4 schwarze 993: /* Make sure we don't back over the stack. */
994: if (st->lpos > 0) {
995: st->lpos--;
996: puts(".El");
997: }
1.1 schwarze 998: break;
999: case (CMD_BEGIN):
1000: /*
1001: * We disregard all types for now.
1002: * TODO: process at least "text" in a -literal block.
1003: */
1004: st->paused = 1;
1005: break;
1006: case (CMD_FOR):
1007: /*
1008: * We ignore all types of encodings and formats
1009: * unilaterally.
1010: */
1011: break;
1012: case (CMD_ENCODING):
1013: break;
1014: case (CMD_CUT):
1015: st->parsing = 0;
1016: return;
1017: default:
1018: abort();
1019: }
1020:
1021: /* Any command (but =cut) makes us start parsing. */
1022: st->parsing = 1;
1023: }
1024:
1025: /*
1.39 schwarze 1026: * Put the type provided as an argument into the dictionary.
1027: */
1028: static void
1029: register_type(const char *ptype)
1030: {
1031: const char *pname, *pend;
1032:
1033: pname = ptype;
1034: while (isalnum((unsigned char)*pname) || '_' == *pname)
1035: pname++;
1036: if ((pname - ptype == 6 && ! strncmp(ptype, "struct", 6)) ||
1037: (pname - ptype == 4 && ! strncmp(ptype, "enum", 4))) {
1038: while (' ' == *pname)
1039: pname++;
1040: pend = pname;
1041: while (isalnum((unsigned char)*pend) || '_' == *pend)
1042: pend++;
1043: if (pend > pname)
1044: dict_put(pname, pend - pname, MDOC_Vt);
1045: } else
1046: pend = pname;
1047: if (pend > ptype)
1048: dict_put(ptype, pend - ptype, MDOC_Vt);
1049: }
1050:
1051: /*
1.1 schwarze 1052: * Just pump out the line in a verbatim block.
1.32 schwarze 1053: * From the perspective of external callers,
1054: * always stays in OUST_NL/wantws mode.
1.1 schwarze 1055: */
1056: static void
1.35 schwarze 1057: verbatim(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1058: {
1.36 schwarze 1059: size_t i, ift, ifo, ifa, ifc, inl;
1.38 schwarze 1060: char *cp, *cp2;
1.53 schwarze 1061: int indisplay, nopen, wantsp;
1.1 schwarze 1062:
1.53 schwarze 1063: if (st->paused || ! st->parsing)
1.1 schwarze 1064: return;
1.53 schwarze 1065:
1066: indisplay = wantsp = 0;
1067:
1.22 kristaps 1068: again:
1.53 schwarze 1069: if (start == end) {
1070: if (indisplay)
1071: puts(".Ed");
1072: return;
1073: }
1074:
1075: if ('\n' == buf[start]) {
1076: wantsp = 1;
1077: start++;
1078: goto again;
1079: }
1080:
1.22 kristaps 1081: /*
1082: * If we're in the SYNOPSIS, see if we're an #include block.
1083: * If we are, then print the "In" macro and re-loop.
1084: * This handles any number of inclusions, but only when they
1085: * come before the remaining parts...
1086: */
1087: if (SECT_SYNOPSIS == st->sect) {
1088: i = start;
1.35 schwarze 1089: while (i < end && buf[i] == ' ')
1090: i++;
1.22 kristaps 1091: if (i == end)
1.53 schwarze 1092: goto again;
1.35 schwarze 1093:
1.22 kristaps 1094: /* We're an include block! */
1095: if (end - i > 10 &&
1096: 0 == memcmp(&buf[i], "#include <", 10)) {
1097: start = i + 10;
1098: while (start < end && ' ' == buf[start])
1099: start++;
1.53 schwarze 1100: if (indisplay)
1101: puts(".Ed");
1102: indisplay = wantsp = 0;
1.22 kristaps 1103: fputs(".In ", stdout);
1104: /* Stop til the '>' marker or we hit eoln. */
1105: while (start < end &&
1106: '>' != buf[start] && '\n' != buf[start])
1107: putchar(buf[start++]);
1108: putchar('\n');
1109: if (start < end && '>' == buf[start])
1110: start++;
1111: if (start < end && '\n' == buf[start])
1112: start++;
1.41 schwarze 1113: goto again;
1114: }
1115:
1116: /* Other preprocessor directives. */
1117: if ('#' == buf[i]) {
1.53 schwarze 1118: if (indisplay)
1119: puts(".Ed");
1120: indisplay = wantsp = 0;
1.41 schwarze 1121: fputs(".Fd ", stdout);
1122: start = i;
1123: while(start < end && '\n' != buf[start])
1124: putchar(buf[start++]);
1125: putchar('\n');
1126: if (start < end && '\n' == buf[start])
1127: start++;
1.49 schwarze 1128:
1129: /* Remember #define for Dv or Fn. */
1130:
1131: if (strncmp(buf + i + 1, "define", 6) ||
1132: ! isspace((unsigned char)buf[i + 7]))
1133: goto again;
1134:
1135: ifo = i + 7;
1136: while (ifo < start &&
1137: isspace((unsigned char)buf[ifo]))
1138: ifo++;
1139: ifa = ifo;
1140: while ('_' == buf[ifa] ||
1141: isalnum((unsigned char)buf[ifa]))
1142: ifa++;
1143: dict_put(buf + ifo, ifa - ifo,
1144: '(' == buf[ifa] ? MDOC_Fo : MDOC_Dv);
1145:
1.41 schwarze 1146: goto again;
1.22 kristaps 1147: }
1.35 schwarze 1148:
1149: /* Parse function declaration. */
1150: ifo = ifa = ifc = 0;
1.36 schwarze 1151: inl = end;
1152: nopen = 0;
1153: for (ift = i; i < end; i++) {
1154: if (ifc) {
1155: if (buf[i] != '\n')
1156: continue;
1157: inl = i;
1158: break;
1159: }
1160: switch (buf[i]) {
1.45 schwarze 1161: case '\t':
1162: /* FALLTHROUGH */
1.36 schwarze 1163: case ' ':
1164: if ( ! ifa)
1165: ifo = i;
1166: break;
1167: case '(':
1168: if (ifo) {
1169: nopen++;
1170: if ( ! ifa)
1171: ifa = i;
1172: } else
1173: i = end;
1174: break;
1175: case ')':
1176: switch (nopen) {
1177: case 0:
1178: i = end;
1179: break;
1180: case 1:
1.35 schwarze 1181: ifc = i;
1.36 schwarze 1182: break;
1183: default:
1184: nopen--;
1185: break;
1186: }
1187: break;
1188: default:
1189: break;
1190: }
1.35 schwarze 1191: }
1192:
1193: /* Encode function declaration. */
1194: if (ifc) {
1.36 schwarze 1195: for (i = ifa; i < ifc; i++)
1196: if (buf[i] == '\n')
1197: buf[i] = ' ';
1.35 schwarze 1198: buf[ifo++] = '\0';
1.39 schwarze 1199: register_type(buf + ift);
1.53 schwarze 1200: if (indisplay)
1201: puts(".Ed");
1202: indisplay = wantsp = 0;
1.35 schwarze 1203: printf(".Ft %s", buf + ift);
1204: if (buf[ifo] == '*') {
1205: fputs(" *", stdout);
1206: ifo++;
1207: }
1208: putchar('\n');
1209: buf[ifa++] = '\0';
1210: printf(".Fo %s\n", buf + ifo);
1.39 schwarze 1211: dict_put(buf + ifo, 0, MDOC_Fo);
1.35 schwarze 1212: buf[ifc++] = '\0';
1213: for (;;) {
1214: cp = strchr(buf + ifa, ',');
1.38 schwarze 1215: if (cp != NULL) {
1216: cp2 = cp;
1.36 schwarze 1217: *cp++ = '\0';
1.38 schwarze 1218: } else
1219: cp2 = strchr(buf + ifa, '\0');
1220: while (isalnum((unsigned char)cp2[-1]) ||
1221: '_' == cp2[-1])
1222: cp2--;
1223: if ('\0' != *cp2)
1.39 schwarze 1224: dict_put(cp2, 0, MDOC_Fa);
1225: register_type(buf + ifa);
1.50 schwarze 1226: if (strchr(buf + ifa, ' ') == NULL)
1227: printf(".Fa %s\n", buf + ifa);
1228: else
1229: printf(".Fa \"%s\"\n", buf + ifa);
1.35 schwarze 1230: if (cp == NULL)
1231: break;
1.45 schwarze 1232: while (*cp == ' ' || *cp == '\t')
1.36 schwarze 1233: cp++;
1234: ifa = cp - buf;
1.35 schwarze 1235: }
1236: puts(".Fc");
1237: if (buf[ifc] == ';')
1238: ifc++;
1.36 schwarze 1239: if (ifc < inl) {
1240: buf[inl] = '\0';
1.35 schwarze 1241: puts(buf + ifc);
1242: }
1.53 schwarze 1243: start = inl < end ? inl + 1 : end;
1244: goto again;
1.35 schwarze 1245: }
1.22 kristaps 1246: }
1.53 schwarze 1247:
1248: if ( ! indisplay)
1249: puts(".Bd -literal");
1250: else if (wantsp)
1251: putchar('\n');
1252: indisplay = 1;
1253: wantsp = 0;
1254:
1255: for (last = '\n'; start < end; start++) {
1.8 kristaps 1256: /*
1257: * Handle accidental macros (newline starting with
1258: * control character) and escapes.
1259: */
1.53 schwarze 1260: if ('\n' == last) {
1261: if ('\n' == buf[start])
1262: goto again;
1.7 kristaps 1263: if ('.' == buf[start] || '\'' == buf[start])
1264: printf("\\&");
1.53 schwarze 1265: }
1.8 kristaps 1266: putchar(last = buf[start]);
1267: if ('\\' == buf[start])
1268: printf("e");
1.7 kristaps 1269: }
1.53 schwarze 1270: if ('\n' != last)
1271: putchar('\n');
1272: if (indisplay)
1273: puts(".Ed");
1.1 schwarze 1274: }
1275:
1276: /*
1.13 kristaps 1277: * See dosynopsisop().
1278: */
1279: static int
1280: hasmatch(const char *buf, size_t start, size_t end)
1281: {
1282: size_t stack;
1283:
1284: for (stack = 0; start < end; start++)
1285: if (buf[start] == '[')
1286: stack++;
1287: else if (buf[start] == ']' && 0 == stack)
1288: return(1);
1289: else if (buf[start] == ']')
1290: stack--;
1291: return(0);
1292: }
1293:
1294: /*
1295: * If we're in the SYNOPSIS section and we've encounter braces in an
1296: * ordinary paragraph, then try to see whether we're an [-option].
1297: * Do this, if we're an opening bracket, by first seeing if we have a
1298: * matching end via hasmatch().
1299: * If we're an ending bracket, see if we have a stack already.
1300: */
1301: static int
1.32 schwarze 1302: dosynopsisop(struct state *st, const char *buf,
1303: size_t *start, size_t end, size_t *opstack)
1.13 kristaps 1304: {
1305:
1306: assert('[' == buf[*start] || ']' == buf[*start]);
1307:
1308: if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
1.32 schwarze 1309: mdoc_newln(st);
1.13 kristaps 1310: puts(".Oo");
1311: (*opstack)++;
1312: } else if ('[' == buf[*start])
1313: return(0);
1314:
1315: if (']' == buf[*start] && *opstack > 0) {
1.32 schwarze 1316: mdoc_newln(st);
1.13 kristaps 1317: puts(".Oc");
1318: (*opstack)--;
1319: } else if (']' == buf[*start])
1320: return(0);
1321:
1322: (*start)++;
1.31 schwarze 1323: last = '\n';
1.13 kristaps 1324: while (' ' == buf[*start])
1325: (*start)++;
1326: return(1);
1327: }
1328:
1329: /*
1.17 kristaps 1330: * Format multiple "Nm" manpage names in the NAME section.
1.32 schwarze 1331: * From the perspective of external callers,
1332: * always stays in OUST_NL/wantws mode,
1333: * but its children do use OUST_MAC.
1.17 kristaps 1334: */
1335: static void
1336: donamenm(struct state *st, const char *buf, size_t *start, size_t end)
1337: {
1338: size_t word;
1339:
1.32 schwarze 1340: assert(OUST_NL == st->oust);
1341: assert(st->wantws);
1342:
1.47 schwarze 1343: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1344: (*start)++;
1345:
1346: if (end == *start) {
1347: puts(".Nm unknown");
1348: return;
1349: }
1350:
1351: while (*start < end) {
1352: for (word = *start; word < end; word++)
1353: if (',' == buf[word])
1354: break;
1.32 schwarze 1355: formatcodeln(st, "Nm", buf, start, word, 1);
1.17 kristaps 1356: if (*start == end) {
1.32 schwarze 1357: mdoc_newln(st);
1358: break;
1.17 kristaps 1359: }
1360: assert(',' == buf[*start]);
1.32 schwarze 1361: printf(" ,");
1362: mdoc_newln(st);
1.17 kristaps 1363: (*start)++;
1.47 schwarze 1364: while (*start < end && isspace((unsigned char)buf[*start]))
1.17 kristaps 1365: (*start)++;
1366: }
1367: }
1368:
1369: /*
1.1 schwarze 1370: * Ordinary paragraph.
1371: * Well, this is really the hardest--POD seems to assume that, for
1372: * example, a leading space implies a newline, and so on.
1373: * Lots of other snakes in the grass: escaping a newline followed by a
1374: * period (accidental mdoc(7) control), double-newlines after macro
1375: * passages, etc.
1.32 schwarze 1376: *
1377: * Uses formatcode() to go to OUST_MAC mode
1378: * and outbuf_flush() to go to OUST_TXT mode.
1.40 schwarze 1379: * In text mode, wantws requests white space before the text
1380: * currently contained in the outbuf, not before upcoming text.
1.32 schwarze 1381: * Must make sure to go back to OUST_NL/wantws mode before returning.
1.1 schwarze 1382: */
1383: static void
1384: ordinary(struct state *st, const char *buf, size_t start, size_t end)
1385: {
1.44 schwarze 1386: size_t i, j, opstack, wend;
1.43 schwarze 1387: enum mdoc_type mtype;
1.44 schwarze 1388: int eos, noeos, seq;
1.49 schwarze 1389: char savechar;
1.1 schwarze 1390:
1391: if ( ! st->parsing || st->paused)
1392: return;
1393:
1394: /*
1395: * Special-case: the NAME section.
1396: * If we find a "-" when searching from the end, assume that
1397: * we're in "name - description" format.
1398: * To wit, print out a "Nm" and "Nd" in that format.
1399: */
1.11 kristaps 1400: if (SECT_NAME == st->sect) {
1.15 kristaps 1401: for (i = end - 2; i > start; i--)
1.47 schwarze 1402: if ('-' == buf[i] &&
1403: isspace((unsigned char)buf[i + 1]))
1.1 schwarze 1404: break;
1405: if ('-' == buf[i]) {
1406: j = i;
1407: /* Roll over multiple "-". */
1408: for ( ; i > start; i--)
1409: if ('-' != buf[i])
1410: break;
1.17 kristaps 1411: donamenm(st, buf, &start, i + 1);
1.5 kristaps 1412: start = j + 1;
1.47 schwarze 1413: while (start < end &&
1414: isspace((unsigned char)buf[start]))
1.17 kristaps 1415: start++;
1.32 schwarze 1416: formatcodeln(st, "Nd", buf, &start, end, 1);
1417: mdoc_newln(st);
1.1 schwarze 1418: return;
1419: }
1420: }
1421:
1422: if ( ! st->haspar)
1423: puts(".Pp");
1424:
1425: st->haspar = 0;
1426: last = '\n';
1.13 kristaps 1427: opstack = 0;
1.1 schwarze 1428:
1.15 kristaps 1429: for (seq = 0; start < end; seq++) {
1.1 schwarze 1430: /*
1431: * Loop til we get either to a newline or escape.
1432: * Escape initial control characters.
1433: */
1434: while (start < end) {
1.34 schwarze 1435: if (start < end - 1 && '<' == buf[start + 1] &&
1436: 'A' <= buf[start] && 'Z' >= buf[start])
1.1 schwarze 1437: break;
1438: else if ('\n' == buf[start])
1439: break;
1440: else if ('\n' == last && '.' == buf[start])
1.31 schwarze 1441: outbuf_addstr(st, "\\&");
1.1 schwarze 1442: else if ('\n' == last && '\'' == buf[start])
1.31 schwarze 1443: outbuf_addstr(st, "\\&");
1.12 kristaps 1444: /*
1445: * If we're in the SYNOPSIS, have square
1446: * brackets indicate that we're opening and
1447: * closing an optional context.
1448: */
1.32 schwarze 1449:
1.13 kristaps 1450: if (SECT_SYNOPSIS == st->sect &&
1451: ('[' == buf[start] ||
1452: ']' == buf[start]) &&
1.32 schwarze 1453: dosynopsisop(st, buf,
1454: &start, end, &opstack))
1.13 kristaps 1455: continue;
1.32 schwarze 1456:
1.42 schwarze 1457: /* Merely buffer non-whitespace. */
1.32 schwarze 1458:
1.31 schwarze 1459: last = buf[start++];
1.44 schwarze 1460: if ( ! isspace(last))
1.37 schwarze 1461: outbuf_addchar(st);
1.44 schwarze 1462: if (start < end &&
1.52 schwarze 1463: ! isspace((unsigned char)buf[start - 1]) &&
1.44 schwarze 1464: ! isspace((unsigned char)buf[start]))
1.37 schwarze 1465: continue;
1466:
1.44 schwarze 1467: /*
1468: * Found the end of a word.
1469: * Rewind trailing delimiters.
1470: */
1471:
1472: eos = noeos = 0;
1473: for (wend = st->outbuflen; wend; wend--)
1474: if ('.' == st->outbuf[wend - 1] ||
1475: '!' == st->outbuf[wend - 1] ||
1476: '?' == st->outbuf[wend - 1])
1477: eos = 1;
1478: else if ('|' == st->outbuf[wend - 1] ||
1479: ',' == st->outbuf[wend - 1] ||
1480: ';' == st->outbuf[wend - 1] ||
1481: ':' == st->outbuf[wend - 1])
1482: noeos = 1;
1483: else if ('\'' != st->outbuf[wend - 1] &&
1484: '"' != st->outbuf[wend - 1] &&
1485: ')' != st->outbuf[wend - 1] &&
1486: ']' != st->outbuf[wend - 1])
1487: break;
1488: eos &= ! noeos;
1489:
1490: /*
1491: * Detect function names.
1492: */
1.42 schwarze 1493:
1.43 schwarze 1494: mtype = MDOC_Fa;
1.49 schwarze 1495: savechar = '\0';
1.44 schwarze 1496: if (wend && ')' == st->outbuf[wend] &&
1497: '(' == st->outbuf[wend - 1]) {
1498: mtype = dict_get(st->outbuf, --wend);
1.49 schwarze 1499: if (MDOC_Dv == mtype)
1500: mtype = MDOC_Fo;
1.43 schwarze 1501: if (MDOC_Fo == mtype || MDOC_MAX == mtype) {
1.44 schwarze 1502: st->outbuflen = wend;
1503: st->outbuf[wend] = '\0';
1.43 schwarze 1504: mdoc_newln(st);
1505: if (MDOC_Fo == mtype)
1506: fputs(".Fn ", stdout);
1507: else
1508: fputs(".Xr ", stdout);
1509: st->oust = OUST_MAC;
1510: }
1.49 schwarze 1511: } else {
1512: mtype = dict_get(st->outbuf, wend);
1513: if (MDOC_Dv == mtype) {
1514: savechar = st->outbuf[wend];
1515: st->outbuf[wend] = '\0';
1516: mdoc_newln(st);
1517: fputs(".Dv ", stdout);
1518: st->oust = OUST_MAC;
1519: } else
1520: mtype = MDOC_Fa;
1.37 schwarze 1521: }
1522:
1.42 schwarze 1523: /*
1524: * On whitespace, flush the output buffer
1525: * and allow breaking to a macro line.
1526: */
1527:
1.37 schwarze 1528: outbuf_flush(st);
1.42 schwarze 1529:
1530: /*
1531: * End macro lines, and
1532: * end text lines at the end of sentences.
1533: */
1534:
1.44 schwarze 1535: if (OUST_MAC == st->oust || (eos && wend > 1 &&
1536: islower((unsigned char)st->outbuf[wend - 1]))) {
1.43 schwarze 1537: if (MDOC_MAX == mtype)
1538: fputs(" 3", stdout);
1.49 schwarze 1539: if (MDOC_Fa != mtype) {
1540: if (MDOC_Dv == mtype)
1541: st->outbuf[wend] = savechar;
1542: else
1543: wend += 2;
1544: while ('\0' != st->outbuf[wend])
1.44 schwarze 1545: printf(" %c",
1.49 schwarze 1546: st->outbuf[wend++]);
1547: }
1.40 schwarze 1548: mdoc_newln(st);
1.43 schwarze 1549: }
1.42 schwarze 1550:
1551: /* Advance to the next word. */
1552:
1.44 schwarze 1553: while ('\n' != buf[start] &&
1554: isspace((unsigned char)buf[start]))
1.42 schwarze 1555: start++;
1556: st->wantws = 1;
1.1 schwarze 1557: }
1558:
1.34 schwarze 1559: if (start < end - 1 && '<' == buf[start + 1] &&
1560: 'A' <= buf[start] && 'Z' >= buf[start]) {
1.32 schwarze 1561: formatcode(st, buf, &start, end, 0, seq);
1562: if (OUST_MAC == st->oust) {
1.30 schwarze 1563: /*
1564: * Let mdoc(7) handle trailing punctuation.
1565: * XXX Some punctuation characters
1566: * are not handled yet.
1567: */
1.51 schwarze 1568: if ((start == end - 1 ||
1569: (start < end - 1 &&
1570: (' ' == buf[start + 1] ||
1571: '\n' == buf[start + 1]))) &&
1572: NULL != strchr("|.,;:?!)]", buf[start])) {
1.16 kristaps 1573: putchar(' ');
1574: putchar(buf[start++]);
1575: }
1.32 schwarze 1576:
1577: if (st->wantws ||
1578: ' ' == buf[start] ||
1579: '\n' == buf[start])
1580: mdoc_newln(st);
1581:
1.30 schwarze 1582: /*
1583: * Consume all whitespace
1584: * so we don't accidentally start
1585: * an implicit literal line.
1586: */
1.32 schwarze 1587:
1.6 kristaps 1588: while (start < end && ' ' == buf[start])
1589: start++;
1.32 schwarze 1590:
1591: /*
1592: * Some text is following.
1593: * Implement requested spacing.
1594: */
1595:
1596: if ( ! st->wantws && start < end &&
1.34 schwarze 1597: ('<' != buf[start + 1] ||
1598: 'A' > buf[start] ||
1599: 'Z' < buf[start])) {
1.32 schwarze 1600: printf(" Ns ");
1601: st->wantws = 1;
1602: }
1.6 kristaps 1603: }
1.1 schwarze 1604: } else if (start < end && '\n' == buf[start]) {
1.32 schwarze 1605: outbuf_flush(st);
1606: mdoc_newln(st);
1.1 schwarze 1607: if (++start >= end)
1608: continue;
1609: /*
1610: * If we have whitespace next, eat it to prevent
1611: * mdoc(7) from thinking that it's meant for
1612: * verbatim text.
1613: * It is--but if we start with that, we can't
1614: * have a macro subsequent it, which may be
1615: * possible if we have an escape next.
1616: */
1.31 schwarze 1617: if (' ' == buf[start] || '\t' == buf[start])
1.1 schwarze 1618: puts(".br");
1619: for ( ; start < end; start++)
1620: if (' ' != buf[start] && '\t' != buf[start])
1621: break;
1.12 kristaps 1622: }
1.1 schwarze 1623: }
1.32 schwarze 1624: outbuf_flush(st);
1625: mdoc_newln(st);
1.1 schwarze 1626: }
1627:
1628: /*
1629: * There are three kinds of paragraphs: verbatim (starts with whitespace
1630: * of some sort), ordinary (starts without "=" marker), or a command
1631: * (default: starts with "=").
1632: */
1633: static void
1.35 schwarze 1634: dopar(struct state *st, char *buf, size_t start, size_t end)
1.1 schwarze 1635: {
1636:
1.32 schwarze 1637: assert(OUST_NL == st->oust);
1638: assert(st->wantws);
1639:
1.1 schwarze 1640: if (end == start)
1641: return;
1642: if (' ' == buf[start] || '\t' == buf[start])
1643: verbatim(st, buf, start, end);
1644: else if ('=' != buf[start])
1645: ordinary(st, buf, start, end);
1646: else
1647: command(st, buf, start, end);
1648: }
1649:
1650: /*
1651: * Loop around paragraphs within a document, processing each one in the
1652: * POD way.
1653: */
1654: static void
1655: dofile(const struct args *args, const char *fname,
1.35 schwarze 1656: const struct tm *tm, char *buf, size_t sz)
1.1 schwarze 1657: {
1.29 schwarze 1658: char datebuf[64];
1.1 schwarze 1659: struct state st;
1.46 schwarze 1660: const char *fbase, *fext, *section, *date, *format;
1.1 schwarze 1661: char *title, *cp;
1.53 schwarze 1662: size_t cur, end;
1663: int verb;
1.1 schwarze 1664:
1665: if (0 == sz)
1666: return;
1667:
1.29 schwarze 1668: /*
1669: * Parsing the filename is almost always required,
1670: * except when both the title and the section
1671: * are provided on the command line.
1672: */
1673:
1674: if (NULL == args->title || NULL == args->section) {
1675: fbase = strrchr(fname, '/');
1676: if (NULL == fbase)
1677: fbase = fname;
1678: else
1679: fbase++;
1680: fext = strrchr(fbase, '.');
1681: } else
1682: fext = NULL;
1683:
1684: /*
1685: * The title will be converted to uppercase,
1686: * so it needs to be copied.
1687: */
1688:
1689: title = (NULL != args->title) ? strdup(args->title) :
1690: (NULL != fext) ? strndup(fbase, fext - fbase) :
1691: strdup(fbase);
1.1 schwarze 1692:
1693: if (NULL == title) {
1694: perror(NULL);
1695: exit(EXIT_FAILURE);
1696: }
1697:
1698: /* Section is 1 unless suffix is "pm". */
1699:
1.29 schwarze 1700: section = (NULL != args->section) ? args->section :
1701: (NULL == fext || strcmp(fext + 1, "pm")) ? "1" :
1702: PERL_SECTION;
1.1 schwarze 1703:
1704: /* Date. Or the given "tm" if not supplied. */
1705:
1.46 schwarze 1706: date = args->date;
1707: format = (NULL == date) ? "%B %d, %Y" :
1.48 schwarze 1708: strcmp(date, "Mdocdate") ? NULL : "$" "Mdocdate: %B %d %Y $";
1.46 schwarze 1709:
1710: if (NULL != format) {
1711: strftime(datebuf, sizeof(datebuf), format, tm);
1.1 schwarze 1712: date = datebuf;
1713: }
1714:
1715: for (cp = title; '\0' != *cp; cp++)
1716: *cp = toupper((int)*cp);
1717:
1718: /* The usual mdoc(7) preamble. */
1719:
1720: printf(".Dd %s\n", date);
1721: printf(".Dt %s %s\n", title, section);
1722: puts(".Os");
1723:
1724: free(title);
1725:
1.37 schwarze 1726: dict_init();
1.1 schwarze 1727: memset(&st, 0, sizeof(struct state));
1.32 schwarze 1728: st.oust = OUST_NL;
1729: st.wantws = 1;
1730:
1.1 schwarze 1731: assert(sz > 0);
1732:
1733: /* Main loop over file contents. */
1734:
1.53 schwarze 1735: cur = 0;
1736: for (;;) {
1737: while (cur < sz && '\n' == buf[cur])
1738: cur++;
1739: if (cur >= sz)
1740: break;
1741:
1742: verb = isspace((unsigned char)buf[cur]);
1743:
1.1 schwarze 1744: /* Read until next paragraph. */
1.53 schwarze 1745:
1746: for (end = cur + 1; end + 1 < sz; end++)
1747: if ('\n' == buf[end] && '\n' == buf[end + 1] &&
1748: !(verb && end + 2 < sz &&
1749: isspace((unsigned char)buf[end + 2])))
1.1 schwarze 1750: break;
1751:
1752: /* Adjust end marker for EOF. */
1.53 schwarze 1753:
1754: if (end < sz && '\n' != buf[end])
1755: end++;
1.1 schwarze 1756:
1757: /* Process paragraph and adjust start. */
1.53 schwarze 1758:
1.1 schwarze 1759: dopar(&st, buf, cur, end);
1.53 schwarze 1760: cur = end + 2;
1.1 schwarze 1761: }
1.37 schwarze 1762: dict_destroy();
1.1 schwarze 1763: }
1764:
1765: /*
1766: * Read a single file fully into memory.
1767: * If the file is "-", do it from stdin.
1768: * If successfully read, send the input buffer to dofile() for further
1769: * processing.
1770: */
1771: static int
1772: readfile(const struct args *args, const char *fname)
1773: {
1774: int fd;
1775: char *buf;
1776: size_t bufsz, cur;
1777: ssize_t ssz;
1778: struct tm *tm;
1779: time_t ttm;
1780: struct stat st;
1781:
1782: fd = 0 != strcmp("-", fname) ?
1783: open(fname, O_RDONLY, 0) : STDIN_FILENO;
1784:
1785: if (-1 == fd) {
1786: perror(fname);
1787: return(0);
1788: }
1789:
1790: if (STDIN_FILENO == fd || -1 == fstat(fd, &st)) {
1791: ttm = time(NULL);
1792: tm = localtime(&ttm);
1793: } else
1794: tm = localtime(&st.st_mtime);
1795:
1796: /*
1797: * Arbitrarily-sized initial buffer.
1798: * Should be big enough for most files...
1799: */
1800: cur = 0;
1801: bufsz = 1 << 14;
1802: if (NULL == (buf = malloc(bufsz))) {
1803: perror(NULL);
1804: exit(EXIT_FAILURE);
1805: }
1806:
1807: while ((ssz = read(fd, buf + cur, bufsz - cur)) > 0) {
1808: /* Double buffer size on fill. */
1809: if ((size_t)ssz == bufsz - cur) {
1810: bufsz *= 2;
1811: if (NULL == (buf = realloc(buf, bufsz))) {
1812: perror(NULL);
1813: exit(EXIT_FAILURE);
1814: }
1815: }
1816: cur += (size_t)ssz;
1817: }
1818: if (ssz < 0) {
1819: perror(fname);
1820: free(buf);
1821: return(0);
1822: }
1823:
1824: dofile(args, STDIN_FILENO == fd ?
1825: "STDIN" : fname, tm, buf, cur);
1826: free(buf);
1827: if (STDIN_FILENO != fd)
1828: close(fd);
1829: return(1);
1830: }
1831:
1832: int
1833: main(int argc, char *argv[])
1834: {
1835: const char *fname, *name;
1836: struct args args;
1837: int c;
1838:
1839: name = strrchr(argv[0], '/');
1840: if (name == NULL)
1841: name = argv[0];
1842: else
1843: ++name;
1844:
1845: memset(&args, 0, sizeof(struct args));
1846: fname = "-";
1847:
1848: /* Accept no arguments for now. */
1849:
1850: while (-1 != (c = getopt(argc, argv, "c:d:hln:oq:rs:uv")))
1851: switch (c) {
1852: case ('h'):
1853: /* FALLTHROUGH */
1854: case ('l'):
1855: /* FALLTHROUGH */
1856: case ('c'):
1857: /* FALLTHROUGH */
1858: case ('o'):
1859: /* FALLTHROUGH */
1860: case ('q'):
1861: /* FALLTHROUGH */
1862: case ('r'):
1863: /* FALLTHROUGH */
1864: case ('u'):
1865: /* FALLTHROUGH */
1866: case ('v'):
1867: /* Ignore these. */
1868: break;
1869: case ('d'):
1870: args.date = optarg;
1871: break;
1872: case ('n'):
1873: args.title = optarg;
1874: break;
1875: case ('s'):
1876: args.section = optarg;
1877: break;
1878: default:
1879: goto usage;
1880: }
1881:
1882: argc -= optind;
1883: argv += optind;
1884:
1885: /* Accept only a single input file. */
1886:
1.25 schwarze 1887: if (argc > 1)
1888: goto usage;
1.1 schwarze 1889: else if (1 == argc)
1890: fname = *argv;
1891:
1892: return(readfile(&args, fname) ?
1893: EXIT_SUCCESS : EXIT_FAILURE);
1894:
1895: usage:
1896: fprintf(stderr, "usage: %s [-d date] "
1.25 schwarze 1897: "[-n title] [-s section] [file]\n", name);
1.1 schwarze 1898:
1899: return(EXIT_FAILURE);
1900: }
CVSweb