/* $Id: util.c,v 1.26 2015/03/05 08:35:35 kristaps Exp $ */ /* * Copyright (c) 2015 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "extern.h" /* * Unmap the top-most file in the stack of files currently opened (that * is, nested calls to parsefile()). */ void texifilepop(struct texi *p) { struct texifile *f; assert(p->filepos > 0); f = &p->files[--p->filepos]; free(f->map); } static void teximacrofree(struct teximacro *p) { size_t i; for (i = 0; i < p->argsz; i++) free(p->args[i]); free(p->args); free(p->key); free(p->value); } static void texivaluefree(struct texivalue *p) { free(p->key); free(p->value); } /* * Unmap all files that we're currently using and free all resources * that we've allocated during the parse. * The utility should exit(...) after this is called. */ void texiexit(struct texi *p) { size_t i; /* Make sure we're newline-terminated. */ if (p->outcol) fputc('\n', p->outfile); if (NULL != p->chapters) teximdocclose(p, 1); /* Unmap all files. */ while (p->filepos > 0) texifilepop(p); for (i = 0; i < p->macrosz; i++) teximacrofree(&p->macros[i]); for (i = 0; i < p->dirsz; i++) free(p->dirs[i]); for (i = 0; i < p->indexsz; i++) free(p->indexs[i]); for (i = 0; i < p->valsz; i++) texivaluefree(&p->vals[i]); free(p->macros); free(p->vals); free(p->indexs); free(p->dirs); free(p->subtitle); free(p->title); free(p->copying); } /* * Fatal error: unmap all files and exit. * The "errstring" is passed to perror(3). */ void texiabort(struct texi *p, const char *errstring) { perror(errstring); texiexit(p); exit(EXIT_FAILURE); } /* * Print a generic warning message (to stderr) tied to our current * location in the parse sequence. */ void texiwarn(const struct texi *p, const char *fmt, ...) { va_list ap; const struct texifile *f; f = &p->files[p->filepos - 1]; if (f->insplice) fprintf(stderr, "%s:%zu:%zu (%zuB left in splice): " "warning: ", f->name, f->line + 1, f->col + 1, f->insplice); else fprintf(stderr, "%s:%zu:%zu: warning: ", f->name, f->line + 1, f->col + 1); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); } /* * Print an error message (to stderr) tied to our current location in * the parse sequence, invoke texiexit(), then die. */ void texierr(struct texi *p, const char *fmt, ...) { va_list ap; struct texifile *f; f = &p->files[p->filepos - 1]; if (f->insplice) fprintf(stderr, "%s:%zu:%zu: (%zuB left in splice): " "error: ", f->name, f->line + 1, f->col + 1, f->insplice); else fprintf(stderr, "%s:%zu:%zu: error: ", f->name, f->line + 1, f->col + 1); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); texiexit(p); exit(EXIT_FAILURE); } /* * Put a single data character to the output if we're not ignoring. * Escape starting a line with a control character and slashes. */ void texiputchar(struct texi *p, char c) { if (p->ign) return; if ('.' == c && 0 == p->outcol) fputs("\\&", p->outfile); if ('\'' == c && 0 == p->outcol) fputs("\\&", p->outfile); if (p->uppercase) fputc(toupper((unsigned int)c), p->outfile); else fputc(c, p->outfile); if ('\\' == c) fputc('e', p->outfile); p->seenvs = 0; if ('\n' == c) { p->outcol = 0; p->seenws = 0; } else p->outcol++; } /* * Put an opaque series of characters. * Characters starting a line with a control character are escaped, but * that's it, so don't use this for non-controlled sequences of text. */ void texiputchars(struct texi *p, const char *s) { if (p->ign) return; if ('.' == *s && 0 == p->outcol) fputs("\\&", p->outfile); if ('\'' == *s && 0 == p->outcol) fputs("\\&", p->outfile); if (p->uppercase) for ( ; '\0' != *s; s++) p->outcol += fputc(toupper ((unsigned int)*s), p->outfile); else p->outcol += fputs(s, p->outfile); p->seenvs = 0; } /* * This puts all characters onto the output stream but makes sure to * escape mdoc(7) slashes. * FIXME: useless. */ void texiputbuf(struct texi *p, size_t start, size_t end) { for ( ; start < end; start++) texiputchar(p, BUF(p)[start]); } /* * Close an mdoc(7) macro opened with teximacroopen(). * If there are no more macros on the line, prints a newline. */ void teximacroclose(struct texi *p) { if (p->ign) return; if (0 == --p->outmacro) { fputc('\n', p->outfile); p->outcol = p->seenws = 0; } } /* * Open a mdoc(7) macro. * This is used for line macros, e.g., Qq [foo bar baz]. * It can be invoked for nested macros, e.g., Qq Li foo . * TODO: flush-right punctuation (e.g., parenthesis). */ void teximacroopen(struct texi *p, const char *s) { int rc; if (p->ign) return; if (p->outcol && 0 == p->outmacro) { fputc('\n', p->outfile); p->outcol = 0; } if (0 == p->outmacro) fputc('.', p->outfile); else fputc(' ', p->outfile); if (EOF != (rc = fputs(s, p->outfile))) p->outcol += rc; fputc(' ', p->outfile); p->outcol++; p->outmacro++; p->seenws = 0; } /* * Put a stadnalone mdoc(7) command with the trailing newline. */ void teximacro(struct texi *p, const char *s) { if (p->ign) return; if (p->outmacro) texierr(p, "\"%s\" in open line scope!?", s); if (p->literal) texierr(p, "\"%s\" in a literal scope!?", s); if (p->outcol) fputc('\n', p->outfile); fputc('.', p->outfile); fputs(s, p->outfile); fputc('\n', p->outfile); p->outcol = p->seenws = 0; } /* * Introduce vertical space during normal (non-macro) input. */ void texivspace(struct texi *p) { if (p->seenvs || TEXILIST_TABLE == p->list) return; teximacro(p, "Pp"); p->seenvs = 1; } /* * Advance by a single byte in the input stream, adjusting our location * in the current input file. */ void advance(struct texi *p, size_t *pos) { struct texifile *f; f = &p->files[p->filepos - 1]; if (0 == f->insplice) { if ('\n' == BUF(p)[*pos]) { f->line++; f->col = 0; } else f->col++; } else { --f->insplice; if (0 == f->insplice) f->depth = 0; } (*pos)++; } /* * It's common to wait punctuation to float on the right side of macro * lines in mdoc(7), e.g., ".Em hello ) ." * This function does so, and should be called before teximacroclose(). * It will detect that it's the last in the nested macros and * appropriately flush-left punctuation alongside the macro. */ void texipunctuate(struct texi *p, size_t *pos) { size_t start, end; if (1 != p->outmacro) return; for (start = end = *pos; end < BUFSZ(p); end++) { switch (BUF(p)[end]) { case (','): case (')'): case ('.'): case ('"'): case (':'): case (';'): case ('!'): case ('?'): continue; default: break; } break; } if (end == *pos) return; if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] || '\n' == BUF(p)[end]) { for ( ; start < end; start++) { texiputchar(p, ' '); texiputchar(p, BUF(p)[start]); advance(p, pos); } } } /* * Advance to the next non-whitespace word in the input stream. * If we're in literal mode, then print all of the whitespace as we're * doing so. */ static size_t advancenext(struct texi *p, size_t *pos) { if (p->literal) { while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) { texiputchar(p, BUF(p)[*pos]); advance(p, pos); } return(*pos); } while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) { p->seenws = 1; /* * If it looks like we've printed a double-line, then * output a paragraph. * FIXME: this is stupid. */ if (*pos && '\n' == BUF(p)[*pos] && '\n' == BUF(p)[*pos - 1]) texivspace(p); advance(p, pos); } return(*pos); } /* * Advance to the EOLN in the input stream. * This will skip over '@' markers in an effort to ignore escaped * newlines. */ size_t advanceeoln(struct texi *p, size_t *pos, int consumenl) { while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) { if ('@' == BUF(p)[*pos]) advance(p, pos); advance(p, pos); } if (*pos < BUFSZ(p) && consumenl) advance(p, pos); return(*pos); } /* * Advance to position "end", which is an absolute position in the * current buffer greater than or equal to the current position. */ void advanceto(struct texi *p, size_t *pos, size_t end) { assert(*pos <= end); while (*pos < end) advance(p, pos); } static void texiexecmacro(struct texi *p, struct teximacro *m, size_t sv, size_t *pos) { size_t valsz, realsz, aasz, asz, ssz, i, j, k, start, end; char *val; char **args; const char *cp; /* Disregard empty macros. */ if (0 == (valsz = realsz = strlen(m->value))) { args = argparse(p, pos, &asz, m->argsz); for (i = 0; i < asz; i++) free(args[i]); free(args); return; } /* * This is important: it protect us from macros that invoke more * macros, possibly going on infinitely. * We use "sv" instead of the current position because we might * be invoked at the end of the macro (i.e., insplice == 0). * The "sv" value was initialised at the start of the macro. */ if (sv > 0) if (++p->files[p->filepos - 1].depth > 64) texierr(p, "maximium recursive depth"); args = argparse(p, pos, &asz, m->argsz); if (asz != m->argsz) texiwarn(p, "invalid macro argument length"); aasz = asz < m->argsz ? asz : m->argsz; if (0 == aasz) { texisplice(p, m->value, valsz, *pos); return; } val = strdup(m->value); for (i = j = 0; i < realsz; i++) { /* Parse blindly til the backslash delimiter. */ if ('\\' != m->value[i]) { val[j++] = m->value[i]; val[j] = '\0'; continue; } else if (i == realsz - 1) texierr(p, "trailing argument name delimiter"); /* Double-backslash is escaped. */ if ('\\' == m->value[i + 1]) { val[j++] = m->value[i++]; val[j] = '\0'; continue; } assert('\\' == m->value[i] && i < realsz - 1); /* Parse to terminating delimiter. */ /* FIXME: embedded, escaped delimiters? */ for (start = end = i + 1; end < realsz; end++) if ('\\' == m->value[end]) break; if (end == realsz) texierr(p, "unterminated argument name"); for (k = 0; k < aasz; k++) { if ((ssz = strlen(m->args[k])) != (end - start)) continue; if (strncmp(&m->value[start], m->args[k], ssz)) continue; break; } /* * Argument didn't exist in argument table. * Just ignore it. */ if (k == aasz) { i = end; continue; } if (strlen(args[k]) > ssz) { valsz += strlen(args[k]); val = realloc(val, valsz + 1); if (NULL == val) texiabort(p, NULL); } for (cp = args[k]; '\0' != *cp; cp++) val[j++] = *cp; val[j] = '\0'; i = end; } texisplice(p, val, strlen(val), *pos); for (i = 0; i < asz; i++) free(args[i]); free(args); free(val); } /* * Output a free-form word in the input stream, progressing to the next * command or white-space. * This also will advance the input stream. */ static void parseword(struct texi *p, size_t *pos, char extra) { /* * Some line control: if we (non-macro, non-literal) already * have more than 72 characters written to the screen, then * output a newline before getting started. */ if (p->seenws && 0 == p->outmacro && p->outcol > 72 && 0 == p->literal) texiputchar(p, '\n'); /* Usual padding in the case of seen whitespace. */ if (p->seenws && p->outcol && 0 == p->literal) texiputchar(p, ' '); p->seenws = 0; while (*pos < BUFSZ(p) && ! ismspace(BUF(p)[*pos])) { switch (BUF(p)[*pos]) { case ('@'): case ('}'): case ('{'): return; } if ('\0' != extra && BUF(p)[*pos] == extra) return; if (*pos < BUFSZ(p) - 1 && '`' == BUF(p)[*pos] && '`' == BUF(p)[*pos + 1]) { texiputchars(p, "\\(lq"); advance(p, pos); } else if (*pos < BUFSZ(p) - 1 && '\'' == BUF(p)[*pos] && '\'' == BUF(p)[*pos + 1]) { texiputchars(p, "\\(rq"); advance(p, pos); } else texiputchar(p, BUF(p)[*pos]); advance(p, pos); } /* * New sentence, new line:if we (non-macro, non-literal) see a * period at the end of the last printed word, then open a * newline. */ if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p) && '.' == BUF(p)[*pos - 1]) texiputchar(p, '\n'); } /* * Look up the command at position "pos" in the buffer, returning it (or * TEXICMD__MAX if none found) and setting "end" to be the absolute * index after the command name. */ enum texicmd texicmd(const struct texi *p, size_t pos, size_t *end, struct teximacro **macro) { size_t i, len, toksz; assert('@' == BUF(p)[pos]); if (NULL != macro) *macro = NULL; if ((*end = pos) == BUFSZ(p)) return(TEXICMD__MAX); else if ((*end = ++pos) == BUFSZ(p)) return(TEXICMD__MAX); /* Alphabetic commands are special. */ if ( ! isalpha((unsigned int)BUF(p)[pos])) { if ((*end = pos + 1) == BUFSZ(p)) return(TEXICMD__MAX); for (i = 0; i < TEXICMD__MAX; i++) { if (1 != texitoks[i].len) continue; if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], 1)) return(i); } texiwarn(p, "bad command: @%c", BUF(p)[pos]); return(TEXICMD__MAX); } /* Scan to the end of the possible command name. */ for (*end = pos; *end < BUFSZ(p) && ! ismspace(BUF(p)[*end]); (*end)++) if ((*end > pos && ('@' == BUF(p)[*end] || '{' == BUF(p)[*end] || '}' == BUF(p)[*end]))) break; /* Look for the command. */ len = *end - pos; for (i = 0; i < TEXICMD__MAX; i++) { if (len != texitoks[i].len) continue; if (0 == strncmp(texitoks[i].tok, &BUF(p)[pos], len)) return(i); } /* Look for it in our indices. */ for (i = 0; i < p->indexsz; i++) { toksz = strlen(p->indexs[i]); if (len != 5 + toksz) continue; if (strncmp(&BUF(p)[pos], p->indexs[i], toksz)) continue; if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5)) return(TEXICMD_USER_INDEX); } for (i = 0; i < p->macrosz; i++) { if (len != strlen(p->macros[i].key)) continue; if (strncmp(&BUF(p)[pos], p->macros[i].key, len)) continue; if (NULL != macro) *macro = &p->macros[i]; return(TEXICMD__MAX); } texiwarn(p, "bad command: @%.*s", (int)len, &BUF(p)[pos]); return(TEXICMD__MAX); } /* * Parse an argument from a bracketed command, e.g., @url{foo, baz}. * Num should be set to the argument we're currently parsing, although * it suffixes for it to be zero or non-zero. * This will return 1 if there are more arguments, 0 otherwise. * This will stop (returning 0) in the event of EOF or if we're not at a * bracket for the zeroth parse. */ int parsearg(struct texi *p, size_t *pos, size_t num) { size_t end, sv; enum texicmd cmd; struct teximacro *macro; while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) advance(p, pos); if (*pos == BUFSZ(p) || (0 == num && '{' != BUF(p)[*pos])) return(0); if (0 == num) advance(p, pos); while ((*pos = advancenext(p, pos)) < BUFSZ(p)) { switch (BUF(p)[*pos]) { case (','): advance(p, pos); return(1); case ('}'): advance(p, pos); return(0); case ('{'): if (0 == p->ign) texiwarn(p, "unexpected \"{\""); advance(p, pos); continue; case ('@'): break; default: parseword(p, pos, ','); continue; } sv = p->files[p->filepos - 1].insplice; cmd = texicmd(p, *pos, &end, ¯o); advanceto(p, pos, end); if (NULL != macro) texiexecmacro(p, macro, sv, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } return(0); } /* * Parse until the end of a bracketed statement, e.g., @foo{bar baz}. * This will stop in the event of EOF or if we're not at a bracket. */ void parsebracket(struct texi *p, size_t *pos, int dostack) { size_t end, sv, stack; enum texicmd cmd; struct teximacro *macro; while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) advance(p, pos); if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos]) return; advance(p, pos); stack = 0; while ((*pos = advancenext(p, pos)) < BUFSZ(p)) { switch (BUF(p)[*pos]) { case ('}'): if (stack > 0) { stack--; advance(p, pos); texiputchar(p, '}'); continue; } advance(p, pos); return; case ('{'): if (dostack) { stack++; advance(p, pos); texiputchar(p, '{'); continue; } if (0 == p->ign) texiwarn(p, "unexpected \"{\""); advance(p, pos); continue; case ('@'): break; default: parseword(p, pos, '\0'); continue; } sv = p->files[p->filepos - 1].insplice; cmd = texicmd(p, *pos, &end, ¯o); advanceto(p, pos, end); if (NULL != macro) texiexecmacro(p, macro, sv, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } } /* * This should be invoked when we're on a macro line and want to process * to the end of the current input line, doing all of our macros along * the way. */ void parseeoln(struct texi *p, size_t *pos) { size_t end, sv; enum texicmd cmd; struct teximacro *macro; while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) { p->seenws = 1; if (p->literal) texiputchar(p, BUF(p)[*pos]); advance(p, pos); } switch (BUF(p)[*pos]) { case ('}'): if (0 == p->ign) texiwarn(p, "unexpected \"}\""); advance(p, pos); continue; case ('{'): if (0 == p->ign) texiwarn(p, "unexpected \"{\""); advance(p, pos); continue; case ('@'): break; default: parseword(p, pos, '\0'); continue; } sv = p->files[p->filepos - 1].insplice; cmd = texicmd(p, *pos, &end, ¯o); advanceto(p, pos, end); if (NULL != macro) texiexecmacro(p, macro, sv, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos]) advance(p, pos); } /* * Peek to see if there's a command after subsequent whitespace. * If so, return the macro identifier. * This DOES NOT work with user-defined macros. */ enum texicmd peekcmd(const struct texi *p, size_t pos) { size_t end; while (pos < BUFSZ(p) && ismspace(BUF(p)[pos])) pos++; if (pos == BUFSZ(p) || '@' != BUF(p)[pos]) return(TEXICMD__MAX); return(texicmd(p, pos, &end, NULL)); } /* * Parse a single word or command. * This will return immediately at the EOF. */ static void parsesingle(struct texi *p, size_t *pos) { size_t end, sv; enum texicmd cmd; struct teximacro *macro; if ((*pos = advancenext(p, pos)) >= BUFSZ(p)) return; switch (BUF(p)[*pos]) { case ('}'): if (0 == p->ign) texiwarn(p, "unexpected \"}\""); advance(p, pos); return; case ('{'): if (0 == p->ign) texiwarn(p, "unexpected \"{\""); advance(p, pos); return; case ('@'): break; default: parseword(p, pos, '\0'); return; } sv = p->files[p->filepos - 1].insplice; cmd = texicmd(p, *pos, &end, ¯o); advanceto(p, pos, end); if (NULL != macro) texiexecmacro(p, macro, sv, pos); if (TEXICMD__MAX == cmd) return; if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } /* * This is used in the @deffn type of command. * These have an arbitrary number of line arguments; however, these * arguments may or may not be surrounded by brackets. * In this function, we parse each one as either a bracketed or * non-bracketed argument, returning 0 when we've reached the end of * line or 1 otherwise. */ int parselinearg(struct texi *p, size_t *pos) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) { p->seenws = 1; advance(p, pos); } if (*pos < BUFSZ(p) && '{' == BUF(p)[*pos]) parsebracket(p, pos, 0); else if (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) parsesingle(p, pos); else return(0); return(1); } /* * Parse til the end of the buffer. */ static void parseeof(struct texi *p) { size_t pos; for (pos = 0; pos < BUFSZ(p); ) parsesingle(p, &pos); } void texisplice(struct texi *p, const char *buf, size_t sz, size_t pos) { char *cp; struct texifile *f; assert(p->filepos > 0); f = &p->files[p->filepos - 1]; if (f->mapsz + sz > f->mapmaxsz) { f->mapmaxsz = f->mapsz + sz + 1024; cp = realloc(f->map, f->mapmaxsz); if (NULL == cp) texiabort(p, NULL); f->map = cp; } f->insplice += sz; memmove(f->map + pos + sz, f->map + pos, f->mapsz - pos); memcpy(f->map + pos, buf, sz); f->mapsz += sz; } /* * Parse a block sequence until we have the "@end endtoken" command * invocation. * This will return immediately at EOF. */ void parseto(struct texi *p, size_t *pos, const char *endtoken) { size_t end, sv; enum texicmd cmd; size_t endtoksz; struct teximacro *macro; endtoksz = strlen(endtoken); assert(endtoksz > 0); while ((*pos = advancenext(p, pos)) < BUFSZ(p)) { switch (BUF(p)[*pos]) { case ('}'): if (0 == p->ign) texiwarn(p, "unexpected \"}\""); advance(p, pos); continue; case ('{'): if (0 == p->ign) texiwarn(p, "unexpected \"{\""); advance(p, pos); continue; case ('@'): break; default: parseword(p, pos, '\0'); continue; } sv = p->files[p->filepos - 1].insplice; cmd = texicmd(p, *pos, &end, ¯o); advanceto(p, pos, end); if (TEXICMD_END == cmd) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); /* * FIXME: check the full word, not just its * initial substring! */ if (BUFSZ(p) - *pos >= endtoksz && 0 == strncmp (&BUF(p)[*pos], endtoken, endtoksz)) { advanceeoln(p, pos, 0); break; } if (0 == p->ign) texiwarn(p, "unexpected \"end\""); advanceeoln(p, pos, 0); continue; } if (NULL != macro) texiexecmacro(p, macro, sv, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } } /* * Like parsefile() but used for reading from stdandard input. * This can only be called for the first file! */ void parsestdin(struct texi *p) { struct texifile *f; ssize_t ssz; assert(0 == p->filepos); f = &p->files[p->filepos]; memset(f, 0, sizeof(struct texifile)); f->type = TEXISRC_STDIN; f->name = ""; for (f->mapsz = 0; ; f->mapsz += (size_t)ssz) { if (f->mapsz == f->mapmaxsz) { if (f->mapmaxsz == (1U << 31)) texierr(p, "stdin buffer too long"); f->mapmaxsz = f->mapmaxsz > 65536 / 2 ? 2 * f->mapmaxsz : 65536; f->map = realloc(f->map, f->mapmaxsz); if (NULL == f->map) texiabort(p, NULL); } ssz = read(STDIN_FILENO, f->map + (int)f->mapsz, f->mapmaxsz - f->mapsz); if (0 == ssz) break; else if (-1 == ssz) texiabort(p, NULL); } p->filepos++; parseeof(p); texifilepop(p); } /* * Memory-map the file "fname" and begin parsing it unless "parse" is * zero, in which case we just dump the file to stdout (making sure it * doesn't trip up mdoc(7) along the way). * This can be called in a nested context. */ void parsefile(struct texi *p, const char *fname, int parse) { struct texifile *f; int fd; struct stat st; size_t i; char *map; if (64 == p->filepos) texierr(p, "too many open files"); f = &p->files[p->filepos]; memset(f, 0, sizeof(struct texifile)); f->type = TEXISRC_FILE; f->name = fname; if (-1 == (fd = open(fname, O_RDONLY, 0))) { texiabort(p, fname); } else if (-1 == fstat(fd, &st)) { close(fd); texiabort(p, fname); } f->mapsz = f->mapmaxsz = st.st_size; map = mmap(NULL, f->mapsz, PROT_READ, MAP_SHARED, fd, 0); close(fd); if (MAP_FAILED == map) texiabort(p, fname); if ( ! parse) { for (i = 0; i < f->mapsz; i++) texiputchar(p, map[i]); if (p->outcol) texiputchar(p, '\n'); munmap(map, f->mapsz); return; } p->filepos++; f->map = malloc(f->mapsz); memcpy(f->map, map, f->mapsz); munmap(map, f->mapsz); parseeof(p); texifilepop(p); } /* * Look up the value to a stored pair's value starting in "buf" from * start to end. * Return the pointer to the value memory, which can be NULL if the * pointer key does not exist. * The pointer can point to NULL if the value has been unset. */ static char ** valuequery(const struct texi *p, size_t start, size_t end) { size_t i, sz, len; assert(end >= start); /* Ignore zero-length. */ if (0 == (len = (end - start))) return(NULL); for (i = 0; i < p->valsz; i++) { sz = strlen(p->vals[i].key); if (sz != len) continue; if (0 == strncmp(p->vals[i].key, &BUF(p)[start], len)) return(&p->vals[i].value); } return(NULL); } /* * Parse a key until the end of line, e.g., @clear foo\n, and return the * pointer to its value via valuequery(). */ static char ** valuelquery(struct texi *p, size_t *pos) { size_t start, end; char **ret; while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); if (*pos == BUFSZ(p)) return(NULL); for (start = end = *pos; end < BUFSZ(p); end++) if ('\n' == BUF(p)[end]) break; advanceto(p, pos, end); if (*pos < BUFSZ(p)) { assert('\n' == BUF(p)[*pos]); advance(p, pos); } if (NULL == (ret = valuequery(p, start, end))) return(NULL); return(ret); } void valuelclear(struct texi *p, size_t *pos) { char **ret; if (NULL == (ret = valuelquery(p, pos))) return; free(*ret); *ret = NULL; } const char * valuellookup(struct texi *p, size_t *pos) { char **ret; if (NULL == (ret = valuelquery(p, pos))) return(NULL); return(*ret); } /* * Parse a key from a bracketed string, e.g., @value{foo}, and return * the pointer to its value. * If the returned pointer is NULL, either there was no string within * the brackets (or no brackets), or the value was not found, or the * value had previously been unset. */ const char * valueblookup(struct texi *p, size_t *pos) { size_t start, end; char **ret; while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); if (*pos == BUFSZ(p) || '{' != BUF(p)[*pos]) return(NULL); advance(p, pos); for (start = end = *pos; end < BUFSZ(p); end++) if ('}' == BUF(p)[end]) break; advanceto(p, pos, end); if (*pos < BUFSZ(p)) { assert('}' == BUF(p)[*pos]); advance(p, pos); } if (NULL == (ret = valuequery(p, start, end))) return(NULL); return(*ret); } void valueadd(struct texi *p, char *key, char *val) { size_t i; assert(NULL != key); assert(NULL != val); for (i = 0; i < p->valsz; i++) if (0 == strcmp(p->vals[i].key, key)) break; if (i < p->valsz) { free(key); free(p->vals[i].value); p->vals[i].value = val; } else { /* FIXME: reallocarray() */ p->vals = realloc(p->vals, (p->valsz + 1) * sizeof(struct texivalue)); if (NULL == p->vals) texiabort(p, NULL); p->vals[p->valsz].key = key; p->vals[p->valsz].value = val; p->valsz++; } } /* * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the * declaration form, @macro foo {arg1, ...}) and textually convert it to * an array of arguments of size "argsz". * These need to be freed individually and as a whole. * NOTE: this will puke on @, or @} macros, which can trick it into * stopping argument parsing earlier. * Ergo, textual: this doesn't interpret the arguments in any way. */ char ** argparse(struct texi *p, size_t *pos, size_t *argsz, size_t hint) { char **args; size_t start, end, stack; while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); args = NULL; *argsz = 0; if (*pos == BUFSZ(p)) return(args); if ('{' != BUF(p)[*pos] && hint) { /* * Special case: if we encounter an unbracketed argument * and we're being invoked with non-zero arguments * (versus being set, i.e., hint>0), then parse until * the end of line. */ *argsz = 1; args = calloc(1, sizeof(char *)); if (NULL == args) texiabort(p, NULL); start = *pos; while (*pos < BUFSZ(p)) { if ('\n' == BUF(p)[*pos]) break; advance(p, pos); } args[0] = malloc(*pos - start + 1); memcpy(args[0], &BUF(p)[start], *pos - start); args[0][*pos - start] = '\0'; if (*pos < BUFSZ(p) && '\n' == BUF(p)[*pos]) advance(p, pos); return(args); } else if ('{' != BUF(p)[*pos]) return(args); assert('{' == BUF(p)[*pos]); /* Parse til the closing '}', putting into the array. */ advance(p, pos); while (*pos < BUFSZ(p)) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); start = *pos; stack = 0; while (*pos < BUFSZ(p)) { /* * According to the manual, commas within * embedded commands are escaped. * We keep track of embedded-ness in the "stack" * state anyway, so this is free. */ if (',' == BUF(p)[*pos] && 0 == stack && 1 != hint) break; else if (0 == stack && '}' == BUF(p)[*pos]) break; else if (0 != stack && '}' == BUF(p)[*pos]) stack--; else if ('{' == BUF(p)[*pos]) stack++; advance(p, pos); } if (stack) texiwarn(p, "unterminated macro " "in macro arguments"); if ((end = *pos) == BUFSZ(p)) break; /* Test for zero-length '{ }'. */ if (start == end && '}' == BUF(p)[*pos] && 0 == *argsz) break; /* FIXME: use reallocarray. */ args = realloc (args, sizeof(char *) * (*argsz + 1)); if (NULL == args) texiabort(p, NULL); args[*argsz] = malloc(end - start + 1); if (NULL == args[*argsz]) texiabort(p, NULL); memcpy(args[*argsz], &BUF(p)[start], end - start); args[*argsz][end - start] = '\0'; (*argsz)++; if ('}' == BUF(p)[*pos]) break; advance(p, pos); } if (*pos == BUFSZ(p)) texierr(p, "unterminated arguments"); assert('}' == BUF(p)[*pos]); advance(p, pos); return(args); } /* * If we're printing chapters, then do some naviation here and then * close our outfile. * I want to call this the SEE ALSO section, but that's not really what * it is: we'll refer to the "initial" (top) node and the next and * previous chapters. */ void teximdocclose(struct texi *p, int last) { char buf[PATH_MAX]; if (NULL == p->chapters || 0 == p->chapnum) return; teximacro(p, "Sh INFO NAVIGATION"); /* Print a reference to the "top" node. */ if (p->chapnum > 1) { texiputchars(p, "Top node,"); snprintf(buf, sizeof(buf), "node1 7"); teximacroopen(p, "Xr "); texiputchars(p, buf); texiputchars(p, " ;"); teximacroclose(p); } /* Print a reference to the previous node. */ if (p->chapnum > 2) { texiputchars(p, "previous node,"); snprintf(buf, sizeof(buf), "node%zu 7", p->chapnum - 1); teximacroopen(p, "Xr "); texiputchars(p, buf); if ( ! last) texiputchars(p, " ;"); teximacroclose(p); } /* Print a reference to the next node. */ if ( ! last) { if (1 == p->chapnum) texiputchars(p, "Next node,"); else texiputchars(p, "next node,"); snprintf(buf, sizeof(buf), "node%zu 7", p->chapnum + 1); teximacroopen(p, "Xr "); texiputchars(p, buf); teximacroclose(p); } fclose(p->outfile); } /* * Open a mdoc(7) context. * If we're printing chapters, then open the outfile here, too. * Otherwise just print the mdoc(7) prologue. */ void teximdocopen(struct texi *p, size_t *pos) { const char *cp; time_t t; char date[32]; char fname[PATH_MAX]; if (NULL != p->chapters) { snprintf(fname, sizeof(fname), "%s/node%zu.7", p->chapters, ++p->chapnum); p->outfile = fopen(fname, "w"); if (NULL == p->outfile) texiabort(p, fname); } /* * Here we print our standard mdoc(7) prologue. * We use the title set with @settitle for the `Nd' description * and the source document filename (the first one as invoked on * the command line) for the title. * The date is set to the current date. */ t = time(NULL); strftime(date, sizeof(date), "%F", localtime(&t)); teximacroopen(p, "Dd"); texiputchars(p, date); teximacroclose(p); teximacroopen(p, "Dt"); for (cp = p->title; '\0' != *cp; cp++) texiputchar(p, toupper((unsigned int)*cp)); texiputchars(p, " 7"); teximacroclose(p); teximacro(p, "Os"); teximacro(p, "Sh NAME"); teximacroopen(p, "Nm"); for (cp = p->title; '\0' != *cp; cp++) texiputchar(p, *cp); teximacroclose(p); teximacroopen(p, "Nd"); /* * The subtitle `Nd' can consist of arbitrary macros, so paste * it and parse to the end of the line. */ if (NULL != p->subtitle) { texisplice(p, p->subtitle, strlen(p->subtitle), *pos); parseeoln(p, pos); } else texiputchars(p, "Unknown description"); teximacroclose(p); p->seenvs = 1; }