version 1.27, 2020/01/20 10:37:15 |
version 1.38, 2023/11/24 05:02:18 |
|
|
/* $Id$ */ |
/* $Id$ */ |
/* |
/* |
* Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> |
* Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 |
|
* Ingo Schwarze <schwarze@openbsd.org> |
* |
* |
* Permission to use, copy, modify, and distribute this software for any |
* Permission to use, copy, modify, and distribute this software for any |
* purpose with or without fee is hereby granted, provided that the above |
* purpose with or without fee is hereby granted, provided that the above |
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
* Functions to tag syntax tree nodes. |
|
* For internal use by mandoc(1) validation modules only. |
*/ |
*/ |
#include "config.h" |
#include "config.h" |
|
|
#include <sys/types.h> |
#include <sys/types.h> |
|
|
#include <assert.h> |
#include <assert.h> |
#include <errno.h> |
|
#include <limits.h> |
#include <limits.h> |
#include <signal.h> |
|
#include <stddef.h> |
#include <stddef.h> |
#include <stdint.h> |
#include <stdint.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
#include <unistd.h> |
|
|
|
#include "mandoc_aux.h" |
#include "mandoc_aux.h" |
#include "mandoc_ohash.h" |
#include "mandoc_ohash.h" |
#include "mandoc.h" |
#include "mandoc.h" |
|
#include "roff.h" |
|
#include "mdoc.h" |
|
#include "roff_int.h" |
#include "tag.h" |
#include "tag.h" |
|
|
struct tag_entry { |
struct tag_entry { |
size_t *lines; |
struct roff_node **nodes; |
size_t maxlines; |
size_t maxnodes; |
size_t nlines; |
size_t nnodes; |
int prio; |
int prio; |
char s[]; |
char s[]; |
}; |
}; |
|
|
static void tag_signal(int) __attribute__((__noreturn__)); |
static void tag_move_href(struct roff_man *, |
|
struct roff_node *, const char *); |
|
static void tag_move_id(struct roff_node *); |
|
|
static struct ohash tag_data; |
static struct ohash tag_data; |
static struct tag_files tag_files; |
|
|
|
|
|
/* |
/* |
* Prepare for using a pager. |
* Set up the ohash table to collect nodes |
* Not all pagers are capable of using a tag file, |
* where various marked-up terms are documented. |
* but for simplicity, create it anyway. |
|
*/ |
*/ |
struct tag_files * |
void |
tag_init(char *tagname) |
tag_alloc(void) |
{ |
{ |
struct sigaction sa; |
mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); |
int ofd; |
} |
|
|
ofd = -1; |
void |
tag_files.tfd = -1; |
tag_free(void) |
tag_files.tcpgid = -1; |
{ |
tag_files.tagname = tagname; |
struct tag_entry *entry; |
|
unsigned int slot; |
|
|
/* Clean up when dying from a signal. */ |
if (tag_data.info.free == NULL) |
|
return; |
memset(&sa, 0, sizeof(sa)); |
entry = ohash_first(&tag_data, &slot); |
sigfillset(&sa.sa_mask); |
while (entry != NULL) { |
sa.sa_handler = tag_signal; |
free(entry->nodes); |
sigaction(SIGHUP, &sa, NULL); |
free(entry); |
sigaction(SIGINT, &sa, NULL); |
entry = ohash_next(&tag_data, &slot); |
sigaction(SIGTERM, &sa, NULL); |
|
|
|
/* |
|
* POSIX requires that a process calling tcsetpgrp(3) |
|
* from the background gets a SIGTTOU signal. |
|
* In that case, do not stop. |
|
*/ |
|
|
|
sa.sa_handler = SIG_IGN; |
|
sigaction(SIGTTOU, &sa, NULL); |
|
|
|
/* Save the original standard output for use by the pager. */ |
|
|
|
if ((tag_files.ofd = dup(STDOUT_FILENO)) == -1) { |
|
mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); |
|
goto fail; |
|
} |
} |
|
ohash_delete(&tag_data); |
/* Create both temporary output files. */ |
tag_data.info.free = NULL; |
|
|
(void)strlcpy(tag_files.ofn, "/tmp/man.XXXXXXXXXX", |
|
sizeof(tag_files.ofn)); |
|
(void)strlcpy(tag_files.tfn, "/tmp/man.XXXXXXXXXX", |
|
sizeof(tag_files.tfn)); |
|
if ((ofd = mkstemp(tag_files.ofn)) == -1) { |
|
mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, |
|
"%s: %s", tag_files.ofn, strerror(errno)); |
|
goto fail; |
|
} |
|
if ((tag_files.tfd = mkstemp(tag_files.tfn)) == -1) { |
|
mandoc_msg(MANDOCERR_MKSTEMP, 0, 0, |
|
"%s: %s", tag_files.tfn, strerror(errno)); |
|
goto fail; |
|
} |
|
if (dup2(ofd, STDOUT_FILENO) == -1) { |
|
mandoc_msg(MANDOCERR_DUP, 0, 0, "%s", strerror(errno)); |
|
goto fail; |
|
} |
|
close(ofd); |
|
|
|
/* |
|
* Set up the ohash table to collect output line numbers |
|
* where various marked-up terms are documented. |
|
*/ |
|
|
|
mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); |
|
return &tag_files; |
|
|
|
fail: |
|
tag_unlink(); |
|
if (ofd != -1) |
|
close(ofd); |
|
if (tag_files.ofd != -1) |
|
close(tag_files.ofd); |
|
if (tag_files.tfd != -1) |
|
close(tag_files.tfd); |
|
*tag_files.ofn = '\0'; |
|
*tag_files.tfn = '\0'; |
|
tag_files.ofd = -1; |
|
tag_files.tfd = -1; |
|
tag_files.tagname = NULL; |
|
return NULL; |
|
} |
} |
|
|
/* |
/* |
* Set the line number where a term is defined, |
* Set a node where a term is defined, |
* unless it is already defined at a lower priority. |
* unless the term is already defined at a lower priority. |
*/ |
*/ |
void |
void |
tag_put(const char *s, int prio, size_t line) |
tag_put(const char *s, int prio, struct roff_node *n) |
{ |
{ |
struct tag_entry *entry; |
struct tag_entry *entry; |
const char *se; |
struct roff_node *nold; |
|
const char *se, *src; |
|
char *cpy; |
size_t len; |
size_t len; |
unsigned int slot; |
unsigned int slot; |
|
int changed; |
|
|
assert(prio <= TAG_FALLBACK); |
assert(prio <= TAG_FALLBACK); |
if (tag_files.tfd <= 0) |
|
|
/* |
|
* If the node is already tagged, the existing tag is |
|
* explicit and we are now about to add an implicit tag. |
|
* Don't do that; just skip implicit tagging if the author |
|
* specified an explicit tag. |
|
*/ |
|
|
|
if (n->flags & NODE_ID) |
return; |
return; |
|
|
if (s[0] == '\\' && (s[1] == '&' || s[1] == 'e')) |
/* Determine the implicit tag. */ |
s += 2; |
|
|
|
|
changed = 1; |
|
if (s == NULL) { |
|
if (n->child == NULL || n->child->type != ROFFT_TEXT) |
|
return; |
|
s = n->child->string; |
|
switch (s[0]) { |
|
case '-': |
|
s++; |
|
break; |
|
case '\\': |
|
switch (s[1]) { |
|
case '&': |
|
case '-': |
|
case 'e': |
|
s += 2; |
|
break; |
|
default: |
|
return; |
|
} |
|
break; |
|
default: |
|
changed = 0; |
|
break; |
|
} |
|
} |
|
|
/* |
/* |
|
* Translate \- and ASCII_HYPH to plain '-'. |
* Skip whitespace and escapes and whatever follows, |
* Skip whitespace and escapes and whatever follows, |
* and if there is any, downgrade the priority. |
* and if there is any, downgrade the priority. |
*/ |
*/ |
|
|
len = strcspn(s, " \t\\"); |
cpy = mandoc_malloc(strlen(s) + 1); |
|
for (src = s, len = 0; *src != '\0'; src++, len++) { |
|
switch (*src) { |
|
case '\t': |
|
case ' ': |
|
changed = 1; |
|
break; |
|
case ASCII_HYPH: |
|
cpy[len] = '-'; |
|
changed = 1; |
|
continue; |
|
case '\\': |
|
if (src[1] != '-') |
|
break; |
|
src++; |
|
changed = 1; |
|
/* FALLTHROUGH */ |
|
default: |
|
cpy[len] = *src; |
|
continue; |
|
} |
|
break; |
|
} |
if (len == 0) |
if (len == 0) |
return; |
goto out; |
|
cpy[len] = '\0'; |
|
|
se = s + len; |
if (*src != '\0' && prio < TAG_WEAK) |
if (*se != '\0' && prio < TAG_WEAK) |
|
prio = TAG_WEAK; |
prio = TAG_WEAK; |
|
|
|
s = cpy; |
|
se = cpy + len; |
slot = ohash_qlookupi(&tag_data, s, &se); |
slot = ohash_qlookupi(&tag_data, s, &se); |
entry = ohash_find(&tag_data, slot); |
entry = ohash_find(&tag_data, slot); |
|
|
if (entry == NULL) { |
/* Build a new entry. */ |
|
|
/* Build a new entry. */ |
if (entry == NULL) { |
|
|
entry = mandoc_malloc(sizeof(*entry) + len + 1); |
entry = mandoc_malloc(sizeof(*entry) + len + 1); |
memcpy(entry->s, s, len); |
memcpy(entry->s, s, len + 1); |
entry->s[len] = '\0'; |
entry->nodes = NULL; |
entry->lines = NULL; |
entry->maxnodes = entry->nnodes = 0; |
entry->maxlines = entry->nlines = 0; |
|
ohash_insert(&tag_data, slot, entry); |
ohash_insert(&tag_data, slot, entry); |
|
} |
|
|
} else { |
/* |
|
* Lower priority numbers take precedence. |
|
* If a better entry is already present, ignore the new one. |
|
*/ |
|
|
/* |
else if (entry->prio < prio) |
* Lower priority numbers take precedence, |
goto out; |
* but TAG_FALLBACK is special. |
|
* A tag with priority TAG_FALLBACK is only used |
|
* if the tag occurs exactly once. |
|
*/ |
|
|
|
|
/* |
|
* If the existing entry is worse, clear it. |
|
* In addition, a tag with priority TAG_FALLBACK |
|
* is only used if the tag occurs exactly once. |
|
*/ |
|
|
|
else if (entry->prio > prio || prio == TAG_FALLBACK) { |
|
while (entry->nnodes > 0) { |
|
nold = entry->nodes[--entry->nnodes]; |
|
nold->flags &= ~NODE_ID; |
|
free(nold->tag); |
|
nold->tag = NULL; |
|
} |
if (prio == TAG_FALLBACK) { |
if (prio == TAG_FALLBACK) { |
if (entry->prio == TAG_FALLBACK) |
entry->prio = TAG_DELETE; |
entry->prio = TAG_DELETE; |
goto out; |
return; |
|
} |
} |
|
|
/* A better entry is already present, ignore the new one. */ |
|
|
|
if (entry->prio < prio) |
|
return; |
|
|
|
/* The existing entry is worse, clear it. */ |
|
|
|
if (entry->prio > prio) |
|
entry->nlines = 0; |
|
} |
} |
|
|
/* Remember the new line. */ |
/* Remember the new node. */ |
|
|
if (entry->maxlines == entry->nlines) { |
if (entry->maxnodes == entry->nnodes) { |
entry->maxlines += 4; |
entry->maxnodes += 4; |
entry->lines = mandoc_reallocarray(entry->lines, |
entry->nodes = mandoc_reallocarray(entry->nodes, |
entry->maxlines, sizeof(*entry->lines)); |
entry->maxnodes, sizeof(*entry->nodes)); |
} |
} |
entry->lines[entry->nlines++] = line; |
entry->nodes[entry->nnodes++] = n; |
entry->prio = prio; |
entry->prio = prio; |
|
n->flags |= NODE_ID; |
|
if (changed) { |
|
assert(n->tag == NULL); |
|
n->tag = mandoc_strndup(s, len); |
|
} |
|
|
|
out: |
|
free(cpy); |
} |
} |
|
|
|
int |
|
tag_exists(const char *tag) |
|
{ |
|
return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; |
|
} |
|
|
/* |
/* |
* Write out the tags file using the previously collected |
* For in-line elements, move the link target |
* information and clear the ohash table while going along. |
* to the enclosing paragraph when appropriate. |
*/ |
*/ |
void |
static void |
tag_write(void) |
tag_move_id(struct roff_node *n) |
{ |
{ |
FILE *stream; |
struct roff_node *np; |
struct tag_entry *entry; |
|
size_t i; |
|
unsigned int slot; |
|
int empty; |
|
|
|
if (tag_files.tfd <= 0) |
np = n; |
return; |
for (;;) { |
if (tag_files.tagname != NULL && ohash_find(&tag_data, |
if (np->prev != NULL) |
ohash_qlookup(&tag_data, tag_files.tagname)) == NULL) { |
np = np->prev; |
mandoc_msg(MANDOCERR_TAG, 0, 0, "%s", tag_files.tagname); |
else if ((np = np->parent) == NULL) |
tag_files.tagname = NULL; |
return; |
} |
switch (np->tok) { |
if ((stream = fdopen(tag_files.tfd, "w")) == NULL) |
case MDOC_It: |
mandoc_msg(MANDOCERR_FDOPEN, 0, 0, "%s", strerror(errno)); |
switch (np->parent->parent->norm->Bl.type) { |
empty = 1; |
case LIST_column: |
entry = ohash_first(&tag_data, &slot); |
/* Target the ROFFT_BLOCK = <tr>. */ |
while (entry != NULL) { |
np = np->parent; |
if (stream != NULL && entry->prio < TAG_DELETE) { |
break; |
for (i = 0; i < entry->nlines; i++) { |
case LIST_diag: |
fprintf(stream, "%s %s %zu\n", |
case LIST_hang: |
entry->s, tag_files.ofn, entry->lines[i]); |
case LIST_inset: |
empty = 0; |
case LIST_ohang: |
|
case LIST_tag: |
|
/* Target the ROFFT_HEAD = <dt>. */ |
|
np = np->parent->head; |
|
break; |
|
default: |
|
/* Target the ROFF_BODY = <li>. */ |
|
break; |
} |
} |
|
/* FALLTHROUGH */ |
|
case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ |
|
if (np->tag == NULL) { |
|
np->tag = mandoc_strdup(n->tag == NULL ? |
|
n->child->string : n->tag); |
|
np->flags |= NODE_ID; |
|
n->flags &= ~NODE_ID; |
|
} |
|
return; |
|
case MDOC_Sh: |
|
case MDOC_Ss: |
|
case MDOC_Bd: |
|
case MDOC_Bl: |
|
case MDOC_D1: |
|
case MDOC_Dl: |
|
case MDOC_Rs: |
|
/* Do not move past major blocks. */ |
|
return; |
|
default: |
|
/* |
|
* Move past in-line content and partial |
|
* blocks, for example .It Xo or .It Bq Er. |
|
*/ |
|
break; |
} |
} |
free(entry->lines); |
|
free(entry); |
|
entry = ohash_next(&tag_data, &slot); |
|
} |
} |
ohash_delete(&tag_data); |
|
if (stream != NULL) |
|
fclose(stream); |
|
else |
|
close(tag_files.tfd); |
|
tag_files.tfd = -1; |
|
if (empty) { |
|
unlink(tag_files.tfn); |
|
*tag_files.tfn = '\0'; |
|
} |
|
} |
} |
|
|
void |
/* |
tag_unlink(void) |
* When a paragraph is tagged and starts with text, |
|
* move the permalink to the first few words. |
|
*/ |
|
static void |
|
tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) |
{ |
{ |
pid_t tc_pgid; |
char *cp; |
|
|
if (tag_files.tcpgid != -1) { |
if (n == NULL || n->type != ROFFT_TEXT || |
tc_pgid = tcgetpgrp(tag_files.ofd); |
*n->string == '\0' || *n->string == ' ') |
if (tc_pgid == tag_files.pager_pid || |
return; |
tc_pgid == getpgid(0) || |
|
getpgid(tc_pgid) == -1) |
cp = n->string; |
(void)tcsetpgrp(tag_files.ofd, tag_files.tcpgid); |
while (cp != NULL && cp - n->string < 5) |
|
cp = strchr(cp + 1, ' '); |
|
|
|
/* If the first text node is longer, split it. */ |
|
|
|
if (cp != NULL && cp[1] != '\0') { |
|
man->last = n; |
|
man->next = ROFF_NEXT_SIBLING; |
|
roff_word_alloc(man, n->line, |
|
n->pos + (cp - n->string), cp + 1); |
|
man->last->flags = n->flags & ~NODE_LINE; |
|
*cp = '\0'; |
} |
} |
if (*tag_files.ofn != '\0') |
|
unlink(tag_files.ofn); |
assert(n->tag == NULL); |
if (*tag_files.tfn != '\0') |
n->tag = mandoc_strdup(tag); |
unlink(tag_files.tfn); |
n->flags |= NODE_HREF; |
} |
} |
|
|
static void |
/* |
tag_signal(int signum) |
* When all tags have been set, decide where to put |
|
* the associated permalinks, and maybe move some tags |
|
* to the beginning of the respective paragraphs. |
|
*/ |
|
void |
|
tag_postprocess(struct roff_man *man, struct roff_node *n) |
{ |
{ |
struct sigaction sa; |
if (n->flags & NODE_ID) { |
|
switch (n->tok) { |
tag_unlink(); |
case MDOC_Pp: |
memset(&sa, 0, sizeof(sa)); |
tag_move_href(man, n->next, n->tag); |
sigemptyset(&sa.sa_mask); |
break; |
sa.sa_handler = SIG_DFL; |
case MDOC_Bd: |
sigaction(signum, &sa, NULL); |
case MDOC_D1: |
kill(getpid(), signum); |
case MDOC_Dl: |
/* NOTREACHED */ |
tag_move_href(man, n->child, n->tag); |
_exit(1); |
break; |
|
case MDOC_Bl: |
|
/* XXX No permalink for now. */ |
|
break; |
|
default: |
|
if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) |
|
tag_move_id(n); |
|
if (n->tok != MDOC_Tg) |
|
n->flags |= NODE_HREF; |
|
else if ((n->flags & NODE_ID) == 0) { |
|
n->flags |= NODE_NOPRT; |
|
free(n->tag); |
|
n->tag = NULL; |
|
} |
|
break; |
|
} |
|
} |
|
for (n = n->child; n != NULL; n = n->next) |
|
tag_postprocess(man, n); |
} |
} |