version 1.1, 2019/03/29 15:55:28 |
version 1.19, 2019/04/12 07:05:19 |
|
|
#include <ctype.h> |
#include <ctype.h> |
#include <err.h> |
#include <err.h> |
#include <fcntl.h> |
#include <fcntl.h> |
|
#include <getopt.h> |
#include <stdio.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdlib.h> |
#include <string.h> |
#include <string.h> |
|
|
* Typical usage: |
* Typical usage: |
* statistics < filenames.txt | sort -n |
* statistics < filenames.txt | sort -n |
* statistics < filenames.txt | grep '\<listitem\>' | sort -n |
* statistics < filenames.txt | grep '\<listitem\>' | sort -n |
|
* |
|
* Relations already fully implemented are excluded by default. |
|
* The option -a shows all relations. |
|
* |
|
* If two arguments (parent and child) are given, a histogram |
|
* of the number of children of the kind in each parent is given |
|
* in addition to the normal output. |
|
* |
|
* Example usage: |
|
* statistics tgroup colspec < filenames.txt | grep colspec |
*/ |
*/ |
|
|
struct entry { |
struct entry { |
Line 51 static char **stack; |
|
Line 62 static char **stack; |
|
static size_t stacksz; |
static size_t stacksz; |
static size_t stacki; |
static size_t stacki; |
|
|
|
static const int nchildsz = 8; |
|
struct nchild { |
|
char *parent; |
|
char *child; |
|
int freq[nchildsz]; |
|
int count; |
|
}; |
|
|
|
static struct nchild nchild; |
|
static char *fname; |
|
|
|
|
/* |
/* |
* Count one instance of a parent-child relation. |
* Count one instance of a parent-child relation. |
|
* Before the special call table_add(NULL, NULL), |
|
* mark relations to not be counted; |
|
* in that phase, child can be NULL as a wildcard. |
*/ |
*/ |
static void |
static void |
table_add(const char *parent, const char *child) |
table_add(const char *parent, const char *child) |
{ |
{ |
size_t i; |
static int init_done; |
|
size_t i; |
|
|
|
if (parent == NULL && child == NULL) { |
|
init_done = 1; |
|
return; |
|
} |
|
|
|
/* Optional parent-child histogram. */ |
|
|
|
if (init_done && parent != NULL && child != NULL && |
|
nchild.parent != NULL && nchild.child != NULL && |
|
strcmp(parent, nchild.parent) == 0 && |
|
strcmp(child, nchild.child) == 0) { |
|
if (nchild.count < nchildsz) { |
|
nchild.freq[nchild.count]++; |
|
if (nchild.count > 0) |
|
nchild.freq[nchild.count - 1]--; |
|
} else if (nchild.count == nchildsz) |
|
puts(fname); |
|
nchild.count++; |
|
} |
|
|
/* If the table entry already exists, increment its count. */ |
/* If the table entry already exists, increment its count. */ |
|
|
for (i = 0; i < tablei; i++) { |
for (i = 0; i < tablei; i++) { |
if (strcmp(parent, table[i].parent) == 0 && |
if (strcmp(parent, table[i].parent) == 0 && |
strcmp(child, table[i].child) == 0) { |
(child == NULL || table[i].child == NULL || |
table[i].count++; |
strcmp(child, table[i].child) == 0)) { |
|
assert(init_done); |
|
if (table[i].count != -1) |
|
table[i].count++; |
return; |
return; |
} |
} |
} |
} |
Line 83 table_add(const char *parent, const char *child) |
|
Line 132 table_add(const char *parent, const char *child) |
|
|
|
if ((table[tablei].parent = strdup(parent)) == NULL) |
if ((table[tablei].parent = strdup(parent)) == NULL) |
err(1, NULL); |
err(1, NULL); |
if ((table[tablei].child = strdup(child)) == NULL) |
if (child == NULL) |
|
table[tablei].child = NULL; |
|
else if ((table[tablei].child = strdup(child)) == NULL) |
err(1, NULL); |
err(1, NULL); |
table[tablei++].count = 1; |
table[tablei++].count = init_done ? 1 : -1; |
} |
} |
|
|
/* |
/* |
Line 94 table_add(const char *parent, const char *child) |
|
Line 145 table_add(const char *parent, const char *child) |
|
static void |
static void |
stack_push(const char *name) |
stack_push(const char *name) |
{ |
{ |
|
if (nchild.parent != NULL && strcmp(name, nchild.parent) == 0) |
|
nchild.count = 0; |
|
|
if (stacki == stacksz) { |
if (stacki == stacksz) { |
stacksz += 8; |
stacksz += 8; |
stack = reallocarray(stack, stacksz, sizeof(*stack)); |
stack = reallocarray(stack, stacksz, sizeof(*stack)); |
|
|
parse_file(int fd, char *fname) |
parse_file(int fd, char *fname) |
{ |
{ |
char b[4096]; |
char b[4096]; |
|
char *cp; |
ssize_t rsz; /* Return value from read(2). */ |
ssize_t rsz; /* Return value from read(2). */ |
size_t rlen; /* Number of bytes in b[]. */ |
size_t rlen; /* Number of bytes in b[]. */ |
size_t poff; /* Parse offset in b[]. */ |
size_t poff; /* Parse offset in b[]. */ |
Line 170 parse_file(int fd, char *fname) |
|
Line 225 parse_file(int fd, char *fname) |
|
continue; |
continue; |
} |
} |
if (in_arg) { |
if (in_arg) { |
if (in_quotes == 0 && b[pend] == '"') { |
if (in_quotes == 0 && |
in_quotes = 1; |
(b[pend] == '\'' || b[pend] == '"')) { |
|
in_quotes = b[pend] == '"' ? 2 : 1; |
pend++; |
pend++; |
continue; |
continue; |
} |
} |
if (advance(b, rlen, &pend, |
if (advance(b, rlen, &pend, |
in_quotes ? "\"" : " >") && rsz > 0) |
in_quotes == 2 ? "\"" : |
|
in_quotes == 1 ? "'" : " >") && rsz > 0) |
break; |
break; |
in_arg = in_quotes = elem_end = 0; |
in_arg = in_quotes = elem_end = 0; |
if (b[pend] == '>') { |
if (b[pend] == '>') { |
Line 217 parse_file(int fd, char *fname) |
|
Line 274 parse_file(int fd, char *fname) |
|
} else if (b[poff] == '<') { |
} else if (b[poff] == '<') { |
if (advance(b, rlen, &pend, " >") && rsz > 0) |
if (advance(b, rlen, &pend, " >") && rsz > 0) |
break; |
break; |
|
if (pend > poff + 3 && |
|
strncmp(b + poff, "<!--", 4) == 0) { |
|
/* Skip a comment. */ |
|
cp = strstr(b + pend - 2, "-->"); |
|
if (cp == NULL) { |
|
pend = rlen; |
|
if (rsz > 0) |
|
break; |
|
} else |
|
pend = cp + 3 - b; |
|
continue; |
|
} |
elem_end = 0; |
elem_end = 0; |
if (b[pend] != '>') |
if (b[pend] != '>') |
in_tag = 1; |
in_tag = 1; |
Line 232 parse_file(int fd, char *fname) |
|
Line 301 parse_file(int fd, char *fname) |
|
poff++; |
poff++; |
} else if (b[poff] != '!' && b[poff] != '?') { |
} else if (b[poff] != '!' && b[poff] != '?') { |
table_add(stacki > 0 ? |
table_add(stacki > 0 ? |
stack[stacki - 1] : "", |
stack[stacki - 1] : "ROOT", |
b + poff); |
b + poff); |
stack_push(b + poff); |
stack_push(b + poff); |
} |
} |
Line 255 parse_file(int fd, char *fname) |
|
Line 324 parse_file(int fd, char *fname) |
|
int |
int |
main(int argc, char *argv[]) |
main(int argc, char *argv[]) |
{ |
{ |
char *fname; |
|
size_t fsz, i; |
size_t fsz, i; |
ssize_t rsz; |
ssize_t rsz; |
int fd; |
int ch, fd, show_all; |
|
|
fd = -1; |
show_all = 0; |
fname = NULL; |
while ((ch = getopt(argc, argv, "a")) != -1) { |
|
switch (ch) { |
|
case 'a': |
|
show_all = 1; |
|
break; |
|
default: |
|
return 1; |
|
} |
|
} |
|
argc -= optind; |
|
argv += optind; |
|
|
|
if (argc > 1) { |
|
nchild.parent = argv[0]; |
|
nchild.child = argv[1]; |
|
} |
|
|
|
/* Exclude relations that are already fully implemented. */ |
|
if (show_all == 0) { |
|
table_add("ROOT", "refentry"); |
|
table_add("acronym", "TEXT"); |
|
table_add("appendix", NULL); |
|
table_add("article", NULL); |
|
table_add("blockquote", NULL); |
|
table_add("book", NULL); |
|
table_add("chapter", NULL); |
|
table_add("code", "TEXT"); |
|
table_add("computeroutput", "TEXT"); |
|
table_add("constant", "TEXT"); |
|
table_add("emphasis", "TEXT"); |
|
table_add("entry", NULL); |
|
table_add("errorname", "TEXT"); |
|
table_add("filename", "TEXT"); |
|
table_add("funcdef", "function"); |
|
table_add("funcdef", "TEXT"); |
|
table_add("funcprototype", "funcdef"); |
|
table_add("funcprototype", "paramdef"); |
|
table_add("funcsynopsis", "funcprototype"); |
|
table_add("funcsynopsis", "funcsynopsisinfo"); |
|
table_add("funcsynopsisinfo", "TEXT"); |
|
table_add("function", "TEXT"); |
|
table_add("glossary", "glossdiv"); |
|
table_add("glossary", "glossentry"); |
|
table_add("glossdef", "para"); |
|
table_add("glossdiv", "glossentry"); |
|
table_add("glossentry", "glossdef"); |
|
table_add("glossentry", "glossterm"); |
|
table_add("glossentry", "indexterm"); |
|
table_add("glosslist", "glossentry"); |
|
table_add("glossterm", "TEXT"); |
|
table_add("indexterm", "primary"); |
|
table_add("indexterm", "secondary"); |
|
table_add("informaltable", "tgroup"); |
|
table_add("itemizedlist", "listitem"); |
|
table_add("legalnotice", NULL); |
|
table_add("link", NULL); |
|
table_add("listitem", NULL); |
|
table_add("literal", "TEXT"); |
|
table_add("literallayout", NULL); |
|
table_add("markup", "TEXT"); |
|
table_add("member", "TEXT"); |
|
table_add("note", NULL); |
|
table_add("orderedlist", "listitem"); |
|
table_add("para", NULL); |
|
table_add("paramdef", "parameter"); |
|
table_add("paramdef", "TEXT"); |
|
table_add("parameter", "TEXT"); |
|
table_add("primary", NULL); |
|
table_add("programlisting", NULL); |
|
table_add("refentry", "refmeta"); |
|
table_add("refentry", "refnamediv"); |
|
table_add("refentry", "refsect1"); |
|
table_add("refentry", "refsynopsisdiv"); |
|
table_add("refmeta", "manvolnum"); |
|
table_add("refmeta", "refentrytitle"); |
|
table_add("refname", "TEXT"); |
|
table_add("refnamediv", "refname"); |
|
table_add("refnamediv", "refpurpose"); |
|
table_add("refpurpose", "TEXT"); |
|
table_add("refsect1", NULL); |
|
table_add("refsect2", NULL); |
|
table_add("refsynopsisdiv", "funcsynopsis"); |
|
table_add("row", "entry"); |
|
table_add("screen", NULL); |
|
table_add("secondary", NULL); |
|
table_add("section", NULL); |
|
table_add("sect1", NULL); |
|
table_add("sect2", NULL); |
|
table_add("sect3", NULL); |
|
table_add("sect4", NULL); |
|
table_add("sgmltag", "TEXT"); |
|
table_add("simpara", NULL); |
|
table_add("simplelist", "member"); |
|
table_add("structfield", "TEXT"); |
|
table_add("structname", "TEXT"); |
|
table_add("symbol", "TEXT"); |
|
table_add("table", "tgroup"); |
|
table_add("table", "title"); |
|
table_add("tbody", "row"); |
|
table_add("term", NULL); |
|
table_add("tgroup", "colspec"); |
|
table_add("tgroup", "tbody"); |
|
table_add("tgroup", "thead"); |
|
table_add("thead", "row"); |
|
table_add("title", "TEXT"); |
|
table_add("type", "TEXT"); |
|
table_add("ulink", NULL); |
|
table_add("userinput", "TEXT"); |
|
table_add("variablelist", "varlistentry"); |
|
table_add("varlistentry", "listitem"); |
|
table_add("varlistentry", "term"); |
|
} |
|
table_add(NULL, NULL); |
|
|
/* Loop over input files. */ |
/* Loop over input files. */ |
|
fd = -1; |
|
fname = NULL; |
while ((rsz = getline(&fname, &fsz, stdin)) != -1) { |
while ((rsz = getline(&fname, &fsz, stdin)) != -1) { |
if (fname[rsz - 1] == '\n') |
if (fname[rsz - 1] == '\n') |
fname[--rsz] = '\0'; |
fname[--rsz] = '\0'; |
Line 282 main(int argc, char *argv[]) |
|
Line 464 main(int argc, char *argv[]) |
|
|
|
/* Dump results. */ |
/* Dump results. */ |
for (i = 0; i < tablei; i++) |
for (i = 0; i < tablei; i++) |
printf("%d\t%s\t%s\n", table[i].count, |
if (table[i].count != -1) |
table[i].parent, table[i].child); |
printf("%d\t%s\t%s\n", table[i].count, |
|
table[i].parent, table[i].child); |
|
|
|
/* Optional parent-child histogram. */ |
|
if (nchild.parent != NULL) { |
|
printf("%s %s", nchild.parent, nchild.child); |
|
for (i = 0; i < nchildsz; i++) |
|
printf(" %d", nchild.freq[i]); |
|
putchar('\n'); |
|
} |
return 0; |
return 0; |
} |
} |