=================================================================== RCS file: /cvs/docbook2mdoc/statistics.c,v retrieving revision 1.24 retrieving revision 1.26 diff -u -p -r1.24 -r1.26 --- docbook2mdoc/statistics.c 2019/04/14 14:00:17 1.24 +++ docbook2mdoc/statistics.c 2019/04/14 16:26:34 1.26 @@ -1,4 +1,4 @@ -/* $Id: statistics.c,v 1.24 2019/04/14 14:00:17 schwarze Exp $ */ +/* $Id: statistics.c,v 1.26 2019/04/14 16:26:34 schwarze Exp $ */ /* * Copyright (c) 2019 Ingo Schwarze * @@ -46,6 +46,8 @@ * * Example usage: * statistics tgroup colspec < filenames.txt | grep colspec + * + * Synchronized with parse.c up to rev. 1.42. */ struct entry { @@ -209,10 +211,10 @@ parse_file(int fd, char *fname) size_t rlen; /* Number of bytes in b[]. */ size_t poff; /* Parse offset in b[]. */ size_t pend; /* Offset of the end of the current word. */ - int in_tag, in_arg, in_quotes, elem_end; + int in_tag, in_arg, in_quotes, in_doctype, elem_end; rlen = 0; - in_tag = in_arg = in_quotes = 0; + in_tag = in_arg = in_quotes = in_doctype = 0; while ((rsz = read(fd, b + rlen, sizeof(b) - rlen - 1)) >= 0) { if ((rlen += rsz) == 0) break; @@ -249,6 +251,11 @@ parse_file(int fd, char *fname) if (elem_end) stack_pop(NULL); } else if (in_tag) { + if (in_doctype && b[pend] == '[') { + in_tag = in_doctype = 0; + pend++; + continue; + } if (advance(b, rlen, &pend, " =>") && rsz > 0) break; elem_end = 0; @@ -299,11 +306,16 @@ parse_file(int fd, char *fname) if (b[++poff] == '/') { elem_end = 1; poff++; + } else if (strcasecmp(b + poff, + "!DOCTYPE") == 0) { + in_doctype = 1; } else if (b[poff] != '!' && b[poff] != '?') { table_add(stacki > 0 ? stack[stacki - 1] : "ROOT", b + poff); stack_push(b + poff); + if (strcmp(b + poff, "sbr") == 0) + elem_end = 1; } if (elem_end) stack_pop(b + poff); @@ -314,8 +326,8 @@ parse_file(int fd, char *fname) } } assert(poff > 0); - memmove(b, b + poff, rlen - poff); rlen -= poff; + memmove(b, b + poff, rlen); } if (rsz < 0) perror(fname);