=================================================================== RCS file: /cvs/mandoc/main.c,v retrieving revision 1.96 retrieving revision 1.99 diff -u -p -r1.96 -r1.99 --- mandoc/main.c 2010/07/02 12:54:33 1.96 +++ mandoc/main.c 2010/07/20 14:56:42 1.99 @@ -1,6 +1,7 @@ -/* $Id: main.c,v 1.96 2010/07/02 12:54:33 kristaps Exp $ */ +/* $Id: main.c,v 1.99 2010/07/20 14:56:42 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons + * Copyright (c) 2010 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -22,6 +23,7 @@ #include #include +#include #include #include #include @@ -30,7 +32,6 @@ #include #include "mandoc.h" -#include "regs.h" #include "main.h" #include "mdoc.h" #include "man.h" @@ -110,6 +111,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = "list type must come first", "bad standard", "bad library", + "tab in non-literal context", "bad escape sequence", "unterminated quoted string", "argument requires the width argument", @@ -491,6 +493,26 @@ fdesc(struct curparse *curp) ++lnn; break; } + + /* + * Warn about bogus characters. If you're using + * non-ASCII encoding, you're screwing your + * readers. Since I'd rather this not happen, + * I'll be helpful and drop these characters so + * we don't display gibberish. Note to manual + * writers: use special characters. + */ + + if ( ! isgraph((u_char)blk.buf[i]) && + ! isblank((u_char)blk.buf[i])) { + if ( ! mmsg(MANDOCERR_BADCHAR, curp, + lnn_start, pos, + "ignoring byte")) + goto bailout; + i++; + continue; + } + /* Trailing backslash is like a plain character. */ if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { if (pos >= (int)ln.sz)