=================================================================== RCS file: /cvs/mandoc/mdoc.h,v retrieving revision 1.19 retrieving revision 1.48 diff -u -p -r1.19 -r1.48 --- mandoc/mdoc.h 2009/01/12 10:31:53 1.19 +++ mandoc/mdoc.h 2009/03/16 22:19:19 1.48 @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.19 2009/01/12 10:31:53 kristaps Exp $ */ +/* $Id: mdoc.h,v 1.48 2009/03/16 22:19:19 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -19,8 +19,18 @@ #ifndef MDOC_H #define MDOC_H -#define MDOC_LINEARG_MAX 12 +#include +/* + * This library implements a validating scanner/parser for ``mdoc'' roff + * macro documents, a.k.a. BSD manual page documents. The mdoc.c file + * drives the parser, while macro.c describes the macro ontologies. + * validate.c pre- and post-validates parsed macros, and action.c + * performs actions on parsed and validated macros. + */ + +/* What follows is a list of ALL possible macros. */ + #define MDOC___ 0 #define MDOC_Dd 1 #define MDOC_Dt 2 @@ -127,8 +137,21 @@ #define MDOC_Hf 103 #define MDOC_Fr 104 #define MDOC_Ud 105 -#define MDOC_MAX 106 +#define MDOC_Lb 106 +#define MDOC_Ap 107 +#define MDOC_Lp 108 +#define MDOC_Lk 109 +#define MDOC_Mt 110 +#define MDOC_Brq 111 +#define MDOC_Bro 112 +#define MDOC_Brc 113 +#define MDOC__C 114 +#define MDOC_Es 115 +#define MDOC_En 116 +#define MDOC_MAX 117 +/* What follows is a list of ALL possible macro arguments. */ + #define MDOC_Split 0 #define MDOC_Nosplit 1 #define MDOC_Ragged 2 @@ -150,124 +173,20 @@ #define MDOC_Width 18 #define MDOC_Compact 19 #define MDOC_Std 20 -#define MDOC_p1003_1_88 21 -#define MDOC_p1003_1_90 22 -#define MDOC_p1003_1_96 23 -#define MDOC_p1003_1_2001 24 -#define MDOC_p1003_1_2004 25 -#define MDOC_p1003_1 26 -#define MDOC_p1003_1b 27 -#define MDOC_p1003_1b_93 28 -#define MDOC_p1003_1c_95 29 -#define MDOC_p1003_1g_2000 30 -#define MDOC_p1003_2_92 31 -#define MDOC_p1387_2_95 32 -#define MDOC_p1003_2 33 -#define MDOC_p1387_2 34 -#define MDOC_isoC_90 35 -#define MDOC_isoC_amd1 36 -#define MDOC_isoC_tcor1 37 -#define MDOC_isoC_tcor2 38 -#define MDOC_isoC_99 39 -#define MDOC_ansiC 40 -#define MDOC_ansiC_89 41 -#define MDOC_ansiC_99 42 -#define MDOC_ieee754 43 -#define MDOC_iso8802_3 44 -#define MDOC_xpg3 45 -#define MDOC_xpg4 46 -#define MDOC_xpg4_2 47 -#define MDOC_xpg4_3 48 -#define MDOC_xbd5 49 -#define MDOC_xcu5 50 -#define MDOC_xsh5 51 -#define MDOC_xns5 52 -#define MDOC_xns5_2d2_0 53 -#define MDOC_xcurses4_2 54 -#define MDOC_susv2 55 -#define MDOC_susv3 56 -#define MDOC_svid4 57 -#define MDOC_Filled 58 -#define MDOC_Words 59 -#define MDOC_Emphasis 60 -#define MDOC_Symbolic 61 -#define MDOC_ARG_MAX 62 +#define MDOC_Filled 21 +#define MDOC_Words 22 +#define MDOC_Emphasis 23 +#define MDOC_Symbolic 24 +#define MDOC_Nested 25 +#define MDOC_ARG_MAX 26 -enum mdoc_err { - ERR_SYNTAX_QUOTE, /* NOTUSED */ - ERR_SYNTAX_UNQUOTE, - ERR_SYNTAX_NOPUNCT, - ERR_SYNTAX_WS, - ERR_SYNTAX_ARG, - ERR_SYNTAX_ARGFORM, - ERR_SYNTAX_ARGVAL, - ERR_SYNTAX_ARGBAD, - ERR_SYNTAX_ARGMISS, - ERR_SYNTAX_ARGMANY, - ERR_MACRO_NOTSUP, - ERR_MACRO_NOTCALL, - ERR_SCOPE_BREAK, - ERR_SCOPE_NOCTX, - ERR_SCOPE_NONEST, - ERR_SEC_PROLOGUE, - ERR_SEC_NPROLOGUE, - ERR_SEC_PROLOGUE_OO, - ERR_SEC_PROLOGUE_REP, - ERR_SEC_NAME, - ERR_ARGS_EQ0, - ERR_ARGS_EQ1, - ERR_ARGS_GE1, - ERR_ARGS_LE2, - ERR_ARGS_LE8, - ERR_ARGS_MANY, - ERR_SYNTAX_CHILDBAD, - ERR_SYNTAX_CHILDHEAD, - ERR_SYNTAX_CHILDBODY, - ERR_SYNTAX_EMPTYBODY, - ERR_SYNTAX_EMPTYHEAD, - ERR_SYNTAX_NOTEXT -}; - -enum mdoc_att { - ATT_DEFAULT = 0, - ATT_v1, - ATT_v2, - ATT_v3, - ATT_v4, - ATT_v5, - ATT_v6, - ATT_v7, - ATT_32v, - ATT_V1, - ATT_V2, - ATT_V3, - ATT_V4 -}; - +/* Warnings are either syntax or groff-compatibility. */ enum mdoc_warn { - WARN_SYNTAX_WS_EOLN, - WARN_SYNTAX_MACLIKE, - WARN_SYNTAX_ARGLIKE, - WARN_SYNTAX_QUOTED, - WARN_SYNTAX_EMPTYBODY, - WARN_IGN_AFTER_BLK, - WARN_IGN_BEFORE_BLK, - WARN_IGN_OBSOLETE, - WARN_SEC_OO, - WARN_SEC_REP, - WARN_ARGS_GE1, - WARN_ARGS_EQ0, - WARN_COMPAT_TROFF + WARN_SYNTAX, + WARN_COMPAT }; -struct mdoc_arg { - int arg; - int line; - int pos; - size_t sz; - char **value; -}; - +/* Type of a syntax node. */ enum mdoc_type { MDOC_TEXT, MDOC_ELEM, @@ -278,141 +197,58 @@ enum mdoc_type { MDOC_ROOT }; -enum mdoc_msec { - MSEC_DEFAULT = 0, - MSEC_1, - MSEC_2, - MSEC_3, - MSEC_3f, - MSEC_3p, - MSEC_4, - MSEC_5, - MSEC_6, - MSEC_7, - MSEC_8, - MSEC_9, - MSEC_X11, - MSEC_X11R6, - MSEC_local, - MSEC_n, - MSEC_unass, - MSEC_draft, - MSEC_paper -}; - +/* Section (named/unnamed) of `Sh'. */ enum mdoc_sec { - SEC_PROLOGUE = 0, - SEC_BODY, - SEC_NAME, - SEC_SYNOPSIS, - SEC_DESCRIPTION, - SEC_RETURN_VALUES, - SEC_ENVIRONMENT, - SEC_FILES, - SEC_EXAMPLES, - SEC_DIAGNOSTICS, - SEC_ERRORS, - SEC_SEE_ALSO, - SEC_STANDARDS, - SEC_HISTORY, - SEC_AUTHORS, - SEC_CAVEATS, - SEC_BUGS, + SEC_PROLOGUE = 0, + SEC_BODY = 1, + SEC_NAME = 2, + SEC_LIBRARY = 3, + SEC_SYNOPSIS = 4, + SEC_DESCRIPTION = 5, + SEC_IMPLEMENTATION = 6, + SEC_RETURN_VALUES = 7, + SEC_ENVIRONMENT = 8, + SEC_FILES = 9, + SEC_EXAMPLES = 10, + SEC_DIAGNOSTICS = 11, + SEC_COMPATIBILITY = 12, + SEC_ERRORS = 13, + SEC_SEE_ALSO = 14, + SEC_STANDARDS = 15, + SEC_HISTORY = 16, + SEC_AUTHORS = 17, + SEC_CAVEATS = 18, + SEC_BUGS = 19, SEC_CUSTOM }; -enum mdoc_vol { - VOL_DEFAULT = 0, - VOL_AMD, - VOL_IND, - VOL_KM, - VOL_LOCAL, - VOL_PRM, - VOL_PS1, - VOL_SMM, - VOL_URM, - VOL_USD -}; - -enum mdoc_arch { - ARCH_DEFAULT = 0, - ARCH_alpha, - ARCH_amd64, - ARCH_amiga, - ARCH_arc, - ARCH_armish, - ARCH_aviion, - ARCH_hp300, - ARCH_hppa, - ARCH_hppa64, - ARCH_i386, - ARCH_landisk, - ARCH_luna88k, - ARCH_mac68k, - ARCH_macppc, - ARCH_mvme68k, - ARCH_mvme88k, - ARCH_mvmeppc, - ARCH_pmax, - ARCH_sgi, - ARCH_socppc, - ARCH_sparc, - ARCH_sparc64, - ARCH_sun3, - ARCH_vax, - ARCH_zaurus -}; - +/* Information from prologue. */ struct mdoc_meta { - enum mdoc_msec msec; - enum mdoc_vol vol; - enum mdoc_arch arch; + int msec; + char *vol; + char *arch; time_t date; -#define META_TITLE_SZ (64) - char title[META_TITLE_SZ]; -#define META_OS_SZ (64) - char os[META_OS_SZ]; + char *title; + char *os; + char *name; }; -struct mdoc_text { - char *string; +/* An argument to a macro (multiple values = `It -column'). */ +struct mdoc_argv { + int arg; + int line; + int pos; + size_t sz; + char **value; }; -struct mdoc_block { - int tok; +struct mdoc_arg { size_t argc; - struct mdoc_arg *argv; + struct mdoc_argv *argv; + unsigned int refcnt; }; -struct mdoc_head { - int tok; -}; - -struct mdoc_tail { - int tok; -}; - -struct mdoc_body { - int tok; -}; - -struct mdoc_elem { - size_t sz; - char **args; - int tok; - size_t argc; - struct mdoc_arg *argv; -}; - -union mdoc_data { - struct mdoc_text text; - struct mdoc_elem elem; - struct mdoc_body body; - struct mdoc_head head; - struct mdoc_tail tail; - struct mdoc_block block; -}; - +/* Node in AST. */ struct mdoc_node { struct mdoc_node *parent; struct mdoc_node *child; @@ -420,29 +256,71 @@ struct mdoc_node { struct mdoc_node *prev; int line; int pos; + int tok; + int flags; +#define MDOC_VALID (1 << 0) +#define MDOC_ACTED (1 << 1) enum mdoc_type type; - union mdoc_data data; + enum mdoc_sec sec; + + /* FIXME: union/struct this with #defines. */ + struct mdoc_arg *args; /* BLOCK/ELEM */ + struct mdoc_node *head; /* BLOCK */ + struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *tail; /* BLOCK */ + char *string; /* TEXT */ }; +#define MDOC_IGN_SCOPE (1 << 0) /* Ignore scope violations. */ +#define MDOC_IGN_ESCAPE (1 << 1) /* Ignore bad escape sequences. */ +#define MDOC_IGN_MACRO (1 << 2) /* Ignore unknown macros. */ + +/* Call-backs for parse messages. */ struct mdoc_cb { - int (*mdoc_err)(void *, int, int, enum mdoc_err); - int (*mdoc_warn)(void *, int, int, enum mdoc_warn); void (*mdoc_msg)(void *, int, int, const char *); + int (*mdoc_err)(void *, int, int, const char *); + int (*mdoc_warn)(void *, int, int, + enum mdoc_warn, const char *); }; +/* Global table of macro names (`Bd', `Ed', etc.). */ extern const char *const *mdoc_macronames; + +/* Global table of argument names (`column', `tag', etc.). */ extern const char *const *mdoc_argnames; __BEGIN_DECLS struct mdoc; +/* Free memory allocated with mdoc_alloc. */ void mdoc_free(struct mdoc *); -struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *); + +/* Allocate a new parser instance. */ +struct mdoc *mdoc_alloc(void *, int, const struct mdoc_cb *); + +/* Set parse options. */ +void mdoc_setflags(struct mdoc *, int); + +/* Parse a single line in a stream (boolean retval). */ int mdoc_parseln(struct mdoc *, int, char *buf); -const struct mdoc_node - *mdoc_result(struct mdoc *); + +/* Get result first node (after mdoc_endparse!). */ +const struct mdoc_node *mdoc_node(const struct mdoc *); + +/* Get result meta-information (after mdoc_endparse!). */ +const struct mdoc_meta *mdoc_meta(const struct mdoc *); + +/* Signal end of parse sequence (boolean retval). */ int mdoc_endparse(struct mdoc *); + +/* The following are utility functions. */ + +const char *mdoc_a2att(const char *); +const char *mdoc_a2lib(const char *); +const char *mdoc_a2st(const char *); + +int mdoc_isdelim(const char *); __END_DECLS