=================================================================== RCS file: /cvs/mandoc/mdoc.h,v retrieving revision 1.1 retrieving revision 1.33 diff -u -p -r1.1 -r1.33 --- mandoc/mdoc.h 2008/12/15 01:54:58 1.1 +++ mandoc/mdoc.h 2009/02/23 09:33:34 1.33 @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.1 2008/12/15 01:54:58 kristaps Exp $ */ +/* $Id: mdoc.h,v 1.33 2009/02/23 09:33:34 kristaps Exp $ */ /* * Copyright (c) 2008 Kristaps Dzonsons * @@ -19,8 +19,16 @@ #ifndef MDOC_H #define MDOC_H -#define MDOC_LINEARG_MAX 9 +/* + * This library implements a validating scanner/parser for ``mdoc'' roff + * macro documents, a.k.a. BSD manual page documents. The mdoc.c file + * drives the parser, while macro.c describes the macro ontologies. + * validate.c pre- and post-validates parsed macros, and action.c + * performs actions on parsed and validated macros. + */ +/* What follows is a list of ALL possible macros. */ + #define MDOC___ 0 #define MDOC_Dd 1 #define MDOC_Dt 2 @@ -129,6 +137,8 @@ #define MDOC_Ud 105 #define MDOC_MAX 106 +/* What follows is a list of ALL possible macro arguments. */ + #define MDOC_Split 0 #define MDOC_Nosplit 1 #define MDOC_Ragged 2 @@ -189,100 +199,241 @@ #define MDOC_svid4 57 #define MDOC_Filled 58 #define MDOC_Words 59 -#define MDOC_ARG_MAX 60 +#define MDOC_Emphasis 60 +#define MDOC_Symbolic 61 +#define MDOC_ARG_MAX 62 -enum mdoc_err { - ERR_SYNTAX_QUOTE, - ERR_SYNTAX_WS, - ERR_MACRO_NOTSUP, - ERR_MACRO_NOTCALL, - ERR_SCOPE_BREAK, - ERR_ARGS_GE1 +/* Warnings are either syntax or groff-compatibility. */ +enum mdoc_warn { + WARN_SYNTAX, + WARN_COMPAT }; -enum mdoc_warn { - WARN_SYNTAX_WS_EOLN, - WARN_SYNTAX_MACLIKE, - WARN_ARGS_GE1 +/* Possible values for the `At' macro. */ +enum mdoc_att { + ATT_DEFAULT = 0, + ATT_v1, + ATT_v2, + ATT_v3, + ATT_v4, + ATT_v5, + ATT_v6, + ATT_v7, + ATT_32v, + ATT_V1, + ATT_V2, + ATT_V3, + ATT_V4 }; +/* An argument to a macro (multiple values = `It -column'). */ struct mdoc_arg { int arg; + int line; + int pos; size_t sz; char **value; }; +/* Type of a syntax node. */ enum mdoc_type { MDOC_TEXT, MDOC_ELEM, MDOC_HEAD, + MDOC_TAIL, MDOC_BODY, - MDOC_BLOCK + MDOC_BLOCK, + MDOC_ROOT }; +/* Manual section. */ +enum mdoc_msec { + MSEC_DEFAULT = 0, + MSEC_1, + MSEC_2, + MSEC_3, + MSEC_3f, + MSEC_3p, + MSEC_4, + MSEC_5, + MSEC_6, + MSEC_7, + MSEC_8, + MSEC_9, + MSEC_X11, + MSEC_X11R6, + MSEC_local, + MSEC_n, + MSEC_unass, + MSEC_draft, + MSEC_paper +}; + +/* Section (named/unnamed) of `Ss'. */ +enum mdoc_sec { + SEC_PROLOGUE = 0, + SEC_BODY, + SEC_NAME, + SEC_SYNOPSIS, + SEC_DESCRIPTION, + SEC_RETURN_VALUES, + SEC_ENVIRONMENT, + SEC_FILES, + SEC_EXAMPLES, + SEC_DIAGNOSTICS, + SEC_ERRORS, + SEC_SEE_ALSO, + SEC_STANDARDS, + SEC_HISTORY, + SEC_AUTHORS, + SEC_CAVEATS, + SEC_BUGS, + SEC_CUSTOM +}; + +/* Volume of `Dt'. */ +enum mdoc_vol { + VOL_DEFAULT = 0, + VOL_AMD, + VOL_IND, + VOL_KM, + VOL_LOCAL, + VOL_PRM, + VOL_PS1, + VOL_SMM, + VOL_URM, + VOL_USD +}; + +/* Architecture of `Dt'. */ +enum mdoc_arch { + ARCH_DEFAULT = 0, + ARCH_alpha, + ARCH_amd64, + ARCH_amiga, + ARCH_arc, + ARCH_arm, + ARCH_armish, + ARCH_aviion, + ARCH_hp300, + ARCH_hppa, + ARCH_hppa64, + ARCH_i386, + ARCH_landisk, + ARCH_luna88k, + ARCH_mac68k, + ARCH_macppc, + ARCH_mvme68k, + ARCH_mvme88k, + ARCH_mvmeppc, + ARCH_pmax, + ARCH_sgi, + ARCH_socppc, + ARCH_sparc, + ARCH_sparc64, + ARCH_sun3, + ARCH_vax, + ARCH_zaurus +}; + +/* Information from prologue. */ +struct mdoc_meta { + enum mdoc_msec msec; + enum mdoc_vol vol; + enum mdoc_arch arch; + time_t date; + char *title; + char *os; + char *name; +}; + +/* Text-only node. */ struct mdoc_text { char *string; }; +/* Block (scoped) node. */ struct mdoc_block { - int tok; size_t argc; struct mdoc_arg *argv; + struct mdoc_node *head; + struct mdoc_node *body; + struct mdoc_node *tail; }; -struct mdoc_head { - size_t sz; - char **args; - int tok; -}; - -struct mdoc_body { - int tok; -}; - +/* In-line element node. */ struct mdoc_elem { size_t sz; char **args; - int tok; size_t argc; struct mdoc_arg *argv; }; +/* Typed nodes of an AST node. */ union mdoc_data { struct mdoc_text text; struct mdoc_elem elem; - struct mdoc_body body; - struct mdoc_head head; struct mdoc_block block; }; +/* Node in AST. */ struct mdoc_node { struct mdoc_node *parent; struct mdoc_node *child; struct mdoc_node *next; + struct mdoc_node *prev; + int line; + int pos; + int tok; + int flags; +#define MDOC_VALID (1 << 0) +#define MDOC_ACTED (1 << 1) enum mdoc_type type; union mdoc_data data; + enum mdoc_sec sec; }; +/* Call-backs for parse messages. */ struct mdoc_cb { - int (*mdoc_err)(void *, int, int, enum mdoc_err); - int (*mdoc_warn)(void *, int, int, enum mdoc_warn); - void (*mdoc_msg)(void *, int, const char *); + void (*mdoc_msg)(void *, int, int, const char *); + int (*mdoc_err)(void *, int, int, const char *); + int (*mdoc_warn)(void *, int, int, + enum mdoc_warn, const char *); }; +/* Global table of macro names (`Bd', `Ed', etc.). */ extern const char *const *mdoc_macronames; + +/* Global table of argument names (`column', `tag', etc.). */ extern const char *const *mdoc_argnames; __BEGIN_DECLS struct mdoc; +/* Free memory allocated with mdoc_alloc. */ void mdoc_free(struct mdoc *); + +/* Allocate a new parser instance. */ struct mdoc *mdoc_alloc(void *data, const struct mdoc_cb *); -int mdoc_parseln(struct mdoc *, char *buf); -const struct mdoc_node - *mdoc_result(struct mdoc *); +/* Parse a single line in a stream (boolean retval). */ +int mdoc_parseln(struct mdoc *, int, char *buf); + +/* Get result first node (after mdoc_endparse!). */ +const struct mdoc_node *mdoc_node(const struct mdoc *); + +/* Get result meta-information (after mdoc_endparse!). */ +const struct mdoc_meta *mdoc_meta(const struct mdoc *); + +/* Signal end of parse sequence (boolean retval). */ +int mdoc_endparse(struct mdoc *); + +/* The following are utility functions. */ +const char *mdoc_arch2a(enum mdoc_arch); +const char *mdoc_vol2a(enum mdoc_vol); +const char *mdoc_msec2a(enum mdoc_msec); +int mdoc_isdelim(const char *); __END_DECLS