=================================================================== RCS file: /cvs/mandoc/mdoc.h,v retrieving revision 1.89 retrieving revision 1.109 diff -u -p -r1.89 -r1.109 --- mandoc/mdoc.h 2010/06/13 21:02:48 1.89 +++ mandoc/mdoc.h 2010/12/22 11:15:16 1.109 @@ -1,6 +1,6 @@ -/* $Id: mdoc.h,v 1.89 2010/06/13 21:02:48 kristaps Exp $ */ +/* $Id: mdoc.h,v 1.109 2010/12/22 11:15:16 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,16 +17,9 @@ #ifndef MDOC_H #define MDOC_H -/* - * This library implements a validating scanner/parser for ``mdoc'' roff - * macro documents, a.k.a. BSD manual page documents. The mdoc.c file - * drives the parser, while macro.c describes the macro ontologies. - * validate.c pre- and post-validates parsed macros, and action.c - * performs actions on parsed and validated macros. +/* + * What follows is a list of ALL possible macros. */ - -/* What follows is a list of ALL possible macros. */ - enum mdoct { MDOC_Ap = 0, MDOC_Dd, @@ -153,38 +146,43 @@ enum mdoct { MDOC_MAX }; -/* What follows is a list of ALL possible macro arguments. */ +/* + * What follows is a list of ALL possible macro arguments. + */ +enum mdocargt { + MDOC_Split, + MDOC_Nosplit, + MDOC_Ragged, + MDOC_Unfilled, + MDOC_Literal, + MDOC_File, + MDOC_Offset, + MDOC_Bullet, + MDOC_Dash, + MDOC_Hyphen, + MDOC_Item, + MDOC_Enum, + MDOC_Tag, + MDOC_Diag, + MDOC_Hang, + MDOC_Ohang, + MDOC_Inset, + MDOC_Column, + MDOC_Width, + MDOC_Compact, + MDOC_Std, + MDOC_Filled, + MDOC_Words, + MDOC_Emphasis, + MDOC_Symbolic, + MDOC_Nested, + MDOC_Centred, + MDOC_ARG_MAX +}; -#define MDOC_Split 0 -#define MDOC_Nosplit 1 -#define MDOC_Ragged 2 -#define MDOC_Unfilled 3 -#define MDOC_Literal 4 -#define MDOC_File 5 -#define MDOC_Offset 6 -#define MDOC_Bullet 7 -#define MDOC_Dash 8 -#define MDOC_Hyphen 9 -#define MDOC_Item 10 -#define MDOC_Enum 11 -#define MDOC_Tag 12 -#define MDOC_Diag 13 -#define MDOC_Hang 14 -#define MDOC_Ohang 15 -#define MDOC_Inset 16 -#define MDOC_Column 17 -#define MDOC_Width 18 -#define MDOC_Compact 19 -#define MDOC_Std 20 -#define MDOC_Filled 21 -#define MDOC_Words 22 -#define MDOC_Emphasis 23 -#define MDOC_Symbolic 24 -#define MDOC_Nested 25 -#define MDOC_Centred 26 -#define MDOC_ARG_MAX 27 - -/* Type of a syntax node. */ +/* + * Type of a syntax node. + */ enum mdoc_type { MDOC_TEXT, MDOC_ELEM, @@ -195,9 +193,12 @@ enum mdoc_type { MDOC_ROOT }; -/* Section (named/unnamed) of `Sh'. */ +/* + * Section (named/unnamed) of `Sh'. Note that these appear in the + * conventional order imposed by mdoc.7. + */ enum mdoc_sec { - SEC_NONE, /* No section, yet. */ + SEC_NONE = 0, /* No section, yet. */ SEC_NAME, SEC_LIBRARY, SEC_SYNOPSIS, @@ -218,36 +219,58 @@ enum mdoc_sec { SEC_CAVEATS, SEC_BUGS, SEC_SECURITY, - SEC_CUSTOM, /* User-defined. */ + SEC_CUSTOM, /* User-defined. */ SEC__MAX }; -/* Information from prologue. */ +/* + * Information from prologue. + */ struct mdoc_meta { - char *msec; - char *vol; - char *arch; - time_t date; - char *title; - char *os; - char *name; + char *msec; /* `Dt' section (1, 3p, etc.) */ + char *vol; /* `Dt' volume (implied) */ + char *arch; /* `Dt' arch (i386, etc.) */ + time_t date; /* `Dd' normalised date */ + char *title; /* `Dt' title (FOO, etc.) */ + char *os; /* `Os' system (OpenBSD, etc.) */ + char *name; /* leading `Nm' name */ }; -/* An argument to a macro (multiple values = `It -column'). */ +/* + * An argument to a macro (multiple values = `-column xxx yyy'). + */ struct mdoc_argv { - int arg; + enum mdocargt arg; /* type of argument */ int line; int pos; - size_t sz; - char **value; + size_t sz; /* elements in "value" */ + char **value; /* argument strings */ }; +/* + * Reference-counted macro arguments. These are refcounted because + * blocks have multiple instances of the same arguments spread across + * the HEAD, BODY, TAIL, and BLOCK node types. + */ struct mdoc_arg { size_t argc; struct mdoc_argv *argv; unsigned int refcnt; }; +/* + * Indicates that a BODY's formatting has ended, but the scope is still + * open. Used for syntax-broken blocks. + */ +enum mdoc_endbody { + ENDBODY_NOT = 0, + ENDBODY_SPACE, /* is broken: append a space */ + ENDBODY_NOSPACE /* is broken: don't append a space */ +}; + +/* + * Normalised `Bl' list type. + */ enum mdoc_list { LIST__NONE = 0, LIST_bullet, @@ -260,9 +283,13 @@ enum mdoc_list { LIST_inset, LIST_item, LIST_ohang, - LIST_tag + LIST_tag, + LIST_MAX }; +/* + * Normalised `Bd' display type. + */ enum mdoc_disp { DISP__NONE = 0, DISP_centred, @@ -272,23 +299,88 @@ enum mdoc_disp { DISP_literal }; +/* + * Normalised `An' splitting argument. + */ +enum mdoc_auth { + AUTH__NONE = 0, + AUTH_split, + AUTH_nosplit +}; + +/* + * Normalised `Bf' font type. + */ +enum mdoc_font { + FONT__NONE = 0, + FONT_Em, + FONT_Li, + FONT_Sy +}; + +/* + * Normalised arguments for `Bd'. + */ struct mdoc_bd { const char *offs; /* -offset */ enum mdoc_disp type; /* -ragged, etc. */ int comp; /* -compact */ }; +/* + * Normalised arguments for `Bl'. + */ struct mdoc_bl { const char *width; /* -width */ const char *offs; /* -offset */ enum mdoc_list type; /* -tag, -enum, etc. */ int comp; /* -compact */ + size_t ncols; /* -column arg count */ + const char **cols; /* -column val ptr */ }; -/* Node in AST. */ +/* + * Normalised arguments for `Bf'. + */ +struct mdoc_bf { + enum mdoc_font font; /* font */ +}; + +/* + * Normalised arguments for `An'. + */ +struct mdoc_an { + enum mdoc_auth auth; /* -split, etc. */ +}; + +/* + * Consists of normalised node arguments. These should be used instead + * of iterating through the mdoc_arg pointers of a node: defaults are + * provided, etc. + */ +union mdoc_data { + struct mdoc_an An; + struct mdoc_bd Bd; + struct mdoc_bf Bf; + struct mdoc_bl Bl; +}; + +/* + * Reference-counted structure for containing normalised arguments of + * certain macros (those listed in union mdoc_data). + */ +struct mdoc_norm { + int refcnt; + union mdoc_data d; +}; + +/* + * Single node in tree-linked AST. + */ struct mdoc_node { struct mdoc_node *parent; /* parent AST node */ struct mdoc_node *child; /* first child AST node */ + struct mdoc_node *last; /* last child AST node */ struct mdoc_node *next; /* sibling AST node */ struct mdoc_node *prev; /* prior sibling AST node */ int nchild; /* number children */ @@ -300,40 +392,40 @@ struct mdoc_node { #define MDOC_ACTED (1 << 1) /* has been acted upon */ #define MDOC_EOS (1 << 2) /* at sentence boundary */ #define MDOC_LINE (1 << 3) /* first macro/text on line */ +#define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */ +#define MDOC_ENDED (1 << 5) /* rendering has been ended */ enum mdoc_type type; /* AST node type */ enum mdoc_sec sec; /* current named section */ - struct mdoc_arg *args; /* BLOCK/ELEM */ -#ifdef UGLY - struct mdoc_node *pending; /* BLOCK */ -#endif - struct mdoc_node *head; /* BLOCK */ - struct mdoc_node *body; /* BLOCK */ - struct mdoc_node *tail; /* BLOCK */ - char *string; /* TEXT */ - - union { - struct mdoc_bl Bl; - struct mdoc_bd Bd; - } data; + struct mdoc_norm *norm; /* ref-counted, normalised args */ + /* FIXME: these can be union'd to shave a few bytes. */ + struct mdoc_arg *args; /* BLOCK/ELEM */ + struct mdoc_node *pending; /* BLOCK */ + struct mdoc_node *head; /* BLOCK */ + struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *tail; /* BLOCK */ + char *string; /* TEXT */ + enum mdoc_endbody end; /* BODY */ }; -#define MDOC_IGN_SCOPE (1 << 0) /* Ignore scope violations. */ -#define MDOC_IGN_ESCAPE (1 << 1) /* Ignore bad escape sequences. */ -#define MDOC_IGN_MACRO (1 << 2) /* Ignore unknown macros. */ - -/* See mdoc.3 for documentation. */ - +/* + * Names of macros. Index is enum mdoct. Indexing into this returns + * the normalised name, e.g., mdoc_macronames[MDOC_Sh] -> "Sh". + */ extern const char *const *mdoc_macronames; + +/* + * Names of macro args. Index is enum mdocargt. Indexing into this + * returns the normalised name, e.g., mdoc_argnames[MDOC_File] -> + * "file". + */ extern const char *const *mdoc_argnames; __BEGIN_DECLS struct mdoc; -/* See mdoc.3 for documentation. */ - void mdoc_free(struct mdoc *); -struct mdoc *mdoc_alloc(void *, int, mandocmsg); +struct mdoc *mdoc_alloc(struct regset *, void *, mandocmsg); void mdoc_reset(struct mdoc *); int mdoc_parseln(struct mdoc *, int, char *, int); const struct mdoc_node *mdoc_node(const struct mdoc *);