=================================================================== RCS file: /cvs/mandoc/mdoc.h,v retrieving revision 1.96 retrieving revision 1.113 diff -u -p -r1.96 -r1.113 --- mandoc/mdoc.h 2010/07/01 22:56:17 1.96 +++ mandoc/mdoc.h 2010/12/26 14:44:13 1.113 @@ -1,6 +1,6 @@ -/* $Id: mdoc.h,v 1.96 2010/07/01 22:56:17 kristaps Exp $ */ +/* $Id: mdoc.h,v 1.113 2010/12/26 14:44:13 kristaps Exp $ */ /* - * Copyright (c) 2008, 2009 Kristaps Dzonsons + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,16 +17,9 @@ #ifndef MDOC_H #define MDOC_H -/* - * This library implements a validating scanner/parser for ``mdoc'' roff - * macro documents, a.k.a. BSD manual page documents. The mdoc.c file - * drives the parser, while macro.c describes the macro ontologies. - * validate.c pre- and post-validates parsed macros, and action.c - * performs actions on parsed and validated macros. +/* + * What follows is a list of ALL possible macros. */ - -/* What follows is a list of ALL possible macros. */ - enum mdoct { MDOC_Ap = 0, MDOC_Dd, @@ -153,39 +146,43 @@ enum mdoct { MDOC_MAX }; -/* What follows is a list of ALL possible macro arguments. */ +/* + * What follows is a list of ALL possible macro arguments. + */ +enum mdocargt { + MDOC_Split, + MDOC_Nosplit, + MDOC_Ragged, + MDOC_Unfilled, + MDOC_Literal, + MDOC_File, + MDOC_Offset, + MDOC_Bullet, + MDOC_Dash, + MDOC_Hyphen, + MDOC_Item, + MDOC_Enum, + MDOC_Tag, + MDOC_Diag, + MDOC_Hang, + MDOC_Ohang, + MDOC_Inset, + MDOC_Column, + MDOC_Width, + MDOC_Compact, + MDOC_Std, + MDOC_Filled, + MDOC_Words, + MDOC_Emphasis, + MDOC_Symbolic, + MDOC_Nested, + MDOC_Centred, + MDOC_ARG_MAX +}; -/* FIXME: make this into an enum. */ -#define MDOC_Split 0 -#define MDOC_Nosplit 1 -#define MDOC_Ragged 2 -#define MDOC_Unfilled 3 -#define MDOC_Literal 4 -#define MDOC_File 5 -#define MDOC_Offset 6 -#define MDOC_Bullet 7 -#define MDOC_Dash 8 -#define MDOC_Hyphen 9 -#define MDOC_Item 10 -#define MDOC_Enum 11 -#define MDOC_Tag 12 -#define MDOC_Diag 13 -#define MDOC_Hang 14 -#define MDOC_Ohang 15 -#define MDOC_Inset 16 -#define MDOC_Column 17 -#define MDOC_Width 18 -#define MDOC_Compact 19 -#define MDOC_Std 20 -#define MDOC_Filled 21 -#define MDOC_Words 22 -#define MDOC_Emphasis 23 -#define MDOC_Symbolic 24 -#define MDOC_Nested 25 -#define MDOC_Centred 26 -#define MDOC_ARG_MAX 27 - -/* Type of a syntax node. */ +/* + * Type of a syntax node. + */ enum mdoc_type { MDOC_TEXT, MDOC_ELEM, @@ -196,9 +193,12 @@ enum mdoc_type { MDOC_ROOT }; -/* Section (named/unnamed) of `Sh'. */ +/* + * Section (named/unnamed) of `Sh'. Note that these appear in the + * conventional order imposed by mdoc.7. + */ enum mdoc_sec { - SEC_NONE, /* No section, yet. */ + SEC_NONE = 0, /* No section, yet. */ SEC_NAME, SEC_LIBRARY, SEC_SYNOPSIS, @@ -219,42 +219,58 @@ enum mdoc_sec { SEC_CAVEATS, SEC_BUGS, SEC_SECURITY, - SEC_CUSTOM, /* User-defined. */ + SEC_CUSTOM, /* User-defined. */ SEC__MAX }; -/* Information from prologue. */ +/* + * Information from prologue. + */ struct mdoc_meta { - char *msec; - char *vol; - char *arch; - time_t date; - char *title; - char *os; - char *name; + char *msec; /* `Dt' section (1, 3p, etc.) */ + char *vol; /* `Dt' volume (implied) */ + char *arch; /* `Dt' arch (i386, etc.) */ + time_t date; /* `Dd' normalised date */ + char *title; /* `Dt' title (FOO, etc.) */ + char *os; /* `Os' system (OpenBSD, etc.) */ + char *name; /* leading `Nm' name */ }; -/* An argument to a macro (multiple values = `It -column'). */ +/* + * An argument to a macro (multiple values = `-column xxx yyy'). + */ struct mdoc_argv { - int arg; + enum mdocargt arg; /* type of argument */ int line; int pos; - size_t sz; - char **value; + size_t sz; /* elements in "value" */ + char **value; /* argument strings */ }; +/* + * Reference-counted macro arguments. These are refcounted because + * blocks have multiple instances of the same arguments spread across + * the HEAD, BODY, TAIL, and BLOCK node types. + */ struct mdoc_arg { size_t argc; struct mdoc_argv *argv; unsigned int refcnt; }; +/* + * Indicates that a BODY's formatting has ended, but the scope is still + * open. Used for syntax-broken blocks. + */ enum mdoc_endbody { ENDBODY_NOT = 0, - ENDBODY_SPACE, - ENDBODY_NOSPACE, + ENDBODY_SPACE, /* is broken: append a space */ + ENDBODY_NOSPACE /* is broken: don't append a space */ }; +/* + * Normalised `Bl' list type. + */ enum mdoc_list { LIST__NONE = 0, LIST_bullet, @@ -267,9 +283,13 @@ enum mdoc_list { LIST_inset, LIST_item, LIST_ohang, - LIST_tag + LIST_tag, + LIST_MAX }; +/* + * Normalised `Bd' display type. + */ enum mdoc_disp { DISP__NONE = 0, DISP_centred, @@ -279,23 +299,84 @@ enum mdoc_disp { DISP_literal }; +/* + * Normalised `An' splitting argument. + */ +enum mdoc_auth { + AUTH__NONE = 0, + AUTH_split, + AUTH_nosplit +}; + +/* + * Normalised `Bf' font type. + */ +enum mdoc_font { + FONT__NONE = 0, + FONT_Em, + FONT_Li, + FONT_Sy +}; + +/* + * Normalised arguments for `Bd'. + */ struct mdoc_bd { const char *offs; /* -offset */ enum mdoc_disp type; /* -ragged, etc. */ int comp; /* -compact */ }; +/* + * Normalised arguments for `Bl'. + */ struct mdoc_bl { const char *width; /* -width */ const char *offs; /* -offset */ enum mdoc_list type; /* -tag, -enum, etc. */ int comp; /* -compact */ + size_t ncols; /* -column arg count */ + const char **cols; /* -column val ptr */ }; -/* Node in AST. */ +/* + * Normalised arguments for `Bf'. + */ +struct mdoc_bf { + enum mdoc_font font; /* font */ +}; + +/* + * Normalised arguments for `An'. + */ +struct mdoc_an { + enum mdoc_auth auth; /* -split, etc. */ +}; + +struct mdoc_rs { + struct mdoc_node *child_J; /* pointer to %J */ +}; + +/* + * Consists of normalised node arguments. These should be used instead + * of iterating through the mdoc_arg pointers of a node: defaults are + * provided, etc. + */ +union mdoc_data { + struct mdoc_an An; + struct mdoc_bd Bd; + struct mdoc_bf Bf; + struct mdoc_bl Bl; + struct mdoc_rs Rs; +}; + +/* + * Single node in tree-linked AST. + */ struct mdoc_node { struct mdoc_node *parent; /* parent AST node */ struct mdoc_node *child; /* first child AST node */ + struct mdoc_node *last; /* last child AST node */ struct mdoc_node *next; /* sibling AST node */ struct mdoc_node *prev; /* prior sibling AST node */ int nchild; /* number children */ @@ -304,45 +385,42 @@ struct mdoc_node { enum mdoct tok; /* tok or MDOC__MAX if none */ int flags; #define MDOC_VALID (1 << 0) /* has been validated */ -#define MDOC_ACTED (1 << 1) /* has been acted upon */ #define MDOC_EOS (1 << 2) /* at sentence boundary */ #define MDOC_LINE (1 << 3) /* first macro/text on line */ #define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */ #define MDOC_ENDED (1 << 5) /* rendering has been ended */ enum mdoc_type type; /* AST node type */ enum mdoc_sec sec; /* current named section */ + union mdoc_data *norm; /* normalised args */ /* FIXME: these can be union'd to shave a few bytes. */ - struct mdoc_arg *args; /* BLOCK/ELEM */ - struct mdoc_node *pending; /* BLOCK */ - struct mdoc_node *head; /* BLOCK */ - struct mdoc_node *body; /* BLOCK */ - struct mdoc_node *tail; /* BLOCK */ - char *string; /* TEXT */ - enum mdoc_endbody end; /* BODY */ - - union { - struct mdoc_bl *Bl; - struct mdoc_bd *Bd; - } data; + struct mdoc_arg *args; /* BLOCK/ELEM */ + struct mdoc_node *pending; /* BLOCK */ + struct mdoc_node *head; /* BLOCK */ + struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *tail; /* BLOCK */ + char *string; /* TEXT */ + enum mdoc_endbody end; /* BODY */ }; -#define MDOC_IGN_SCOPE (1 << 0) /* Ignore scope violations. */ -#define MDOC_IGN_ESCAPE (1 << 1) /* Ignore bad escape sequences. */ -#define MDOC_IGN_MACRO (1 << 2) /* Ignore unknown macros. */ - -/* See mdoc.3 for documentation. */ - +/* + * Names of macros. Index is enum mdoct. Indexing into this returns + * the normalised name, e.g., mdoc_macronames[MDOC_Sh] -> "Sh". + */ extern const char *const *mdoc_macronames; + +/* + * Names of macro args. Index is enum mdocargt. Indexing into this + * returns the normalised name, e.g., mdoc_argnames[MDOC_File] -> + * "file". + */ extern const char *const *mdoc_argnames; __BEGIN_DECLS struct mdoc; -/* See mdoc.3 for documentation. */ - void mdoc_free(struct mdoc *); -struct mdoc *mdoc_alloc(struct regset *, void *, int, mandocmsg); +struct mdoc *mdoc_alloc(struct regset *, void *, mandocmsg); void mdoc_reset(struct mdoc *); int mdoc_parseln(struct mdoc *, int, char *, int); const struct mdoc_node *mdoc_node(const struct mdoc *);