=================================================================== RCS file: /cvs/mandoc/mdoc_markdown.c,v retrieving revision 1.2 retrieving revision 1.31 diff -u -p -r1.2 -r1.31 --- mandoc/mdoc_markdown.c 2017/03/04 21:41:29 1.2 +++ mandoc/mdoc_markdown.c 2019/07/01 22:56:24 1.31 @@ -1,6 +1,6 @@ -/* $Id: mdoc_markdown.c,v 1.2 2017/03/04 21:41:29 schwarze Exp $ */ +/* $Id: mdoc_markdown.c,v 1.31 2019/07/01 22:56:24 schwarze Exp $ */ /* - * Copyright (c) 2017 Ingo Schwarze + * Copyright (c) 2017, 2018 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "mandoc_aux.h" @@ -43,14 +44,17 @@ static void md_rawword(const char *); static void md_word(const char *); static void md_named(const char *); static void md_char(unsigned char); +static void md_uri(const char *); static int md_cond_head(struct roff_node *); static int md_cond_body(struct roff_node *); +static int md_pre_abort(struct roff_node *); static int md_pre_raw(struct roff_node *); static int md_pre_word(struct roff_node *); static int md_pre_skip(struct roff_node *); static void md_pre_syn(struct roff_node *); +static int md_pre_An(struct roff_node *); static int md_pre_Ap(struct roff_node *); static int md_pre_Bd(struct roff_node *); static int md_pre_Bk(struct roff_node *); @@ -66,6 +70,7 @@ static int md_pre_Fo(struct roff_node *); static int md_pre_In(struct roff_node *); static int md_pre_It(struct roff_node *); static int md_pre_Lk(struct roff_node *); +static int md_pre_Mt(struct roff_node *); static int md_pre_Nd(struct roff_node *); static int md_pre_Nm(struct roff_node *); static int md_pre_No(struct roff_node *); @@ -89,6 +94,7 @@ static void md_post_En(struct roff_node *); static void md_post_Eo(struct roff_node *); static void md_post_Fa(struct roff_node *); static void md_post_Fd(struct roff_node *); +static void md_post_Fl(struct roff_node *); static void md_post_Fn(struct roff_node *); static void md_post_Fo(struct roff_node *); static void md_post_In(struct roff_node *); @@ -99,8 +105,7 @@ static void md_post_Pf(struct roff_node *); static void md_post_Vt(struct roff_node *); static void md_post__T(struct roff_node *); -static const struct md_act md_acts[MDOC_MAX + 1] = { - { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */ +static const struct md_act md_acts[MDOC_MAX - MDOC_Dd] = { { NULL, NULL, NULL, NULL, NULL }, /* Dd */ { NULL, NULL, NULL, NULL, NULL }, /* Dt */ { NULL, NULL, NULL, NULL, NULL }, /* Os */ @@ -115,7 +120,8 @@ static const struct md_act md_acts[MDOC_MAX + 1] = { { NULL, NULL, NULL, NULL, NULL }, /* El */ { NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */ { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */ - { NULL, NULL, NULL, NULL, NULL }, /* An */ + { NULL, md_pre_An, NULL, NULL, NULL }, /* An */ + { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */ { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */ { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */ { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */ @@ -125,16 +131,16 @@ static const struct md_act md_acts[MDOC_MAX + 1] = { { NULL, NULL, NULL, NULL, NULL }, /* Ex */ { NULL, md_pre_Fa, md_post_Fa, NULL, NULL }, /* Fa */ { NULL, md_pre_Fd, md_post_Fd, "**", "**" }, /* Fd */ - { NULL, md_pre_raw, md_post_raw, "**-", "**" }, /* Fl */ + { NULL, md_pre_raw, md_post_Fl, "**-", "**" }, /* Fl */ { NULL, md_pre_Fn, md_post_Fn, NULL, NULL }, /* Fn */ { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ft */ { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ic */ - { NULL, md_pre_In, md_post_In, "*", "*" }, /* In */ + { NULL, md_pre_In, md_post_In, NULL, NULL }, /* In */ { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Li */ { md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */ { NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */ { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */ - { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ot */ + { NULL, md_pre_abort, NULL, NULL, NULL }, /* Ot */ { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */ { NULL, NULL, NULL, NULL, NULL }, /* Rv */ { NULL, NULL, NULL, NULL, NULL }, /* St */ @@ -207,9 +213,9 @@ static const struct md_act md_acts[MDOC_MAX + 1] = { { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */ { NULL, NULL, NULL, NULL, NULL }, /* Ud */ { NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */ - { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Lp */ + { NULL, md_pre_abort, NULL, NULL, NULL }, /* Lp */ { NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */ - { NULL, md_pre_raw, md_post_raw, "<", ">" }, /* Mt */ + { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */ { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */ { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */ { NULL, NULL, NULL, NULL, NULL }, /* Brc */ @@ -218,13 +224,10 @@ static const struct md_act md_acts[MDOC_MAX + 1] = { { md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */ { NULL, NULL, NULL, NULL, NULL }, /* Dx */ { NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */ - { NULL, md_pre_br, NULL, NULL, NULL }, /* br */ - { NULL, md_pre_Pp, NULL, NULL, NULL }, /* sp */ - { NULL, NULL, md_post_pc, NULL, NULL }, /* %U */ + { NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */ { NULL, NULL, NULL, NULL, NULL }, /* Ta */ - { NULL, NULL, NULL, NULL, NULL }, /* ll */ - { NULL, NULL, NULL, NULL, NULL }, /* ROOT */ }; +static const struct md_act *md_act(enum roff_tok); static int outflags; #define MD_spc (1 << 0) /* Blank character before next word. */ @@ -235,34 +238,44 @@ static int outflags; #define MD_sp (1 << 5) /* Insert a paragraph break. */ #define MD_Sm (1 << 6) /* Horizontal spacing mode. */ #define MD_Bk (1 << 7) /* Word keep mode. */ +#define MD_An_split (1 << 8) /* Author mode is "split". */ +#define MD_An_nosplit (1 << 9) /* Author mode is "nosplit". */ static int escflags; /* Escape in generated markdown code: */ #define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */ #define ESC_NUM (1 << 1) /* "." after a leading number. */ #define ESC_HYP (1 << 2) /* "(" immediately after "]". */ -#define ESC_PAR (1 << 3) /* ")" when "(" is open. */ #define ESC_SQU (1 << 4) /* "]" when "[" is open. */ #define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */ +#define ESC_EOL (1 << 6) /* " " at the and of a line. */ static int code_blocks, quote_blocks, list_blocks; static int outcount; + +static const struct md_act * +md_act(enum roff_tok tok) +{ + assert(tok >= MDOC_Dd && tok <= MDOC_MAX); + return md_acts + (tok - MDOC_Dd); +} + void -markdown_mdoc(void *arg, const struct roff_man *mdoc) +markdown_mdoc(void *arg, const struct roff_meta *mdoc) { outflags = MD_Sm; - md_word(mdoc->meta.title); - if (mdoc->meta.msec != NULL) { + md_word(mdoc->title); + if (mdoc->msec != NULL) { outflags &= ~MD_spc; md_word("("); - md_word(mdoc->meta.msec); + md_word(mdoc->msec); md_word(")"); } md_word("-"); - md_word(mdoc->meta.vol); - if (mdoc->meta.arch != NULL) { + md_word(mdoc->vol); + if (mdoc->arch != NULL) { md_word("("); - md_word(mdoc->meta.arch); + md_word(mdoc->arch); md_word(")"); } outflags |= MD_sp; @@ -270,9 +283,9 @@ markdown_mdoc(void *arg, const struct roff_man *mdoc) md_nodelist(mdoc->first->child); outflags |= MD_sp; - md_word(mdoc->meta.os); + md_word(mdoc->os); md_word("-"); - md_word(mdoc->meta.date); + md_word(mdoc->date); putchar('\n'); } @@ -291,7 +304,7 @@ md_node(struct roff_node *n) const struct md_act *act; int cond, process_children; - if (n->flags & NODE_NOPRT) + if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) return; if (outflags & MD_nonl) @@ -304,8 +317,7 @@ md_node(struct roff_node *n) process_children = 1; n->flags &= ~NODE_ENDED; - switch (n->type) { - case ROFFT_TEXT: + if (n->type == ROFFT_TEXT) { if (n->flags & NODE_DELIMC) outflags &= ~(MD_spc | MD_spc_force); else if (outflags & MD_Sm) @@ -315,14 +327,24 @@ md_node(struct roff_node *n) outflags &= ~(MD_spc | MD_spc_force); else if (outflags & MD_Sm) outflags |= MD_spc; - break; - default: - act = md_acts + n->tok; + } else if (n->tok < ROFF_MAX) { + switch (n->tok) { + case ROFF_br: + process_children = md_pre_br(n); + break; + case ROFF_sp: + process_children = md_pre_Pp(n); + break; + default: + process_children = 0; + break; + } + } else { + act = md_act(n->tok); cond = act->cond == NULL || (*act->cond)(n); if (cond && act->pre != NULL && (n->end == ENDBODY_NOT || n->child != NULL)) process_children = (*act->pre)(n); - break; } if (process_children && n->child != NULL) @@ -370,37 +392,43 @@ md_stack(char c) static void md_preword(void) { + const char *cp; + /* * If a list block is nested inside a code block or a blockquote, * blank lines for paragraph breaks no longer work; instead, * they terminate the list. Work around this markdown issue * by using mere line breaks instead. */ + if (list_blocks && outflags & MD_sp) { outflags &= ~MD_sp; outflags |= MD_br; } - /* End the old line if requested. */ + /* + * End the old line if requested. + * Escape whitespace at the end of the markdown line + * such that it won't look like an output line break. + */ if (outflags & MD_sp) putchar('\n'); else if (outflags & MD_br) { putchar(' '); putchar(' '); -#ifdef DEBUG - putchar(':'); - putchar(':'); - putchar(' '); - putchar(' '); -#endif - } + } else if (outflags & MD_nl && escflags & ESC_EOL) + md_named("zwnj"); /* Start a new line if necessary. */ if (outflags & (MD_nl | MD_br | MD_sp)) { putchar('\n'); - fputs(md_stack('\0'), stdout); + for (cp = md_stack('\0'); *cp != '\0'; cp++) { + putchar(*cp); + if (*cp == '>') + putchar(' '); + } outflags &= ~(MD_nl | MD_br | MD_sp); escflags = ESC_BOL; outcount = 0; @@ -433,7 +461,7 @@ md_rawword(const char *s) { md_preword(); - if (*s == 0) + if (*s == '\0') return; if (escflags & ESC_FON) { @@ -444,12 +472,6 @@ md_rawword(const char *s) while (*s != '\0') { switch(*s) { - case '(': - escflags |= ESC_PAR; - break; - case ')': - escflags |= ~ESC_PAR; - break; case '*': if (s[1] == '\0') escflags |= ESC_FON; @@ -466,6 +488,10 @@ md_rawword(const char *s) } md_char(*s++); } + if (s[-1] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; } /* @@ -476,7 +502,7 @@ md_word(const char *s) { const char *seq, *prevfont, *currfont, *nextfont; char c; - int bs, sz, uc; + int bs, sz, uc, breakline; /* No spacing before closing delimiters. */ if (s[0] != '\0' && s[1] == '\0' && @@ -486,10 +512,14 @@ md_word(const char *s) md_preword(); + if (*s == '\0') + return; + /* No spacing after opening delimiters. */ if ((s[0] == '(' || s[0] == '[') && s[1] == '\0') outflags &= ~MD_spc; + breakline = 0; prevfont = currfont = ""; while ((c = *s++) != '\0') { bs = 0; @@ -517,7 +547,7 @@ md_word(const char *s) bs = escflags & ESC_HYP && !code_blocks; break; case ')': - bs = escflags & ESC_PAR && !code_blocks; + bs = escflags & ESC_NUM && !code_blocks; break; case '*': case '[': @@ -559,6 +589,12 @@ md_word(const char *s) case ESCAPE_SPECIAL: uc = mchars_spec2cp(seq, sz); break; + case ESCAPE_UNDEF: + uc = *seq; + break; + case ESCAPE_DEVICE: + md_rawword("markdown"); + continue; case ESCAPE_FONTBOLD: nextfont = "**"; break; @@ -569,12 +605,16 @@ md_word(const char *s) nextfont = "***"; break; case ESCAPE_FONT: + case ESCAPE_FONTCW: case ESCAPE_FONTROMAN: nextfont = ""; break; case ESCAPE_FONTPREV: nextfont = prevfont; break; + case ESCAPE_BREAK: + breakline = 1; + break; case ESCAPE_NOSPACE: case ESCAPE_SKIPCHAR: case ESCAPE_OVERSTRIKE: @@ -622,11 +662,21 @@ md_word(const char *s) if (bs) putchar('\\'); md_char(c); + if (breakline && + (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) { + printf(" \n"); + breakline = 0; + while (*s == ' ' || *s == ASCII_NBRSP) + s++; + } } if (*currfont != '\0') { outflags &= ~MD_spc; md_rawword(currfont); - } + } else if (s[-2] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; } /* @@ -636,7 +686,7 @@ static void md_named(const char *s) { printf("&%s;", s); - escflags &= ~ESC_FON; + escflags &= ~(ESC_FON | ESC_EOL); outcount++; } @@ -678,13 +728,21 @@ md_cond_body(struct roff_node *n) } static int +md_pre_abort(struct roff_node *n) +{ + abort(); +} + +static int md_pre_raw(struct roff_node *n) { const char *prefix; - if ((prefix = md_acts[n->tok].prefix) != NULL) { + if ((prefix = md_act(n->tok)->prefix) != NULL) { md_rawword(prefix); outflags &= ~MD_spc; + if (*prefix == '`') + code_blocks++; } return 1; } @@ -694,9 +752,11 @@ md_post_raw(struct roff_node *n) { const char *suffix; - if ((suffix = md_acts[n->tok].suffix) != NULL) { + if ((suffix = md_act(n->tok)->suffix) != NULL) { outflags &= ~(MD_spc | MD_nl); md_rawword(suffix); + if (*suffix == '`') + code_blocks--; } } @@ -705,7 +765,7 @@ md_pre_word(struct roff_node *n) { const char *prefix; - if ((prefix = md_acts[n->tok].prefix) != NULL) { + if ((prefix = md_act(n->tok)->prefix) != NULL) { md_word(prefix); outflags &= ~MD_spc; } @@ -717,7 +777,7 @@ md_post_word(struct roff_node *n) { const char *suffix; - if ((suffix = md_acts[n->tok].suffix) != NULL) { + if ((suffix = md_act(n->tok)->suffix) != NULL) { outflags &= ~(MD_spc | MD_nl); md_word(suffix); } @@ -782,6 +842,28 @@ md_pre_syn(struct roff_node *n) } static int +md_pre_An(struct roff_node *n) +{ + switch (n->norm->An.auth) { + case AUTH_split: + outflags &= ~MD_An_nosplit; + outflags |= MD_An_split; + return 0; + case AUTH_nosplit: + outflags &= ~MD_An_split; + outflags |= MD_An_nosplit; + return 0; + default: + if (outflags & MD_An_split) + outflags |= MD_br; + else if (n->sec == SEC_AUTHORS && + ! (outflags & MD_An_nosplit)) + outflags |= MD_An_split; + return 1; + } +} + +static int md_pre_Ap(struct roff_node *n) { outflags &= ~MD_spc; @@ -931,21 +1013,17 @@ md_pre_Eo(struct roff_node *n) static void md_post_Eo(struct roff_node *n) { - int body, tail; - if (n->end != ENDBODY_NOT) { outflags |= MD_spc; return; } - body = n->child != NULL || n->parent->head->child != NULL; - tail = n->parent->tail != NULL && n->parent->tail->child != NULL; + if (n->child == NULL && n->parent->head->child == NULL) + return; - if (body && tail) + if (n->parent->tail != NULL && n->parent->tail->child != NULL) outflags &= ~MD_spc; - else if ( ! (body || tail)) - md_preword(); - else if ( ! tail) + else outflags |= MD_spc; } @@ -993,6 +1071,15 @@ md_post_Fd(struct roff_node *n) outflags |= MD_br; } +static void +md_post_Fl(struct roff_node *n) +{ + md_post_raw(n); + if (n->child == NULL && n->next != NULL && + n->next->type != ROFFT_TEXT && !(n->next->flags & NODE_LINE)) + outflags &= ~MD_spc; +} + static int md_pre_Fn(struct roff_node *n) { @@ -1067,16 +1154,15 @@ md_pre_In(struct roff_node *n) { if (n->flags & NODE_SYNPRETTY) { md_pre_syn(n); - md_pre_raw(n); - md_rawword("*"); + md_rawword("**"); outflags &= ~MD_spc; md_word("#include <"); - outflags &= ~MD_spc; } else { md_word("<"); outflags &= ~MD_spc; - md_pre_raw(n); + md_rawword("*"); } + outflags &= ~MD_spc; return 1; } @@ -1085,13 +1171,11 @@ md_post_In(struct roff_node *n) { if (n->flags & NODE_SYNPRETTY) { outflags &= ~MD_spc; - md_rawword(">*"); - md_post_raw(n); + md_rawword(">**"); outflags |= MD_nl; } else { - md_post_raw(n); outflags &= ~MD_spc; - md_rawword(">"); + md_rawword("*>"); } } @@ -1106,7 +1190,8 @@ md_pre_It(struct roff_node *n) case ROFFT_HEAD: bln = n->parent->parent; - if (bln->norm->Bl.comp == 0) + if (bln->norm->Bl.comp == 0 && + bln->norm->Bl.type != LIST_column) outflags |= MD_sp; outflags |= MD_nl; @@ -1132,9 +1217,14 @@ md_pre_It(struct roff_node *n) break; case LIST_enum: md_preword(); - printf("%d.\t", ++bln->norm->Bl.count); + if (bln->norm->Bl.count < 99) + bln->norm->Bl.count++; + printf("%d.\t", bln->norm->Bl.count); escflags &= ~ESC_FON; break; + case LIST_column: + outflags |= MD_br; + return 0; default: return 0; } @@ -1200,7 +1290,7 @@ md_post_It(struct roff_node *n) while ((n = n->prev) != NULL && n->type != ROFFT_HEAD) i++; - /* + /* * If a width was specified for this column, * subtract what printed, and * add the same spacing as in mdoc_term.c. @@ -1231,34 +1321,85 @@ md_post_Lb(struct roff_node *n) outflags |= MD_br; } +static void +md_uri(const char *s) +{ + while (*s != '\0') { + if (strchr("%()<>", *s) != NULL) { + printf("%%%2.2hhX", *s); + outcount += 3; + } else { + putchar(*s); + outcount++; + } + s++; + } +} + static int md_pre_Lk(struct roff_node *n) { - const struct roff_node *link, *descr; + const struct roff_node *link, *descr, *punct; if ((link = n->child) == NULL) return 0; - if ((descr = link->next) != NULL) { - md_rawword("["); - outflags &= ~MD_spc; - while (descr != NULL) { - md_word(descr->string); - descr = descr->next; - } - outflags &= ~MD_spc; - md_rawword("]("); - } else - md_rawword("<"); + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + /* Link text. */ + descr = link->next; + if (descr == punct) + descr = link; /* no text */ + md_rawword("["); outflags &= ~MD_spc; - md_word(link->string); + do { + md_word(descr->string); + descr = descr->next; + } while (descr != punct); outflags &= ~MD_spc; - md_rawword(link->next == NULL ? ">" : ")"); + + /* Link target. */ + md_rawword("]("); + md_uri(link->string); + outflags &= ~MD_spc; + md_rawword(")"); + + /* Trailing punctuation. */ + while (punct != NULL) { + md_word(punct->string); + punct = punct->next; + } return 0; } static int +md_pre_Mt(struct roff_node *n) +{ + const struct roff_node *nch; + + md_rawword("["); + outflags &= ~MD_spc; + for (nch = n->child; nch != NULL; nch = nch->next) + md_word(nch->string); + outflags &= ~MD_spc; + md_rawword("](mailto:"); + for (nch = n->child; nch != NULL; nch = nch->next) { + md_uri(nch->string); + if (nch->next != NULL) { + putchar(' '); + outcount++; + } + } + outflags &= ~MD_spc; + md_rawword(")"); + return 0; +} + +static int md_pre_Nd(struct roff_node *n) { outflags &= ~MD_nl; @@ -1341,6 +1482,10 @@ static int md_pre_Sh(struct roff_node *n) { switch (n->type) { + case ROFFT_BLOCK: + if (n->sec == SEC_AUTHORS) + outflags &= ~(MD_An_split | MD_An_nosplit); + break; case ROFFT_HEAD: outflags |= MD_sp; md_rawword(n->tok == MDOC_Sh ? "#" : "##");