💾 Archived View for gmi.noulin.net › gitRepositories › md4c › file › md2html › render_html.c.gmi captured on 2024-08-18 at 18:50:18. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2023-01-29)
-=-=-=-=-=-=-
render_html.c (16141B)
1 /* 2 * MD4C: Markdown parser for C 3 * (http://github.com/mity/md4c) 4 * 5 * Copyright (c) 2016-2017 Martin Mitas 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26 #include <stdio.h> 27 #include <string.h> 28 29 #include "render_html.h" 30 #include "entity.h" 31 32 33 #ifdef _MSC_VER 34 /* MSVC does not understand "inline" when building as pure C (not C++). 35 * However it understands "__inline" */ 36 #ifndef __cplusplus 37 #define inline __inline 38 #endif 39 #endif 40 41 #ifdef _WIN32 42 #define snprintf _snprintf 43 #endif 44 45 46 47 typedef struct MD_RENDER_HTML_tag MD_RENDER_HTML; 48 struct MD_RENDER_HTML_tag { 49 void (*process_output)(const MD_CHAR*, MD_SIZE, void*); 50 void* userdata; 51 unsigned flags; 52 int image_nesting_level; 53 }; 54 55 56 /***************************************** 57 *** HTML rendering helper functions *** 58 *****************************************/ 59 60 #define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9') 61 #define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z') 62 #define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z') 63 #define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch)) 64 65 66 static inline void 67 render_text(MD_RENDER_HTML* r, const MD_CHAR* text, MD_SIZE size) 68 { 69 r->process_output(text, size, r->userdata); 70 } 71 72 #define RENDER_LITERAL(r, literal) render_text((r), (literal), strlen(literal)) 73 74 75 static void 76 render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 77 { 78 MD_OFFSET beg = 0; 79 MD_OFFSET off = 0; 80 81 /* Some characters need to be escaped in normal HTML text. */ 82 #define HTML_NEED_ESCAPE(ch) \ 83 ((ch) == '&' || (ch) == '<' || (ch) == '>' || (ch) == '"') 84 85 while(1) { 86 while(off < size && !HTML_NEED_ESCAPE(data[off])) 87 off++; 88 if(off > beg) 89 render_text(r, data + beg, off - beg); 90 91 if(off < size) { 92 switch(data[off]) { 93 case '&': RENDER_LITERAL(r, "&"); break; 94 case '<': RENDER_LITERAL(r, "<"); break; 95 case '>': RENDER_LITERAL(r, ">"); break; 96 case '"': RENDER_LITERAL(r, """); break; 97 } 98 off++; 99 } else { 100 break; 101 } 102 beg = off; 103 } 104 } 105 106 static void 107 render_url_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 108 { 109 static const MD_CHAR hex_chars[] = "0123456789ABCDEF"; 110 MD_OFFSET beg = 0; 111 MD_OFFSET off = 0; 112 113 #define URL_NEED_ESCAPE(ch) \ 114 (!ISALNUM(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == NULL) 115 116 while(1) { 117 while(off < size && !URL_NEED_ESCAPE(data[off])) 118 off++; 119 if(off > beg) 120 render_text(r, data + beg, off - beg); 121 122 if(off < size) { 123 char hex[3]; 124 125 switch(data[off]) { 126 case '&': RENDER_LITERAL(r, "&"); break; 127 case '\'': RENDER_LITERAL(r, "'"); break; 128 default: 129 hex[0] = '%'; 130 hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf]; 131 hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf]; 132 render_text(r, hex, 3); 133 break; 134 } 135 off++; 136 } else { 137 break; 138 } 139 140 beg = off; 141 } 142 } 143 144 static unsigned 145 hex_val(char ch) 146 { 147 if('0' <= ch && ch <= '9') 148 return ch - '0'; 149 if('A' <= ch && ch <= 'Z') 150 return ch - 'A' + 10; 151 else 152 return ch - 'a' + 10; 153 } 154 155 static void 156 render_utf8_codepoint(MD_RENDER_HTML* r, unsigned codepoint, 157 void (*fn_append)(MD_RENDER_HTML*, const MD_CHAR*, MD_SIZE)) 158 { 159 static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd }; 160 161 unsigned char utf8[4]; 162 size_t n; 163 164 if(codepoint <= 0x7f) { 165 n = 1; 166 utf8[0] = codepoint; 167 } else if(codepoint <= 0x7ff) { 168 n = 2; 169 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); 170 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); 171 } else if(codepoint <= 0xffff) { 172 n = 3; 173 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); 174 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); 175 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); 176 } else { 177 n = 4; 178 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); 179 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); 180 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); 181 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); 182 } 183 184 if(0 < codepoint && codepoint <= 0x10ffff) 185 fn_append(r, (char*)utf8, n); 186 else 187 fn_append(r, utf8_replacement_char, 3); 188 } 189 190 /* Translate entity to its UTF-8 equivalent, or output the verbatim one 191 * if such entity is unknown (or if the translation is disabled). */ 192 static void 193 render_entity(MD_RENDER_HTML* r, const MD_CHAR* text, MD_SIZE size, 194 void (*fn_append)(MD_RENDER_HTML*, const MD_CHAR*, MD_SIZE)) 195 { 196 if(r->flags & MD_RENDER_FLAG_VERBATIM_ENTITIES) { 197 fn_append(r, text, size); 198 return; 199 } 200 201 /* We assume UTF-8 output is what is desired. */ 202 if(size > 3 && text[1] == '#') { 203 unsigned codepoint = 0; 204 205 if(text[2] == 'x' || text[2] == 'X') { 206 /* Hexadecimal entity (e.g. "�")). */ 207 MD_SIZE i; 208 for(i = 3; i < size-1; i++) 209 codepoint = 16 * codepoint + hex_val(text[i]); 210 } else { 211 /* Decimal entity (e.g. "&1234;") */ 212 MD_SIZE i; 213 for(i = 2; i < size-1; i++) 214 codepoint = 10 * codepoint + (text[i] - '0'); 215 } 216 217 render_utf8_codepoint(r, codepoint, fn_append); 218 return; 219 } else { 220 /* Named entity (e.g. " "). */ 221 const struct entity* ent; 222 223 ent = entity_lookup(text, size); 224 if(ent != NULL) { 225 render_utf8_codepoint(r, ent->codepoints[0], fn_append); 226 if(ent->codepoints[1]) 227 render_utf8_codepoint(r, ent->codepoints[1], fn_append); 228 return; 229 } 230 } 231 232 fn_append(r, text, size); 233 } 234 235 static void 236 render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, 237 void (*fn_append)(MD_RENDER_HTML*, const MD_CHAR*, MD_SIZE)) 238 { 239 int i; 240 241 for(i = 0; attr->substr_offsets[i] < attr->size; i++) { 242 MD_TEXTTYPE type = attr->substr_types[i]; 243 MD_OFFSET off = attr->substr_offsets[i]; 244 MD_SIZE size = attr->substr_offsets[i+1] - off; 245 const MD_CHAR* text = attr->text + off; 246 247 switch(type) { 248 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_text); break; 249 case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; 250 default: fn_append(r, text, size); break; 251 } 252 } 253 } 254 255 256 static void 257 render_open_ol_block(MD_RENDER_HTML* r, const MD_BLOCK_OL_DETAIL* det) 258 { 259 char buf[64]; 260 261 if(det->start == 1) { 262 RENDER_LITERAL(r, "<ol>\n"); 263 return; 264 } 265 266 snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start); 267 RENDER_LITERAL(r, buf); 268 } 269 270 static void 271 render_open_code_block(MD_RENDER_HTML* r, const MD_BLOCK_CODE_DETAIL* det) 272 { 273 RENDER_LITERAL(r, "<pre><code"); 274 275 /* If known, output the HTML 5 attribute class="language-LANGNAME". */ 276 if(det->lang.text != NULL) { 277 RENDER_LITERAL(r, " class=\"language-"); 278 render_attribute(r, &det->lang, render_html_escaped); 279 RENDER_LITERAL(r, "\""); 280 } 281 282 RENDER_LITERAL(r, ">"); 283 } 284 285 static void 286 render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det) 287 { 288 RENDER_LITERAL(r, "<"); 289 RENDER_LITERAL(r, cell_type); 290 291 switch(det->align) { 292 case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break; 293 case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break; 294 case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break; 295 default: RENDER_LITERAL(r, ">"); break; 296 } 297 } 298 299 static void 300 render_open_a_span(MD_RENDER_HTML* r, const MD_SPAN_A_DETAIL* det) 301 { 302 RENDER_LITERAL(r, "<a href=\""); 303 render_attribute(r, &det->href, render_url_escaped); 304 305 if(det->title.text != NULL) { 306 RENDER_LITERAL(r, "\" title=\""); 307 render_attribute(r, &det->title, render_html_escaped); 308 } 309 310 RENDER_LITERAL(r, "\">"); 311 } 312 313 static void 314 render_open_img_span(MD_RENDER_HTML* r, const MD_SPAN_IMG_DETAIL* det) 315 { 316 RENDER_LITERAL(r, "<img src=\""); 317 render_attribute(r, &det->src, render_url_escaped); 318 319 RENDER_LITERAL(r, "\" alt=\""); 320 321 r->image_nesting_level++; 322 } 323 324 static void 325 render_close_img_span(MD_RENDER_HTML* r, const MD_SPAN_IMG_DETAIL* det) 326 { 327 if(det->title.text != NULL) { 328 RENDER_LITERAL(r, "\" title=\""); 329 render_attribute(r, &det->title, render_html_escaped); 330 } 331 332 RENDER_LITERAL(r, "\">"); 333 334 r->image_nesting_level--; 335 } 336 337 338 /************************************** 339 *** HTML renderer implementation *** 340 **************************************/ 341 342 static int 343 enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 344 { 345 static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" }; 346 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 347 348 switch(type) { 349 case MD_BLOCK_DOC: /* noop */ break; 350 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "<blockquote>\n"); break; 351 case MD_BLOCK_UL: RENDER_LITERAL(r, "<ul>\n"); break; 352 case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break; 353 case MD_BLOCK_LI: RENDER_LITERAL(r, "<li>"); break; 354 case MD_BLOCK_HR: RENDER_LITERAL(r, "<hr>\n"); break; 355 case MD_BLOCK_H: RENDER_LITERAL(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; 356 case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break; 357 case MD_BLOCK_HTML: /* noop */ break; 358 case MD_BLOCK_P: RENDER_LITERAL(r, "<p>"); break; 359 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "<table>\n"); break; 360 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "<thead>\n"); break; 361 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "<tbody>\n"); break; 362 case MD_BLOCK_TR: RENDER_LITERAL(r, "<tr>\n"); break; 363 case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break; 364 case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break; 365 } 366 367 return 0; 368 } 369 370 static int 371 leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 372 { 373 static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" }; 374 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 375 376 switch(type) { 377 case MD_BLOCK_DOC: /*noop*/ break; 378 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "</blockquote>\n"); break; 379 case MD_BLOCK_UL: RENDER_LITERAL(r, "</ul>\n"); break; 380 case MD_BLOCK_OL: RENDER_LITERAL(r, "</ol>\n"); break; 381 case MD_BLOCK_LI: RENDER_LITERAL(r, "</li>\n"); break; 382 case MD_BLOCK_HR: /*noop*/ break; 383 case MD_BLOCK_H: RENDER_LITERAL(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; 384 case MD_BLOCK_CODE: RENDER_LITERAL(r, "</code></pre>\n"); break; 385 case MD_BLOCK_HTML: /* noop */ break; 386 case MD_BLOCK_P: RENDER_LITERAL(r, "</p>\n"); break; 387 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "</table>\n"); break; 388 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; 389 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; 390 case MD_BLOCK_TR: RENDER_LITERAL(r, "</tr>\n"); break; 391 case MD_BLOCK_TH: RENDER_LITERAL(r, "</th>\n"); break; 392 case MD_BLOCK_TD: RENDER_LITERAL(r, "</td>\n"); break; 393 } 394 395 return 0; 396 } 397 398 static int 399 enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 400 { 401 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 402 403 if(r->image_nesting_level > 0) { 404 /* We are inside an image, i.e. rendering the ALT attribute of 405 * <IMG> tag. */ 406 return 0; 407 } 408 409 switch(type) { 410 case MD_SPAN_EM: RENDER_LITERAL(r, "<em>"); break; 411 case MD_SPAN_STRONG: RENDER_LITERAL(r, "<strong>"); break; 412 case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; 413 case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; 414 case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break; 415 case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break; 416 } 417 418 return 0; 419 } 420 421 static int 422 leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 423 { 424 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 425 426 if(r->image_nesting_level > 0) { 427 /* We are inside an image, i.e. rendering the ALT attribute of 428 * <IMG> tag. */ 429 if(r->image_nesting_level == 1 && type == MD_SPAN_IMG) 430 render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); 431 return 0; 432 } 433 434 switch(type) { 435 case MD_SPAN_EM: RENDER_LITERAL(r, "</em>"); break; 436 case MD_SPAN_STRONG: RENDER_LITERAL(r, "</strong>"); break; 437 case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; 438 case MD_SPAN_IMG: /*noop, handled above*/ break; 439 case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; 440 case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; 441 } 442 443 return 0; 444 } 445 446 static int 447 text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) 448 { 449 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 450 451 switch(type) { 452 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_text); break; 453 case MD_TEXT_BR: RENDER_LITERAL(r, (r->image_nesting_level == 0 ? "<br>\n" : " ")); break; 454 case MD_TEXT_SOFTBR: RENDER_LITERAL(r, (r->image_nesting_level == 0 ? "\n" : " ")); break; 455 case MD_TEXT_HTML: render_text(r, text, size); break; 456 case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break; 457 default: render_html_escaped(r, text, size); break; 458 } 459 460 return 0; 461 } 462 463 static void 464 debug_log_callback(const char* msg, void* userdata) 465 { 466 MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; 467 if(r->flags & MD_RENDER_FLAG_DEBUG) 468 fprintf(stderr, "MD4C: %s\n", msg); 469 } 470 471 int 472 md_render_html(const MD_CHAR* input, MD_SIZE input_size, 473 void (*process_output)(const MD_CHAR*, MD_SIZE, void*), 474 void* userdata, unsigned parser_flags, unsigned renderer_flags) 475 { 476 MD_RENDER_HTML render = { process_output, userdata, renderer_flags, 0 }; 477 478 MD_RENDERER renderer = { 479 enter_block_callback, 480 leave_block_callback, 481 enter_span_callback, 482 leave_span_callback, 483 text_callback, 484 debug_log_callback, 485 parser_flags 486 }; 487 488 return md_parse(input, input_size, &renderer, (void*) &render); 489 } 490