💾 Archived View for gmi.noulin.net › gitRepositories › md4c › file › md2html › md2html.c.gmi captured on 2024-09-29 at 01:14:38. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2023-01-29)
-=-=-=-=-=-=-
md2html.c (11657B)
1 /* 2 * MD4C: Markdown parser for C 3 * (http://github.com/mity/md4c) 4 * 5 * Copyright (c) 2016-2017 Martin Mitas 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "render_html.h" 32 #include "cmdline.h" 33 34 35 36 /* Global options. */ 37 static unsigned parser_flags = 0; 38 static unsigned renderer_flags = MD_RENDER_FLAG_DEBUG; 39 static int want_fullhtml = 0; 40 static int want_stat = 0; 41 42 43 /********************************* 44 *** Simple grow-able buffer *** 45 *********************************/ 46 47 /* We render to a memory buffer instead of directly outputting the rendered 48 * documents, as this allows using this utility for evaluating performance 49 * of MD4C (--stat option). This allows us to measure just time of the parser, 50 * without the I/O. 51 */ 52 53 struct membuffer { 54 char* data; 55 MD_SIZE asize; 56 MD_SIZE size; 57 }; 58 59 static void 60 membuf_init(struct membuffer* buf, MD_SIZE new_asize) 61 { 62 buf->size = 0; 63 buf->asize = new_asize; 64 buf->data = malloc(buf->asize); 65 if(buf->data == NULL) { 66 fprintf(stderr, "membuf_init: malloc() failed."); 67 exit(1); 68 } 69 } 70 71 static void 72 membuf_fini(struct membuffer* buf) 73 { 74 if(buf->data) 75 free(buf->data); 76 } 77 78 static void 79 membuf_grow(struct membuffer* buf, MD_SIZE new_asize) 80 { 81 buf->data = realloc(buf->data, new_asize); 82 if(buf->data == NULL) { 83 fprintf(stderr, "membuf_grow: realloc() failed."); 84 exit(1); 85 } 86 buf->asize = new_asize; 87 } 88 89 static void 90 membuf_append(struct membuffer* buf, const char* data, MD_SIZE size) 91 { 92 if(buf->asize < buf->size + size) 93 membuf_grow(buf, (buf->size + size) * 2); 94 memcpy(buf->data + buf->size, data, size); 95 buf->size += size; 96 } 97 98 99 /********************** 100 *** Main program *** 101 **********************/ 102 103 static void 104 process_output(const MD_CHAR* text, MD_SIZE size, void* userdata) 105 { 106 membuf_append((struct membuffer*) userdata, text, size); 107 } 108 109 static int 110 process_file(FILE* in, FILE* out) 111 { 112 MD_SIZE n; 113 struct membuffer buf_in = {0}; 114 struct membuffer buf_out = {0}; 115 int ret = -1; 116 clock_t t0, t1; 117 118 membuf_init(&buf_in, 32 * 1024); 119 120 /* Read the input file into a buffer. */ 121 while(1) { 122 if(buf_in.size >= buf_in.asize) 123 membuf_grow(&buf_in, 2 * buf_in.asize); 124 125 n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in); 126 if(n == 0) 127 break; 128 buf_in.size += n; 129 } 130 131 /* Input size is good estimation of output size. Add some more reserve to 132 * deal with the HTML header/footer and tags. */ 133 membuf_init(&buf_out, buf_in.size + buf_in.size/8 + 64); 134 135 /* Parse the document. This shall call our callbacks provided via the 136 * md_renderer_t structure. */ 137 t0 = clock(); 138 139 ret = md_render_html(buf_in.data, buf_in.size, process_output, 140 (void*) &buf_out, parser_flags, renderer_flags); 141 142 t1 = clock(); 143 if(ret != 0) { 144 fprintf(stderr, "Parsing failed.\n"); 145 goto out; 146 } 147 148 /* Write down the document in the HTML format. */ 149 if(want_fullhtml) { 150 fprintf(out, "<html>\n"); 151 fprintf(out, "<head>\n"); 152 fprintf(out, "<title></title>\n"); 153 fprintf(out, "<meta name=\"generator\" content=\"md2html\">\n"); 154 fprintf(out, "</head>\n"); 155 fprintf(out, "<body>\n"); 156 } 157 158 fwrite(buf_out.data, 1, buf_out.size, out); 159 160 if(want_fullhtml) { 161 fprintf(out, "</body>\n"); 162 fprintf(out, "</html>\n"); 163 } 164 165 if(want_stat) { 166 if(t0 != (clock_t)-1 && t1 != (clock_t)-1) { 167 double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC; 168 if (elapsed < 1) 169 fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3); 170 else 171 fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed); 172 } 173 } 174 175 /* Success if we have reached here. */ 176 ret = 0; 177 178 out: 179 membuf_fini(&buf_in); 180 membuf_fini(&buf_out); 181 182 return ret; 183 } 184 185 186 #define OPTION_ARG_NONE 0 187 #define OPTION_ARG_REQUIRED 1 188 #define OPTION_ARG_OPTIONAL 2 189 190 static const option cmdline_options[] = { 191 { "output", 'o', 'o', OPTION_ARG_REQUIRED }, 192 { "full-html", 'f', 'f', OPTION_ARG_NONE }, 193 { "stat", 's', 's', OPTION_ARG_NONE }, 194 { "help", 'h', 'h', OPTION_ARG_NONE }, 195 { "version", 'v', 'v', OPTION_ARG_NONE }, 196 { "commonmark", 0, 'c', OPTION_ARG_NONE }, 197 { "github", 0, 'g', OPTION_ARG_NONE }, 198 { "fverbatim-entities", 0, 'E', OPTION_ARG_NONE }, 199 { "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE }, 200 { "fpermissive-url-autolinks", 0, 'U', OPTION_ARG_NONE }, 201 { "fpermissive-www-autolinks", 0, '.', OPTION_ARG_NONE }, 202 { "fpermissive-email-autolinks", 0, '@', OPTION_ARG_NONE }, 203 { "fpermissive-autolinks", 0, 'V', OPTION_ARG_NONE }, 204 { "fno-indented-code", 0, 'I', OPTION_ARG_NONE }, 205 { "fno-html-blocks", 0, 'F', OPTION_ARG_NONE }, 206 { "fno-html-spans", 0, 'G', OPTION_ARG_NONE }, 207 { "fno-html", 0, 'H', OPTION_ARG_NONE }, 208 { "fcollapse-whitespace", 0, 'W', OPTION_ARG_NONE }, 209 { "ftables", 0, 'T', OPTION_ARG_NONE }, 210 { "fstrikethrough", 0, 'S', OPTION_ARG_NONE }, 211 { 0 } 212 }; 213 214 static void 215 usage(void) 216 { 217 printf( 218 "Usage: md2html [OPTION]... [FILE]\n" 219 "Convert input FILE (or standard input) in Markdown format to HTML.\n" 220 "\n" 221 "General options:\n" 222 " -o --output=FILE Output file (default is standard output)\n" 223 " -f, --full-html Generate full HTML document, including header\n" 224 " -s, --stat Measure time of input parsing\n" 225 " -h, --help Display this help and exit\n" 226 " -v, --version Display version and exit\n" 227 "\n" 228 "Markdown dialect options:\n" 229 "(note these are equivalent to some combinations of flags below)\n" 230 " --commonmark CommonMark (this is default)\n" 231 " --github Github Flavored Markdown\n" 232 "\n" 233 "Markdown extension options:\n" 234 " --fcollapse-whitespace\n" 235 " Collapse non-trivial whitespace\n" 236 " --fverbatim-entities\n" 237 " Do not translate entities\n" 238 " --fpermissive-atx-headers\n" 239 " Allow ATX headers without delimiting space\n" 240 " --fpermissive-url-autolinks\n" 241 " Allow URL autolinks without '<', '>'\n" 242 " --fpermissive-www-autolinks\n" 243 " Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n" 244 " --fpermissive-email-autolinks \n" 245 " Allow e-mail autolinks without '<', '>' and 'mailto:'\n" 246 " --fpermissive-autolinks\n" 247 " Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n" 248 " --fpermissive-email-autolinks\n" 249 " --fno-indented-code\n" 250 " Disable indented code blocks\n" 251 " --fno-html-blocks\n" 252 " Disable raw HTML blocks\n" 253 " --fno-html-spans\n" 254 " Disable raw HTML spans\n" 255 " --fno-html Same as --fno-html-blocks --fno-html-spans\n" 256 " --ftables Enable tables\n" 257 " --fstrikethrough Enable strikethrough spans\n" 258 ); 259 } 260 261 static void 262 version(void) 263 { 264 printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE); 265 } 266 267 static const char* input_path = NULL; 268 static const char* output_path = NULL; 269 270 static int 271 cmdline_callback(int opt, char const* value, void* data) 272 { 273 switch(opt) { 274 case 0: 275 if(input_path) { 276 fprintf(stderr, "Too many arguments. Only one input file can be specified.\n"); 277 fprintf(stderr, "Use --help for more info.\n"); 278 exit(1); 279 } 280 input_path = value; 281 break; 282 283 case 'o': output_path = value; break; 284 case 'f': want_fullhtml = 1; break; 285 case 's': want_stat = 1; break; 286 case 'h': usage(); exit(0); break; 287 case 'v': version(); exit(0); break; 288 289 case 'c': parser_flags = MD_DIALECT_COMMONMARK; break; 290 case 'g': parser_flags = MD_DIALECT_GITHUB; break; 291 292 case 'E': renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break; 293 case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break; 294 case 'I': parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break; 295 case 'F': parser_flags |= MD_FLAG_NOHTMLBLOCKS; break; 296 case 'G': parser_flags |= MD_FLAG_NOHTMLSPANS; break; 297 case 'H': parser_flags |= MD_FLAG_NOHTML; break; 298 case 'W': parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break; 299 case 'U': parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break; 300 case '.': parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break; 301 case '@': parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break; 302 case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break; 303 case 'T': parser_flags |= MD_FLAG_TABLES; break; 304 case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break; 305 306 default: 307 fprintf(stderr, "Illegal option: %s\n", value); 308 fprintf(stderr, "Use --help for more info.\n"); 309 exit(1); 310 break; 311 } 312 313 return 0; 314 } 315 316 int 317 main(int argc, char** argv) 318 { 319 FILE* in = stdin; 320 FILE* out = stdout; 321 int ret = 0; 322 323 if(readoptions(cmdline_options, argc, argv, cmdline_callback, NULL) < 0) { 324 usage(); 325 exit(1); 326 } 327 328 if(input_path != NULL && strcmp(input_path, "-") != 0) { 329 in = fopen(input_path, "rb"); 330 if(in == NULL) { 331 fprintf(stderr, "Cannot open %s.\n", input_path); 332 exit(1); 333 } 334 } 335 if(output_path != NULL && strcmp(output_path, "-") != 0) { 336 out = fopen(output_path, "wt"); 337 if(out == NULL) { 338 fprintf(stderr, "Cannot open %s.\n", input_path); 339 exit(1); 340 } 341 } 342 343 ret = process_file(in, out); 344 if(in != stdin) 345 fclose(in); 346 if(out != stdout) 347 fclose(out); 348 349 return ret; 350 }