💾 Archived View for gmi.noulin.net › gitRepositories › md2html › file › md2html.c.gmi captured on 2023-01-29 at 13:24:44. Gemini links have been rewritten to link to archived content

View Raw

More Information

-=-=-=-=-=-=-

md2html

Log

Files

Refs

README

md2html.c (11657B)

     1 /*
     2  * MD4C: Markdown parser for C
     3  * (http://github.com/mity/md4c)
     4  *
     5  * Copyright (c) 2016-2017 Martin Mitas
     6  *
     7  * Permission is hereby granted, free of charge, to any person obtaining a
     8  * copy of this software and associated documentation files (the "Software"),
     9  * to deal in the Software without restriction, including without limitation
    10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    11  * and/or sell copies of the Software, and to permit persons to whom the
    12  * Software is furnished to do so, subject to the following conditions:
    13  *
    14  * The above copyright notice and this permission notice shall be included in
    15  * all copies or substantial portions of the Software.
    16  *
    17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    23  * IN THE SOFTWARE.
    24  */
    25 
    26 #include <stdio.h>
    27 #include <stdlib.h>
    28 #include <string.h>
    29 #include <time.h>
    30 
    31 #include "render_html.h"
    32 #include "cmdline.h"
    33 
    34 
    35 
    36 /* Global options. */
    37 static unsigned parser_flags = 0;
    38 static unsigned renderer_flags = MD_RENDER_FLAG_DEBUG;
    39 static int want_fullhtml = 0;
    40 static int want_stat = 0;
    41 
    42 
    43 /*********************************
    44  ***  Simple grow-able buffer  ***
    45  *********************************/
    46 
    47 /* We render to a memory buffer instead of directly outputting the rendered
    48  * documents, as this allows using this utility for evaluating performance
    49  * of MD4C (--stat option). This allows us to measure just time of the parser,
    50  * without the I/O.
    51  */
    52 
    53 struct membuffer {
    54     char* data;
    55     MD_SIZE asize;
    56     MD_SIZE size;
    57 };
    58 
    59 static void
    60 membuf_init(struct membuffer* buf, MD_SIZE new_asize)
    61 {
    62     buf->size = 0;
    63     buf->asize = new_asize;
    64     buf->data = malloc(buf->asize);
    65     if(buf->data == NULL) {
    66         fprintf(stderr, "membuf_init: malloc() failed.");
    67         exit(1);
    68     }
    69 }
    70 
    71 static void
    72 membuf_fini(struct membuffer* buf)
    73 {
    74     if(buf->data)
    75         free(buf->data);
    76 }
    77 
    78 static void
    79 membuf_grow(struct membuffer* buf, MD_SIZE new_asize)
    80 {
    81     buf->data = realloc(buf->data, new_asize);
    82     if(buf->data == NULL) {
    83         fprintf(stderr, "membuf_grow: realloc() failed.");
    84         exit(1);
    85     }
    86     buf->asize = new_asize;
    87 }
    88 
    89 static void
    90 membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
    91 {
    92     if(buf->asize < buf->size + size)
    93         membuf_grow(buf, (buf->size + size) * 2);
    94     memcpy(buf->data + buf->size, data, size);
    95     buf->size += size;
    96 }
    97 
    98 
    99 /**********************
   100  ***  Main program  ***
   101  **********************/
   102 
   103 static void
   104 process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
   105 {
   106     membuf_append((struct membuffer*) userdata, text, size);
   107 }
   108 
   109 static int
   110 process_file(FILE* in, FILE* out)
   111 {
   112     MD_SIZE n;
   113     struct membuffer buf_in = {0};
   114     struct membuffer buf_out = {0};
   115     int ret = -1;
   116     clock_t t0, t1;
   117 
   118     membuf_init(&buf_in, 32 * 1024);
   119 
   120     /* Read the input file into a buffer. */
   121     while(1) {
   122         if(buf_in.size >= buf_in.asize)
   123             membuf_grow(&buf_in, 2 * buf_in.asize);
   124 
   125         n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
   126         if(n == 0)
   127             break;
   128         buf_in.size += n;
   129     }
   130 
   131     /* Input size is good estimation of output size. Add some more reserve to
   132      * deal with the HTML header/footer and tags. */
   133     membuf_init(&buf_out, buf_in.size + buf_in.size/8 + 64);
   134 
   135     /* Parse the document. This shall call our callbacks provided via the
   136      * md_renderer_t structure. */
   137     t0 = clock();
   138 
   139     ret = md_render_html(buf_in.data, buf_in.size, process_output,
   140                 (void*) &buf_out, parser_flags, renderer_flags);
   141 
   142     t1 = clock();
   143     if(ret != 0) {
   144         fprintf(stderr, "Parsing failed.\n");
   145         goto out;
   146     }
   147 
   148     /* Write down the document in the HTML format. */
   149     if(want_fullhtml) {
   150         fprintf(out, "<html>\n");
   151         fprintf(out, "<head>\n");
   152         fprintf(out, "<title></title>\n");
   153         fprintf(out, "<meta name=\"generator\" content=\"md2html\">\n");
   154         fprintf(out, "</head>\n");
   155         fprintf(out, "<body>\n");
   156     }
   157 
   158     fwrite(buf_out.data, 1, buf_out.size, out);
   159 
   160     if(want_fullhtml) {
   161         fprintf(out, "</body>\n");
   162         fprintf(out, "</html>\n");
   163     }
   164 
   165     if(want_stat) {
   166         if(t0 != (clock_t)-1  &&  t1 != (clock_t)-1) {
   167             double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
   168             if (elapsed < 1)
   169                 fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
   170             else
   171                 fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
   172         }
   173     }
   174 
   175     /* Success if we have reached here. */
   176     ret = 0;
   177 
   178 out:
   179     membuf_fini(&buf_in);
   180     membuf_fini(&buf_out);
   181 
   182     return ret;
   183 }
   184 
   185 
   186 #define OPTION_ARG_NONE         0
   187 #define OPTION_ARG_REQUIRED     1
   188 #define OPTION_ARG_OPTIONAL     2
   189 
   190 static const option cmdline_options[] = {
   191     { "output",                     'o', 'o', OPTION_ARG_REQUIRED },
   192     { "full-html",                  'f', 'f', OPTION_ARG_NONE },
   193     { "stat",                       's', 's', OPTION_ARG_NONE },
   194     { "help",                       'h', 'h', OPTION_ARG_NONE },
   195     { "version",                    'v', 'v', OPTION_ARG_NONE },
   196     { "commonmark",                  0,  'c', OPTION_ARG_NONE },
   197     { "github",                      0,  'g', OPTION_ARG_NONE },
   198     { "fverbatim-entities",          0,  'E', OPTION_ARG_NONE },
   199     { "fpermissive-atx-headers",     0,  'A', OPTION_ARG_NONE },
   200     { "fpermissive-url-autolinks",   0,  'U', OPTION_ARG_NONE },
   201     { "fpermissive-www-autolinks",   0,  '.', OPTION_ARG_NONE },
   202     { "fpermissive-email-autolinks", 0,  '@', OPTION_ARG_NONE },
   203     { "fpermissive-autolinks",       0,  'V', OPTION_ARG_NONE },
   204     { "fno-indented-code",           0,  'I', OPTION_ARG_NONE },
   205     { "fno-html-blocks",             0,  'F', OPTION_ARG_NONE },
   206     { "fno-html-spans",              0,  'G', OPTION_ARG_NONE },
   207     { "fno-html",                    0,  'H', OPTION_ARG_NONE },
   208     { "fcollapse-whitespace",        0,  'W', OPTION_ARG_NONE },
   209     { "ftables",                     0,  'T', OPTION_ARG_NONE },
   210     { "fstrikethrough",              0,  'S', OPTION_ARG_NONE },
   211     { 0 }
   212 };
   213 
   214 static void
   215 usage(void)
   216 {
   217     printf(
   218         "Usage: md2html [OPTION]... [FILE]\n"
   219         "Convert input FILE (or standard input) in Markdown format to HTML.\n"
   220         "\n"
   221         "General options:\n"
   222         "  -o  --output=FILE    Output file (default is standard output)\n"
   223         "  -f, --full-html      Generate full HTML document, including header\n"
   224         "  -s, --stat           Measure time of input parsing\n"
   225         "  -h, --help           Display this help and exit\n"
   226         "  -v, --version        Display version and exit\n"
   227         "\n"
   228         "Markdown dialect options:\n"
   229         "(note these are equivalent to some combinations of flags below)\n"
   230         "      --commonmark     CommonMark (this is default)\n"
   231         "      --github         Github Flavored Markdown\n"
   232         "\n"
   233         "Markdown extension options:\n"
   234         "      --fcollapse-whitespace\n"
   235         "                       Collapse non-trivial whitespace\n"
   236         "      --fverbatim-entities\n"
   237         "                       Do not translate entities\n"
   238         "      --fpermissive-atx-headers\n"
   239         "                       Allow ATX headers without delimiting space\n"
   240         "      --fpermissive-url-autolinks\n"
   241         "                       Allow URL autolinks without '<', '>'\n"
   242         "      --fpermissive-www-autolinks\n"
   243         "                       Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n"
   244         "      --fpermissive-email-autolinks  \n"
   245         "                       Allow e-mail autolinks without '<', '>' and 'mailto:'\n"
   246         "      --fpermissive-autolinks\n"
   247         "                       Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n"
   248         "                       --fpermissive-email-autolinks\n"
   249         "      --fno-indented-code\n"
   250         "                       Disable indented code blocks\n"
   251         "      --fno-html-blocks\n"
   252         "                       Disable raw HTML blocks\n"
   253         "      --fno-html-spans\n"
   254         "                       Disable raw HTML spans\n"
   255         "      --fno-html       Same as --fno-html-blocks --fno-html-spans\n"
   256         "      --ftables        Enable tables\n"
   257         "      --fstrikethrough Enable strikethrough spans\n"
   258     );
   259 }
   260 
   261 static void
   262 version(void)
   263 {
   264     printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE);
   265 }
   266 
   267 static const char* input_path = NULL;
   268 static const char* output_path = NULL;
   269 
   270 static int
   271 cmdline_callback(int opt, char const* value, void* data)
   272 {
   273     switch(opt) {
   274         case 0:
   275             if(input_path) {
   276                 fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
   277                 fprintf(stderr, "Use --help for more info.\n");
   278                 exit(1);
   279             }
   280             input_path = value;
   281             break;
   282 
   283         case 'o':   output_path = value; break;
   284         case 'f':   want_fullhtml = 1; break;
   285         case 's':   want_stat = 1; break;
   286         case 'h':   usage(); exit(0); break;
   287         case 'v':   version(); exit(0); break;
   288 
   289         case 'c':   parser_flags = MD_DIALECT_COMMONMARK; break;
   290         case 'g':   parser_flags = MD_DIALECT_GITHUB; break;
   291 
   292         case 'E':   renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break;
   293         case 'A':   parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
   294         case 'I':   parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break;
   295         case 'F':   parser_flags |= MD_FLAG_NOHTMLBLOCKS; break;
   296         case 'G':   parser_flags |= MD_FLAG_NOHTMLSPANS; break;
   297         case 'H':   parser_flags |= MD_FLAG_NOHTML; break;
   298         case 'W':   parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break;
   299         case 'U':   parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break;
   300         case '.':   parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break;
   301         case '@':   parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
   302         case 'V':   parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
   303         case 'T':   parser_flags |= MD_FLAG_TABLES; break;
   304         case 'S':   parser_flags |= MD_FLAG_STRIKETHROUGH; break;
   305 
   306         default:
   307             fprintf(stderr, "Illegal option: %s\n", value);
   308             fprintf(stderr, "Use --help for more info.\n");
   309             exit(1);
   310             break;
   311     }
   312 
   313     return 0;
   314 }
   315 
   316 int
   317 main(int argc, char** argv)
   318 {
   319     FILE* in = stdin;
   320     FILE* out = stdout;
   321     int ret = 0;
   322 
   323     if(readoptions(cmdline_options, argc, argv, cmdline_callback, NULL) < 0) {
   324         usage();
   325         exit(1);
   326     }
   327 
   328     if(input_path != NULL && strcmp(input_path, "-") != 0) {
   329         in = fopen(input_path, "rb");
   330         if(in == NULL) {
   331             fprintf(stderr, "Cannot open %s.\n", input_path);
   332             exit(1);
   333         }
   334     }
   335     if(output_path != NULL && strcmp(output_path, "-") != 0) {
   336         out = fopen(output_path, "wt");
   337         if(out == NULL) {
   338             fprintf(stderr, "Cannot open %s.\n", input_path);
   339             exit(1);
   340         }
   341     }
   342 
   343     ret = process_file(in, out);
   344     if(in != stdin)
   345         fclose(in);
   346     if(out != stdout)
   347         fclose(out);
   348 
   349     return ret;
   350 }