💾 Archived View for republic.circumlunar.space › users › grimmware › to_gem › to_gem.c captured on 2024-12-17 at 11:03:48.

View Raw

More Information

⬅️ Previous capture (2021-12-04)

-=-=-=-=-=-=-

/* to_gem
 * # What
 * Convert regular markdown to `text/gemini` in an opinionated manner
 *
 * # How
 * Reads from STDIN and writes to STDOUT by default, alternatively the input
 * file can be supplied as the first argument and the output file as the
 * second. In both positions if you supply "-" then the default will be used so
 * you can (e.g.) specify stdin and an outfile thusly: `to_gem - outfile`
 *
 * Text reflow is considered out of scope (for now at least), and you should
 * check the output manually for errors
 *
 * # Why
 * tl;dr I wanted the practice and to have fun!
 *
 * I write a lot of notes in markdown and I'd like to be able to trivially
 * publish them in `text/gemini` on my capsule.
 *
 * This is very much "artisanal" C, in that it targets minimalism and
 * terseness even though for the specified use case it's more in the spirit of
 * low resource usage and minimalism of gemini and anachronistic byte-shaving
 * than is actually sensible for the problem - text conversion is not an easy
 * problem in C compared to other languages and the bytes shaved are very much
 * a drop in the ocean for such a low-resource problem.
 *
 * But sometimes you just like to express how you wish the world could be :)
 *
 * # Opinions
 * The opinion is that for every link we should replace it with a numbered
 * reference to a link below the block of text or whatever.
 *
 * To do this, we need to iteratively read through text, and for everything
 * that looks like a markdown link we'll replace it with `[link text][N]` where
 * N will be the number of the link in the file. Then when we reach the next
 * block we'll insert the link lines *first* and then move on.
 *
 * To get the full lowdown on intended behaviour, compare testfile.md and
 * testfile.gmi
 *
 * Format rules:
 * - No closing square brackets in a link name - we could accomodate for this
 *   or it could just be sensible markdown :P
 * - Links in blocks will be ignored
 * - Link references should already be URI escaped - otherwise this is
 *   undefined behaviour. I've added code to URI escape newlines and spaces to
 *   URI encoded spaces (%20) because my own markdown was riddled with this
 *   problem, but that's it
 * - Alt-text is not supported so it will cause a breakage due to the last
 *   point - this is due to ease of implementation rather than philosophy, as
 *   I'd prefer that alt-text *was* supported for accessibility.
 *
 * # Contributions
 * Contributions should aim to use minimal memory and avoid reading through the
 * input stream more than once. `valgrind` should be used to ensure that all
 * memory is freed, and all memory allocations should aim to be reasonably
 * dynamic so as to not hog resources unnecessarily.
 *
 * If you don't know where else to reach me, oholiab at grimmwa.re is the place
 * to email! I'll walk you through `git send-email` or other diff sending
 * workflows if you've not done it before :)
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CHUNK 128
#define lines_to_print last_printed < linknames->size
#define realloc_error "ERROR: Could not allocate memory! Exiting\n"

enum boolean {false, true};

enum modes {
  plaintext,
  block,
};

typedef struct {
  char** strings;
  size_t size;
} List;

static List* initList(const char* string, size_t size){
  /* Initialize a List and return a pointer to it
   *
   * string: First entry as a null terminated string
   * size: Size of the string (NOT of the intended list, it grows dynamically)
   *
   * return: Pointer to a new List */
  List* l = malloc(sizeof(List));
  l->strings = malloc(sizeof(char*));
  l->strings[0] = (char *) malloc(size);
  strcpy(l->strings[0], string);
  l->size = 1;
  return l;
}

static void freeList(List* l){
  /* Recursively free all malloc'd memory in a List
   *
   * l: Pointer to a list */
  for(size_t i = 0; i<l->size; i++){
    free(l->strings[i]);
  }
  free(l->strings);
  free(l);
}

static void push(List* l, const char* string, size_t size){
  /* Push a new item onto the end of the list
   *
   * l: Pointer to a list
   * string: String to add to the list
   * size: size of the string */
  size_t s = l->size;
  char** tmp = realloc(l->strings, ++(l->size) * sizeof(char*));
  if(tmp == NULL){
    fwrite(realloc_error, 1, sizeof(realloc_error), stderr);
    exit(1);
  }
  l->strings = tmp;
  l->strings[s] = (char *) malloc(size);
  strcpy(l->strings[s], string);
}

static void pop(List* l){
  /* Pop item from the list. This is not returned.
   *
   * l: Pointer to a list */
  if(l == NULL) return;
  if(l->size == 1){
    l->size = 0;
    return;
  }
  free(l->strings[--l->size]);
  char** tmp = realloc(l->strings, l->size * sizeof(char*));
  if(tmp == NULL){
    fwrite(realloc_error, 1, sizeof(realloc_error), stderr);
    exit(1);
  }
  l->strings = tmp;
}

static size_t print_from(List* names, List* refs, size_t from, FILE* out){
  /* Print links from a pair of lists from item with index `from` to the end
   *
   * names: pointer to a list of link names
   * refs: pointer to a list of link references
   * from: index to print from
   * out: output stream
   *
   * returns: index of last printed item */
  for(;from < names->size; from++){
    fprintf(out, "=> ");
    char c;
    for(size_t i = 0; (c = (char) refs->strings[from][i]) != 0; i++) {
      if(c == ' ') fprintf(out, "%%20"); else fputc(c, out);
    }
    fprintf(out, " %s [%ld]\n", names->strings[from], from);
  }
  return(from);
}

static FILE* safe_open(const char* filepath, const char* mode) {
  /* Open and return a file pointer with error checking
   *
   * filepath: path to the file
   * mode: `fopen` format file mode
   *
   * returns: file pointer as returned by `fopen` */
  FILE* file = fopen(filepath, mode);
  if(file == NULL){
    fprintf(stderr, "Could not open file %s in mode %s\n", filepath, mode);
    exit(1);
  }
  return(file);
}

static size_t read_until(FILE* in, char until, char** ptr){
  /* Read input filestream until char `until` into `ptr` or bail out if EOF is
   * reached first. Handles input buffering with dynamic resizing in increments
   * of size `CHUNK`
   *
   * in: pointer to input file stream until: The target character to stop
   * reading at
   * ptr: pointer to the memory to read in to
   *
   * returns: number of bytes read */
  size_t bufsize = CHUNK;
  char* buf = malloc(bufsize);
  char c;
  size_t i;
  for(i=0; (c = (char) fgetc(in)) != until; i++) {
    if(c == EOF) {
      fprintf(stderr, "ERROR: Reached EOF whilst looking for matching `%c`\n", until);
      exit(1);
    }
    if(i == bufsize - 2){ // Always leave space for \0
      char* newbuf = malloc(bufsize + CHUNK);
      if(newbuf == NULL) {
        fprintf(stderr, "Could not allocate memory!");
        exit(1);
      }
      memcpy(newbuf, buf, bufsize);
      bufsize += CHUNK;
      free(buf);
      buf = newbuf;
    }
    buf[i] = c == '\n' ? ' ' : c;
  }
  buf[i] = 0;
  *ptr = buf;
  return(i+1);
}

static void read_push_until(FILE* in, char until, List** l, FILE* out){
  /* VERY specific to this program, wrapper for `read_until` that also handles
   * pushing the returned string onto a list, and optionally prints the
   * characters including the closing one to an output stream if provided
   *
   * in: pointer to input stream
   * until: char to stop reading at
   * l: list to append the read bytes to
   * out: output stream to print to. No output will be printed if this is NULL.
   */
  char* text = NULL;
  size_t s = read_until(in, until, &text);
  if(*l == NULL){
    *l = initList(text, s);
  } else {
    push(*l, text, s);
  }
  if(out != NULL){
    fwrite(text, 1, s-1, out);
    fputc(until, out);
  }
  free(text);
  return;
}

int main(int argc, char* argv[]){
  FILE* in = stdin;
  FILE* out = stdout;
  if(argc > 1 && strcmp(argv[1], "-") != 0) in = safe_open(argv[1], "r");
  if(argc > 2 && strcmp(argv[2], "-") != 0) out = safe_open(argv[2], "w");
  char c, last = 0;
  size_t linenum = 1;
  size_t last_printed = 0;
  enum modes mode = plaintext; // Links are not respected in `block` mode
  enum boolean likely_link = false; // Used to indicate whether a `(` is
                                    // expected to be a link reference
  List* linknames = NULL;
  List* linkrefs = NULL;
  while((c = (char) fgetc(in)) != EOF){
    if(likely_link && (c != '(')) {
      likely_link = false;
      // If we just parsed what we thought was a link name but it wasn't
      // trailed by a ref then it wasn't a link, so we pop it back off again
      pop(linknames);
    }
    switch(c){
      case '[':
        putc(c, out);
        if(mode == plaintext && last != '\\'){
          read_push_until(in, ']', &linknames, out);
          likely_link = true;
        }
        break;
      case '(':
        if(likely_link) {
          read_push_until(in, ')', &linkrefs, NULL);
          fprintf(out, "[%ld]", linkrefs->size);
          likely_link = false;
          break;
        }
        fputc(c, out);
        break;
      case '`':
        fputc(c, out);
        if(last == '\n'){
          if((c = (char) fgetc(in)) != '`') { fputc(c, out); break; }
          if((c = (char) fgetc(in)) != '`') { fputc(c, out); break; }
          fprintf(out, "``");
          if(mode == block){
            mode = plaintext;
          } else
          if(mode == plaintext) mode = block;
        }
        break;
      case '#':
        if(last == '\n') {
          if (linknames != NULL && lines_to_print) {
            fputc('\n', out);
            last_printed = print_from(linknames, linkrefs, last_printed, out);
            fputc('\n', out);
          }
        }
        fputc(c, out);
        break;
      case '\n':
        linenum++;
        if(last == '\n' && linknames != NULL && lines_to_print) {
          fputc(c, out);
          last_printed = print_from(linknames, linkrefs, last_printed, out);
        }
        __attribute__((fallthrough));
      default:
        fputc(c, out);
    }
    last = c;
  }

  if(linknames){
    if(lines_to_print) fputc('\n', out);
    last_printed = print_from(linknames, linkrefs, last_printed, out);
  }

  fclose(in); fclose(out);
  freeList(linknames); freeList(linkrefs);
  return 0;
}