💾 Archived View for gmi.noulin.net › gitRepositories › emoji › file › generateDataForC.c.gmi captured on 2023-01-29 at 13:16:17. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
generateDataForC.c (5005B)
1 #! /usr/bin/env sheepy 2 /* or direct path to sheepy: #! /usr/local/bin/sheepy */ 3 4 /* Libsheepy documentation: http://spartatek.se/libsheepy/ */ 5 #include "libsheepyObject.h" 6 7 #define lv logVarG 8 9 #define gG(e,i) getG(e, rtChar, i) 10 11 int argc; char **argv; 12 13 /* enable/disable logging */ 14 /* #undef pLog */ 15 /* #define pLog(...) */ 16 17 int main(int ARGC, char** ARGV) { 18 19 argc = ARGC; argv = ARGV; 20 21 initLibsheepy(ARGV[0]); 22 setLogMode(LOG_DATE); 23 24 25 createSmallArray(html); 26 createSmallJson(j); 27 createSmallArray(emojiList); 28 // emojiList elements: strings 29 // [utf8 code point string, c hex string, name lower case, NAME UPPER CASE, bighead, mediumhead] 30 enum {UTF8, CHEX, LOWERC, UPPERC, BIGHEAD, MEDIUMHEAD}; 31 32 readFileG(&html, "full-emoji-list-nobin.html"); 33 34 35 // Generate emoji list 36 37 char *bighead = NULL; 38 char *mediumhead = NULL; 39 const char *stS[] = {"start", "head", "codes", "name"}; 40 typ enum {start, head, codes, name} statust; 41 statust status = start; 42 statust statusCode = start; 43 createSmallArray(elem); 44 45 iter(&html, L) { 46 castS(l,L); 47 /* lv(l); */ 48 /* lv(stS[status]); */ 49 /* lv(stS[statusCode]); */ 50 if (status == codes) { 51 if (statusCode == name and hasG(l, "td class='name'")) { 52 var l_l = extractG(l, ">", "<"); 53 var text = getNDupG(l_l, rtSmallStringt, 0); 54 terminateG(l_l); 55 replaceManyG(text, " ", "_", 56 ":", "", 57 "&", "and", 58 ".", "", 59 "-", "", 60 "(", "", 61 ")", "", 62 "é", "e", 63 "ç", "c", 64 "’", "_", 65 "Å", "A", 66 "⊛", "", 67 "“", "", 68 "”", "", 69 "!", "", 70 "*", "x", 71 ",", "", 72 "ô", "o", 73 "ã", "a", 74 "í", "i", 75 "#", "number_sign"); 76 lowerG(text); 77 pushNFreeG(&elem, dupG(text)); 78 //lv(text); 79 upperG(text); 80 //lv(text); 81 pushNFreeG(&elem, text); 82 pushG(&elem, bighead); 83 pushG(&elem, mediumhead); 84 pushG(&emojiList, &elem); 85 } 86 if (statusCode == codes and hasG(l, "td class='chars'")) { 87 initiateG(&elem); 88 var l_l = extractG(l, ">", "<"); 89 //lv(l_l); 90 char *utf8 = getG(l_l, rtChar, 0); 91 char *chex = toHexHeadSepS(utf8, strlen(utf8), "\\x", ""); 92 pushG(&elem, utf8); 93 pushNFreeG(&elem, chex); 94 terminateG(l_l); 95 statusCode = name; 96 } 97 if (hasG(l, "td class='rchars'")) 98 statusCode = codes; 99 if (hasG(l, "class='bighead'") or hasG(l, "class='mediumhead'")) { 100 status = head; 101 statusCode = start; 102 } 103 } 104 if (status == head) { 105 if (hasG(l, "class='bighead'")) { 106 free(bighead); 107 var l_l = extractG(l, "name='", "'"); 108 bighead = getNDupG(l_l, rtChar, 0); 109 terminateG(l_l); 110 replaceG(&bighead, "&", "and", 1); 111 //lv(bighead); 112 } 113 elif (hasG(l, "class='mediumhead'")) { 114 free(mediumhead); 115 var l_l = extractG(l, "name='", "'"); 116 mediumhead= getNDupG(l_l, rtChar, 0); 117 terminateG(l_l); 118 replaceG(&mediumhead, "&", "and", 1); 119 //lv(mediumhead); 120 } 121 elif (hasG(l, "td class='rchars'")) { 122 status = codes; 123 statusCode = codes; 124 } 125 } 126 if (status == start) { 127 if (hasG(l, "<table")) 128 status = head; 129 } 130 } 131 132 freen(bighead); 133 freen(mediumhead); 134 freeG(&html); 135 //lv(&emojiList); 136 137 // generate h header for c 138 createSmallArray(hheader); 139 createSmallArray(lookupList); 140 // lookupList elememts: strings 141 // [name lower case, UTF8] 142 143 iter(&emojiList, E) { 144 cast(smallArrayt*, e, E); 145 initiateG(&elem); 146 pushG(&elem, gG(e, LOWERC)); 147 pushG(&elem, gG(e, UTF8)); 148 pushG(&lookupList, &elem); 149 if (!eqG(bighead, gG(e, BIGHEAD))) { 150 bighead = gG(e, BIGHEAD); 151 pushNFreeG(&hheader, formatS("\n\n// Category: %s", bighead)); 152 } 153 if (!eqG(mediumhead, gG(e, MEDIUMHEAD))) { 154 mediumhead = gG(e, MEDIUMHEAD); 155 pushNFreeG(&hheader, formatS("\n// Subcategory: %s", mediumhead)); 156 } 157 pushNFreeG(&hheader, formatS("#define EM_%s \"%s\"", gG(e,UPPERC), gG(e, CHEX))); 158 } 159 160 var s = toStringG(&lookupList); 161 replaceG(&s, "\"", "\\\"", 0); 162 163 pushG(&hheader, "\n\nextern smallJsont *emList;"); 164 pushG(&hheader, "bool allocEmList(void);"); 165 pushG(&hheader, "void freeEmList(void);"); 166 pushNFreeG(&hheader, formatS("\n#define EM_LIST \"%s\"", s)); 167 free(s); 168 169 170 writeFileG(&hheader, "emoji.h"); 171 freeManyG(&hheader, &lookupList); 172 173 freeG(&emojiList); 174 175 logI("*"); 176 } 177 // vim: set expandtab ts=2 sw=2: