💾 Archived View for gmi.noulin.net › gitRepositories › morph › file › morph.c.gmi captured on 2024-07-09 at 02:34:36. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2023-01-29)
-=-=-=-=-=-=-
morph.c (9191B)
1 #!/usr/bin/env sheepy 2 3 // search and replace 4 // input: 5 // file.txt 6 // replace_configuration.txt 7 // the files to process are listed in file.txt in current directory. 8 // Use # to comment lines in files.txt. 9 // replace_configuration.txt holds the strings to search and replace in the files listed in file.txt 10 // 11 // replace_configuration.txt format: 12 // - line 1: separetor in line 2 and 3 13 // - line 2: strings to search separeted with the characters in line 1 14 // - line 3: replace strings separeted with the characters in line 1 15 16 #include "libsheepyObject.h" 17 #include "pcre.h" 18 19 #define internal static 20 21 #include <stdlib.h> 22 #include <stdbool.h> 23 #include <string.h> 24 #include <stdio.h> 25 26 void replace(); 27 void replaceWithPcre(char **l, pcre *reCompiled, pcre_extra *pcreExtra, char *func_search, char *replaceWith, smallArrayt *mappingNParams); 28 int MAIN(int ARGC, char** ARGV); 29 30 int argc; char **argv; 31 32 // get function name 33 smallStringt *funcName(char *e) { 34 smallStringt *s = allocG(e); 35 smallArrayt *spl = extractG(s, "*", "("); 36 terminateG(s); 37 s = getNDupG(spl, rtSmallStringt, 0); 38 terminateG(spl); 39 return s; 40 } 41 42 // extract params 43 smallArrayt *funcParams(char *e) { 44 smallStringt *s = allocG(e); 45 smallArrayt *spl = extractG(s, "(", ")"); 46 terminateG(s); 47 s = getG(spl, rtSmallStringt, 0); 48 smallArrayt *params = splitG(s, ","); 49 terminateG(spl); 50 enumerateSmallArray(params, P, i) { 51 castS(p, P); 52 trimG(p); 53 setPG(params, i, p); 54 finishG(p); 55 } 56 return params; 57 } 58 59 void replace() { 60 char **replace_configuration = NULL; 61 char **search_what = NULL; 62 char **replace_with = NULL; 63 char **config = NULL; 64 65 // Steps 66 // load config 67 // create string regex 68 // compile regexes 69 // optimize the regex 70 // create regex for each search string 71 // for each file in files.txt, search and replace 72 73 printf("\nLoading config...\n"); 74 printf("\n"); 75 76 // load config 77 replace_configuration = readText("morph_configuration.txt"); 78 79 exitFailure(replace_configuration); 80 81 search_what = split(replace_configuration[1], replace_configuration[0]); 82 replace_with = split(replace_configuration[2], replace_configuration[0]); 83 84 if (!search_what || !replace_with) { 85 printf("wrong replace_configuration.txt"); 86 printf("\n"); 87 XFAILURE; 88 } 89 90 createAllocateSmallArray(a); 91 createAllocateSmallArray(func_search_what); 92 createAllocateSmallArray(func_replace_with); 93 94 // create string regex 95 char **strRegex = NULL; 96 enumerateCharP(search_what, e, i) { 97 printf("Replace: "); 98 printf("\n"); 99 // char *strLCpy(char *dst, const char *src, size_t dstSize) 100 // get function name 101 pushNFreeG(func_search_what, trimG(funcName(*e))); 102 smallArrayt *params = funcParams(*e); 103 printf("%s\n", *e); 104 printf("With:"); 105 printf("\n"); 106 // get replace function name 107 pushNFreeG(func_replace_with, trimG(funcName(replace_with[i]))); 108 // get replace params 109 smallArrayt *replace_params = funcParams(replace_with[i]); 110 // map old->new params 111 createAllocateSmallArray(param_mapping); 112 enumerateSmallArray(replace_params, P, j) { 113 enumerateSmallArray(params, RP, k) { 114 if (eqG(P, RP)) { 115 pushG(param_mapping, k); 116 } 117 finishG(RP); 118 } 119 finishG(P); 120 } 121 terminateG(replace_params); 122 // save info in array a 123 createAllocateSmallArray(mappingNParams); 124 pushNFreeG(mappingNParams, params); 125 pushNFreeG(mappingNParams, param_mapping); 126 pushNFreeG(a, mappingNParams); 127 printf("%s\n", replace_with[i]); 128 char *reg = catS("\\b", getG(func_search_what, rtChar, i), "\\b"); 129 iListPushS(&strRegex, reg); 130 } 131 132 //logVarG(a); 133 //listPrintS(strRegex); 134 135 // compile regexes 136 // optimize the regex 137 size_t nSearch = lenG(func_search_what); 138 pcre **reCompiled = malloc((nSearch+1) * sizeof(pcre *)); 139 reCompiled[nSearch] = NULL; 140 pcre_extra **pcreExtra = malloc((nSearch+1) * sizeof(pcre_extra *)); 141 pcreExtra[nSearch] = NULL; 142 143 const char *pcreErrorStr; 144 int pcreErrorOffset; 145 146 // create regex for each search string 147 enumerateCharP(strRegex, e, j) { 148 reCompiled[j] = pcre_compile(*e, 0, &pcreErrorStr, &pcreErrorOffset, NULL); 149 if (!reCompiled[j]) { 150 printf("ERROR: Could not study >%s<: %s", *e, pcreErrorStr); 151 printf("\n"); 152 XFAILURE; 153 } 154 155 pcreExtra[j] = pcre_study(reCompiled[j], 0, &pcreErrorStr); 156 if (!pcreExtra) { 157 printf("ERROR: Could not study >%s<: %s", *e, pcreErrorStr); 158 printf("\n"); 159 XFAILURE; 160 } 161 } 162 163 164 printf("\n\nProcess files...\n"); 165 printf("\n"); 166 167 config = readText("files.txt"); 168 exitFailure(config); 169 170 // for each file in files.txt, search and replace 171 forEachCharP(config, f) { 172 if (!isBlankS(*f)) { 173 if (*f[0] != '#') { 174 iTrimS(f); 175 char **src = readText(*f); 176 if (!src) { 177 printf("Cant read file %s", *f); 178 printf("\n"); 179 continue; 180 } 181 182 // scan lines 183 forEachCharP(src, l) { 184 range(i, nSearch) { 185 //iReplaceS_max(l, *a, func_replace_with[i]); 186 replaceWithPcre(l, reCompiled[i], pcreExtra[i], getG(func_search_what, rtChar, i), getG(func_replace_with, rtChar, i), getG(a, rtSmallArrayt, i)); 187 } 188 } 189 190 bool status = writeText(*f, src); 191 //bool status = false; 192 if (status) { 193 printf("CHANGED: %s", *f); 194 printf("\n"); 195 } 196 else { 197 printf("DIDNT CHANGE: %s", *f); 198 printf("\n"); 199 } 200 } 201 } 202 } 203 204 listFreeManyS(replace_configuration, search_what, replace_with, config, strRegex); 205 terminateManyG(func_search_what, func_replace_with, a); 206 207 forEachType(pcre, reCompiled, e) { 208 pcre_free(*e); 209 } 210 free(reCompiled); 211 212 if (pcreExtra[0]) { 213 forEachType(pcre_extra, pcreExtra, e) { 214 #ifdef PCRE_CONFIG_JIT 215 pcre_free_study(*e); 216 #else 217 pcre_free(*e); 218 #endif 219 } 220 } 221 free(pcreExtra); 222 } 223 224 void replaceWithPcre(char **l, pcre *reCompiled, pcre_extra *pcreExtra, char *func_search, char *replaceWith, smallArrayt *mappingNParams) { 225 int r; 226 int subStrVec[30]; 227 size_t len; 228 size_t offset = 0;; 229 int allMatch[100]; 230 size_t nMatch = 0;; 231 232 len = strlen(*l); 233 r = pcre_exec(reCompiled, pcreExtra, *l, len, 0, 0, subStrVec, 30); 234 235 while ((offset < len) && (r >= 0)) { 236 237 if (r == 0) { 238 printf("But too many substrings were found to fit in subStrVec!\n"); 239 r = 30/3; 240 } 241 242 for (int j=0; j < r; j++) { 243 allMatch[nMatch] = subStrVec[j*2]; 244 allMatch[nMatch+1] = subStrVec[j*2+1]; 245 nMatch += 2; 246 } 247 248 offset = subStrVec[1]; 249 r = pcre_exec(reCompiled, pcreExtra, *l, len, offset, 0, subStrVec, 30); 250 } 251 252 if (!nMatch) { 253 return; 254 } 255 256 /* logVarG(*l); */ 257 for (int i = nMatch ; i > 0 ; i-=2) { 258 /* logVarG(*l); */ 259 /* logVarG(func_search); */ 260 /* logVarG(mappingNParams); */ 261 // split and change order 262 //puts(*l+allMatch[i-1]); 263 smallStringt *s = allocG(*l+allMatch[i-1]); 264 smallArrayt *extract = extractG(s, "(", ")"); 265 terminateG(s); 266 /* logVarG(extract); */ 267 if (extract) { 268 s = getG(extract, rtSmallStringt, 0); 269 /* logVarG(s); */ 270 smallArrayt *spl = splitG(s, ","); 271 smallArrayt *mapping = getG(mappingNParams, rtSmallArrayt, 1); 272 createAllocateSmallArray(newPara); 273 range(p, lenG(mapping)) { 274 pushG(newPara, getG(spl, rtSmallStringt, getG(mapping, rtI64, p))); 275 } 276 smallStringt *s2 = joinG(newPara, ","); 277 prependG(s, "("); 278 appendG(s, ")"); 279 prependG(s2, "("); 280 appendG(s2, ")"); 281 //logVarG(s); 282 //logVarG(s2); 283 replaceG(l, ssGet(s), ssGet(s2), 0); 284 smashG(extract); 285 finishManyG(mapping, spl); 286 terminateManyG(s, s2); 287 } 288 289 // replace word 290 char *sBefore = emptySF();; 291 292 if (allMatch[i-2]) { 293 // 0 means end of string 294 sBefore = sliceS(*l, 0, allMatch[i-2]); 295 } 296 297 char *sAfter = sliceS(*l, allMatch[i-1], 0); 298 free(*l); 299 *l = catS(sBefore, replaceWith, sAfter); 300 freeManyS(sBefore, sAfter); 301 } 302 finishG(mappingNParams); 303 /* logVarG(*l); */ 304 } 305 306 // ------------------------------------------------------------------------------------- 307 308 #ifndef unitTest 309 // Remove main when running the unit tests 310 #define MAIN main 311 #endif 312 int MAIN(int ARGC, char** ARGV) { 313 int dum UNUSED; 314 315 argc = ARGC; argv = ARGV;;// 316 317 replace(); 318 319 XSUCCESS; 320 }