💾 Archived View for gmi.noulin.net › gitRepositories › sort › file › main.c.gmi captured on 2024-08-18 at 18:52:40. Gemini links have been rewritten to link to archived content

View Raw

More Information

⬅️ Previous capture (2023-01-29)

-=-=-=-=-=-=-

sort

Log

Files

Refs

README

LICENSE

main.c (28748B)

     1 #! /usr/bin/env sheepy
     2 /* or direct path to sheepy: #! /usr/local/bin/sheepy */
     3 
     4 /* Libsheepy documentation: http://spartatek.se/libsheepy/ */
     5 #include "libsheepyObject.h"
     6 #include "sort.h"
     7 
     8 int argc; char **argv;
     9 
    10 
    11 /**
    12  * the advanced examples are in the trials function
    13  * (called from main)
    14  *
    15  * - float, double array
    16  * - pointer array
    17  * - struct array
    18  * - i32, i64, u64 keys
    19  */
    20 
    21 
    22 /* enable/disable logging */
    23 /* #undef pLog */
    24 /* #define pLog(...) */
    25 
    26 /**
    27  * tests
    28  * these functions have limited input
    29  */
    30 
    31 void myswap(int *a, int *b)
    32 {
    33     int temp = *a;
    34     *a = *b;
    35     *b = temp;
    36 }
    37 
    38 void flashSort_i32ArrayM(int *arr, size_t size, size_t m) {
    39   // Phase 1
    40   int min = arr[0];
    41   int max = arr[0];
    42   rangeFrom(i, 1, size) {
    43     min = MIN(arr[i], min);
    44     max = MAX(arr[i], max);
    45   }
    46 
    47   /* logVarG(min); */
    48   /* logVarG(max); */
    49 
    50   if (min == max)
    51     ret;
    52 
    53   // Divide to m class
    54   float c;
    55   if (max > 0 and min < 0)
    56     c = (float)(m - 1) / ((u32)max + (u32)(-min));
    57   else
    58     c = (float)(m - 1) / (max - min);
    59   /* logVarG(m); */
    60   /* logVarG((double)c); */
    61 
    62   size_t *L  = callocArray(L, m + 1);
    63   range(i, size) {
    64     size_t k;
    65     /* #define flashsortSubKMacro(num)\ */
    66     /*   if (num > 0 and min < 0) {\ */
    67     /*     k = c * ((u32)(num) + (u32)(-min)) + 1;\ */
    68     /*     #<{(|logVarG(num);\ */
    69     /*     logVarG(min);\ */
    70     /*     logVarG((u32)(num) + (u32)(-min));\ */
    71     /*     logVarG(k);|)}>#\ */
    72     /*   }\ */
    73     /*   else {\ */
    74     /*     k = c * ((num) - min) + 1;\ */
    75     /*     #<{(|logVarG(num);\ */
    76     /*     logVarG(min);\ */
    77     /*     logVarG((num) -min);\ */
    78     /*     logVarG(k);|)}>#\ */
    79     /*   } */
    80     flashsortSubKMacro(arr[i]);
    81     // L[k]: the number of class K
    82     L[k]++;
    83   }
    84   rangeFrom(k, 2, m+1) {
    85     // L[k]: upper bound of class K
    86     L[k] += L[k - 1];
    87   }
    88 
    89   // Phase 2
    90   // number of move
    91   size_t move = 0;
    92   size_t j    = 0;
    93   size_t k    = m;
    94   // Only move size - 1 step
    95   while (move < size - 1)
    96   {
    97     // arr[j] is in right class
    98     while (j >= L[k])
    99     {
   100       j++;
   101       flashsortSubKMacro(arr[j]);
   102     }
   103     // temp to hold the value
   104     int flash = arr[j];
   105     // stop when arr[j] is changed
   106     while (j < L[k])
   107     {
   108       flashsortSubKMacro(flash);
   109       myswap(&flash, &arr[--L[k]]);
   110       move++;
   111     }
   112   }
   113 
   114   // Phase 3
   115   // Use Insertion sort for every class
   116   // Don't sort the m class,
   117   // Because the m class is all max
   118   rangeFrom(k, 1, m) {
   119     rangeFrom(i, L[k] + 1, L[k + 1]) {
   120       int key = arr[i];
   121       i64 j   = i - 1;
   122       while (j >= 0 and arr[j] > key)
   123       {
   124         arr[j + 1] = arr[j];
   125         j--;
   126       }
   127       arr[j + 1] = key;
   128     }
   129   }
   130   free(L);
   131 }
   132 
   133 typ int (*flashSortGetFt)(const void *elemPtr);
   134 
   135 void flashSort_i32AM(void *arr, size_t size, size_t m, size_t elemSize, flashSortGetFt flashSortGet) {
   136   // Phase 1
   137   #define elemAddr(idx) (arr + (elemSize * (idx)))
   138   #define eGet(idx) flashSortGet(elemAddr(idx))
   139   int min = eGet(0);
   140   int max = eGet(0);
   141   rangeFrom(i, 1, size) {
   142     min = MIN(eGet(i), min);
   143     max = MAX(eGet(i), max);
   144   }
   145 
   146   if (min == max)
   147     ret;
   148 
   149   // Divide to m class
   150   float c;
   151   if (max > 0 and min < 0)
   152     c = (float)(m - 1) / ((u32)max + (u32)(-min));
   153   else
   154     c = (float)(m - 1) / (max - min);
   155 
   156   // stack allocated has no effect on speed
   157   size_t *L  = callocArray(L, m + 1);
   158   range(i, size) {
   159     size_t k;
   160     flashsortSubKMacro(eGet(i));
   161     // L[k]: the number of class K
   162     L[k]++;
   163   }
   164   rangeFrom(k, 2, m+1) {
   165     // L[k]: upper bound of class K
   166     L[k] += L[k - 1];
   167   }
   168 
   169   // Phase 2
   170   #define cpe(dst, src) memcpy(dst, src, elemSize);
   171   // number of move
   172   size_t move = 0;
   173   size_t j    = 0;
   174   size_t k    = m;
   175   // Only move size - 1 step
   176   while (move < size - 1)
   177   {
   178     // eGet(j) is in right class
   179     while (j >= L[k])
   180     {
   181       j++;
   182       flashsortSubKMacro(eGet(j));
   183     }
   184     // temp to hold the value
   185     int  flash = eGet(j);
   186     char flashElem[elemSize];
   187     cpe(flashElem, elemAddr(j));
   188 
   189     // stop when eGet(j) is changed
   190     while (j < L[k])
   191     {
   192       flashsortSubKMacro(flash);
   193       char tmpElem[elemSize];
   194       cpe(tmpElem, flashElem);
   195       cpe(flashElem, elemAddr(--L[k]));
   196       cpe(elemAddr(L[k]), tmpElem);
   197       flash = flashSortGet(flashElem);
   198       move++;
   199     }
   200   }
   201 
   202   // Phase 3
   203   // Use Insertion sort for every class
   204   // Don't sort the m class,
   205   // Because the m class is all max
   206   rangeFrom(k, 1, m) {
   207     rangeFrom(i, L[k] + 1, L[k + 1]) {
   208       int key = eGet(i);
   209       char keyElem[elemSize];
   210       cpe(keyElem, elemAddr(i));
   211 
   212       i64 j   = i - 1;
   213       while (j >= 0 and eGet(j) > key)
   214       {
   215         cpe(elemAddr(j+1), elemAddr(j));
   216         j--;
   217       }
   218       cpe(elemAddr(j+1), keyElem);
   219     }
   220   }
   221   free(L);
   222 }
   223 
   224 void flashSort_i32Array(int *arr, size_t size) {
   225   flashSort_i32ArrayM(arr, size, 0.1 * size);
   226 }
   227 
   228 void flashSort_i32A(void *arr, size_t size, size_t elemSize, flashSortGetFt flashSortGet) {
   229   flashSort_i32ArrayM(arr, size, 0.1 * size);
   230 }
   231 
   232 
   233 
   234 //radix bsort
   235 
   236 //#define RADIX_LEVEL 3
   237 #define RADIX_LEVEL 40
   238 
   239 internal inline void shellsort(u32 *arr, size_t size) {
   240   u32 tmp; // tmp record
   241 
   242   rangeFrom(i, 3, size) {
   243     tmp = arr[i];
   244     size_t j;
   245     for(j = i ; j >= 3 && arr[j-3] > tmp; j-=3) {
   246       arr[j] = arr[j-3];
   247     }
   248     arr[j] = tmp;
   249   }
   250 
   251   rangeFrom(i, 1, size) {
   252     tmp = arr[i];
   253     size_t j;
   254     for(j = i ; j >= 1 && arr[j-1] > tmp; j--) {
   255       arr[j] = arr[j-1];
   256     }
   257     arr[j] = tmp;
   258   }
   259 }
   260 
   261 void radixyOrig(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
   262   #define stop 256
   263   u8 *a = (u8*) arr;
   264   size_t counts[stop];
   265   size_t offsets[stop];
   266   size_t starts[stop];
   267   size_t ends[stop];
   268   size_t digt       = sizeof(u32) - digit - 1;
   269 
   270   zeroBuf(counts, sizeof(counts));
   271 
   272   // compute counts
   273   range(x, size) {
   274     u8 c = a[x * sizeof(u32) + digt];
   275     counts[c]++;
   276   }
   277 
   278   // compute offsets
   279   offsets[0] = 0;
   280   starts[0]  = 0;
   281   rangeFrom(x, 1, stop) {
   282     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
   283   }
   284   ends[stop-1] = size;
   285 
   286 
   287   // loop on digit values
   288   range(x, stop) {
   289     // move records according to digit value
   290     while(offsets[x] < ends[x]) {
   291       u8 target = a[offsets[x] * sizeof(u32) + digt];
   292       if (target == x) {
   293         // this record already placed in the correct bucket
   294         offsets[x]++;
   295       }
   296       else {
   297         size_t stackIdx = 0;
   298         size_t stack[stackSize];
   299         stack[stackIdx++] = offsets[x];
   300         while(target != x && stackIdx < stackSize) {
   301           stack[stackIdx] = offsets[target]++;
   302           target          = a[stack[stackIdx] * sizeof(u32) + digt];
   303           stackIdx++;
   304         }
   305         if (stackIdx != stackSize) {
   306           // found an element to store a offsets[x]
   307           offsets[x]++;
   308         }
   309         stackIdx--;
   310         u32 tmp = arr[stack[stackIdx]];
   311         while(stackIdx) {
   312           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   313           stackIdx--;
   314         }
   315         arr[stack[0]] = tmp;
   316       }
   317     }
   318   }
   319 
   320   // cutoff to shellsort
   321   if (digit < cutoff) {
   322     // loop on digit values to sort the buckets
   323     range(x, stop) {
   324       if (counts[x] > RADIX_LEVEL) {
   325         radixyOrig(&arr[starts[x]], counts[x], digit+1, stackSize, cutoff);
   326       }
   327       else {
   328         if (counts[x] < 2) continue;
   329         shellsort(&arr[starts[x]], counts[x]);
   330       }
   331     }
   332   }
   333   else {
   334     range(x, stop) {
   335       if (counts[x] > 1) {
   336         shellsort(&arr[starts[x]], counts[x]);
   337       }
   338     }
   339   }
   340   #undef stop
   341 }
   342 
   343 void radixy(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff);
   344 
   345 typ struct {u32 *arr; size_t size; size_t digit; size_t stackSize;  size_t cutoff;} radixyArgst;
   346 
   347 typ struct {u32 *arr; size_t size;} shellsortArgst;
   348 
   349 void radixyThread(void *args) {
   350   cast(radixyArgst*, p, args);
   351   radixy(p->arr, p->size, p->digit, p->stackSize, p->cutoff);
   352 }
   353 
   354 void shellsortThread(void *args) {
   355   cast(shellsortArgst *, p, args);
   356   shellsort(p->arr, p->size);
   357 }
   358 
   359 // parallel radix sort for u32 array
   360 void radixy(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
   361   #define stop 256
   362   u8 *a = (u8*) arr;
   363   size_t counts[stop];
   364   size_t offsets[stop];
   365   size_t starts[stop];
   366   size_t ends[stop];
   367   size_t digt       = sizeof(u32) - digit - 1;
   368   radixyArgst rdxArgs[stop];
   369   size_t rdxArgsIdx = 0;
   370 
   371   zeroBuf(counts, sizeof(counts));
   372 
   373   // compute counts
   374   range(x, size) {
   375     u8 c = a[x * sizeof(u32) + digt];
   376     counts[c]++;
   377   }
   378 
   379   // compute offsets
   380   offsets[0] = 0;
   381   starts[0]  = 0;
   382   rangeFrom(x, 1, stop) {
   383     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
   384   }
   385   ends[stop-1] = size;
   386 
   387 
   388   // loop on digit values
   389   range(x, stop) {
   390     // move records according to digit value
   391     while(offsets[x] < ends[x]) {
   392       u8 target = a[offsets[x] * sizeof(u32) + digt];
   393       if (target == x) {
   394         // this record already placed in the correct bucket
   395         offsets[x]++;
   396       }
   397       else {
   398         size_t stackIdx = 0;
   399         size_t stack[stackSize];
   400         stack[stackIdx++] = offsets[x];
   401         while(target != x && stackIdx < stackSize) {
   402           stack[stackIdx] = offsets[target]++;
   403           target          = a[stack[stackIdx] * sizeof(u32) + digt];
   404           stackIdx++;
   405         }
   406         if (stackIdx != stackSize) {
   407           // found an element to store a offsets[x]
   408           offsets[x]++;
   409         }
   410         stackIdx--;
   411         u32 tmp = arr[stack[stackIdx]];
   412         while(stackIdx) {
   413           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   414           stackIdx--;
   415         }
   416         arr[stack[0]] = tmp;
   417       }
   418     }
   419   }
   420 
   421   // cutoff to shellsort
   422   if (digit < cutoff) {
   423     // loop on digit values to sort the buckets
   424     range(x, stop) {
   425       if (counts[x] > RADIX_LEVEL) {
   426         if (!digit) {
   427           rdxArgs[rdxArgsIdx] = (radixyArgst){.arr=&arr[starts[x]], .size=counts[x], .digit=digit+1, .stackSize=stackSize, .cutoff=cutoff};
   428           tpoolAdd(radixyThread, (void*)&rdxArgs[rdxArgsIdx]);
   429           rdxArgsIdx++;
   430         }
   431         else
   432           radixy(&arr[starts[x]], counts[x], digit+1, stackSize, cutoff);
   433       }
   434       else {
   435         if (counts[x] < 2) continue;
   436         /* ssArgs[ssArgsIdx] = (shellsortArgst){.arr=&arr[starts[x]], .size=counts[x]}; */
   437         /* tpoolAdd(shellsortThread, (void*)&ssArgs[ssArgsIdx]); */
   438         /* ssArgsIdx++; */
   439         shellsort(&arr[starts[x]], counts[x]);
   440       }
   441     }
   442   }
   443   else {
   444     range(x, stop) {
   445       if (counts[x] > 1) {
   446         shellsort(&arr[starts[x]], counts[x]);
   447       }
   448     }
   449   }
   450   #undef stop
   451 
   452   if (!digit) {
   453     tpoolWait;
   454   }
   455 }
   456 
   457 void radixyMacro(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
   458   #define stop 256
   459   u32 *ar[sizeof(u32)];
   460   size_t sizes[sizeof(u32)];
   461   u8 *a[sizeof(u32)];
   462   size_t counts[sizeof(u32)][stop];
   463   size_t offsets[sizeof(u32)][stop];
   464   size_t starts[sizeof(u32)][stop];
   465   size_t ends[sizeof(u32)][stop];
   466   jmp_buf jumpBuffers[sizeof(u32)];
   467 
   468   zeroBuf(counts, sizeof(counts));
   469 
   470   ar[0]    = arr;
   471   sizes[0] = size;
   472 
   473   radixStart:;
   474   a[0]              = (u8*) ar[0];
   475   size_t digt       = sizeof(u32) - digit - 1;
   476   // compute counts
   477   range(x, sizes[0]) {
   478     u8 c = a[0][x * sizeof(u32) + digt];
   479     counts[0][c]++;
   480   }
   481 
   482   // compute offsets
   483   offsets[0][0] = 0;
   484   starts[0][0]  = 0;
   485   rangeFrom(x, 1, stop) {
   486     offsets[0][x] = starts[0][x] = ends[0][x-1] = offsets[0][x-1] + counts[0][x-1];
   487   }
   488   ends[0][stop-1] = sizes[0];
   489 
   490 
   491   // loop on digit values
   492   range(x, stop) {
   493     // move records according to digit value
   494     while(offsets[0][x] < ends[0][x]) {
   495       u8 target = a[0][offsets[0][x] * sizeof(u32) + digt];
   496       if (target == x) {
   497         // this record already placed in the correct bucket
   498         offsets[0][x]++;
   499       }
   500       else {
   501         size_t stackIdx = 0;
   502         size_t stack[stackSize];
   503         stack[stackIdx++] = offsets[0][x];
   504         while(target != x && stackIdx < stackSize) {
   505           stack[stackIdx] = offsets[0][target]++;
   506           target          = a[0][stack[stackIdx] * sizeof(u32) + digt];
   507           stackIdx++;
   508         }
   509         if (stackIdx != stackSize) {
   510           // found an element to store a offsets[0][x]
   511           offsets[0][x]++;
   512         }
   513         stackIdx--;
   514         u32 tmp = ar[0][stack[stackIdx]];
   515         while(stackIdx) {
   516           ar[0][stack[stackIdx]] = ar[0][stack[stackIdx-1]];
   517           stackIdx--;
   518         }
   519         ar[0][stack[0]] = tmp;
   520       }
   521     }
   522   }
   523 
   524   // cutoff to shellsort
   525   if (digit < cutoff) {
   526     // loop on digit values to sort the buckets
   527     range(x, stop) {
   528       if (counts[0][x] > RADIX_LEVEL) {
   529         radixy(&ar[0][starts[0][x]], counts[0][x], digit+1, stackSize, cutoff);
   530       }
   531       else {
   532         if (counts[0][x] < 2) continue;
   533         shellsort(&ar[0][starts[0][x]], counts[0][x]);
   534       }
   535     }
   536   }
   537   else {
   538     range(x, stop) {
   539       if (counts[0][x] > 1) {
   540         shellsort(&ar[0][starts[0][x]], counts[0][x]);
   541       }
   542     }
   543   }
   544   #undef stop
   545 }
   546 
   547 
   548 // radixy from strings
   549 internal inline void shellsortS(char **arr, size_t size) {
   550   char *tmp; // tmp record
   551 
   552   rangeFrom(i, 3, size) {
   553     tmp = arr[i];
   554     size_t j;
   555     for(j = i ; j >= 3 && strcmp(arr[j-3],tmp) > 0; j-=3) {
   556       arr[j] = arr[j-3];
   557     }
   558     arr[j] = tmp;
   559   }
   560 
   561   rangeFrom(i, 1, size) {
   562     tmp = arr[i];
   563     size_t j;
   564     for(j = i ; j >= 1 && strcmp(arr[j-1],tmp) > 0; j--) {
   565       arr[j] = arr[j-1];
   566     }
   567     arr[j] = tmp;
   568   }
   569 }
   570 
   571 /**
   572  * radixyS doesn't accept empty strings
   573  * it segfaults when there is an empty string in the array
   574  */
   575 void radixySOrig(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff) {
   576   size_t counts[charStop+1];
   577   size_t offsets[charStop+1];
   578   size_t starts[charStop+1];
   579   size_t ends[charStop+1];
   580   size_t digt       = digit;
   581 
   582   zeroBuf(counts, sizeof(counts));
   583 
   584   // compute counts
   585   range(x, size) {
   586     u8 c = *(arr[x]+digt);
   587     counts[c]++;
   588   }
   589 
   590   // compute offsets
   591   offsets[0] = 0;
   592   starts[0]  = 0;
   593   offsets[charStart] = starts[charStart] = ends[0] = offsets[0] + counts[0];
   594   rangeFrom(x, charStart+1, charStop+1) {
   595     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
   596   }
   597   ends[charStop] = size;
   598 
   599   // sort 0 end of string
   600   // move records according to digit value
   601   while(offsets[0] < ends[0]) {
   602     u8 target = *(arr[offsets[0]] + digt);
   603     if (target == 0) {
   604       // this record already placed in the correct bucket
   605       offsets[0]++;
   606     }
   607     else {
   608       size_t stackIdx = 0;
   609       size_t stack[stackSize];
   610       stack[stackIdx++] = offsets[0];
   611       while(target != 0 && stackIdx < stackSize) {
   612         stack[stackIdx] = offsets[target]++;
   613         target          = *(arr[stack[stackIdx]] + digt);
   614         stackIdx++;
   615       }
   616       if (stackIdx != stackSize) {
   617         // found an element to store a offsets[0]
   618         offsets[0]++;
   619       }
   620       stackIdx--;
   621       char *tmp = arr[stack[stackIdx]];
   622       while(stackIdx) {
   623         arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   624         stackIdx--;
   625       }
   626       arr[stack[0]] = tmp;
   627     }
   628   }
   629   // loop on digit values
   630   rangeFrom(x, charStart, charStop+1) {
   631     // move records according to digit value
   632     while(offsets[x] < ends[x]) {
   633       u8 target = *(arr[offsets[x]] + digt);
   634       if (target == x) {
   635         // this record already placed in the correct bucket
   636         offsets[x]++;
   637       }
   638       else {
   639         size_t stackIdx = 0;
   640         size_t stack[stackSize];
   641         stack[stackIdx++] = offsets[x];
   642         while(target != x && stackIdx < stackSize) {
   643           stack[stackIdx] = offsets[target]++;
   644           target          = *(arr[stack[stackIdx]] + digt);
   645           stackIdx++;
   646         }
   647         if (stackIdx != stackSize) {
   648           // found an element to store a offsets[x]
   649           offsets[x]++;
   650         }
   651         stackIdx--;
   652         char *tmp = arr[stack[stackIdx]];
   653         while(stackIdx) {
   654           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   655           stackIdx--;
   656         }
   657         arr[stack[0]] = tmp;
   658       }
   659     }
   660   }
   661 
   662   // cutoff to shellsort
   663   if (digit < cutoff) {
   664     // sort 0 end of string
   665     if (counts[0] > RADIX_LEVEL) {
   666       radixySOrig(&arr[starts[0]], counts[0], digit+1, charStart, charStop, stackSize, cutoff);
   667     }
   668     else {
   669       if (counts[0] > 1) shellsortS(&arr[starts[0]], counts[0]);
   670     }
   671     // loop on digit values to sort the buckets
   672     rangeFrom(x, charStart, charStop+1) {
   673       if (counts[x] > RADIX_LEVEL) {
   674         radixySOrig(&arr[starts[x]], counts[x], digit+1, charStart, charStop, stackSize, cutoff);
   675       }
   676       else {
   677         if (counts[x] < 2) continue;
   678         shellsortS(&arr[starts[x]], counts[x]);
   679       }
   680     }
   681   }
   682   else {
   683     if (counts[0] > 1) {
   684       shellsortS(&arr[starts[0]], counts[0]);
   685     }
   686     rangeFrom(x, charStart, charStop+1) {
   687       if (counts[x] > 1) {
   688         shellsortS(&arr[starts[x]], counts[x]);
   689       }
   690     }
   691   }
   692 }
   693 
   694 void radixyS(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff);
   695 
   696 typ struct {char **arr; size_t size; size_t digit; u8 charStart; u16 charStop; size_t stackSize;  size_t cutoff;} radixySArgst;
   697 
   698 void radixySThread(void *args) {
   699   cast(radixySArgst*, p, args);
   700   radixyS(p->arr, p->size, p->digit, p->charStart, p->charStop, p->stackSize, p->cutoff);
   701 }
   702 
   703 // parallel radix sort for string (char*) arrays
   704 void radixyS(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff) {
   705   size_t counts[charStop+1];
   706   size_t offsets[charStop+1];
   707   size_t starts[charStop+1];
   708   size_t ends[charStop+1];
   709   size_t digt       = digit;
   710   radixySArgst rdxArgs[charStop+1];
   711   size_t rdxArgsIdx = 0;
   712 
   713   zeroBuf(counts, sizeof(counts));
   714 
   715   // compute counts
   716   range(x, size) {
   717     u8 c = *(arr[x]+digt);
   718     counts[c]++;
   719   }
   720 
   721   // compute offsets
   722   offsets[0] = 0;
   723   starts[0]  = 0;
   724   offsets[charStart] = starts[charStart] = ends[0] = offsets[0] + counts[0];
   725   rangeFrom(x, charStart+1, charStop+1) {
   726     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
   727   }
   728   ends[charStop] = size;
   729 
   730   // sort 0 end of string
   731   // move records according to digit value
   732   while(offsets[0] < ends[0]) {
   733     u8 target = *(arr[offsets[0]] + digt);
   734     if (target == 0) {
   735       // this record already placed in the correct bucket
   736       offsets[0]++;
   737     }
   738     else {
   739       size_t stackIdx = 0;
   740       size_t stack[stackSize];
   741       stack[stackIdx++] = offsets[0];
   742       while(target != 0 && stackIdx < stackSize) {
   743         stack[stackIdx] = offsets[target]++;
   744         target          = *(arr[stack[stackIdx]] + digt);
   745         stackIdx++;
   746       }
   747       if (stackIdx != stackSize) {
   748         // found an element to store a offsets[0]
   749         offsets[0]++;
   750       }
   751       stackIdx--;
   752       char *tmp = arr[stack[stackIdx]];
   753       while(stackIdx) {
   754         arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   755         stackIdx--;
   756       }
   757       arr[stack[0]] = tmp;
   758     }
   759   }
   760   // loop on digit values
   761   rangeFrom(x, charStart, charStop+1) {
   762     // move records according to digit value
   763     while(offsets[x] < ends[x]) {
   764       u8 target = *(arr[offsets[x]] + digt);
   765       if (target == x) {
   766         // this record already placed in the correct bucket
   767         offsets[x]++;
   768       }
   769       else {
   770         size_t stackIdx = 0;
   771         size_t stack[stackSize];
   772         stack[stackIdx++] = offsets[x];
   773         while(target != x && stackIdx < stackSize) {
   774           stack[stackIdx] = offsets[target]++;
   775           target          = *(arr[stack[stackIdx]] + digt);
   776           stackIdx++;
   777         }
   778         if (stackIdx != stackSize) {
   779           // found an element to store a offsets[x]
   780           offsets[x]++;
   781         }
   782         stackIdx--;
   783         char *tmp = arr[stack[stackIdx]];
   784         while(stackIdx) {
   785           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
   786           stackIdx--;
   787         }
   788         arr[stack[0]] = tmp;
   789       }
   790     }
   791   }
   792 
   793   // cutoff to shellsort
   794   if (digit < cutoff) {
   795     // sort 0 end of string
   796     if (counts[0] > RADIX_LEVEL) {
   797       if (!digit) {
   798         rdxArgs[rdxArgsIdx] = (radixySArgst){.arr=&arr[starts[0]], .size=counts[0], .digit=digit+1, .charStart=charStart, .charStop=charStop, .stackSize=stackSize, .cutoff=cutoff};
   799         tpoolAdd(radixySThread, (void*)&rdxArgs[rdxArgsIdx]);
   800         rdxArgsIdx++;
   801       }
   802       else
   803         radixyS(&arr[starts[0]], counts[0], digit+1, charStart, charStop, stackSize, cutoff);
   804     }
   805     else {
   806       if (counts[0] > 1) shellsortS(&arr[starts[0]], counts[0]);
   807     }
   808     // loop on digit values to sort the buckets
   809     rangeFrom(x, charStart, charStop+1) {
   810       if (counts[x] > RADIX_LEVEL) {
   811         if (!digit) {
   812           rdxArgs[rdxArgsIdx] = (radixySArgst){.arr=&arr[starts[x]], .size=counts[x], .digit=digit+1, .charStart=charStart, .charStop=charStop, .stackSize=stackSize, .cutoff=cutoff};
   813           tpoolAdd(radixySThread, (void*)&rdxArgs[rdxArgsIdx]);
   814           rdxArgsIdx++;
   815         }
   816         else
   817           radixyS(&arr[starts[x]], counts[x], digit+1, charStart, charStop, stackSize, cutoff);
   818       }
   819       else {
   820         if (counts[x] < 2) continue;
   821         shellsortS(&arr[starts[x]], counts[x]);
   822       }
   823     }
   824   }
   825   else {
   826     if (counts[0] > 1) {
   827       shellsortS(&arr[starts[0]], counts[0]);
   828     }
   829     rangeFrom(x, charStart, charStop+1) {
   830       if (counts[x] > 1) {
   831         shellsortS(&arr[starts[x]], counts[x]);
   832       }
   833     }
   834   }
   835 
   836   if (!digit) {
   837     tpoolWait;
   838   }
   839 }
   840 
   841 
   842 // u32 array
   843 #define radixsortAt(ptr) *(ptr)
   844 radixyUintDef(/*scope*/,radi/*name*/,u32/*elemType*/,6/*stackSize*/,4/*cutoff*/,40/*RADIX_LEVEL*/);
   845 #undef radixsortAt
   846 //#define radi
   847 
   848 
   849 // u64 array
   850 typ struct {u64 a; int b;} rdxElemt;
   851 #define radixsortAt(ptr) (ptr)->a
   852 radixyUintThreadDef(/*scope*/,radi64/*name*/,rdxElemt/*elemType*/,6/*stackSize*/,4/*cutoff*/,40/*RADIX_LEVEL*/);
   853 #undef radixsortAt
   854 
   855 // array with string keys
   856 typ struct {char *a; int b;} skElemt;
   857 
   858 #define radixsortAt(ptr) (ptr)->a
   859 radixyStringThreadDef(/*scope*/,radiS/*name*/,skElemt/*elemType*/,6/*stackSize*/,4/*cutoff*/, 40/*RADIX_LEVEL*/);
   860 #undef radixsortAt
   861 
   862 
   863 // end radix sort
   864 
   865 void trials(void) {
   866 
   867   typ struct {i32 a; int b;} elemt;
   868 
   869   //#define sz 600000
   870   #define sz 60
   871   //goto rnd;
   872 
   873   // radix sort struct array
   874   rdxElemt radA[sz];
   875   skElemt radS[sz];
   876 
   877 
   878 
   879   srandom(10);
   880   range(i, ARRAY_SIZE(radA)) {
   881     radA[i].a = (u64)random() << 27;
   882     radA[i].b = i;
   883     radS[i].a = randomS(random()%15+1);
   884     radS[i].b = i;
   885     /* printf("(%lu,%d) ", radA[i].a, radA[i].b); */
   886     printf("(%s,%d) ", radS[i].a, radS[i].b);
   887   }
   888   put;put;
   889 
   890   radi64(radA, sz);
   891   //radiS(radS, sz);
   892   if (!radiSSafe(radS, sz)) {
   893     logE("Empty strings are not allowed");
   894   }
   895 
   896   //XSUCCESS;
   897 
   898   range(i, ARRAY_SIZE(radA)) {
   899     /* printf("(%lu,%d) ", radA[i].a, radA[i].b); */
   900     printf("(%s,%d) ", radS[i].a, radS[i].b);
   901   }
   902   put;
   903   XSUCCESS;
   904 
   905   // --------------------------------
   906   // float key array
   907   //
   908   // intel skylake cpu: it takes less time sorting float array than sorting int array
   909 
   910   float  fa[sz];
   911   double da[sz];
   912 
   913   srandom(10);
   914   range(i, ARRAY_SIZE(fa)) {
   915     fa[i] = (float)((random() - 0x4fffffff) * 2) / 10000000000;
   916     da[i] = (double)((random() - 0x4fffffff) * 2) / 10000000000;
   917     /* printf("%f ", fa[i]); */
   918     /* printf("%f ", da[i]); */
   919   }
   920 
   921   stopwatchStart;
   922 
   923   #define flashsortGet(ptr) (*(ptr))
   924   flashsortM(fa, ARRAY_SIZE(fa), 0.2 * ARRAY_SIZE(fa));
   925   /* flashsortM(da, ARRAY_SIZE(da), 0.2 * ARRAY_SIZE(da)); */
   926   #undef flashsortGet
   927 
   928   stopwatchLogUs;
   929 
   930   //XSUCCESS;
   931 
   932   range(i, ARRAY_SIZE(fa)) {
   933       printf("%f ", fa[i]);
   934       /* printf("%f ", da[i]); */
   935   }
   936   put;
   937 
   938   XSUCCESS;
   939 
   940   // --------------------------------
   941   // all int keys
   942 
   943   elemt arr[sz];
   944 
   945   range(i, ARRAY_SIZE(arr)) {
   946     arr[i].b = i;
   947   }
   948 
   949   srandom(10);
   950   range(i, ARRAY_SIZE(arr)) {
   951     //arr[i].a = ((int)randomWordFromHW());
   952     //arr[i].a = absV((int)randomWordFromHW()) % (ARRAY_SIZE(arr) *2);
   953     //arr[i].a = random() % (ARRAY_SIZE(arr) *2);
   954     //arr[i].a = random();
   955     arr[i].a = (random() - 0x4fffffff) * 2;
   956     //printf("%d ", arr[i]);
   957     //printf("%lu ", arr[i]);
   958     //printf("%d %d\n", arr[i].a, arr[i].b);
   959   }
   960 
   961 
   962   goto arrr;
   963 
   964 
   965 
   966   // pointer array (move pointer to struct, to be used when the elements have a lot of data)
   967 
   968   elemt *parr[sz];
   969 
   970   range(i, ARRAY_SIZE(arr)) {
   971     parr[i] = &arr[i];
   972   }
   973 
   974   stopwatchStart;
   975 
   976   #define flashsortGet(ptr) (*(ptr))->a
   977   flashsortM(parr, ARRAY_SIZE(parr), 0.1 * ARRAY_SIZE(parr));
   978   #undef flashsortGet
   979 
   980   stopwatchLogUs;
   981 
   982   /* range(i, ARRAY_SIZE(arr)) { */
   983   /*     printf("%d %d\n", parr[i]->a,parr[i]->b); */
   984   /* } */
   985   /* put; */
   986 
   987   XSUCCESS;
   988 
   989 
   990 
   991 
   992 
   993   // struct int array (move data)
   994 
   995 
   996 arrr:;
   997 
   998   /* int get(const void *e) { */
   999   /*   ret ((elemt*)e)->a; */
  1000   /* } */
  1001 
  1002   stopwatchStart;
  1003 
  1004   //flashSort_i32AM(arr, ARRAY_SIZE(arr), 0.14 * ARRAY_SIZE(arr), sizeof(elemt), get);
  1005   #define flashsortGet(ptr) (ptr)->a
  1006   flashsortM(arr, ARRAY_SIZE(arr), 0.2 * ARRAY_SIZE(arr));
  1007   //flashsort(arr, ARRAY_SIZE(arr));
  1008   #undef flashsortGet
  1009 
  1010   int cmpArr(const void *a, const void *b) {
  1011     ret CMP(((elemt*)a)->a, ((elemt*)b)->a);
  1012     //wrong overflow - ret ( ((elemt*)a)->a - ((elemt*)b)->a );
  1013   }
  1014 
  1015   //qsort(arr, ARRAY_SIZE(arr), sizeof(elemt), cmpArr);
  1016 
  1017   stopwatchLogUs;
  1018 
  1019   range(i, ARRAY_SIZE(arr)) {
  1020       printf("%d ", arr[i].a);
  1021       /* printf("%lu ", arr[i].a); */
  1022       /* printf("%d %d\n", arr[i].a, arr[i].b); */
  1023   }
  1024   put;
  1025 
  1026   XSUCCESS;
  1027 
  1028 
  1029 
  1030   // int array
  1031 
  1032 rnd:;
  1033   int rnd[sz];
  1034   u32 urnd[sz];
  1035   i64 rnd64[sz];
  1036   u64 urnd64[sz];
  1037   char *list[sz+1];
  1038   list[sz] = NULL;
  1039   char **lst = list;
  1040 
  1041   srandom(10);
  1042   range(i, ARRAY_SIZE(rnd)) {
  1043     //rnd[i] = ((int)randomWordFromHW());
  1044     //rnd[i] = absV((int)randomWordFromHW()) % (ARRAY_SIZE(rnd) *2);
  1045     //rnd[i] = random() % (ARRAY_SIZE(rnd) *2);
  1046     rnd[i]    = (random() - 0x4fffffff) * 2;
  1047     //urnd[i]   = (random() * 2);
  1048     urnd[i]   = random();
  1049     // string representing a number
  1050     /* sprintf((char*)&urnd[i], "%d", ARRAY_SIZE(rnd)-i-1); */
  1051     /* sprintf((char*)&urnd[i], "%c", '0'+ARRAY_SIZE(rnd)-i-1); */
  1052     rnd64[i]  = random() << 33 + random();
  1053     urnd64[i] = random() << 33 + random();
  1054     /* list[i]   = intToS(random()); */
  1055     list[i]   = randomS(random()%15+1);
  1056     //printf("%d ", rnd[i]);
  1057     //printf("%u ", urnd[i]);
  1058     //printf("%08x ", urnd[i]);
  1059     //printf("%ld ", rnd64[i]);
  1060   }
  1061 
  1062   /* logVarG(lst); */
  1063   put;put
  1064 
  1065   stopwatchStart;
  1066 
  1067   /* range(i, 100000) { */
  1068   /*   flashSort_i32Array(rnd, ARRAY_SIZE(rnd)); */
  1069   /* } */
  1070 
  1071   //flashSort_i32Array(rnd, ARRAY_SIZE(rnd));
  1072   //flashSort_i32ArrayM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd));
  1073   #define flashsortGet(ptr) (*(ptr))
  1074   //flashsortM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd));
  1075   //flashsortM(urnd, ARRAY_SIZE(urnd), 0.2 * ARRAY_SIZE(urnd));
  1076   //flashsortM(rnd64, ARRAY_SIZE(rnd64), 0.2 * ARRAY_SIZE(rnd64));
  1077   //flashsortM(urnd64, ARRAY_SIZE(urnd64), 0.2 * ARRAY_SIZE(urnd64));
  1078   //flashsort(rnd, ARRAY_SIZE(rnd));
  1079   //int *A = rnd;
  1080   //flashsort(A, ARRAY_SIZE(rnd));
  1081 
  1082   //RadixSort(urnd, sz);
  1083   //radixSort(urnd, sz);
  1084   //radixy(urnd, sz, 0 /*digit*/, 6 /*stackSize*/, 4 /*cutoff*/);
  1085   radi(urnd, sz);
  1086   //radixyS(lst, sz, 0 /*digit*/, 44 /*start*/, 'z' /*stop*/, 5 /*stackSize*/, 4 /*cutoff*/);
  1087   //radixyStrTODO(urnd, sz, 0 /* digit */);
  1088   //sortG(&lst);
  1089 
  1090   int cmp(const void *a, const void *b) {
  1091     ret CMP(*(int*)a, *(int*)b);
  1092     //ret ( *(int*)a - *(int*)b );
  1093   }
  1094   //qsort(rnd, ARRAY_SIZE(rnd), sizeof(int), cmp);
  1095   int ucmp(const void *a, const void *b) {
  1096     ret CMP(*(u32*)a, *(u32*)b);
  1097     //ret ( *(u32*)a - *(u32*)b );
  1098   }
  1099   //qsort(urnd, ARRAY_SIZE(urnd), sizeof(int), ucmp);
  1100 
  1101   stopwatchLogUs;
  1102 
  1103   // check
  1104   range(i, sz-1) {
  1105     if (urnd[i+1] < urnd[i]) {
  1106       logVarG(i);
  1107       logVarG(urnd[i]);
  1108       logVarG(urnd[i+1]);
  1109       logE("wrong order");
  1110       /* XFAILURE; */
  1111     }
  1112   }
  1113 
  1114   /* // check */
  1115   /* range(i, sz-1) { */
  1116   /*   if (strcmp(lst[i+1], lst[i]) < 0) { */
  1117   /*     logVarG(i); */
  1118   /*     logVarG(lst[i]); */
  1119   /*     logVarG(lst[i+1]); */
  1120   /*     logE("wrong order"); */
  1121   /*   } */
  1122   /* } */
  1123   //XSUCCESS;
  1124 
  1125   range(i, ARRAY_SIZE(rnd)) {
  1126       /* printf("%d ", rnd[i]); */
  1127       /* printf("%ld ", rnd64[i]); */
  1128       printf("%u ", urnd[i]);
  1129       /* printf("%08x ", urnd[i]); */
  1130       /* printf("%s ", &urnd[i]); */
  1131       /* printf("%lu ", urnd64[i]); */
  1132   }
  1133   put;
  1134 
  1135   /* logVarG(lst); */
  1136   // TODO free lst strings
  1137 
  1138   /* typeof(convertToUnsignedType(rnd[0])) a = 1; */
  1139   /* typeof(convertToUnsignedType(rnd[0])) b = -1; */
  1140   /* toUnsignedType((rnd[0])) a = 1; */
  1141   /* toUnsignedType((rnd[0])) b = -1; */
  1142   /* typeof((rnd[0])) a = 1; */
  1143   /* typeof((rnd[0])) b = -1; */
  1144   /* logVarG(MAX(a,b)); */
  1145 }
  1146 
  1147 int main(int ARGC, char** ARGV) {
  1148 
  1149   argc = ARGC; argv = ARGV;
  1150 
  1151   initLibsheepy(ARGV[0]);
  1152   setLogMode(LOG_FUNC);
  1153   setLogSymbols(LOG_UTF8);
  1154   setStackLimit(-1);
  1155 
  1156   trials();
  1157 
  1158 }
  1159 // vim: set expandtab ts=2 sw=2: