💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 9596ba2808107bc59ffff8effdbff… captured on 2024-02-05 at 10:04:31. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
0 /*
1 * MIT License
2 * Copyright (c) 2010-2020 nsf <no.smile.face@gmail.com>
3 * 2015-2022 Adam Saponara <as@php.net>
4 * 2023-2024 RMF <rawmonk@rmf-dev.com>
5 */
6 #include <stdio.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include "wcwidth.h"
10
11 static const unsigned char utf8_length[256] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
19 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
20 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
21 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
22 5, 6, 6, 1, 1};
23
24 static const unsigned char utf8_mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
25
26 int utf8_char_length(char c) {
27 return utf8_length[(unsigned char)c];
28 }
29
30 int utf8_unicode_length(uint32_t c) {
31 int len;
32 if (c < 0x80) {
33 len = 1;
34 } else if (c < 0x800) {
35 len = 2;
36 } else if (c < 0x10000) {
37 len = 3;
38 } else if (c < 0x200000) {
39 len = 4;
40 } else if (c < 0x4000000) {
41 len = 5;
42 } else {
43 len = 6;
44 }
45 return len;
46 }
47
48 int utf8_char_to_unicode(uint32_t *out, const char *c) {
49
50 int i;
51 unsigned char len, mask;
52 uint32_t result;
53
54 len = utf8_char_length(*c);
55 mask = utf8_mask[len - 1];
56 result = c[0] & mask;
57 for (i = 1; i < len; ++i) {
58 result <<= 6;
59 result |= c[i] & 0x3f;
60 }
61
62 *out = result;
63 return (int)len;
64 }
65
66 int utf8_unicode_to_char(char *out, uint32_t c) {
67 int len = 0;
68 int first;
69 int i;
70
71 if (c < 0x80) {
72 first = 0;
73 len = 1;
74 } else if (c < 0x800) {
75 first = 0xc0;
76 len = 2;
77 } else if (c < 0x10000) {
78 first = 0xe0;
79 len = 3;
80 } else if (c < 0x200000) {
81 first = 0xf0;
82 len = 4;
83 } else if (c < 0x4000000) {
84 first = 0xf8;
85 len = 5;
86 } else {
87 first = 0xfc;
88 len = 6;
89 }
90
91 for (i = len - 1; i > 0; --i) {
92 out[i] = (c & 0x3f) | 0x80;
93 c >>= 6;
94 }
95 out[0] = c | first;
96
97 return len;
98 }
99
100 const char *utf8_next(const char **ptr) {
101 int i = utf8_char_length(**ptr);
102 *ptr += i;
103 return *ptr;
104 }
105
106 int utf8_previous(const char *ptr, int i) {
107 if (i) i--;
108 while (i > 0 && (ptr[i] & 0xC0) == 0x80) i--;
109 return i;
110 }
111
112 int utf8_width(const char *ptr, size_t length) {
113
114 int width;
115 size_t i;
116
117 width = 0;
118 for (i = 0; i < length; ) {
119 uint32_t ch;
120 i += utf8_char_to_unicode(&ch, &ptr[i]);
121 if (!ch) break;
122 width += mk_wcwidth(ch);
123 }
124 return width;
125 }
126
127 int utf8_cpy(char *dst, const char *src, size_t length) {
128 size_t i;
129 for (i = 0; i < length; ) {
130 size_t len = utf8_char_length(src[i]);
131 if (i + len >= length) {
132 dst[i] = '\0';
133 break;
134 }
135 while (len--) {
136 dst[i] = src[i];
137 i++;
138 }
139 }
140 return i;
141 }
142
143 int utf8_fgetc(FILE *f, uint32_t *out) {
144
145 int ch, len;
146
147 ch = fgetc(f);
148 len = utf8_char_length(ch);
149 if (ch == EOF) return EOF;
150 if (len > 1) {
151 char buf[32];
152 int pos = 0;
153 if ((unsigned)len >= sizeof(buf)) return -1;
154 buf[pos] = ch;
155 for (pos = 1; pos < len; pos++) {
156 ch = fgetc(f);
157 if (ch == EOF) return EOF;
158 buf[pos] = ch;
159 }
160 buf[pos] = 0;
161 utf8_char_to_unicode((uint32_t*)&ch, buf);
162 }
163 *out = ch;
164
165 return 0;
166 }
167
168 int utf8_len(const char *ptr, size_t length) {
169 const char *start = ptr, *end = ptr + length, *last;
170 for (last = NULL; ptr < end && *ptr; ptr += utf8_char_length(*ptr))
171 last = ptr;
172 if (ptr >= end) ptr = last;
173 return ptr ? (ptr - start) : 0;
174 }
175
176 int utf8_fprintf(FILE *f, const char *buf, size_t length) {
177 int i = utf8_len(buf, length);
178 return fwrite(buf, 1, i, f);
179 }
180