💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › ceb653e644195c18ba4c273c2ff97… captured on 2023-12-28 at 15:46:04. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
0 /*
1 * MIT License
2 * Copyright (c) 2010-2020 nsf <no.smile.face@gmail.com>
3 * 2015-2022 Adam Saponara <as@php.net>
4 */
5 #include <stdio.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include "wcwidth.h"
9
10 static const unsigned char utf8_length[256] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
19 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
20 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
21 5, 6, 6, 1, 1};
22
23 static const unsigned char utf8_mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
24
25 int utf8_char_length(char c) {
26 return utf8_length[(unsigned char)c];
27 }
28
29 int utf8_unicode_length(uint32_t c) {
30 int len;
31 if (c < 0x80) {
32 len = 1;
33 } else if (c < 0x800) {
34 len = 2;
35 } else if (c < 0x10000) {
36 len = 3;
37 } else if (c < 0x200000) {
38 len = 4;
39 } else if (c < 0x4000000) {
40 len = 5;
41 } else {
42 len = 6;
43 }
44 return len;
45 }
46
47 int utf8_char_to_unicode(uint32_t *out, const char *c) {
48
49 int i;
50 unsigned char len, mask;
51 uint32_t result;
52
53 len = utf8_char_length(*c);
54 mask = utf8_mask[len - 1];
55 result = c[0] & mask;
56 for (i = 1; i < len; ++i) {
57 result <<= 6;
58 result |= c[i] & 0x3f;
59 }
60
61 *out = result;
62 return (int)len;
63 }
64
65 int utf8_unicode_to_char(char *out, uint32_t c) {
66 int len = 0;
67 int first;
68 int i;
69
70 if (c < 0x80) {
71 first = 0;
72 len = 1;
73 } else if (c < 0x800) {
74 first = 0xc0;
75 len = 2;
76 } else if (c < 0x10000) {
77 first = 0xe0;
78 len = 3;
79 } else if (c < 0x200000) {
80 first = 0xf0;
81 len = 4;
82 } else if (c < 0x4000000) {
83 first = 0xf8;
84 len = 5;
85 } else {
86 first = 0xfc;
87 len = 6;
88 }
89
90 for (i = len - 1; i > 0; --i) {
91 out[i] = (c & 0x3f) | 0x80;
92 c >>= 6;
93 }
94 out[0] = c | first;
95
96 return len;
97 }
98
99 const char *utf8_next(const char **ptr) {
100 int i = utf8_char_length(**ptr);
101 *ptr += i;
102 return *ptr;
103 }
104
105 int utf8_previous(const char *ptr, int i) {
106 if (i) i--;
107 while (i > 0 && (ptr[i] & 0xC0) == 0x80) i--;
108 return i;
109 }
110
111 int utf8_width(const char *ptr, size_t length) {
112
113 int width;
114 size_t i;
115
116 width = 0;
117 for (i = 0; i < length; ) {
118 uint32_t ch;
119 i += utf8_char_to_unicode(&ch, &ptr[i]);
120 if (!ch) break;
121 width += mk_wcwidth(ch);
122 }
123 return width;
124 }
125
126 int utf8_cpy(char *dst, const char *src, size_t length) {
127 size_t i;
128 for (i = 0; i < length; ) {
129 size_t len = utf8_char_length(src[i]);
130 if (i + len >= length) {
131 dst[i] = '\0';
132 break;
133 }
134 while (len--) {
135 dst[i] = src[i];
136 i++;
137 }
138 }
139 return 0;
140 }
141
142 int utf8_fgetc(FILE *f, uint32_t *out) {
143
144 int ch, len;
145
146 ch = fgetc(f);
147 len = utf8_char_length(ch);
148 if (ch == EOF) return EOF;
149 if (len > 1) {
150 char buf[32];
151 int pos = 0;
152 if ((unsigned)len >= sizeof(buf)) return -1;
153 buf[pos] = ch;
154 for (pos = 1; pos < len; pos++) {
155 ch = fgetc(f);
156 if (ch == EOF) return EOF;
157 buf[pos] = ch;
158 }
159 buf[pos] = 0;
160 utf8_char_to_unicode((uint32_t*)&ch, buf);
161 }
162 *out = ch;
163
164 return 0;
165 }
166