💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 915f7fe756b16fab380b4b848c90f… captured on 2023-09-08 at 16:24:30. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2023-03-20)
-=-=-=-=-=-=-
0 /* See LICENSE file for copyright and license details. */
1 #include <stdlib.h>
2 #include <termbox.h>
3 #include <ctype.h>
4 #include "str.h"
5 #include "punycode.h"
6 #include "wcwidth.h"
7 #include "url.h"
8
9 int utf8_width_to(char* ptr, size_t len, size_t to) {
10 int width = 0;
11 char* max = ptr + len;
12 for (size_t i = 0; *ptr && ptr < max && i < to; i++) {
13 uint32_t c;
14 ptr += tb_utf8_char_to_unicode(&c, ptr);
15 width += mk_wcwidth(c);
16 }
17 return width;
18 }
19
20 int utf8_width(char* ptr, size_t len) {
21 int width = 0;
22 char* max = ptr + len;
23 while (*ptr && ptr < max) {
24 uint32_t c;
25 ptr += tb_utf8_char_to_unicode(&c, ptr);
26 width += mk_wcwidth(c);
27 }
28 return width;
29 }
30
31 int utf8_len(char* ptr, size_t len) {
32 int ret = 0;
33 char* max = ptr + len;
34 while (*ptr && ptr < max) {
35 ptr += tb_utf8_char_length(*ptr);
36 ret++;
37 }
38 return ret;
39
40 }
41
42 int utf8_len_to(char* ptr, size_t len, size_t to_width) {
43 char* start = ptr;
44 char* max = ptr + len;
45 size_t width = 0;
46 while (*ptr && ptr < max) {
47 uint32_t c;
48 ptr += tb_utf8_char_to_unicode(&c, ptr);
49 width += mk_wcwidth(c);
50 if (width >= to_width)
51 return ptr - start;
52 }
53 return ptr - start;
54 }
55
56 void parse_relative(const char* urlbuf, int host_len, char* buf) {
57 int j = 0;
58 for (size_t i = 0; i < MAX_URL; i++) {
59 if (j + 1 >= MAX_URL) {
60 buf[j] = '\0';
61 break;
62 }
63 buf[j] = urlbuf[i];
64 j++;
65 if (urlbuf[i] == '\0') break;
66 if (i > 0 && i + 2 < MAX_URL &&
67 urlbuf[i - 1] == '/' && urlbuf[i + 0] == '.' &&
68 (urlbuf[i + 1] == '/' || urlbuf[i + 1] == '\0')) {
69 i += 1;
70 j--;
71 continue;
72 }
73 if (!(i > 0 && i + 3 < MAX_URL &&
74 urlbuf[i - 1] == '/' &&
75 urlbuf[i + 0] == '.' && urlbuf[i + 1] == '.' &&
76 (urlbuf[i + 2] == '/' || urlbuf[i + 2] == '\0')))
77 continue;
78 int k = j - 3;
79 i += 2;
80 if (k <= (int)host_len) {
81 j = k + 2;
82 buf[j] = '\0';
83 continue;
84 }
85 for (; k >= host_len && buf[k] != '/'; k--) ;
86 j = k + 1;
87 buf[k + 1] = '\0';
88 }
89 }
90
91 int parse_url(const char* url, char* host, int host_len, char* buf,
92 int url_len, unsigned short* port) {
93 char urlbuf[MAX_URL];
94 int proto = PROTO_GEMINI;
95 char* proto_ptr = strstr(url, "://");
96 char* ptr = (char*)url;
97 if (!proto_ptr) {
98 goto skip_proto;
99 }
100 char proto_buf[16];
101 for(; proto_ptr!=ptr; ptr++) {
102 if (!((*ptr > 'a' && *ptr < 'z') ||
103 (*ptr > 'A' && *ptr < 'Z')))
104 goto skip_proto;
105 if (ptr - url >= (signed)sizeof(proto_buf)) goto skip_proto;
106 proto_buf[ptr-url] = tolower(*ptr);
107 }
108 proto_buf[ptr-url] = '\0';
109 ptr+=3;
110 proto_ptr+=3;
111 if (!strcmp(proto_buf,"gemini")) goto skip_proto;
112 else if (!strcmp(proto_buf,"http")) proto = PROTO_HTTP;
113 else if (!strcmp(proto_buf,"https")) proto = PROTO_HTTPS;
114 else if (!strcmp(proto_buf,"gopher")) proto = PROTO_GOPHER;
115 else if (!strcmp(proto_buf,"file")) proto = PROTO_FILE;
116 else return -1; // unknown protocol
117 skip_proto:;
118 if (port && proto == PROTO_GEMINI) *port = 1965;
119 if (!proto_ptr) proto_ptr = ptr;
120 char* host_ptr = strchr(ptr, '/');
121 if (!host_ptr) host_ptr = ptr+strnlen(ptr, MAX_URL);
122 char* port_ptr = strchr(ptr, ':');
123 if (port_ptr && port_ptr < host_ptr) {
124 port_ptr++;
125 char c = *host_ptr;
126 *host_ptr = '\0';
127 if (port) {
128 *port = atoi(port_ptr);
129 if (*port < 1)
130 return -1; // invalid port
131 }
132 *host_ptr = c;
133 host_ptr = port_ptr - 1;
134 }
135 int utf8 = 0;
136 for(; host_ptr!=ptr; ptr++) {
137 if (host_len <= host_ptr-ptr) {
138 return -1;
139 }
140 host[ptr - proto_ptr] = *ptr;
141 if (utf8) {
142 utf8--;
143 continue;
144 }
145 utf8 += tb_utf8_char_length(*ptr) - 1;
146 if (utf8) continue;
147 if (*ptr < 32) {
148 host[ptr - proto_ptr] = '?';
149 continue;
150 }
151 }
152 host[ptr-proto_ptr] = '\0';
153 if (!buf) return proto;
154 if (url_len < 16) return -1; // buffer too small
155 unsigned int len = 0;
156 switch (proto) {
157 case PROTO_GEMINI:
158 len = strlcpy(urlbuf, "gemini://", url_len);
159 break;
160 case PROTO_HTTP:
161 len = strlcpy(urlbuf, "http://", url_len);
162 break;
163 case PROTO_HTTPS:
164 len = strlcpy(urlbuf, "https://", url_len);
165 break;
166 case PROTO_GOPHER:
167 len = strlcpy(urlbuf, "gopher://", url_len);
168 break;
169 case PROTO_FILE:
170 len = strlcpy(urlbuf, "file://", url_len);
171 break;
172 default:
173 return -1;
174 }
175 size_t l = strlcpy(urlbuf + len, host, sizeof(urlbuf) - len);
176 if (l >= url_len - len) {
177 goto parseurl_overflow;
178 }
179 len += l;
180 if (host_ptr &&
181 strlcpy(urlbuf + len, host_ptr, url_len - len) >=
182 url_len - len)
183 goto parseurl_overflow;
184 if (buf)
185 parse_relative(urlbuf, len + 1, buf);
186 return proto;
187 parseurl_overflow:
188 return -2;
189 }
190
191 int isCharValid(char c, int inquery) {
192 return (c >= 'a' && c <= 'z') ||
193 (c >= 'A' && c <= 'Z') ||
194 (c >= '0' && c <= '9') ||
195 (c == '?' && !inquery) ||
196 c == '.' || c == '/' ||
197 c == ':' || c == '-' ||
198 c == '_' || c == '~';
199 }
200
201 int parse_query(const char* url, int len, char* buf, int llen) {
202 char urlbuf[1024];
203 parse_relative(url, 0, urlbuf);
204 url = urlbuf;
205 int j = 0;
206 int inquery = 0;
207 for (int i = 0; j < llen && i < len && url[i]; i++) {
208 if (url[i] == '/') inquery = 0;
209 if (!inquery || isCharValid(url[i], inquery)) {
210 if (url[i] == '?') inquery = 1;
211 buf[j] = url[i];
212 j++;
213 continue;
214 }
215 char format[8];
216 snprintf(format, sizeof(format),
217 "%%%x", (unsigned char)url[i]);
218 buf[j] = '\0';
219 j = strlcat(buf, format, llen);
220 }
221 if (j >= llen) j = llen - 1;
222 buf[j] = '\0';
223 return j;
224 }
225
226 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {
227 const char* ptr = domain;
228 uint32_t part[1024];
229 memset(part, 0, sizeof(part));
230 size_t pos = 0;
231 int n = 0;
232 int unicode = 0;
233 for (size_t i = 0; i < sizeof(part) && i < dlen; i++) {
234 if (*ptr && *ptr != '.') {
235 if (*ptr & 128)
236 unicode = 1;
237 ptr += tb_utf8_char_to_unicode(&part[i], ptr);
238 continue;
239 }
240 uint32_t len = outlen - pos;
241 if (unicode) {
242 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);
243 if (punycode_encode(i - n, &part[n],
244 NULL, &len, &out[pos]) !=
245 punycode_success)
246 return -1;
247 pos += len;
248 } else {
249 for (size_t j = n; j < i; j++) {
250 out[pos] = part[j];
251 pos++;
252 }
253 }
254 unicode = 0;
255 n = i + 1;
256 if (*ptr == '.') {
257 out[pos] = '.';
258 pos++;
259 ptr++;
260 }
261
262 if (!*ptr) {
263 out[pos] = '\0';
264 break;
265 }
266 }
267 return 0;
268 }
269
270 int parse_link(char* data, int len) {
271 int i = 0;
272 while (i < len) {
273 i += tb_utf8_char_length(data[i]);
274 if (data[i] == '\n' || data[i] == '\0' ||
275 data[i] == '\r' || data[i] == ' ' || data[i] == '\t')
276 return i;
277 if (!(data[i]&127) && data[i] < 32)
278 data[i] = '?';
279 }
280 return i;
281 }
282