💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 959b56e199abadca65ba5d0744bce… captured on 2023-12-28 at 15:45:55. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
0 /*
1 * ISC License
2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>
3 */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <stddef.h>
9 #include "macro.h"
10 #include "utf8.h"
11 #include "url.h"
12 #include "punycode.h"
13 #include "page.h"
14 #include "request.h"
15 #include "error.h"
16 #include "strnstr.h"
17 #include "strlcpy.h"
18
19 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {
20
21 const char* ptr = domain;
22 uint32_t part[1024] = {0};
23 size_t pos = 0;
24 int n = 0;
25 int unicode = 0;
26 size_t i;
27
28 for (i = 0; i < sizeof(part) && i < dlen; i++) {
29 uint32_t len;
30 if (*ptr && *ptr != '.') {
31 if (*ptr & 128)
32 unicode = 1;
33 ptr += utf8_char_to_unicode(&part[i], ptr);
34 continue;
35 }
36 len = outlen - pos;
37 if (unicode) {
38 int ret;
39 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);
40 ret = punycode_encode(i - n, &part[n],
41 NULL, &len, &out[pos]);
42 if (ret != punycode_success)
43 return -1;
44 pos += len;
45 } else {
46 size_t j;
47 for (j = n; j < i; j++) {
48 out[pos] = part[j];
49 pos++;
50 }
51 }
52 unicode = 0;
53 n = i + 1;
54 if (*ptr == '.') {
55 out[pos] = '.';
56 pos++;
57 ptr++;
58 }
59
60 if (!*ptr) {
61 out[pos] = '\0';
62 break;
63 }
64 }
65 return 0;
66 }
67
68 int servername_from_url(const char *url, char* out, size_t len) {
69
70 const char *start, *port, *end;
71
72 start = strnstr(url, "://", len);
73 if (!start) start = url;
74 else start += sizeof("://") - 1;
75
76 port = strchr(start, ':');
77 end = strchr(start, '/');
78 if (!end || (port && port < end)) end = port;
79 if (!end) end = start + strlen(url);
80
81 if ((size_t)(end - start) >= len) return ERROR_BUFFER_OVERFLOW;
82
83 strlcpy(out, start, end - start + 1);
84 return 0;
85 }
86
87 int protocol_from_url(const char *url) {
88 if (!memcmp(url, V("mailto:") - 1)) return PROTOCOL_MAIL;
89 if (!strnstr(url, "://", MAX_URL)) return PROTOCOL_NONE; /* default */
90 if (!memcmp(url, V("gemini://") - 1)) return PROTOCOL_GEMINI;
91 if (!memcmp(url, V("http://") - 1)) return PROTOCOL_HTTP;
92 if (!memcmp(url, V("https://") - 1)) return PROTOCOL_HTTPS;
93 if (!memcmp(url, V("gopher://") - 1)) return PROTOCOL_GOPHER;
94 return PROTOCOL_UNKNOWN;
95 }
96
97 int port_from_url(const char *url) {
98
99 const char *start, *end;
100 char buf[MAX_URL];
101 int port;
102
103 start = strnstr(url, "://", MAX_URL);
104 if (!start) start = url;
105 end = strchr(start + sizeof("://"), '/');
106 start = strchr(start + sizeof("://"), ':');
107 if (!start || (end && end < start)) return 0;
108 start++;
109 end = strchr(start, '/') + 1;
110 if (!end) end = start + strlen(start);
111 strlcpy(buf, start, end - start);
112 port = atoi(buf);
113 if (!port) return ERROR_INVALID_PORT;
114 return port;
115 }
116
117 int url_parse(struct request* request, const char *url) {
118
119 int protocol, port, ret;
120 char buf[MAX_URL];
121
122 memset(request, 0, sizeof(*request));
123
124 if ((ret = servername_from_url(url, V(request->name)))) return ret;
125
126 protocol = protocol_from_url(url);
127 if (protocol == PROTOCOL_UNKNOWN) return ERROR_UNKNOWN_PROTOCOL;
128 if (protocol == PROTOCOL_NONE) {
129 size_t length = STRLCPY(buf, "gemini://");
130 int i;
131 i = strlcpy(&buf[length], url, sizeof(buf) - length);
132 i += length;
133 buf[i] = '/';
134 buf[i + 1] = '\0';
135 protocol = PROTOCOL_GEMINI;
136 } else STRLCPY(buf, url);
137
138 port = port_from_url(url);
139 if (port < 0) return port;
140 if (!port) {
141 switch (protocol) {
142 case PROTOCOL_GEMINI: port = 1965; break;
143 }
144 }
145
146 request->protocol = protocol;
147 request->port = port;
148 STRLCPY(request->url, buf);
149
150 return 0;
151 }
152
153 int url_parse_idn(const char *in, char *out, size_t out_length) {
154 char host[256] = {0}, buf[256] = {0}, *ptr, *end;
155 size_t offset;
156 ptr = out;
157 end = out + out_length;
158 while (*ptr && ptr < end) {
159 if (utf8_char_length(*ptr) != 1) {
160 ptr = NULL;
161 break;
162 }
163 ptr++;
164 }
165 if (ptr) {
166 strlcpy(out, in, out_length);
167 return 0;
168 }
169 servername_from_url(in, V(buf));
170 if (idn_to_ascii(V(buf), V(host)))
171 return ERROR_INVALID_URL;
172 strlcpy(out, in, out_length);
173 ptr = strnstr(out, buf, out_length);
174 if (!ptr) return ERROR_INVALID_URL;
175 offset = (ptr - out) + strnlen(V(buf));
176 ptr += strlcpy(ptr, host, out_length - (ptr - out));
177 strlcpy(ptr, &in[offset], out_length - (ptr - out));
178 return 0;
179 }
180
181 int url_hide_query(const char *url, char *out, size_t length) {
182 size_t i, j;
183 int inquery;
184 for (inquery = i = j = 0; i < length; ) {
185 uint32_t ch;
186 i += utf8_char_to_unicode(&ch, &url[i]);
187 if (!ch) break;
188 if (ch == '/' && inquery) inquery = 0;
189 if (inquery) continue;
190 j += utf8_unicode_to_char(&out[j], ch);
191 if (ch == '?') {
192 out[j++] = '<';
193 out[j++] = '*';
194 out[j++] = '>';
195 inquery = 1;
196 }
197 }
198 out[j] = 0;
199 return 0;
200 }
201
202 static int valid_char(char c) {
203 if (c == '"' || c == '%') return 0;
204 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
205 (c >= '!' && c <= ';') || c == '=' || c == '~' || c == '_');
206 }
207
208 int url_convert(const char *url, char *out, size_t length) {
209 unsigned int j, i;
210 int slash = 0;
211 for (i = j = 0; i < length;) {
212 uint32_t ch;
213 int len, k;
214 len = utf8_char_to_unicode(&ch, &url[j]);
215 if (!ch) {
216 out[i] = 0;
217 return 0;
218 }
219 if (slash < 3) {
220 slash += ch == '/';
221 utf8_unicode_to_char(&out[i], ch);
222 i += len;
223 j += len;
224 continue;
225 }
226 if ((len == 1 && valid_char(ch))) {
227 out[i++] = url[j++];
228 continue;
229 }
230 for (k = 0; k < len; k++) {
231 if (i + 3 > length) break;
232 out[i++] = '%';
233 i += snprintf(&out[i], length - i, "%02X", url[j++]);
234 }
235 }
236 out[length - 1] = 0;
237 return -1;
238 }
239
240 int url_is_absolute(const char *url) {
241 return !!strnstr(url, "://", MAX_URL) ||
242 !memcmp(url, V("mailto:") - 1);
243 }
244