0 /*
1 * ISC License
2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>
3 */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <stddef.h>
9 #include "macro.h"
10 #include "utf8.h"
11 #include "url.h"
12 #include "punycode.h"
13 #include "page.h"
14 #include "request.h"
15 #include "error.h"
16 #include "strnstr.h"
17 #include "strlcpy.h"
18
19 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {
20
21 const char* ptr = domain;
22 uint32_t part[1024] = {0};
23 size_t pos = 0;
24 int n = 0;
25 int unicode = 0;
26 size_t i;
27
28 for (i = 0; i < sizeof(part) && i < dlen; i++) {
29 uint32_t len;
30 if (*ptr && *ptr != '.') {
31 if (*ptr & 128)
32 unicode = 1;
33 ptr += utf8_char_to_unicode(&part[i], ptr);
34 continue;
35 }
36 len = outlen - pos;
37 if (unicode) {
38 int ret;
39 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);
40 ret = punycode_encode(i - n, &part[n],
41 NULL, &len, &out[pos]);
42 if (ret != punycode_success)
43 return -1;
44 pos += len;
45 } else {
46 size_t j;
47 for (j = n; j < i; j++) {
48 out[pos] = part[j];
49 pos++;
50 }
51 }
52 unicode = 0;
53 n = i + 1;
54 if (*ptr == '.') {
55 out[pos] = '.';
56 pos++;
57 ptr++;
58 }
59
60 if (!*ptr) {
61 out[pos] = '\0';
62 break;
63 }
64 }
65 return 0;
66 }
67
68 int servername_from_url(const char *url, char* out, size_t len) {
69
70 const char *start, *port, *end;
71
72 start = strnstr(url, "://", len);
73 if (!start) start = url;
74 else start += sizeof("://") - 1;
75
76 port = strchr(start, ':');
77 end = strchr(start, '/');
78 if (!end || (port && port < end)) end = port;
79 if (!end) end = start + strlen(url);
80
81 if ((size_t)(end - start) >= len) return ERROR_BUFFER_OVERFLOW;
82
83 strlcpy(out, start, end - start + 1);
84 return 0;
85 }
86
87 int protocol_from_url(const char *url) {
88 if (!memcmp(url, V("mailto:") - 1)) return PROTOCOL_MAIL;
89 if (!strnstr(url, "://", MAX_URL)) return PROTOCOL_NONE; /* default */
90 if (!memcmp(url, V("gemini://") - 1)) return PROTOCOL_GEMINI;
91 if (!memcmp(url, V("http://") - 1)) return PROTOCOL_HTTP;
92 if (!memcmp(url, V("https://") - 1)) return PROTOCOL_HTTPS;
93 if (!memcmp(url, V("gopher://") - 1)) return PROTOCOL_GOPHER;
94 return PROTOCOL_UNKNOWN;
95 }
96
97 int port_from_url(const char *url) {
98
99 const char *start, *end;
100 char buf[MAX_URL];
101 int port;
102
103 start = strnstr(url, "://", MAX_URL);
104 if (!start) start = url;
105 end = strchr(start + sizeof("://"), '/');
106 start = strchr(start + sizeof("://"), ':');
107 if (!start || (end && end < start)) return 0;
108 start++;
109 end = strchr(start, '/') + 1;
110 if (!end) end = start + strlen(start);
111 strlcpy(buf, start, end - start);
112 port = atoi(buf);
113 if (!port) return ERROR_INVALID_PORT;
114 return port;
115 }
116
117 int url_parse(struct request* request, const char *url) {
118
119 int protocol, port, ret;
120 char buf[MAX_URL];
121
122 memset(request, 0, sizeof(*request));
123
124 if ((ret = servername_from_url(url, V(request->name)))) return ret;
125
126 protocol = protocol_from_url(url);
127 if (protocol == PROTOCOL_UNKNOWN) return ERROR_UNKNOWN_PROTOCOL;
128 if (protocol == PROTOCOL_NONE) {
129 size_t length = STRLCPY(buf, "gemini://");
130 int i;
131 i = strlcpy(&buf[length], url, sizeof(buf) - length);
132 i += length;
133 buf[i] = '\0';
134 protocol = PROTOCOL_GEMINI;
135 } else STRLCPY(buf, url);
136
137 port = port_from_url(url);
138 if (port < 0) return port;
139 if (!port) {
140 switch (protocol) {
141 case PROTOCOL_GEMINI: port = 1965; break;
142 }
143 }
144
145 request->protocol = protocol;
146 request->port = port;
147 STRLCPY(request->url, buf);
148
149 return 0;
150 }
151
152 int url_parse_idn(const char *in, char *out, size_t out_length) {
153 char host[256] = {0}, buf[256] = {0}, *ptr, *end;
154 size_t offset;
155 ptr = out;
156 end = out + out_length;
157 while (*ptr && ptr < end) {
158 if (utf8_char_length(*ptr) != 1) {
159 ptr = NULL;
160 break;
161 }
162 ptr++;
163 }
164 if (ptr) {
165 strlcpy(out, in, out_length);
166 return 0;
167 }
168 servername_from_url(in, V(buf));
169 if (idn_to_ascii(V(buf), V(host)))
170 return ERROR_INVALID_URL;
171 strlcpy(out, in, out_length);
172 ptr = strnstr(out, buf, out_length);
173 if (!ptr) return ERROR_INVALID_URL;
174 offset = (ptr - out) + strnlen(V(buf));
175 ptr += strlcpy(ptr, host, out_length - (ptr - out));
176 strlcpy(ptr, &in[offset], out_length - (ptr - out));
177 return 0;
178 }
179
180 int url_hide_query(const char *url, char *out, size_t length) {
181 size_t i, j;
182 int inquery;
183 for (inquery = i = j = 0; i < length; ) {
184 uint32_t ch;
185 i += utf8_char_to_unicode(&ch, &url[i]);
186 if (!ch) break;
187 if (ch == '/' && inquery) inquery = 0;
188 if (inquery) continue;
189 j += utf8_unicode_to_char(&out[j], ch);
190 if (ch == '?') {
191 out[j++] = '<';
192 out[j++] = '*';
193 out[j++] = '>';
194 inquery = 1;
195 }
196 }
197 out[j] = 0;
198 return 0;
199 }
200
201 static int valid_char(char c) {
202 if (c == '"' || c == '%') return 0;
203 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
204 (c >= '!' && c <= ';') || c == '=' || c == '~' || c == '_');
205 }
206
207 int url_convert(const char *url, char *out, size_t length) {
208 unsigned int j, i;
209 int slash = 0;
210 for (i = j = 0; i < length;) {
211 uint32_t ch;
212 int len, k;
213 len = utf8_char_to_unicode(&ch, &url[j]);
214 if (!ch) {
215 out[i] = 0;
216 return 0;
217 }
218 if (slash < 3) {
219 slash += ch == '/';
220 utf8_unicode_to_char(&out[i], ch);
221 i += len;
222 j += len;
223 continue;
224 }
225 if ((len == 1 && valid_char(ch))) {
226 out[i++] = url[j++];
227 continue;
228 }
229 for (k = 0; k < len; k++) {
230 if (i + 3 > length) break;
231 out[i++] = '%';
232 i += snprintf(&out[i], length - i, "%02X", url[j++]);
233 }
234 }
235 out[length - 1] = 0;
236 return -1;
237 }
238
239 int url_is_absolute(const char *url) {
240 return !!strnstr(url, "://", MAX_URL) ||
241 !memcmp(url, V("mailto:") - 1);
242 }
243