💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 959b56e199abadca65ba5d0744bce… captured on 2023-12-28 at 15:45:55. Gemini links have been rewritten to link to archived content

View Raw

More Information

-=-=-=-=-=-=-

Go Back

0 /*

1 * ISC License

2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>

3 */

4 #include <stdio.h>

5 #include <stdlib.h>

6 #include <stdint.h>

7 #include <string.h>

8 #include <stddef.h>

9 #include "macro.h"

10 #include "utf8.h"

11 #include "url.h"

12 #include "punycode.h"

13 #include "page.h"

14 #include "request.h"

15 #include "error.h"

16 #include "strnstr.h"

17 #include "strlcpy.h"

18

19 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {

20

21 const char* ptr = domain;

22 uint32_t part[1024] = {0};

23 size_t pos = 0;

24 int n = 0;

25 int unicode = 0;

26 size_t i;

27

28 for (i = 0; i < sizeof(part) && i < dlen; i++) {

29 uint32_t len;

30 if (*ptr && *ptr != '.') {

31 if (*ptr & 128)

32 unicode = 1;

33 ptr += utf8_char_to_unicode(&part[i], ptr);

34 continue;

35 }

36 len = outlen - pos;

37 if (unicode) {

38 int ret;

39 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);

40 ret = punycode_encode(i - n, &part[n],

41 NULL, &len, &out[pos]);

42 if (ret != punycode_success)

43 return -1;

44 pos += len;

45 } else {

46 size_t j;

47 for (j = n; j < i; j++) {

48 out[pos] = part[j];

49 pos++;

50 }

51 }

52 unicode = 0;

53 n = i + 1;

54 if (*ptr == '.') {

55 out[pos] = '.';

56 pos++;

57 ptr++;

58 }

59

60 if (!*ptr) {

61 out[pos] = '\0';

62 break;

63 }

64 }

65 return 0;

66 }

67

68 int servername_from_url(const char *url, char* out, size_t len) {

69

70 const char *start, *port, *end;

71

72 start = strnstr(url, "://", len);

73 if (!start) start = url;

74 else start += sizeof("://") - 1;

75

76 port = strchr(start, ':');

77 end = strchr(start, '/');

78 if (!end || (port && port < end)) end = port;

79 if (!end) end = start + strlen(url);

80

81 if ((size_t)(end - start) >= len) return ERROR_BUFFER_OVERFLOW;

82

83 strlcpy(out, start, end - start + 1);

84 return 0;

85 }

86

87 int protocol_from_url(const char *url) {

88 if (!memcmp(url, V("mailto:") - 1)) return PROTOCOL_MAIL;

89 if (!strnstr(url, "://", MAX_URL)) return PROTOCOL_NONE; /* default */

90 if (!memcmp(url, V("gemini://") - 1)) return PROTOCOL_GEMINI;

91 if (!memcmp(url, V("http://") - 1)) return PROTOCOL_HTTP;

92 if (!memcmp(url, V("https://") - 1)) return PROTOCOL_HTTPS;

93 if (!memcmp(url, V("gopher://") - 1)) return PROTOCOL_GOPHER;

94 return PROTOCOL_UNKNOWN;

95 }

96

97 int port_from_url(const char *url) {

98

99 const char *start, *end;

100 char buf[MAX_URL];

101 int port;

102

103 start = strnstr(url, "://", MAX_URL);

104 if (!start) start = url;

105 end = strchr(start + sizeof("://"), '/');

106 start = strchr(start + sizeof("://"), ':');

107 if (!start || (end && end < start)) return 0;

108 start++;

109 end = strchr(start, '/') + 1;

110 if (!end) end = start + strlen(start);

111 strlcpy(buf, start, end - start);

112 port = atoi(buf);

113 if (!port) return ERROR_INVALID_PORT;

114 return port;

115 }

116

117 int url_parse(struct request* request, const char *url) {

118

119 int protocol, port, ret;

120 char buf[MAX_URL];

121

122 memset(request, 0, sizeof(*request));

123

124 if ((ret = servername_from_url(url, V(request->name)))) return ret;

125

126 protocol = protocol_from_url(url);

127 if (protocol == PROTOCOL_UNKNOWN) return ERROR_UNKNOWN_PROTOCOL;

128 if (protocol == PROTOCOL_NONE) {

129 size_t length = STRLCPY(buf, "gemini://");

130 int i;

131 i = strlcpy(&buf[length], url, sizeof(buf) - length);

132 i += length;

133 buf[i] = '/';

134 buf[i + 1] = '\0';

135 protocol = PROTOCOL_GEMINI;

136 } else STRLCPY(buf, url);

137

138 port = port_from_url(url);

139 if (port < 0) return port;

140 if (!port) {

141 switch (protocol) {

142 case PROTOCOL_GEMINI: port = 1965; break;

143 }

144 }

145

146 request->protocol = protocol;

147 request->port = port;

148 STRLCPY(request->url, buf);

149

150 return 0;

151 }

152

153 int url_parse_idn(const char *in, char *out, size_t out_length) {

154 char host[256] = {0}, buf[256] = {0}, *ptr, *end;

155 size_t offset;

156 ptr = out;

157 end = out + out_length;

158 while (*ptr && ptr < end) {

159 if (utf8_char_length(*ptr) != 1) {

160 ptr = NULL;

161 break;

162 }

163 ptr++;

164 }

165 if (ptr) {

166 strlcpy(out, in, out_length);

167 return 0;

168 }

169 servername_from_url(in, V(buf));

170 if (idn_to_ascii(V(buf), V(host)))

171 return ERROR_INVALID_URL;

172 strlcpy(out, in, out_length);

173 ptr = strnstr(out, buf, out_length);

174 if (!ptr) return ERROR_INVALID_URL;

175 offset = (ptr - out) + strnlen(V(buf));

176 ptr += strlcpy(ptr, host, out_length - (ptr - out));

177 strlcpy(ptr, &in[offset], out_length - (ptr - out));

178 return 0;

179 }

180

181 int url_hide_query(const char *url, char *out, size_t length) {

182 size_t i, j;

183 int inquery;

184 for (inquery = i = j = 0; i < length; ) {

185 uint32_t ch;

186 i += utf8_char_to_unicode(&ch, &url[i]);

187 if (!ch) break;

188 if (ch == '/' && inquery) inquery = 0;

189 if (inquery) continue;

190 j += utf8_unicode_to_char(&out[j], ch);

191 if (ch == '?') {

192 out[j++] = '<';

193 out[j++] = '*';

194 out[j++] = '>';

195 inquery = 1;

196 }

197 }

198 out[j] = 0;

199 return 0;

200 }

201

202 static int valid_char(char c) {

203 if (c == '"' || c == '%') return 0;

204 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||

205 (c >= '!' && c <= ';') || c == '=' || c == '~' || c == '_');

206 }

207

208 int url_convert(const char *url, char *out, size_t length) {

209 unsigned int j, i;

210 int slash = 0;

211 for (i = j = 0; i < length;) {

212 uint32_t ch;

213 int len, k;

214 len = utf8_char_to_unicode(&ch, &url[j]);

215 if (!ch) {

216 out[i] = 0;

217 return 0;

218 }

219 if (slash < 3) {

220 slash += ch == '/';

221 utf8_unicode_to_char(&out[i], ch);

222 i += len;

223 j += len;

224 continue;

225 }

226 if ((len == 1 && valid_char(ch))) {

227 out[i++] = url[j++];

228 continue;

229 }

230 for (k = 0; k < len; k++) {

231 if (i + 3 > length) break;

232 out[i++] = '%';

233 i += snprintf(&out[i], length - i, "%02X", url[j++]);

234 }

235 }

236 out[length - 1] = 0;

237 return -1;

238 }

239

240 int url_is_absolute(const char *url) {

241 return !!strnstr(url, "://", MAX_URL) ||

242 !memcmp(url, V("mailto:") - 1);

243 }

244