Go Back

0 /*

1 * ISC License

2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>

3 */

4 #include <stdio.h>

5 #include <stdlib.h>

6 #include <stdint.h>

7 #include <string.h>

8 #include <stddef.h>

9 #include "macro.h"

10 #include "utf8.h"

11 #include "url.h"

12 #include "punycode.h"

13 #include "page.h"

14 #include "request.h"

15 #include "error.h"

16 #include "strnstr.h"

17 #include "strlcpy.h"

18

19 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {

20

21 const char* ptr = domain;

22 uint32_t part[1024] = {0};

23 size_t pos = 0;

24 int n = 0;

25 int unicode = 0;

26 size_t i;

27

28 for (i = 0; i < sizeof(part) && i < dlen; i++) {

29 uint32_t len;

30 if (*ptr && *ptr != '.') {

31 if (*ptr & 128)

32 unicode = 1;

33 ptr += utf8_char_to_unicode(&part[i], ptr);

34 continue;

35 }

36 len = outlen - pos;

37 if (unicode) {

38 int ret;

39 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);

40 ret = punycode_encode(i - n, &part[n],

41 NULL, &len, &out[pos]);

42 if (ret != punycode_success)

43 return -1;

44 pos += len;

45 } else {

46 size_t j;

47 for (j = n; j < i; j++) {

48 out[pos] = part[j];

49 pos++;

50 }

51 }

52 unicode = 0;

53 n = i + 1;

54 if (*ptr == '.') {

55 out[pos] = '.';

56 pos++;

57 ptr++;

58 }

59

60 if (!*ptr) {

61 out[pos] = '\0';

62 break;

63 }

64 }

65 return 0;

66 }

67

68 int servername_from_url(const char *url, char* out, size_t len) {

69

70 const char *start, *port, *end;

71

72 start = strnstr(url, "://", len);

73 if (!start) start = url;

74 else start += sizeof("://") - 1;

75

76 port = strchr(start, ':');

77 end = strchr(start, '/');

78 if (!end || (port && port < end)) end = port;

79 if (!end) end = start + strlen(url);

80

81 if ((size_t)(end - start) >= len) return ERROR_BUFFER_OVERFLOW;

82

83 strlcpy(out, start, end - start + 1);

84 return 0;

85 }

86

87 int protocol_from_url(const char *url) {

88 if (!memcmp(url, V("mailto:") - 1)) return PROTOCOL_MAIL;

89 if (!strnstr(url, "://", MAX_URL)) return PROTOCOL_NONE; /* default */

90 if (!memcmp(url, V("gemini://") - 1)) return PROTOCOL_GEMINI;

91 if (!memcmp(url, V("http://") - 1)) return PROTOCOL_HTTP;

92 if (!memcmp(url, V("https://") - 1)) return PROTOCOL_HTTPS;

93 if (!memcmp(url, V("gopher://") - 1)) return PROTOCOL_GOPHER;

94 return PROTOCOL_UNKNOWN;

95 }

96

97 int port_from_url(const char *url) {

98

99 const char *start, *end;

100 char buf[MAX_URL];

101 int port;

102

103 start = strnstr(url, "://", MAX_URL);

104 if (!start) start = url;

105 end = strchr(start + sizeof("://"), '/');

106 start = strchr(start + sizeof("://"), ':');

107 if (!start || (end && end < start)) return 0;

108 start++;

109 end = strchr(start, '/') + 1;

110 if (!end) end = start + strlen(start);

111 strlcpy(buf, start, end - start);

112 port = atoi(buf);

113 if (!port) return ERROR_INVALID_PORT;

114 return port;

115 }

116

117 int url_parse(struct request* request, const char *url) {

118

119 int protocol, port, ret;

120 char buf[MAX_URL];

121

122 memset(request, 0, sizeof(*request));

123

124 if ((ret = servername_from_url(url, V(request->name)))) return ret;

125

126 protocol = protocol_from_url(url);

127 if (protocol == PROTOCOL_UNKNOWN) return ERROR_UNKNOWN_PROTOCOL;

128 if (protocol == PROTOCOL_NONE) {

129 size_t length = STRLCPY(buf, "gemini://");

130 int i;

131 i = strlcpy(&buf[length], url, sizeof(buf) - length);

132 i += length;

133 buf[i] = '\0';

134 protocol = PROTOCOL_GEMINI;

135 } else STRLCPY(buf, url);

136

137 port = port_from_url(url);

138 if (port < 0) return port;

139 if (!port) {

140 switch (protocol) {

141 case PROTOCOL_GEMINI: port = 1965; break;

142 }

143 }

144

145 request->protocol = protocol;

146 request->port = port;

147 STRLCPY(request->url, buf);

148

149 return 0;

150 }

151

152 int url_parse_idn(const char *in, char *out, size_t out_length) {

153 char host[256] = {0}, buf[256] = {0}, *ptr, *end;

154 size_t offset;

155 ptr = out;

156 end = out + out_length;

157 while (*ptr && ptr < end) {

158 if (utf8_char_length(*ptr) != 1) {

159 ptr = NULL;

160 break;

161 }

162 ptr++;

163 }

164 if (ptr) {

165 strlcpy(out, in, out_length);

166 return 0;

167 }

168 servername_from_url(in, V(buf));

169 if (idn_to_ascii(V(buf), V(host)))

170 return ERROR_INVALID_URL;

171 strlcpy(out, in, out_length);

172 ptr = strnstr(out, buf, out_length);

173 if (!ptr) return ERROR_INVALID_URL;

174 offset = (ptr - out) + strnlen(V(buf));

175 ptr += strlcpy(ptr, host, out_length - (ptr - out));

176 strlcpy(ptr, &in[offset], out_length - (ptr - out));

177 return 0;

178 }

179

180 int url_hide_query(const char *url, char *out, size_t length) {

181 size_t i, j;

182 int inquery;

183 for (inquery = i = j = 0; i < length; ) {

184 uint32_t ch;

185 i += utf8_char_to_unicode(&ch, &url[i]);

186 if (!ch) break;

187 if (ch == '/' && inquery) inquery = 0;

188 if (inquery) continue;

189 j += utf8_unicode_to_char(&out[j], ch);

190 if (ch == '?') {

191 out[j++] = '<';

192 out[j++] = '*';

193 out[j++] = '>';

194 inquery = 1;

195 }

196 }

197 out[j] = 0;

198 return 0;

199 }

200

201 static int valid_char(char c) {

202 if (c == '"' || c == '%') return 0;

203 return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||

204 (c >= '!' && c <= ';') || c == '=' || c == '~' || c == '_');

205 }

206

207 int url_convert(const char *url, char *out, size_t length) {

208 unsigned int j, i;

209 int slash = 0;

210 for (i = j = 0; i < length;) {

211 uint32_t ch;

212 int len, k;

213 len = utf8_char_to_unicode(&ch, &url[j]);

214 if (!ch) {

215 out[i] = 0;

216 return 0;

217 }

218 if (slash < 3) {

219 slash += ch == '/';

220 utf8_unicode_to_char(&out[i], ch);

221 i += len;

222 j += len;

223 continue;

224 }

225 if ((len == 1 && valid_char(ch))) {

226 out[i++] = url[j++];

227 continue;

228 }

229 for (k = 0; k < len; k++) {

230 if (i + 3 > length) break;

231 out[i++] = '%';

232 i += snprintf(&out[i], length - i, "%02X", url[j++]);

233 }

234 }

235 out[length - 1] = 0;

236 return -1;

237 }

238

239 int url_is_absolute(const char *url) {

240 return !!strnstr(url, "://", MAX_URL) ||

241 !memcmp(url, V("mailto:") - 1);

242 }

243