💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 915f7fe756b16fab380b4b848c90f… captured on 2023-05-24 at 18:13:23. Gemini links have been rewritten to link to archived content

View Raw

More Information

⬅️ Previous capture (2023-03-20)

➡️ Next capture (2023-09-08)

-=-=-=-=-=-=-

0 /* See LICENSE file for copyright and license details. */

1 #include <stdlib.h>

2 #include <termbox.h>

3 #include <ctype.h>

4 #include "str.h"

5 #include "punycode.h"

6 #include "wcwidth.h"

7 #include "url.h"

8

9 int utf8_width_to(char* ptr, size_t len, size_t to) {

10 int width = 0;

11 char* max = ptr + len;

12 for (size_t i = 0; *ptr && ptr < max && i < to; i++) {

13 uint32_t c;

14 ptr += tb_utf8_char_to_unicode(&c, ptr);

15 width += mk_wcwidth(c);

16 }

17 return width;

18 }

19

20 int utf8_width(char* ptr, size_t len) {

21 int width = 0;

22 char* max = ptr + len;

23 while (*ptr && ptr < max) {

24 uint32_t c;

25 ptr += tb_utf8_char_to_unicode(&c, ptr);

26 width += mk_wcwidth(c);

27 }

28 return width;

29 }

30

31 int utf8_len(char* ptr, size_t len) {

32 int ret = 0;

33 char* max = ptr + len;

34 while (*ptr && ptr < max) {

35 ptr += tb_utf8_char_length(*ptr);

36 ret++;

37 }

38 return ret;

39

40 }

41

42 int utf8_len_to(char* ptr, size_t len, size_t to_width) {

43 char* start = ptr;

44 char* max = ptr + len;

45 size_t width = 0;

46 while (*ptr && ptr < max) {

47 uint32_t c;

48 ptr += tb_utf8_char_to_unicode(&c, ptr);

49 width += mk_wcwidth(c);

50 if (width >= to_width)

51 return ptr - start;

52 }

53 return ptr - start;

54 }

55

56 void parse_relative(const char* urlbuf, int host_len, char* buf) {

57 int j = 0;

58 for (size_t i = 0; i < MAX_URL; i++) {

59 if (j + 1 >= MAX_URL) {

60 buf[j] = '\0';

61 break;

62 }

63 buf[j] = urlbuf[i];

64 j++;

65 if (urlbuf[i] == '\0') break;

66 if (i > 0 && i + 2 < MAX_URL &&

67 urlbuf[i - 1] == '/' && urlbuf[i + 0] == '.' &&

68 (urlbuf[i + 1] == '/' || urlbuf[i + 1] == '\0')) {

69 i += 1;

70 j--;

71 continue;

72 }

73 if (!(i > 0 && i + 3 < MAX_URL &&

74 urlbuf[i - 1] == '/' &&

75 urlbuf[i + 0] == '.' && urlbuf[i + 1] == '.' &&

76 (urlbuf[i + 2] == '/' || urlbuf[i + 2] == '\0')))

77 continue;

78 int k = j - 3;

79 i += 2;

80 if (k <= (int)host_len) {

81 j = k + 2;

82 buf[j] = '\0';

83 continue;

84 }

85 for (; k >= host_len && buf[k] != '/'; k--) ;

86 j = k + 1;

87 buf[k + 1] = '\0';

88 }

89 }

90

91 int parse_url(const char* url, char* host, int host_len, char* buf,

92 int url_len, unsigned short* port) {

93 char urlbuf[MAX_URL];

94 int proto = PROTO_GEMINI;

95 char* proto_ptr = strstr(url, "://");

96 char* ptr = (char*)url;

97 if (!proto_ptr) {

98 goto skip_proto;

99 }

100 char proto_buf[16];

101 for(; proto_ptr!=ptr; ptr++) {

102 if (!((*ptr > 'a' && *ptr < 'z') ||

103 (*ptr > 'A' && *ptr < 'Z')))

104 goto skip_proto;

105 if (ptr - url >= (signed)sizeof(proto_buf)) goto skip_proto;

106 proto_buf[ptr-url] = tolower(*ptr);

107 }

108 proto_buf[ptr-url] = '\0';

109 ptr+=3;

110 proto_ptr+=3;

111 if (!strcmp(proto_buf,"gemini")) goto skip_proto;

112 else if (!strcmp(proto_buf,"http")) proto = PROTO_HTTP;

113 else if (!strcmp(proto_buf,"https")) proto = PROTO_HTTPS;

114 else if (!strcmp(proto_buf,"gopher")) proto = PROTO_GOPHER;

115 else if (!strcmp(proto_buf,"file")) proto = PROTO_FILE;

116 else return -1; // unknown protocol

117 skip_proto:;

118 if (port && proto == PROTO_GEMINI) *port = 1965;

119 if (!proto_ptr) proto_ptr = ptr;

120 char* host_ptr = strchr(ptr, '/');

121 if (!host_ptr) host_ptr = ptr+strnlen(ptr, MAX_URL);

122 char* port_ptr = strchr(ptr, ':');

123 if (port_ptr && port_ptr < host_ptr) {

124 port_ptr++;

125 char c = *host_ptr;

126 *host_ptr = '\0';

127 if (port) {

128 *port = atoi(port_ptr);

129 if (*port < 1)

130 return -1; // invalid port

131 }

132 *host_ptr = c;

133 host_ptr = port_ptr - 1;

134 }

135 int utf8 = 0;

136 for(; host_ptr!=ptr; ptr++) {

137 if (host_len <= host_ptr-ptr) {

138 return -1;

139 }

140 host[ptr - proto_ptr] = *ptr;

141 if (utf8) {

142 utf8--;

143 continue;

144 }

145 utf8 += tb_utf8_char_length(*ptr) - 1;

146 if (utf8) continue;

147 if (*ptr < 32) {

148 host[ptr - proto_ptr] = '?';

149 continue;

150 }

151 }

152 host[ptr-proto_ptr] = '\0';

153 if (!buf) return proto;

154 if (url_len < 16) return -1; // buffer too small

155 unsigned int len = 0;

156 switch (proto) {

157 case PROTO_GEMINI:

158 len = strlcpy(urlbuf, "gemini://", url_len);

159 break;

160 case PROTO_HTTP:

161 len = strlcpy(urlbuf, "http://", url_len);

162 break;

163 case PROTO_HTTPS:

164 len = strlcpy(urlbuf, "https://", url_len);

165 break;

166 case PROTO_GOPHER:

167 len = strlcpy(urlbuf, "gopher://", url_len);

168 break;

169 case PROTO_FILE:

170 len = strlcpy(urlbuf, "file://", url_len);

171 break;

172 default:

173 return -1;

174 }

175 size_t l = strlcpy(urlbuf + len, host, sizeof(urlbuf) - len);

176 if (l >= url_len - len) {

177 goto parseurl_overflow;

178 }

179 len += l;

180 if (host_ptr &&

181 strlcpy(urlbuf + len, host_ptr, url_len - len) >=

182 url_len - len)

183 goto parseurl_overflow;

184 if (buf)

185 parse_relative(urlbuf, len + 1, buf);

186 return proto;

187 parseurl_overflow:

188 return -2;

189 }

190

191 int isCharValid(char c, int inquery) {

192 return (c >= 'a' && c <= 'z') ||

193 (c >= 'A' && c <= 'Z') ||

194 (c >= '0' && c <= '9') ||

195 (c == '?' && !inquery) ||

196 c == '.' || c == '/' ||

197 c == ':' || c == '-' ||

198 c == '_' || c == '~';

199 }

200

201 int parse_query(const char* url, int len, char* buf, int llen) {

202 char urlbuf[1024];

203 parse_relative(url, 0, urlbuf);

204 url = urlbuf;

205 int j = 0;

206 int inquery = 0;

207 for (int i = 0; j < llen && i < len && url[i]; i++) {

208 if (url[i] == '/') inquery = 0;

209 if (!inquery || isCharValid(url[i], inquery)) {

210 if (url[i] == '?') inquery = 1;

211 buf[j] = url[i];

212 j++;

213 continue;

214 }

215 char format[8];

216 snprintf(format, sizeof(format),

217 "%%%x", (unsigned char)url[i]);

218 buf[j] = '\0';

219 j = strlcat(buf, format, llen);

220 }

221 if (j >= llen) j = llen - 1;

222 buf[j] = '\0';

223 return j;

224 }

225

226 int idn_to_ascii(const char* domain, size_t dlen, char* out, size_t outlen) {

227 const char* ptr = domain;

228 uint32_t part[1024];

229 memset(part, 0, sizeof(part));

230 size_t pos = 0;

231 int n = 0;

232 int unicode = 0;

233 for (size_t i = 0; i < sizeof(part) && i < dlen; i++) {

234 if (*ptr && *ptr != '.') {

235 if (*ptr & 128)

236 unicode = 1;

237 ptr += tb_utf8_char_to_unicode(&part[i], ptr);

238 continue;

239 }

240 uint32_t len = outlen - pos;

241 if (unicode) {

242 pos += strlcpy(&out[pos], "xn--", sizeof(out) - pos);

243 if (punycode_encode(i - n, &part[n],

244 NULL, &len, &out[pos]) !=

245 punycode_success)

246 return -1;

247 pos += len;

248 } else {

249 for (size_t j = n; j < i; j++) {

250 out[pos] = part[j];

251 pos++;

252 }

253 }

254 unicode = 0;

255 n = i + 1;

256 if (*ptr == '.') {

257 out[pos] = '.';

258 pos++;

259 ptr++;

260 }

261

262 if (!*ptr) {

263 out[pos] = '\0';

264 break;

265 }

266 }

267 return 0;

268 }

269

270 int parse_link(char* data, int len) {

271 int i = 0;

272 while (i < len) {

273 i += tb_utf8_char_length(data[i]);

274 if (data[i] == '\n' || data[i] == '\0' ||

275 data[i] == '\r' || data[i] == ' ' || data[i] == '\t')

276 return i;

277 if (!(data[i]&127) && data[i] < 32)

278 data[i] = '?';

279 }

280 return i;

281 }

282