💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › be085323fa168f81aebbb3e1f5fce… captured on 2024-02-05 at 10:00:59. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
0 /*
1 * ISC License
2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>
3 */
4 #include <stdint.h>
5 #include <string.h>
6 #include <unistd.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include "macro.h"
10 #include "strlcpy.h"
11 #include "strnstr.h"
12 #include "utf8.h"
13 #include "url.h"
14 #include "error.h"
15 #include "page.h"
16 #include "request.h"
17 #define PARSER_INTERNAL
18 #include "parser.h"
19
20 int format_link(const char *link, size_t length,
21 char *out, size_t out_length) {
22 int i = 0, j = 0;
23 uint32_t prev = 0;
24 while (link[i]) {
25 uint32_t ch;
26 int len;
27 len = utf8_char_to_unicode(&ch, &link[i]);
28 if ((prev == '/' || prev == 0) && ch == '.') {
29 if (link[i + len] == '/') {
30 j -= 1;
31 i += len;
32 continue;
33 } else if (link[i + len] == '.' &&
34 link[i + len + 1] == '/'){
35 j -= 2;
36 if (j < 0) j = 0;
37 while (out[j] != '/' && j)
38 j = utf8_previous(out, j);
39 i += len + 1;
40 continue;
41 }
42 }
43 if (i + len >= (ssize_t)length ||
44 j + len >= (ssize_t)out_length) {
45 out[j] = '\0';
46 break;
47 }
48 if (ch < ' ') ch = '\0';
49 memcpy(&out[j], &link[i], len);
50 i += len;
51 j += len;
52 prev = ch;
53 }
54 out[j] = '\0';
55 if (strstr(out, "gemini://") == out) {
56 if (!strchr(&out[sizeof("gemini://")], '/')) {
57 out[j++] = '/';
58 out[j] = '\0';
59 }
60 }
61 return j;
62 }
63
64 int parse_links(int in, size_t length, int out) {
65
66 int newline, link, header, ignore, ignore_mode;
67 size_t i, pos;
68 char title[1024] = {0};
69
70 pos = link = header = ignore_mode = ignore = 0;
71 newline = 1;
72 link = 0;
73 for (i = 0; i < length; ) {
74
75 uint32_t ch;
76
77 if (readnext(in, &ch, &i, length)) return -1;
78 if (newline && ch == '`') {
79 ignore = 1;
80 newline = 0;
81 continue;
82 }
83 if (ignore) {
84 if (ch == '`') {
85 if (++ignore < 2) continue;
86 ignore_mode = !ignore_mode;
87 ignore = 0;
88 continue;
89 }
90 ignore = 0;
91 }
92 if (ignore_mode) {
93 if (ch == '\n') newline = 1;
94 continue;
95 }
96 if (header == 2) {
97 if (pos + utf8_unicode_length(ch) >= sizeof(title)) {
98 header = 0;
99 continue;
100 }
101 if (ch == '\n') {
102 header = 0;
103 newline = 1;
104 continue;
105 }
106 if (ch == '\t') ch = ' ';
107 if (renderable(ch))
108 pos += utf8_unicode_to_char(&title[pos], ch);
109 }
110 if (header == 1 && WHITESPACE(ch)) {
111 header++;
112 }
113 if (!(link && ch == '>')) {
114 if (ch == '\n') {
115 newline = 1;
116 link = 0;
117 continue;
118 }
119 if (!newline) {
120 link = 0;
121 continue;
122 }
123 if (ch == '=') {
124 link = 1;
125 }
126 if (!pos && ch == '#') {
127 header = 1;
128 }
129 newline = 0;
130 continue;
131 }
132
133 while (i < length) {
134 if (readnext(in, &ch, &i, length)) return -1;
135 if (!WHITESPACE(ch)) break;
136 }
137
138 link = 0;
139 header = 0;
140
141 if (i >= length) break;
142 if (ch == '\n') {
143 newline = 1;
144 continue;
145 }
146
147 {
148 char link[MAX_URL] = {0};
149 char buf[MAX_URL];
150 size_t link_length;
151 link_length = utf8_unicode_to_char(link, ch);
152
153 while (i < length) {
154 size_t next;
155 if (readnext(in, &ch, &i, length)) return -1;
156 if (SEPARATOR(ch)) break;
157 next = link_length + utf8_unicode_length(ch);
158 if (next >= sizeof(link)) {
159 link_length = next;
160 break;
161 }
162 utf8_unicode_to_char(&link[link_length], ch);
163 link_length = next;
164 }
165 link_length++;
166 /* ignore links above the length limit */
167 if (link_length > sizeof(link)) {
168 newline = ch == '\n';
169 continue;
170 }
171
172 format_link(link, link_length, V(buf));
173 url_parse_idn(buf, V(link));
174 url_convert(link, V(buf));
175 link_length = strnlen(V(buf));
176 write(out, P(link_length));
177 write(out, buf, link_length);
178 }
179
180 newline = 1;
181
182 }
183 i = -1;
184 write(out, P(i));
185 write(out, V(title));
186 return 0;
187 }
188