💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 83776a0171e08161784a5c68a813d… captured on 2023-12-28 at 15:44:24. Gemini links have been rewritten to link to archived content
-=-=-=-=-=-=-
0 /*
1 * ISC License
2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>
3 */
4 #include <stdint.h>
5 #include <string.h>
6 #include <unistd.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include "macro.h"
10 #include "strlcpy.h"
11 #include "strnstr.h"
12 #include "utf8.h"
13 #include "url.h"
14 #include "error.h"
15 #include "page.h"
16 #include "request.h"
17 #define PARSER_INTERNAL
18 #include "parser.h"
19
20 int format_link(const char *link, size_t length,
21 char *out, size_t out_length) {
22 int i = 0, j = 0;
23 uint32_t prev = 0;
24 while (link[i]) {
25 uint32_t ch;
26 int len;
27 len = utf8_char_to_unicode(&ch, &link[i]);
28 if ((prev == '/' || prev == 0) && ch == '.') {
29 if (link[i + len] == '/') {
30 j -= 1;
31 i += len;
32 continue;
33 } else if (link[i + len] == '.' &&
34 link[i + len + 1] == '/'){
35 j -= 2;
36 if (j < 0) j = 0;
37 while (out[j] != '/' && j)
38 j = utf8_previous(out, j);
39 i += len + 1;
40 continue;
41 }
42 }
43 if (i + len >= (ssize_t)length ||
44 j + len >= (ssize_t)out_length) {
45 out[j] = '\0';
46 break;
47 }
48 if (ch < ' ') ch = '\0';
49 memcpy(&out[j], &link[i], len);
50 i += len;
51 j += len;
52 prev = ch;
53 }
54 out[j] = '\0';
55 if (strstr(out, "gemini://") == out) {
56 if (!strchr(&out[sizeof("gemini://")], '/')) {
57 out[j++] = '/';
58 out[j] = '\0';
59 }
60 }
61 return j;
62 }
63
64 int parse_links(int in, size_t length, int out) {
65
66 int newline, link, header;
67 size_t i, pos;
68 char title[1024] = {0};
69
70 header = 0;
71 newline = 1;
72 link = 0;
73 pos = 0;
74 for (i = 0; i < length; ) {
75
76 uint32_t ch;
77
78 if (readnext(in, &ch, &i)) return -1;
79 if (header == 2) {
80 if (ch == '\n') {
81 newline = 1;
82 header = 0;
83 continue;
84 }
85 if (ch == '\t') ch = ' ';
86 if (ch >= ' ')
87 pos += utf8_unicode_to_char(&title[pos], ch);
88 }
89 if (header == 1 && WHITESPACE(ch)) {
90 header++;
91 }
92 if (!(link && ch == '>')) {
93 if (ch == '\n') {
94 newline = 1;
95 link = 0;
96 continue;
97 }
98 if (!newline) {
99 link = 0;
100 continue;
101 }
102 if (ch == '=') {
103 link = 1;
104 }
105 if (!pos && ch == '#') {
106 header = 1;
107 }
108 newline = 0;
109 continue;
110 }
111
112 while (i < length) {
113 if (readnext(in, &ch, &i)) return -1;
114 if (!WHITESPACE(ch)) break;
115 }
116
117 link = 0;
118 header = 0;
119
120 if (i >= length) continue;
121 if (ch == '\n') {
122 newline = 1;
123 continue;
124 }
125
126 {
127 char link[MAX_URL] = {0};
128 char buf[MAX_URL];
129 size_t link_length;
130 link_length = utf8_unicode_to_char(link, ch);
131
132 while (i < length && link_length < sizeof(link)) {
133 if (readnext(in, &ch, &i)) return -1;
134 if (SEPARATOR(ch)) break;
135 link_length += utf8_unicode_to_char(
136 &link[link_length], ch);
137 }
138 link_length++;
139
140 format_link(link, link_length, V(buf));
141 url_parse_idn(buf, V(link));
142 url_convert(link, V(buf));
143 link_length = strnlen(V(buf));
144 write(out, P(link_length));
145 write(out, buf, link_length);
146 }
147
148 newline = 1;
149
150 }
151 i = -1;
152 write(out, P(i));
153 write(out, V(title));
154 return 0;
155 }
156