💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › be085323fa168f81aebbb3e1f5fce… captured on 2024-02-05 at 10:00:59. Gemini links have been rewritten to link to archived content

View Raw

More Information

-=-=-=-=-=-=-

Go Back

0 /*

1 * ISC License

2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>

3 */

4 #include <stdint.h>

5 #include <string.h>

6 #include <unistd.h>

7 #include <stdio.h>

8 #include <stdlib.h>

9 #include "macro.h"

10 #include "strlcpy.h"

11 #include "strnstr.h"

12 #include "utf8.h"

13 #include "url.h"

14 #include "error.h"

15 #include "page.h"

16 #include "request.h"

17 #define PARSER_INTERNAL

18 #include "parser.h"

19

20 int format_link(const char *link, size_t length,

21 char *out, size_t out_length) {

22 int i = 0, j = 0;

23 uint32_t prev = 0;

24 while (link[i]) {

25 uint32_t ch;

26 int len;

27 len = utf8_char_to_unicode(&ch, &link[i]);

28 if ((prev == '/' || prev == 0) && ch == '.') {

29 if (link[i + len] == '/') {

30 j -= 1;

31 i += len;

32 continue;

33 } else if (link[i + len] == '.' &&

34 link[i + len + 1] == '/'){

35 j -= 2;

36 if (j < 0) j = 0;

37 while (out[j] != '/' && j)

38 j = utf8_previous(out, j);

39 i += len + 1;

40 continue;

41 }

42 }

43 if (i + len >= (ssize_t)length ||

44 j + len >= (ssize_t)out_length) {

45 out[j] = '\0';

46 break;

47 }

48 if (ch < ' ') ch = '\0';

49 memcpy(&out[j], &link[i], len);

50 i += len;

51 j += len;

52 prev = ch;

53 }

54 out[j] = '\0';

55 if (strstr(out, "gemini://") == out) {

56 if (!strchr(&out[sizeof("gemini://")], '/')) {

57 out[j++] = '/';

58 out[j] = '\0';

59 }

60 }

61 return j;

62 }

63

64 int parse_links(int in, size_t length, int out) {

65

66 int newline, link, header, ignore, ignore_mode;

67 size_t i, pos;

68 char title[1024] = {0};

69

70 pos = link = header = ignore_mode = ignore = 0;

71 newline = 1;

72 link = 0;

73 for (i = 0; i < length; ) {

74

75 uint32_t ch;

76

77 if (readnext(in, &ch, &i, length)) return -1;

78 if (newline && ch == '`') {

79 ignore = 1;

80 newline = 0;

81 continue;

82 }

83 if (ignore) {

84 if (ch == '`') {

85 if (++ignore < 2) continue;

86 ignore_mode = !ignore_mode;

87 ignore = 0;

88 continue;

89 }

90 ignore = 0;

91 }

92 if (ignore_mode) {

93 if (ch == '\n') newline = 1;

94 continue;

95 }

96 if (header == 2) {

97 if (pos + utf8_unicode_length(ch) >= sizeof(title)) {

98 header = 0;

99 continue;

100 }

101 if (ch == '\n') {

102 header = 0;

103 newline = 1;

104 continue;

105 }

106 if (ch == '\t') ch = ' ';

107 if (renderable(ch))

108 pos += utf8_unicode_to_char(&title[pos], ch);

109 }

110 if (header == 1 && WHITESPACE(ch)) {

111 header++;

112 }

113 if (!(link && ch == '>')) {

114 if (ch == '\n') {

115 newline = 1;

116 link = 0;

117 continue;

118 }

119 if (!newline) {

120 link = 0;

121 continue;

122 }

123 if (ch == '=') {

124 link = 1;

125 }

126 if (!pos && ch == '#') {

127 header = 1;

128 }

129 newline = 0;

130 continue;

131 }

132

133 while (i < length) {

134 if (readnext(in, &ch, &i, length)) return -1;

135 if (!WHITESPACE(ch)) break;

136 }

137

138 link = 0;

139 header = 0;

140

141 if (i >= length) break;

142 if (ch == '\n') {

143 newline = 1;

144 continue;

145 }

146

147 {

148 char link[MAX_URL] = {0};

149 char buf[MAX_URL];

150 size_t link_length;

151 link_length = utf8_unicode_to_char(link, ch);

152

153 while (i < length) {

154 size_t next;

155 if (readnext(in, &ch, &i, length)) return -1;

156 if (SEPARATOR(ch)) break;

157 next = link_length + utf8_unicode_length(ch);

158 if (next >= sizeof(link)) {

159 link_length = next;

160 break;

161 }

162 utf8_unicode_to_char(&link[link_length], ch);

163 link_length = next;

164 }

165 link_length++;

166 /* ignore links above the length limit */

167 if (link_length > sizeof(link)) {

168 newline = ch == '\n';

169 continue;

170 }

171

172 format_link(link, link_length, V(buf));

173 url_parse_idn(buf, V(link));

174 url_convert(link, V(buf));

175 link_length = strnlen(V(buf));

176 write(out, P(link_length));

177 write(out, buf, link_length);

178 }

179

180 newline = 1;

181

182 }

183 i = -1;

184 write(out, P(i));

185 write(out, V(title));

186 return 0;

187 }

188