💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › 83776a0171e08161784a5c68a813d… captured on 2023-12-28 at 15:44:24. Gemini links have been rewritten to link to archived content

View Raw

More Information

-=-=-=-=-=-=-

Go Back

0 /*

1 * ISC License

2 * Copyright (c) 2023 RMF <rawmonk@firemail.cc>

3 */

4 #include <stdint.h>

5 #include <string.h>

6 #include <unistd.h>

7 #include <stdio.h>

8 #include <stdlib.h>

9 #include "macro.h"

10 #include "strlcpy.h"

11 #include "strnstr.h"

12 #include "utf8.h"

13 #include "url.h"

14 #include "error.h"

15 #include "page.h"

16 #include "request.h"

17 #define PARSER_INTERNAL

18 #include "parser.h"

19

20 int format_link(const char *link, size_t length,

21 char *out, size_t out_length) {

22 int i = 0, j = 0;

23 uint32_t prev = 0;

24 while (link[i]) {

25 uint32_t ch;

26 int len;

27 len = utf8_char_to_unicode(&ch, &link[i]);

28 if ((prev == '/' || prev == 0) && ch == '.') {

29 if (link[i + len] == '/') {

30 j -= 1;

31 i += len;

32 continue;

33 } else if (link[i + len] == '.' &&

34 link[i + len + 1] == '/'){

35 j -= 2;

36 if (j < 0) j = 0;

37 while (out[j] != '/' && j)

38 j = utf8_previous(out, j);

39 i += len + 1;

40 continue;

41 }

42 }

43 if (i + len >= (ssize_t)length ||

44 j + len >= (ssize_t)out_length) {

45 out[j] = '\0';

46 break;

47 }

48 if (ch < ' ') ch = '\0';

49 memcpy(&out[j], &link[i], len);

50 i += len;

51 j += len;

52 prev = ch;

53 }

54 out[j] = '\0';

55 if (strstr(out, "gemini://") == out) {

56 if (!strchr(&out[sizeof("gemini://")], '/')) {

57 out[j++] = '/';

58 out[j] = '\0';

59 }

60 }

61 return j;

62 }

63

64 int parse_links(int in, size_t length, int out) {

65

66 int newline, link, header;

67 size_t i, pos;

68 char title[1024] = {0};

69

70 header = 0;

71 newline = 1;

72 link = 0;

73 pos = 0;

74 for (i = 0; i < length; ) {

75

76 uint32_t ch;

77

78 if (readnext(in, &ch, &i)) return -1;

79 if (header == 2) {

80 if (ch == '\n') {

81 newline = 1;

82 header = 0;

83 continue;

84 }

85 if (ch == '\t') ch = ' ';

86 if (ch >= ' ')

87 pos += utf8_unicode_to_char(&title[pos], ch);

88 }

89 if (header == 1 && WHITESPACE(ch)) {

90 header++;

91 }

92 if (!(link && ch == '>')) {

93 if (ch == '\n') {

94 newline = 1;

95 link = 0;

96 continue;

97 }

98 if (!newline) {

99 link = 0;

100 continue;

101 }

102 if (ch == '=') {

103 link = 1;

104 }

105 if (!pos && ch == '#') {

106 header = 1;

107 }

108 newline = 0;

109 continue;

110 }

111

112 while (i < length) {

113 if (readnext(in, &ch, &i)) return -1;

114 if (!WHITESPACE(ch)) break;

115 }

116

117 link = 0;

118 header = 0;

119

120 if (i >= length) continue;

121 if (ch == '\n') {

122 newline = 1;

123 continue;

124 }

125

126 {

127 char link[MAX_URL] = {0};

128 char buf[MAX_URL];

129 size_t link_length;

130 link_length = utf8_unicode_to_char(link, ch);

131

132 while (i < length && link_length < sizeof(link)) {

133 if (readnext(in, &ch, &i)) return -1;

134 if (SEPARATOR(ch)) break;

135 link_length += utf8_unicode_to_char(

136 &link[link_length], ch);

137 }

138 link_length++;

139

140 format_link(link, link_length, V(buf));

141 url_parse_idn(buf, V(link));

142 url_convert(link, V(buf));

143 link_length = strnlen(V(buf));

144 write(out, P(link_length));

145 write(out, buf, link_length);

146 }

147

148 newline = 1;

149

150 }

151 i = -1;

152 write(out, P(i));

153 write(out, V(title));

154 return 0;

155 }

156