💾 Archived View for gemini.rmf-dev.com › repo › Vaati › Vgmi › files › ceb653e644195c18ba4c273c2ff97… captured on 2023-12-28 at 15:46:04. Gemini links have been rewritten to link to archived content

View Raw

More Information

-=-=-=-=-=-=-

Go Back

0 /*

1 * MIT License

2 * Copyright (c) 2010-2020 nsf <no.smile.face@gmail.com>

3 * 2015-2022 Adam Saponara <as@php.net>

4 */

5 #include <stdio.h>

6 #include <stddef.h>

7 #include <stdint.h>

8 #include "wcwidth.h"

9

10 static const unsigned char utf8_length[256] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

12 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

13 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

14 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

15 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

16 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

17 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

19 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,

20 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,

21 5, 6, 6, 1, 1};

22

23 static const unsigned char utf8_mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};

24

25 int utf8_char_length(char c) {

26 return utf8_length[(unsigned char)c];

27 }

28

29 int utf8_unicode_length(uint32_t c) {

30 int len;

31 if (c < 0x80) {

32 len = 1;

33 } else if (c < 0x800) {

34 len = 2;

35 } else if (c < 0x10000) {

36 len = 3;

37 } else if (c < 0x200000) {

38 len = 4;

39 } else if (c < 0x4000000) {

40 len = 5;

41 } else {

42 len = 6;

43 }

44 return len;

45 }

46

47 int utf8_char_to_unicode(uint32_t *out, const char *c) {

48

49 int i;

50 unsigned char len, mask;

51 uint32_t result;

52

53 len = utf8_char_length(*c);

54 mask = utf8_mask[len - 1];

55 result = c[0] & mask;

56 for (i = 1; i < len; ++i) {

57 result <<= 6;

58 result |= c[i] & 0x3f;

59 }

60

61 *out = result;

62 return (int)len;

63 }

64

65 int utf8_unicode_to_char(char *out, uint32_t c) {

66 int len = 0;

67 int first;

68 int i;

69

70 if (c < 0x80) {

71 first = 0;

72 len = 1;

73 } else if (c < 0x800) {

74 first = 0xc0;

75 len = 2;

76 } else if (c < 0x10000) {

77 first = 0xe0;

78 len = 3;

79 } else if (c < 0x200000) {

80 first = 0xf0;

81 len = 4;

82 } else if (c < 0x4000000) {

83 first = 0xf8;

84 len = 5;

85 } else {

86 first = 0xfc;

87 len = 6;

88 }

89

90 for (i = len - 1; i > 0; --i) {

91 out[i] = (c & 0x3f) | 0x80;

92 c >>= 6;

93 }

94 out[0] = c | first;

95

96 return len;

97 }

98

99 const char *utf8_next(const char **ptr) {

100 int i = utf8_char_length(**ptr);

101 *ptr += i;

102 return *ptr;

103 }

104

105 int utf8_previous(const char *ptr, int i) {

106 if (i) i--;

107 while (i > 0 && (ptr[i] & 0xC0) == 0x80) i--;

108 return i;

109 }

110

111 int utf8_width(const char *ptr, size_t length) {

112

113 int width;

114 size_t i;

115

116 width = 0;

117 for (i = 0; i < length; ) {

118 uint32_t ch;

119 i += utf8_char_to_unicode(&ch, &ptr[i]);

120 if (!ch) break;

121 width += mk_wcwidth(ch);

122 }

123 return width;

124 }

125

126 int utf8_cpy(char *dst, const char *src, size_t length) {

127 size_t i;

128 for (i = 0; i < length; ) {

129 size_t len = utf8_char_length(src[i]);

130 if (i + len >= length) {

131 dst[i] = '\0';

132 break;

133 }

134 while (len--) {

135 dst[i] = src[i];

136 i++;

137 }

138 }

139 return 0;

140 }

141

142 int utf8_fgetc(FILE *f, uint32_t *out) {

143

144 int ch, len;

145

146 ch = fgetc(f);

147 len = utf8_char_length(ch);

148 if (ch == EOF) return EOF;

149 if (len > 1) {

150 char buf[32];

151 int pos = 0;

152 if ((unsigned)len >= sizeof(buf)) return -1;

153 buf[pos] = ch;

154 for (pos = 1; pos < len; pos++) {

155 ch = fgetc(f);

156 if (ch == EOF) return EOF;

157 buf[pos] = ch;

158 }

159 buf[pos] = 0;

160 utf8_char_to_unicode((uint32_t*)&ch, buf);

161 }

162 *out = ch;

163

164 return 0;

165 }

166