Changeset b31323f in mainline for uspace/lib/c/test/str.c
- Timestamp:
- 2025-04-17T14:29:23Z (5 days ago)
- Branches:
- master
- Children:
- ae787807
- Parents:
- 65bf084
- git-author:
- Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-17 11:01:00)
- git-committer:
- Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-17 14:29:23)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/test/str.c
r65bf084 rb31323f 28 28 29 29 #include "pcut/asserts.h" 30 #include <assert.h> 31 #include <stdint.h> 30 32 #include <stdio.h> 31 33 #include <str.h> … … 48 50 } 49 51 52 /* Helper to display string contents for debugging */ 53 static void print_string_hex(char *out, const char *s, size_t len) 54 { 55 *out++ = '"'; 56 for (size_t i = 0; i < len && s[i]; i++) { 57 if (s[i] >= 32 && s[i] <= 126) 58 *out++ = s[i]; 59 else 60 out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]); 61 } 62 *out++ = '"'; 63 *out++ = 0; 64 } 65 50 66 PCUT_TEST(rtrim) 51 67 { … … 119 135 { 120 136 /* Overlong zero. */ 121 const char overlong1[] = { 0b11000000, 0b10000000, 0 };122 const char overlong2[] = { 0b11100000, 0b10000000, 0 };123 const char overlong3[] = { 0b11110000, 0b10000000, 0 };124 125 const char overlong4[] = { 0b11000001, 0b10111111, 0 };126 const char overlong5[] = { 0b11100000, 0b10011111, 0b10111111, 0 };127 const char overlong6[] = { 0b11110000, 0b10001111, 0b10111111, 0b10111111, 0 };137 const char overlong1[] = "\xC0\x80"; 138 const char overlong2[] = "\xE0\x80\x80"; 139 const char overlong3[] = "\xF0\x80\x80\x80"; 140 141 const char overlong4[] = "\xC1\xBF"; 142 const char overlong5[] = "\xE0\x9F\xBF"; 143 const char overlong6[] = "\xF0\x8F\xBF\xBF"; 128 144 129 145 size_t offset = 0; … … 139 155 offset = 0; 140 156 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6))); 141 142 char sanitized[sizeof(overlong6)]; 143 str_cpy(sanitized, STR_NO_LIMIT, overlong1); 144 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 145 str_cpy(sanitized, STR_NO_LIMIT, overlong2); 146 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 147 str_cpy(sanitized, STR_NO_LIMIT, overlong3); 148 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 149 str_cpy(sanitized, STR_NO_LIMIT, overlong4); 150 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 151 str_cpy(sanitized, STR_NO_LIMIT, overlong5); 152 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 153 str_cpy(sanitized, STR_NO_LIMIT, overlong6); 154 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 157 } 158 159 struct sanitize_test { 160 const char *input; 161 const char *output; 162 }; 163 164 static const struct sanitize_test sanitize_tests[] = { 165 // Empty string 166 { "", "" }, 167 // ASCII only 168 { "Hello, world!", "Hello, world!" }, 169 // Valid multi-byte sequences 170 { "Aπ你🐱", "Aπ你🐱" }, 171 // U+D7FF is last valid before surrogates 172 { "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" }, 173 // 0x10FFFF is the highest legal code point 174 { "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" }, 175 176 // Missing continuation byte 177 { "A\xC2Z", "A?Z" }, 178 // Truncated multi-byte at buffer end 179 { "A\xE2\x82", "A??" }, 180 // Continuation byte without leading byte (0x80-0xBF are never valid first bytes) 181 { "A\x80Y\xBFZ", "A?Y?Z" }, 182 183 // 'A' (U+0041) normally encoded as 0x41 184 // Overlong 2-byte encoding: 0xC1 0x81 185 { "\xC1\x81X", "??X" }, 186 187 // ¢ (U+00A2) normally encoded as 0xC2 0xA2 188 // Overlong 3-byte encoding: 0xE0 0x82 0xA2 189 { "\xE0\x82\xA2X", "???X" }, 190 191 // ¢ (U+00A2) normally encoded as 0xC2 0xA2 192 // Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2 193 { "\xF0\x80\x82\xA2X", "????X" }, 194 195 // € (U+20AC) normally encoded as 0xE2 0x82 0xAC 196 // Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC 197 { "\xF0\x82\x82\xACX", "????X" }, 198 199 // Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00) 200 { "\xC0\x80X", "??X" }, 201 202 // 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8 203 { "A\xED\xA0\x80Z", "A???Z" }, 204 205 // 0x110000 is not a legal code point 206 { "A\xF4\x90\x80\x80Z", "A????Z" }, 207 208 // Mix of valid and invalid sequences 209 { "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" }, 210 }; 211 212 static size_t count_diff(const char *a, const char *b, size_t n) 213 { 214 size_t count = 0; 215 216 for (size_t i = 0; i < n; i++) { 217 if (a[i] != b[i]) 218 count++; 219 } 220 221 return count; 222 } 223 224 PCUT_TEST(str_sanitize) 225 { 226 char replacement = '?'; 227 char buffer2[255]; 228 229 for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) { 230 const char *in = sanitize_tests[i].input; 231 const char *out = sanitize_tests[i].output; 232 size_t n = str_size(in) + 1; 233 assert(str_size(out) + 1 == n); 234 235 memcpy(buffer, in, n); 236 size_t replaced = str_sanitize(buffer, n, replacement); 237 if (memcmp(buffer, out, n) != 0) { 238 print_string_hex(buffer2, buffer, n); 239 print_string_hex(buffer, out, n); 240 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2); 241 } 242 243 size_t expect_replaced = count_diff(buffer, in, n); 244 PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced); 245 } 246 247 // Test with n smaller than string length - truncated valid encoding for € 248 const char *in = "ABC€"; 249 const char *out = "ABC??\xAC"; 250 size_t n = str_size(in) + 1; 251 memcpy(buffer, in, n); 252 size_t replaced = str_sanitize(buffer, 5, replacement); 253 if (memcmp(buffer, out, n) != 0) { 254 print_string_hex(buffer2, buffer, n); 255 print_string_hex(buffer, out, n); 256 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2); 257 } 258 259 PCUT_ASSERT_INT_EQUALS(2, replaced); 155 260 } 156 261
Note:
See TracChangeset
for help on using the changeset viewer.