Changes in / [1d3ae66:f444633] in mainline
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
common/include/str.h
r1d3ae66 rf444633 162 162 extern void bin_order_suffix(const uint64_t, uint64_t *, const char **, bool); 163 163 164 extern size_t str_sanitize(char *str, size_t n, uint8_t replacement);165 166 164 /* 167 165 * TODO: Get rid of this. -
common/str.c
r1d3ae66 rf444633 234 234 } 235 235 236 static bool _is_surrogate(const mbstate_t *mb, uint8_t b)237 {238 return (mb->state == 0b1111110000001101 && b >= 0xa0);239 }240 241 236 #define _likely(expr) __builtin_expect((expr), true) 242 237 #define _unlikely(expr) __builtin_expect((expr), false) … … 304 299 return CHAR_INVALID; 305 300 306 /* Reject surrogates */307 if (_unlikely(ch >= 0xD800 && ch < 0xE000))308 return CHAR_INVALID;309 310 301 return ch; 311 302 } … … 332 323 return CHAR_INVALID; 333 324 334 /* Reject out-of-range characters. */335 if (_unlikely(ch >= 0x110000))336 return CHAR_INVALID;337 338 325 return ch; 339 326 } … … 352 339 uint8_t b = s[*offset]; 353 340 354 if (!_is_continuation(b) || _is_non_shortest(mb, b) || _is_surrogate(mb, b)) {341 if (!_is_continuation(b) || _is_non_shortest(mb, b)) { 355 342 mb->state = 0; 356 343 return CHAR_INVALID; … … 536 523 } 537 524 538 /* Convert in place any bytes that don't form a valid character into replacement. */539 static size_t _str_sanitize(char *str, size_t n, uint8_t replacement)525 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 526 static void _sanitize_string(char *str, size_t n) 540 527 { 541 528 uint8_t *b = (uint8_t *) str; 542 size_t count = 0; 543 544 for (; n > 0 && b[0]; b++, n--) { 529 530 for (; *b && n > 0; b++, n--) { 545 531 int cont = _continuation_bytes(b[0]); 546 532 if (__builtin_expect(cont, 0) == 0) … … 548 534 549 535 if (cont < 0 || n <= (size_t) cont) { 550 b[0] = replacement; 551 count++; 536 b[0] = U_SPECIAL; 552 537 continue; 553 538 } 554 539 555 540 /* Check continuation bytes. */ 556 bool valid = true;557 541 for (int i = 1; i <= cont; i++) { 558 542 if (!_is_continuation(b[i])) { 559 valid = false;560 break;543 b[0] = U_SPECIAL; 544 continue; 561 545 } 562 }563 564 if (!valid) {565 b[0] = replacement;566 count++;567 continue;568 546 } 569 547 … … 573 551 */ 574 552 575 /* 0b110!!!!x 0b10xxxxxx */ 576 if (cont == 1 && !(b[0] & 0b00011110)) { 577 b[0] = replacement; 578 count++; 553 switch (cont) { 554 case 1: 555 /* 0b110!!!!x 0b10xxxxxx */ 556 if (!(b[0] & 0b00011110)) 557 b[0] = U_SPECIAL; 558 559 continue; 560 case 2: 561 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 562 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000)) 563 b[0] = U_SPECIAL; 564 565 continue; 566 case 3: 567 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 568 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000)) 569 b[0] = U_SPECIAL; 570 579 571 continue; 580 572 } 581 582 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 583 if (cont == 2 && !(b[0] & 0b00001111) && !(b[1] & 0b00100000)) { 584 b[0] = replacement; 585 count++; 586 continue; 587 } 588 589 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 590 if (cont == 3 && !(b[0] & 0b00000111) && !(b[1] & 0b00110000)) { 591 b[0] = replacement; 592 count++; 593 continue; 594 } 595 596 /* Check for surrogate character encoding. */ 597 if (cont == 2 && b[0] == 0xED && b[1] >= 0xA0) { 598 b[0] = replacement; 599 count++; 600 continue; 601 } 602 603 /* Check for out-of-range code points. */ 604 if (cont == 3 && (b[0] > 0xF4 || (b[0] == 0xF4 && b[1] >= 0x90))) { 605 b[0] = replacement; 606 count++; 607 continue; 608 } 609 610 b += cont; 611 n -= cont; 612 } 613 614 return count; 615 } 616 617 size_t str_sanitize(char *str, size_t n, uint8_t replacement) 618 { 619 return _str_sanitize(str, n, replacement); 573 } 620 574 } 621 575 … … 1176 1130 1177 1131 /* In-place translate invalid bytes to U_SPECIAL. */ 1178 _s tr_sanitize(dest, size, U_SPECIAL);1132 _sanitize_string(dest, size); 1179 1133 } 1180 1134 … … 1205 1159 1206 1160 /* In-place translate invalid bytes to U_SPECIAL. */ 1207 _s tr_sanitize(dest, size, U_SPECIAL);1161 _sanitize_string(dest, size); 1208 1162 } 1209 1163 … … 1229 1183 if (dstr_size < size) { 1230 1184 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1231 _s tr_sanitize(dest + dstr_size, size - dstr_size, U_SPECIAL);1185 _sanitize_string(dest + dstr_size, size - dstr_size); 1232 1186 } 1233 1187 } … … 1808 1762 1809 1763 memcpy(dest, src, size); 1810 _s tr_sanitize(dest, size, U_SPECIAL);1764 _sanitize_string(dest, size); 1811 1765 return dest; 1812 1766 } … … 1841 1795 1842 1796 memcpy(dest, src, size); 1843 _s tr_sanitize(dest, size, U_SPECIAL);1797 _sanitize_string(dest, size); 1844 1798 dest[size] = 0; 1845 1799 return dest; -
uspace/lib/c/arch/arm32/src/atomic.c
r1d3ae66 rf444633 38 38 volatile unsigned *ras_page; 39 39 40 unsigned long long __atomic_load_8(const volatile void *mem0, int model)41 {42 const volatile unsigned long long *mem = mem0;43 44 (void) model;45 46 unsigned long long ret;47 48 /*49 * The following instructions between labels 1 and 2 constitute a50 * Restartable Atomic Seqeunce. Should the sequence be non-atomic,51 * the kernel will restart it.52 */53 asm volatile (54 "1:\n"55 " adr %[ret], 1b\n"56 " str %[ret], %[rp0]\n"57 " adr %[ret], 2f\n"58 " str %[ret], %[rp1]\n"59 60 " ldrd %[ret], %[addr]\n"61 "2:\n"62 : [ret] "=&r" (ret),63 [rp0] "=m" (ras_page[0]),64 [rp1] "=m" (ras_page[1])65 : [addr] "m" (*mem)66 );67 68 ras_page[0] = 0;69 ras_page[1] = 0xffffffff;70 71 return ret;72 }73 74 void __atomic_store_8(volatile void *mem0, unsigned long long val, int model)75 {76 volatile unsigned long long *mem = mem0;77 78 (void) model;79 80 /* scratch register */81 unsigned tmp;82 83 /*84 * The following instructions between labels 1 and 2 constitute a85 * Restartable Atomic Seqeunce. Should the sequence be non-atomic,86 * the kernel will restart it.87 */88 asm volatile (89 "1:\n"90 " adr %[tmp], 1b\n"91 " str %[tmp], %[rp0]\n"92 " adr %[tmp], 2f\n"93 " str %[tmp], %[rp1]\n"94 95 " strd %[imm], %[addr]\n"96 "2:\n"97 : [tmp] "=&r" (tmp),98 [rp0] "=m" (ras_page[0]),99 [rp1] "=m" (ras_page[1]),100 [addr] "=m" (*mem)101 : [imm] "r" (val)102 );103 104 ras_page[0] = 0;105 ras_page[1] = 0xffffffff;106 }107 108 40 bool __atomic_compare_exchange_4(volatile void *mem0, void *expected0, 109 41 unsigned desired, bool weak, int success, int failure) -
uspace/lib/c/test/str.c
r1d3ae66 rf444633 28 28 29 29 #include "pcut/asserts.h" 30 #include <assert.h>31 #include <stdint.h>32 30 #include <stdio.h> 33 31 #include <str.h> … … 48 46 { 49 47 memset(buffer, 0, BUFFER_SIZE); 50 }51 52 /* Helper to display string contents for debugging */53 static void print_string_hex(char *out, const char *s, size_t len)54 {55 *out++ = '"';56 for (size_t i = 0; i < len && s[i]; i++) {57 if (s[i] >= 32 && s[i] <= 126)58 *out++ = s[i];59 else60 out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);61 }62 *out++ = '"';63 *out++ = 0;64 48 } 65 49 … … 135 119 { 136 120 /* Overlong zero. */ 137 const char overlong1[] = "\xC0\x80";138 const char overlong2[] = "\xE0\x80\x80";139 const char overlong3[] = "\xF0\x80\x80\x80";121 const char overlong1[] = { 0b11000000, 0b10000000, 0 }; 122 const char overlong2[] = { 0b11100000, 0b10000000, 0 }; 123 const char overlong3[] = { 0b11110000, 0b10000000, 0 }; 140 124 141 const char overlong4[] = "\xC1\xBF";142 const char overlong5[] = "\xE0\x9F\xBF";143 const char overlong6[] = "\xF0\x8F\xBF\xBF";125 const char overlong4[] = { 0b11000001, 0b10111111, 0 }; 126 const char overlong5[] = { 0b11100000, 0b10011111, 0b10111111, 0 }; 127 const char overlong6[] = { 0b11110000, 0b10001111, 0b10111111, 0b10111111, 0 }; 144 128 145 129 size_t offset = 0; … … 155 139 offset = 0; 156 140 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6))); 157 }158 141 159 struct sanitize_test { 160 const char *input; 161 const char *output; 162 }; 163 164 static const struct sanitize_test sanitize_tests[] = { 165 // Empty string 166 { "", "" }, 167 // ASCII only 168 { "Hello, world!", "Hello, world!" }, 169 // Valid multi-byte sequences 170 { "Aπ你🐱", "Aπ你🐱" }, 171 // U+D7FF is last valid before surrogates 172 { "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" }, 173 // 0x10FFFF is the highest legal code point 174 { "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" }, 175 176 // Missing continuation byte 177 { "A\xC2Z", "A?Z" }, 178 // Truncated multi-byte at buffer end 179 { "A\xE2\x82", "A??" }, 180 // Continuation byte without leading byte (0x80-0xBF are never valid first bytes) 181 { "A\x80Y\xBFZ", "A?Y?Z" }, 182 183 // 'A' (U+0041) normally encoded as 0x41 184 // Overlong 2-byte encoding: 0xC1 0x81 185 { "\xC1\x81X", "??X" }, 186 187 // ¢ (U+00A2) normally encoded as 0xC2 0xA2 188 // Overlong 3-byte encoding: 0xE0 0x82 0xA2 189 { "\xE0\x82\xA2X", "???X" }, 190 191 // ¢ (U+00A2) normally encoded as 0xC2 0xA2 192 // Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2 193 { "\xF0\x80\x82\xA2X", "????X" }, 194 195 // € (U+20AC) normally encoded as 0xE2 0x82 0xAC 196 // Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC 197 { "\xF0\x82\x82\xACX", "????X" }, 198 199 // Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00) 200 { "\xC0\x80X", "??X" }, 201 202 // 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8 203 { "A\xED\xA0\x80Z", "A???Z" }, 204 205 // 0x110000 is not a legal code point 206 { "A\xF4\x90\x80\x80Z", "A????Z" }, 207 208 // Mix of valid and invalid sequences 209 { "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" }, 210 }; 211 212 static size_t count_diff(const char *a, const char *b, size_t n) 213 { 214 size_t count = 0; 215 216 for (size_t i = 0; i < n; i++) { 217 if (a[i] != b[i]) 218 count++; 219 } 220 221 return count; 222 } 223 224 PCUT_TEST(str_sanitize) 225 { 226 char replacement = '?'; 227 char buffer2[255]; 228 229 for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) { 230 const char *in = sanitize_tests[i].input; 231 const char *out = sanitize_tests[i].output; 232 size_t n = str_size(in) + 1; 233 assert(str_size(out) + 1 == n); 234 235 memcpy(buffer, in, n); 236 size_t replaced = str_sanitize(buffer, n, replacement); 237 if (memcmp(buffer, out, n) != 0) { 238 print_string_hex(buffer2, buffer, n); 239 print_string_hex(buffer, out, n); 240 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2); 241 } 242 243 size_t expect_replaced = count_diff(buffer, in, n); 244 PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced); 245 } 246 247 // Test with n smaller than string length - truncated valid encoding for € 248 const char *in = "ABC€"; 249 const char *out = "ABC??\xAC"; 250 size_t n = str_size(in) + 1; 251 memcpy(buffer, in, n); 252 size_t replaced = str_sanitize(buffer, 5, replacement); 253 if (memcmp(buffer, out, n) != 0) { 254 print_string_hex(buffer2, buffer, n); 255 print_string_hex(buffer, out, n); 256 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2); 257 } 258 259 PCUT_ASSERT_INT_EQUALS(2, replaced); 142 char sanitized[sizeof(overlong6)]; 143 str_cpy(sanitized, STR_NO_LIMIT, overlong1); 144 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 145 str_cpy(sanitized, STR_NO_LIMIT, overlong2); 146 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 147 str_cpy(sanitized, STR_NO_LIMIT, overlong3); 148 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 149 str_cpy(sanitized, STR_NO_LIMIT, overlong4); 150 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 151 str_cpy(sanitized, STR_NO_LIMIT, overlong5); 152 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 153 str_cpy(sanitized, STR_NO_LIMIT, overlong6); 154 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]); 260 155 } 261 156
Note:
See TracChangeset
for help on using the changeset viewer.