Changes in common/str.c [b31323f:65bf084] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
common/str.c
rb31323f r65bf084 234 234 } 235 235 236 static bool _is_surrogate(const mbstate_t *mb, uint8_t b)237 {238 return (mb->state == 0b1111110000001101 && b >= 0xa0);239 }240 241 236 #define _likely(expr) __builtin_expect((expr), true) 242 237 #define _unlikely(expr) __builtin_expect((expr), false) … … 304 299 return CHAR_INVALID; 305 300 306 /* Reject surrogates */307 if (_unlikely(ch >= 0xD800 && ch < 0xE000))308 return CHAR_INVALID;309 310 301 return ch; 311 302 } … … 332 323 return CHAR_INVALID; 333 324 334 /* Reject out-of-range characters. */335 if (_unlikely(ch >= 0x110000))336 return CHAR_INVALID;337 338 325 return ch; 339 326 } … … 352 339 uint8_t b = s[*offset]; 353 340 354 if (!_is_continuation(b) || _is_non_shortest(mb, b) || _is_surrogate(mb, b)) {341 if (!_is_continuation(b) || _is_non_shortest(mb, b)) { 355 342 mb->state = 0; 356 343 return CHAR_INVALID; … … 536 523 } 537 524 538 /* Convert in place any bytes that don't form a valid character into replacement. */539 static size_t _str_sanitize(char *str, size_t n, uint8_t replacement)525 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 526 static void _sanitize_string(char *str, size_t n) 540 527 { 541 528 uint8_t *b = (uint8_t *) str; 542 size_t count = 0; 543 544 for (; n > 0 && b[0]; b++, n--) { 529 530 for (; *b && n > 0; b++, n--) { 545 531 int cont = _continuation_bytes(b[0]); 546 532 if (__builtin_expect(cont, 0) == 0) … … 548 534 549 535 if (cont < 0 || n <= (size_t) cont) { 550 b[0] = replacement; 551 count++; 536 b[0] = U_SPECIAL; 552 537 continue; 553 538 } 554 539 555 540 /* Check continuation bytes. */ 556 bool valid = true;557 541 for (int i = 1; i <= cont; i++) { 558 542 if (!_is_continuation(b[i])) { 559 valid = false;560 break;543 b[0] = U_SPECIAL; 544 continue; 561 545 } 562 }563 564 if (!valid) {565 b[0] = replacement;566 count++;567 continue;568 546 } 569 547 … … 573 551 */ 574 552 575 /* 0b110!!!!x 0b10xxxxxx */ 576 if (cont == 1 && !(b[0] & 0b00011110)) { 577 b[0] = replacement; 578 count++; 553 switch (cont) { 554 case 1: 555 /* 0b110!!!!x 0b10xxxxxx */ 556 if (!(b[0] & 0b00011110)) 557 b[0] = U_SPECIAL; 558 559 continue; 560 case 2: 561 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 562 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000)) 563 b[0] = U_SPECIAL; 564 565 continue; 566 case 3: 567 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 568 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000)) 569 b[0] = U_SPECIAL; 570 579 571 continue; 580 572 } 581 582 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 583 if (cont == 2 && !(b[0] & 0b00001111) && !(b[1] & 0b00100000)) { 584 b[0] = replacement; 585 count++; 586 continue; 587 } 588 589 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 590 if (cont == 3 && !(b[0] & 0b00000111) && !(b[1] & 0b00110000)) { 591 b[0] = replacement; 592 count++; 593 continue; 594 } 595 596 /* Check for surrogate character encoding. */ 597 if (cont == 2 && b[0] == 0xED && b[1] >= 0xA0) { 598 b[0] = replacement; 599 count++; 600 continue; 601 } 602 603 /* Check for out-of-range code points. */ 604 if (cont == 3 && (b[0] > 0xF4 || (b[0] == 0xF4 && b[1] >= 0x90))) { 605 b[0] = replacement; 606 count++; 607 continue; 608 } 609 610 b += cont; 611 n -= cont; 612 } 613 614 return count; 615 } 616 617 size_t str_sanitize(char *str, size_t n, uint8_t replacement) 618 { 619 return _str_sanitize(str, n, replacement); 573 } 620 574 } 621 575 … … 1176 1130 1177 1131 /* In-place translate invalid bytes to U_SPECIAL. */ 1178 _s tr_sanitize(dest, size, U_SPECIAL);1132 _sanitize_string(dest, size); 1179 1133 } 1180 1134 … … 1205 1159 1206 1160 /* In-place translate invalid bytes to U_SPECIAL. */ 1207 _s tr_sanitize(dest, size, U_SPECIAL);1161 _sanitize_string(dest, size); 1208 1162 } 1209 1163 … … 1229 1183 if (dstr_size < size) { 1230 1184 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1231 _s tr_sanitize(dest + dstr_size, size - dstr_size, U_SPECIAL);1185 _sanitize_string(dest + dstr_size, size - dstr_size); 1232 1186 } 1233 1187 } … … 1808 1762 1809 1763 memcpy(dest, src, size); 1810 _s tr_sanitize(dest, size, U_SPECIAL);1764 _sanitize_string(dest, size); 1811 1765 return dest; 1812 1766 } … … 1841 1795 1842 1796 memcpy(dest, src, size); 1843 _s tr_sanitize(dest, size, U_SPECIAL);1797 _sanitize_string(dest, size); 1844 1798 dest[size] = 0; 1845 1799 return dest;
Note:
See TracChangeset
for help on using the changeset viewer.