Changeset 0600976 in mainline for common/str.c
- Timestamp:
- 2025-04-14T11:23:38Z (5 days ago)
- Branches:
- master
- Children:
- 5d2bdaa
- Parents:
- 11782da
- git-author:
- Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-14 10:57:38)
- git-committer:
- Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-14 11:23:38)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
common/str.c
r11782da r0600976 210 210 char32_t str_decode(const char *str, size_t *offset, size_t size) 211 211 { 212 if (*offset + 1 >size)212 if (*offset >= size) 213 213 return 0; 214 214 … … 235 235 236 236 /* Decode continuation bytes */ 237 while (cbytes > 0) {237 for (int i = 0; i < cbytes; i++) { 238 238 uint8_t b = (uint8_t) str[*offset]; 239 239 … … 245 245 /* Shift data bits to ch */ 246 246 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 247 cbytes--; 248 } 247 } 248 249 /* 250 * Reject non-shortest form encodings. 251 * See https://www.unicode.org/versions/corrigendum1.html 252 */ 253 if (cbytes != _char_continuation_bytes(ch)) 254 return U_SPECIAL; 249 255 250 256 return ch; … … 350 356 351 357 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 352 static void _repair_string(char *str, size_t n) 353 { 354 for (; *str && n > 0; str++, n--) { 355 int cont = _continuation_bytes(*str); 356 if (cont == 0) 358 static void _sanitize_string(char *str, size_t n) 359 { 360 uint8_t *b = (uint8_t *) str; 361 362 for (; *b && n > 0; b++, n--) { 363 int cont = _continuation_bytes(b[0]); 364 if (__builtin_expect(cont, 0) == 0) 357 365 continue; 358 366 359 367 if (cont < 0 || n <= (size_t) cont) { 360 *str= U_SPECIAL;368 b[0] = U_SPECIAL; 361 369 continue; 362 370 } 363 371 372 /* Check continuation bytes. */ 364 373 for (int i = 1; i <= cont; i++) { 365 if (!_is_continuation_byte( str[i])) {366 *str= U_SPECIAL;374 if (!_is_continuation_byte(b[i])) { 375 b[0] = U_SPECIAL; 367 376 continue; 368 377 } 378 } 379 380 /* 381 * Check for non-shortest form encoding. 382 * See https://www.unicode.org/versions/corrigendum1.html 383 */ 384 385 switch (cont) { 386 case 1: 387 /* 0b110!!!!x 0b10xxxxxx */ 388 if (!(b[0] & 0b00011110)) 389 b[0] = U_SPECIAL; 390 391 continue; 392 case 2: 393 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 394 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000)) 395 b[0] = U_SPECIAL; 396 397 continue; 398 case 3: 399 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 400 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000)) 401 b[0] = U_SPECIAL; 402 403 continue; 369 404 } 370 405 } … … 886 921 static void _str_cpyn(char *dest, size_t size, const char *src) 887 922 { 923 assert(dest && src && size); 924 925 if (!dest || !src || !size) 926 return; 927 928 if (size == STR_NO_LIMIT) 929 return _str_cpy(dest, src); 930 888 931 char *dest_top = dest + size - 1; 932 assert(size == 1 || dest < dest_top); 889 933 890 934 while (*src && dest < dest_top) … … 912 956 assert(src != NULL); 913 957 assert(dest != NULL); 958 assert(size == STR_NO_LIMIT || dest + size > dest); 914 959 915 960 /* Copy data. */ … … 917 962 918 963 /* In-place translate invalid bytes to U_SPECIAL. */ 919 _ repair_string(dest, size);964 _sanitize_string(dest, size); 920 965 } 921 966 … … 946 991 947 992 /* In-place translate invalid bytes to U_SPECIAL. */ 948 _ repair_string(dest, size);993 _sanitize_string(dest, size); 949 994 } 950 995 … … 965 1010 assert(dest != NULL); 966 1011 assert(size > 0); 1012 assert(size == STR_NO_LIMIT || dest + size > dest); 967 1013 968 1014 size_t dstr_size = _str_nsize(dest, size); 969 _str_cpyn(dest + dstr_size, size - dstr_size, src); 970 _repair_string(dest + dstr_size, size - dstr_size); 1015 if (dstr_size < size) { 1016 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1017 _sanitize_string(dest + dstr_size, size - dstr_size); 1018 } 971 1019 } 972 1020 … … 1545 1593 return NULL; 1546 1594 1547 _str_cpy(dest, src);1548 _ repair_string(dest, size);1595 memcpy(dest, src, size); 1596 _sanitize_string(dest, size); 1549 1597 return dest; 1550 1598 } … … 1572 1620 char *str_ndup(const char *src, size_t n) 1573 1621 { 1574 size_t size = _str_nsize(src, n) + 1;1575 1576 char *dest = malloc(size );1622 size_t size = _str_nsize(src, n); 1623 1624 char *dest = malloc(size + 1); 1577 1625 if (!dest) 1578 1626 return NULL; 1579 1627 1580 _str_cpyn(dest, size, src); 1581 _repair_string(dest, size); 1628 memcpy(dest, src, size); 1629 _sanitize_string(dest, size); 1630 dest[size] = 0; 1582 1631 return dest; 1583 1632 }
Note:
See TracChangeset
for help on using the changeset viewer.