Changes in common/str.c [28c39f3:0600976] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
common/str.c
r28c39f3 r0600976 156 156 static inline int _char_continuation_bytes(char32_t c) 157 157 { 158 if ((c & ~LO_MASK_32(7)) == 0) 159 return 0; 160 158 161 if ((c & ~LO_MASK_32(11)) == 0) 159 162 return 1; … … 207 210 char32_t str_decode(const char *str, size_t *offset, size_t size) 208 211 { 209 if (*offset + 1 >size)212 if (*offset >= size) 210 213 return 0; 211 214 … … 223 226 /* Determine code length */ 224 227 225 unsignedint cbytes = _continuation_bytes(b0);226 unsignedint b0_bits = 6 - cbytes; /* Data bits in first byte */227 228 if ( *offset + cbytes > size)228 int cbytes = _continuation_bytes(b0); 229 int b0_bits = 6 - cbytes; /* Data bits in first byte */ 230 231 if (cbytes < 0 || *offset + cbytes > size) 229 232 return U_SPECIAL; 230 233 … … 232 235 233 236 /* Decode continuation bytes */ 234 while (cbytes > 0) {235 uint8_t b = (uint8_t) str[ (*offset)++];237 for (int i = 0; i < cbytes; i++) { 238 uint8_t b = (uint8_t) str[*offset]; 236 239 237 240 if (!_is_continuation_byte(b)) 238 241 return U_SPECIAL; 239 242 243 (*offset)++; 244 240 245 /* Shift data bits to ch */ 241 246 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 242 cbytes--; 243 } 247 } 248 249 /* 250 * Reject non-shortest form encodings. 251 * See https://www.unicode.org/versions/corrigendum1.html 252 */ 253 if (cbytes != _char_continuation_bytes(ch)) 254 return U_SPECIAL; 244 255 245 256 return ch; … … 345 356 346 357 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 347 static void _repair_string(char *str, size_t n) 348 { 349 for (; *str && n > 0; str++, n--) { 350 int cont = _continuation_bytes(*str); 351 if (cont == 0) 358 static void _sanitize_string(char *str, size_t n) 359 { 360 uint8_t *b = (uint8_t *) str; 361 362 for (; *b && n > 0; b++, n--) { 363 int cont = _continuation_bytes(b[0]); 364 if (__builtin_expect(cont, 0) == 0) 352 365 continue; 353 366 354 367 if (cont < 0 || n <= (size_t) cont) { 355 *str= U_SPECIAL;368 b[0] = U_SPECIAL; 356 369 continue; 357 370 } 358 371 372 /* Check continuation bytes. */ 359 373 for (int i = 1; i <= cont; i++) { 360 if (!_is_continuation_byte( str[i])) {361 *str= U_SPECIAL;374 if (!_is_continuation_byte(b[i])) { 375 b[0] = U_SPECIAL; 362 376 continue; 363 377 } 378 } 379 380 /* 381 * Check for non-shortest form encoding. 382 * See https://www.unicode.org/versions/corrigendum1.html 383 */ 384 385 switch (cont) { 386 case 1: 387 /* 0b110!!!!x 0b10xxxxxx */ 388 if (!(b[0] & 0b00011110)) 389 b[0] = U_SPECIAL; 390 391 continue; 392 case 2: 393 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 394 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000)) 395 b[0] = U_SPECIAL; 396 397 continue; 398 case 3: 399 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 400 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000)) 401 b[0] = U_SPECIAL; 402 403 continue; 364 404 } 365 405 } … … 881 921 static void _str_cpyn(char *dest, size_t size, const char *src) 882 922 { 923 assert(dest && src && size); 924 925 if (!dest || !src || !size) 926 return; 927 928 if (size == STR_NO_LIMIT) 929 return _str_cpy(dest, src); 930 883 931 char *dest_top = dest + size - 1; 932 assert(size == 1 || dest < dest_top); 884 933 885 934 while (*src && dest < dest_top) … … 907 956 assert(src != NULL); 908 957 assert(dest != NULL); 958 assert(size == STR_NO_LIMIT || dest + size > dest); 909 959 910 960 /* Copy data. */ … … 912 962 913 963 /* In-place translate invalid bytes to U_SPECIAL. */ 914 _ repair_string(dest, size);964 _sanitize_string(dest, size); 915 965 } 916 966 … … 941 991 942 992 /* In-place translate invalid bytes to U_SPECIAL. */ 943 _ repair_string(dest, size);993 _sanitize_string(dest, size); 944 994 } 945 995 … … 960 1010 assert(dest != NULL); 961 1011 assert(size > 0); 1012 assert(size == STR_NO_LIMIT || dest + size > dest); 962 1013 963 1014 size_t dstr_size = _str_nsize(dest, size); 964 _str_cpyn(dest + dstr_size, size - dstr_size, src); 965 _repair_string(dest + dstr_size, size - dstr_size); 1015 if (dstr_size < size) { 1016 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1017 _sanitize_string(dest + dstr_size, size - dstr_size); 1018 } 966 1019 } 967 1020 … … 1540 1593 return NULL; 1541 1594 1542 _str_cpy(dest, src);1543 _ repair_string(dest, size);1595 memcpy(dest, src, size); 1596 _sanitize_string(dest, size); 1544 1597 return dest; 1545 1598 } … … 1567 1620 char *str_ndup(const char *src, size_t n) 1568 1621 { 1569 size_t size = _str_nsize(src, n) + 1;1570 1571 char *dest = malloc(size );1622 size_t size = _str_nsize(src, n); 1623 1624 char *dest = malloc(size + 1); 1572 1625 if (!dest) 1573 1626 return NULL; 1574 1627 1575 _str_cpyn(dest, size, src); 1576 _repair_string(dest, size); 1628 memcpy(dest, src, size); 1629 _sanitize_string(dest, size); 1630 dest[size] = 0; 1577 1631 return dest; 1578 1632 }
Note:
See TracChangeset
for help on using the changeset viewer.