Changes in common/str.c [0600976:28c39f3] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
common/str.c
r0600976 r28c39f3 156 156 static inline int _char_continuation_bytes(char32_t c) 157 157 { 158 if ((c & ~LO_MASK_32(7)) == 0)159 return 0;160 161 158 if ((c & ~LO_MASK_32(11)) == 0) 162 159 return 1; … … 210 207 char32_t str_decode(const char *str, size_t *offset, size_t size) 211 208 { 212 if (*offset >=size)209 if (*offset + 1 > size) 213 210 return 0; 214 211 … … 226 223 /* Determine code length */ 227 224 228 int cbytes = _continuation_bytes(b0);229 int b0_bits = 6 - cbytes; /* Data bits in first byte */230 231 if ( cbytes < 0 ||*offset + cbytes > size)225 unsigned int cbytes = _continuation_bytes(b0); 226 unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */ 227 228 if (*offset + cbytes > size) 232 229 return U_SPECIAL; 233 230 … … 235 232 236 233 /* Decode continuation bytes */ 237 for (int i = 0; i < cbytes; i++) {238 uint8_t b = (uint8_t) str[ *offset];234 while (cbytes > 0) { 235 uint8_t b = (uint8_t) str[(*offset)++]; 239 236 240 237 if (!_is_continuation_byte(b)) 241 238 return U_SPECIAL; 242 239 243 (*offset)++;244 245 240 /* Shift data bits to ch */ 246 241 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 247 } 248 249 /* 250 * Reject non-shortest form encodings. 251 * See https://www.unicode.org/versions/corrigendum1.html 252 */ 253 if (cbytes != _char_continuation_bytes(ch)) 254 return U_SPECIAL; 242 cbytes--; 243 } 255 244 256 245 return ch; … … 356 345 357 346 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 358 static void _sanitize_string(char *str, size_t n) 359 { 360 uint8_t *b = (uint8_t *) str; 361 362 for (; *b && n > 0; b++, n--) { 363 int cont = _continuation_bytes(b[0]); 364 if (__builtin_expect(cont, 0) == 0) 347 static void _repair_string(char *str, size_t n) 348 { 349 for (; *str && n > 0; str++, n--) { 350 int cont = _continuation_bytes(*str); 351 if (cont == 0) 365 352 continue; 366 353 367 354 if (cont < 0 || n <= (size_t) cont) { 368 b[0]= U_SPECIAL;355 *str = U_SPECIAL; 369 356 continue; 370 357 } 371 358 372 /* Check continuation bytes. */373 359 for (int i = 1; i <= cont; i++) { 374 if (!_is_continuation_byte( b[i])) {375 b[0]= U_SPECIAL;360 if (!_is_continuation_byte(str[i])) { 361 *str = U_SPECIAL; 376 362 continue; 377 363 } 378 }379 380 /*381 * Check for non-shortest form encoding.382 * See https://www.unicode.org/versions/corrigendum1.html383 */384 385 switch (cont) {386 case 1:387 /* 0b110!!!!x 0b10xxxxxx */388 if (!(b[0] & 0b00011110))389 b[0] = U_SPECIAL;390 391 continue;392 case 2:393 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */394 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000))395 b[0] = U_SPECIAL;396 397 continue;398 case 3:399 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */400 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000))401 b[0] = U_SPECIAL;402 403 continue;404 364 } 405 365 } … … 921 881 static void _str_cpyn(char *dest, size_t size, const char *src) 922 882 { 923 assert(dest && src && size);924 925 if (!dest || !src || !size)926 return;927 928 if (size == STR_NO_LIMIT)929 return _str_cpy(dest, src);930 931 883 char *dest_top = dest + size - 1; 932 assert(size == 1 || dest < dest_top);933 884 934 885 while (*src && dest < dest_top) … … 956 907 assert(src != NULL); 957 908 assert(dest != NULL); 958 assert(size == STR_NO_LIMIT || dest + size > dest);959 909 960 910 /* Copy data. */ … … 962 912 963 913 /* In-place translate invalid bytes to U_SPECIAL. */ 964 _ sanitize_string(dest, size);914 _repair_string(dest, size); 965 915 } 966 916 … … 991 941 992 942 /* In-place translate invalid bytes to U_SPECIAL. */ 993 _ sanitize_string(dest, size);943 _repair_string(dest, size); 994 944 } 995 945 … … 1010 960 assert(dest != NULL); 1011 961 assert(size > 0); 1012 assert(size == STR_NO_LIMIT || dest + size > dest);1013 962 1014 963 size_t dstr_size = _str_nsize(dest, size); 1015 if (dstr_size < size) { 1016 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1017 _sanitize_string(dest + dstr_size, size - dstr_size); 1018 } 964 _str_cpyn(dest + dstr_size, size - dstr_size, src); 965 _repair_string(dest + dstr_size, size - dstr_size); 1019 966 } 1020 967 … … 1593 1540 return NULL; 1594 1541 1595 memcpy(dest, src, size);1596 _ sanitize_string(dest, size);1542 _str_cpy(dest, src); 1543 _repair_string(dest, size); 1597 1544 return dest; 1598 1545 } … … 1620 1567 char *str_ndup(const char *src, size_t n) 1621 1568 { 1622 size_t size = _str_nsize(src, n) ;1623 1624 char *dest = malloc(size + 1);1569 size_t size = _str_nsize(src, n) + 1; 1570 1571 char *dest = malloc(size); 1625 1572 if (!dest) 1626 1573 return NULL; 1627 1574 1628 memcpy(dest, src, size); 1629 _sanitize_string(dest, size); 1630 dest[size] = 0; 1575 _str_cpyn(dest, size, src); 1576 _repair_string(dest, size); 1631 1577 return dest; 1632 1578 }
Note:
See TracChangeset
for help on using the changeset viewer.