Changes in common/str.c [65bf084:fdfb24e] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
common/str.c
r65bf084 rfdfb24e 5 5 * Copyright (c) 2011 Martin Sucha 6 6 * Copyright (c) 2011 Oleg Romanenko 7 * Copyright (c) 2025 Jiří Zárevúcky8 7 * All rights reserved. 9 8 * … … 55 54 * are valid 56 55 * 57 * Note that Unicode characters do not match58 * one-to-one with displayed characters or glyphs on59 * screen. For that level of precision, look up60 * Grapheme Clusters.61 *62 56 * ASCII character 7 bit encoded ASCII character, stored in char 63 57 * (usually signed 8 bit integer), code points 0 .. 127 … … 77 71 * [wide] string width number of display cells on a monospace display taken 78 72 * by a [wide] string, size_t 79 *80 * This is virtually impossible to determine exactly for81 * all strings without knowing specifics of the display82 * device, due to various factors affecting text output.83 * If you have the option to query the terminal for84 * position change caused by outputting the string,85 * it is preferrable to determine width that way.86 73 * 87 74 * … … 121 108 #include <str.h> 122 109 123 #include <align.h>124 110 #include <assert.h> 125 111 #include <ctype.h> 126 112 #include <errno.h> 127 #include <limits.h>128 #include <macros.h>129 #include <mem.h>130 113 #include <stdbool.h> 131 114 #include <stddef.h> 132 115 #include <stdint.h> 133 116 #include <stdlib.h> 134 #include <uchar.h> 135 136 #if __STDC_HOSTED__ 137 #include <fibril.h> 138 #endif 139 140 static void _set_ilseq() 141 { 142 #ifdef errno 143 errno = EILSEQ; 144 #endif 145 } 117 118 #include <align.h> 119 #include <mem.h> 146 120 147 121 /** Byte mask consisting of lowest @n bits (out of 8) */ … … 156 130 /** Number of data bits in a UTF-8 continuation byte */ 157 131 #define CONT_BITS 6 158 159 #define UTF8_MASK_INITIAL2 0b00011111160 #define UTF8_MASK_INITIAL3 0b00001111161 #define UTF8_MASK_INITIAL4 0b00000111162 #define UTF8_MASK_CONT 0b00111111163 164 #define CHAR_INVALID ((char32_t) UINT_MAX)165 166 static inline bool _is_ascii(uint8_t b)167 {168 return b < 0x80;169 }170 171 static inline bool _is_continuation(uint8_t b)172 {173 return (b & 0xC0) == 0x80;174 }175 176 static inline bool _is_2_byte(uint8_t c)177 {178 return (c & 0xE0) == 0xC0;179 }180 181 static inline bool _is_3_byte(uint8_t c)182 {183 return (c & 0xF0) == 0xE0;184 }185 186 static inline bool _is_4_byte(uint8_t c)187 {188 return (c & 0xF8) == 0xF0;189 }190 191 static inline int _char_continuation_bytes(char32_t c)192 {193 if ((c & ~LO_MASK_32(7)) == 0)194 return 0;195 196 if ((c & ~LO_MASK_32(11)) == 0)197 return 1;198 199 if ((c & ~LO_MASK_32(16)) == 0)200 return 2;201 202 if ((c & ~LO_MASK_32(21)) == 0)203 return 3;204 205 /* Codes longer than 21 bits are not supported */206 return -1;207 }208 209 static inline int _continuation_bytes(uint8_t b)210 {211 /* 0xxxxxxx */212 if (_is_ascii(b))213 return 0;214 215 /* 110xxxxx 10xxxxxx */216 if (_is_2_byte(b))217 return 1;218 219 /* 1110xxxx 10xxxxxx 10xxxxxx */220 if (_is_3_byte(b))221 return 2;222 223 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */224 if (_is_4_byte(b))225 return 3;226 227 return -1;228 }229 230 static bool _is_non_shortest(const mbstate_t *mb, uint8_t b)231 {232 return (mb->state == 0b1111110000000000 && !(b & 0b00100000)) ||233 (mb->state == 0b1111111111110000 && !(b & 0b00110000));234 }235 236 #define _likely(expr) __builtin_expect((expr), true)237 #define _unlikely(expr) __builtin_expect((expr), false)238 239 #define FAST_PATHS 1240 241 static char32_t _str_decode(const char *s, size_t *offset, size_t size, mbstate_t *mb)242 {243 assert(s);244 assert(offset);245 assert(*offset <= size);246 assert(size == STR_NO_LIMIT || s + size >= s);247 assert(mb);248 249 if (*offset == size)250 return 0;251 252 if (_likely(!mb->state)) {253 /* Clean slate, read initial byte. */254 uint8_t b = s[(*offset)++];255 256 /* Fast exit for the most common case. */257 if (_likely(_is_ascii(b)))258 return b;259 260 /* unexpected continuation byte */261 if (_unlikely(_is_continuation(b)))262 return CHAR_INVALID;263 264 /*265 * The value stored into `continuation` is designed to have266 * just enough leading ones that after shifting in one less than267 * the expected number of continuation bytes, the most significant268 * bit becomes zero. (The field is 16b wide.)269 */270 271 if (_is_2_byte(b)) {272 /* Reject non-shortest form. */273 if (_unlikely(!(b & 0b00011110)))274 return CHAR_INVALID;275 276 #if FAST_PATHS277 /* We can usually take this exit. */278 if (_likely(*offset < size && _is_continuation(s[*offset])))279 return (b & UTF8_MASK_INITIAL2) << 6 |280 (s[(*offset)++] & UTF8_MASK_CONT);281 #endif282 283 /* 2 byte continuation 110xxxxx */284 mb->state = b ^ 0b0000000011000000;285 286 } else if (_is_3_byte(b)) {287 #if FAST_PATHS288 /* We can usually take this exit. */289 if (_likely(*offset + 1 < size && _is_continuation(s[*offset]) && _is_continuation(s[*offset + 1]))) {290 291 char32_t ch = (b & UTF8_MASK_INITIAL3) << 12 |292 (s[(*offset)] & UTF8_MASK_CONT) << 6 |293 (s[(*offset) + 1] & UTF8_MASK_CONT);294 295 *offset += 2;296 297 /* Reject non-shortest form. */298 if (_unlikely(!(ch & 0xFFFFF800)))299 return CHAR_INVALID;300 301 return ch;302 }303 #endif304 305 /* 3 byte continuation 1110xxxx */306 mb->state = b ^ 0b1111110011100000;307 308 } else if (_is_4_byte(b)) {309 #if FAST_PATHS310 /* We can usually take this exit. */311 if (_likely(*offset + 2 < size && _is_continuation(s[*offset]) &&312 _is_continuation(s[*offset + 1]) && _is_continuation(s[*offset + 2]))) {313 314 char32_t ch = (b & UTF8_MASK_INITIAL4) << 18 |315 (s[(*offset)] & UTF8_MASK_CONT) << 12 |316 (s[(*offset) + 1] & UTF8_MASK_CONT) << 6 |317 (s[(*offset) + 2] & UTF8_MASK_CONT);318 319 *offset += 3;320 321 /* Reject non-shortest form. */322 if (_unlikely(!(ch & 0xFFFF0000)))323 return CHAR_INVALID;324 325 return ch;326 }327 #endif328 329 /* 4 byte continuation 11110xxx */330 mb->state = b ^ 0b1111111100000000;331 } else {332 return CHAR_INVALID;333 }334 }335 336 /* Deal with the remaining edge and invalid cases. */337 for (; *offset < size; (*offset)++) {338 /* Read continuation bytes. */339 uint8_t b = s[*offset];340 341 if (!_is_continuation(b) || _is_non_shortest(mb, b)) {342 mb->state = 0;343 return CHAR_INVALID;344 }345 346 /* Top bit becomes zero when shifting in the second to last byte. */347 if (!(mb->state & 0x8000)) {348 char32_t c = ((char32_t) mb->state) << 6 | (b & UTF8_MASK_CONT);349 mb->state = 0;350 (*offset)++;351 return c;352 }353 354 mb->state = mb->state << 6 | (b & UTF8_MASK_CONT);355 }356 357 /* Incomplete character. */358 assert(mb->state);359 return 0;360 }361 362 /** Standard <uchar.h> function since C11. */363 size_t mbrtoc32(char32_t *c, const char *s, size_t n, mbstate_t *mb)364 {365 #if __STDC_HOSTED__366 static fibril_local mbstate_t global_state = { };367 368 if (!mb)369 mb = &global_state;370 #endif371 372 if (!s) {373 /* Equivalent to mbrtoc32(NULL, "", 1, mb); */374 c = NULL;375 s = "";376 n = 1;377 }378 379 size_t offset = 0;380 char32_t ret = _str_decode(s, &offset, n, mb);381 if (ret == CHAR_INVALID) {382 assert(!mb->state);383 _set_ilseq();384 return UCHAR_ILSEQ;385 }386 if (mb->state) {387 assert(ret == 0);388 return UCHAR_INCOMPLETE;389 }390 391 if (c)392 *c = ret;393 return ret ? offset : 0;394 }395 132 396 133 /** Decode a single character from a string. … … 411 148 char32_t str_decode(const char *str, size_t *offset, size_t size) 412 149 { 413 mbstate_t mb = { }; 414 char32_t ch = _str_decode(str, offset, size, &mb); 415 416 if (ch == CHAR_INVALID) 150 if (*offset + 1 > size) 151 return 0; 152 153 /* First byte read from string */ 154 uint8_t b0 = (uint8_t) str[(*offset)++]; 155 156 /* Determine code length */ 157 158 unsigned int b0_bits; /* Data bits in first byte */ 159 unsigned int cbytes; /* Number of continuation bytes */ 160 161 if ((b0 & 0x80) == 0) { 162 /* 0xxxxxxx (Plain ASCII) */ 163 b0_bits = 7; 164 cbytes = 0; 165 } else if ((b0 & 0xe0) == 0xc0) { 166 /* 110xxxxx 10xxxxxx */ 167 b0_bits = 5; 168 cbytes = 1; 169 } else if ((b0 & 0xf0) == 0xe0) { 170 /* 1110xxxx 10xxxxxx 10xxxxxx */ 171 b0_bits = 4; 172 cbytes = 2; 173 } else if ((b0 & 0xf8) == 0xf0) { 174 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 175 b0_bits = 3; 176 cbytes = 3; 177 } else { 178 /* 10xxxxxx -- unexpected continuation byte */ 417 179 return U_SPECIAL; 418 419 if (mb.state) 180 } 181 182 if (*offset + cbytes > size) 420 183 return U_SPECIAL; 184 185 char32_t ch = b0 & LO_MASK_8(b0_bits); 186 187 /* Decode continuation bytes */ 188 while (cbytes > 0) { 189 uint8_t b = (uint8_t) str[(*offset)++]; 190 191 /* Must be 10xxxxxx */ 192 if ((b & 0xc0) != 0x80) 193 return U_SPECIAL; 194 195 /* Shift data bits to ch */ 196 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 197 cbytes--; 198 } 421 199 422 200 return ch; … … 443 221 return 0; 444 222 445 int cbytes= 0;223 size_t processed = 0; 446 224 /* Continue while continuation bytes found */ 447 while (*offset > 0 && cbytes< 4) {225 while (*offset > 0 && processed < 4) { 448 226 uint8_t b = (uint8_t) str[--(*offset)]; 449 227 450 if (_is_continuation(b)) { 451 cbytes++; 452 continue; 228 if (processed == 0 && (b & 0x80) == 0) { 229 /* 0xxxxxxx (Plain ASCII) */ 230 return b & 0x7f; 231 } else if ((b & 0xe0) == 0xc0 || (b & 0xf0) == 0xe0 || 232 (b & 0xf8) == 0xf0) { 233 /* Start byte */ 234 size_t start_offset = *offset; 235 return str_decode(str, &start_offset, size); 236 } else if ((b & 0xc0) != 0x80) { 237 /* Not a continuation byte */ 238 return U_SPECIAL; 453 239 } 454 455 /* Reject non-shortest form encoding. */ 456 if (cbytes != _continuation_bytes(b)) 457 return U_SPECIAL; 458 459 /* Start byte */ 460 size_t start_offset = *offset; 461 return str_decode(str, &start_offset, size); 462 } 463 240 processed++; 241 } 464 242 /* Too many continuation bytes */ 465 243 return U_SPECIAL; … … 481 259 * code was invalid. 482 260 */ 483 errno_t chr_encode(char32_t ch, char *str, size_t *offset, size_t size) 484 { 485 // TODO: merge with c32rtomb() 486 261 errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size) 262 { 487 263 if (*offset >= size) 488 264 return EOVERFLOW; 489 265 490 /* Fast exit for the most common case. */491 if (ch < 0x80) {492 str[(*offset)++] = (char) ch;493 return EOK;494 }495 496 /* Codes longer than 21 bits are not supported */497 266 if (!chr_check(ch)) 498 267 return EINVAL; 499 268 269 /* 270 * Unsigned version of ch (bit operations should only be done 271 * on unsigned types). 272 */ 273 uint32_t cc = (uint32_t) ch; 274 500 275 /* Determine how many continuation bytes are needed */ 501 276 502 unsigned int cbytes = _char_continuation_bytes(ch); 503 unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */ 277 unsigned int b0_bits; /* Data bits in first byte */ 278 unsigned int cbytes; /* Number of continuation bytes */ 279 280 if ((cc & ~LO_MASK_32(7)) == 0) { 281 b0_bits = 7; 282 cbytes = 0; 283 } else if ((cc & ~LO_MASK_32(11)) == 0) { 284 b0_bits = 5; 285 cbytes = 1; 286 } else if ((cc & ~LO_MASK_32(16)) == 0) { 287 b0_bits = 4; 288 cbytes = 2; 289 } else if ((cc & ~LO_MASK_32(21)) == 0) { 290 b0_bits = 3; 291 cbytes = 3; 292 } else { 293 /* Codes longer than 21 bits are not supported */ 294 return EINVAL; 295 } 504 296 505 297 /* Check for available space in buffer */ … … 510 302 unsigned int i; 511 303 for (i = cbytes; i > 0; i--) { 512 str[*offset + i] = 0x80 | (c h& LO_MASK_32(CONT_BITS));513 c h >>=CONT_BITS;304 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS)); 305 cc = cc >> CONT_BITS; 514 306 } 515 307 516 308 /* Encode first byte */ 517 str[*offset] = (c h& LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);309 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1); 518 310 519 311 /* Advance offset */ … … 523 315 } 524 316 525 /* Convert in place any bytes that don't form a valid character into U_SPECIAL. */ 526 static void _sanitize_string(char *str, size_t n) 527 { 528 uint8_t *b = (uint8_t *) str; 529 530 for (; *b && n > 0; b++, n--) { 531 int cont = _continuation_bytes(b[0]); 532 if (__builtin_expect(cont, 0) == 0) 533 continue; 534 535 if (cont < 0 || n <= (size_t) cont) { 536 b[0] = U_SPECIAL; 537 continue; 538 } 539 540 /* Check continuation bytes. */ 541 for (int i = 1; i <= cont; i++) { 542 if (!_is_continuation(b[i])) { 543 b[0] = U_SPECIAL; 544 continue; 545 } 546 } 547 548 /* 549 * Check for non-shortest form encoding. 550 * See https://www.unicode.org/versions/corrigendum1.html 551 */ 552 553 switch (cont) { 554 case 1: 555 /* 0b110!!!!x 0b10xxxxxx */ 556 if (!(b[0] & 0b00011110)) 557 b[0] = U_SPECIAL; 558 559 continue; 560 case 2: 561 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */ 562 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000)) 563 b[0] = U_SPECIAL; 564 565 continue; 566 case 3: 567 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */ 568 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000)) 569 b[0] = U_SPECIAL; 570 571 continue; 572 } 573 } 574 } 575 576 static size_t _str_size(const char *str) 317 /** Get size of string. 318 * 319 * Get the number of bytes which are used by the string @a str (excluding the 320 * NULL-terminator). 321 * 322 * @param str String to consider. 323 * 324 * @return Number of bytes used by the string 325 * 326 */ 327 size_t str_size(const char *str) 577 328 { 578 329 size_t size = 0; … … 582 333 583 334 return size; 584 }585 586 /** Get size of string.587 *588 * Get the number of bytes which are used by the string @a str (excluding the589 * NULL-terminator).590 *591 * @param str String to consider.592 *593 * @return Number of bytes used by the string594 *595 */596 size_t str_size(const char *str)597 {598 return _str_size(str);599 335 } 600 336 … … 642 378 } 643 379 644 static size_t _str_nsize(const char *str, size_t max_size) 380 /** Get size of string with size limit. 381 * 382 * Get the number of bytes which are used by the string @a str 383 * (excluding the NULL-terminator), but no more than @max_size bytes. 384 * 385 * @param str String to consider. 386 * @param max_size Maximum number of bytes to measure. 387 * 388 * @return Number of bytes used by the string 389 * 390 */ 391 size_t str_nsize(const char *str, size_t max_size) 645 392 { 646 393 size_t size = 0; … … 650 397 651 398 return size; 652 }653 654 /** Get size of string with size limit.655 *656 * Get the number of bytes which are used by the string @a str657 * (excluding the NULL-terminator), but no more than @max_size bytes.658 *659 * @param str String to consider.660 * @param max_size Maximum number of bytes to measure.661 *662 * @return Number of bytes used by the string663 *664 */665 size_t str_nsize(const char *str, size_t max_size)666 {667 return _str_nsize(str, max_size);668 399 } 669 400 … … 851 582 int str_cmp(const char *s1, const char *s2) 852 583 { 853 /* 854 * UTF-8 has the nice property that lexicographic ordering on bytes is 855 * the same as the lexicographic ordering of the character sequences. 856 */ 857 while (*s1 == *s2 && *s1 != 0) { 858 s1++; 859 s2++; 860 } 861 862 if (*s1 == *s2) 863 return 0; 864 865 return (*s1 < *s2) ? -1 : 1; 584 char32_t c1 = 0; 585 char32_t c2 = 0; 586 587 size_t off1 = 0; 588 size_t off2 = 0; 589 590 while (true) { 591 c1 = str_decode(s1, &off1, STR_NO_LIMIT); 592 c2 = str_decode(s2, &off2, STR_NO_LIMIT); 593 594 if (c1 < c2) 595 return -1; 596 597 if (c1 > c2) 598 return 1; 599 600 if (c1 == 0 || c2 == 0) 601 break; 602 } 603 604 return 0; 866 605 } 867 606 … … 942 681 int str_casecmp(const char *s1, const char *s2) 943 682 { 944 // FIXME: doesn't work for non-ASCII caseful characters945 946 683 char32_t c1 = 0; 947 684 char32_t c2 = 0; … … 992 729 int str_lcasecmp(const char *s1, const char *s2, size_t max_len) 993 730 { 994 // FIXME: doesn't work for non-ASCII caseful characters995 996 731 char32_t c1 = 0; 997 732 char32_t c2 = 0; … … 1025 760 } 1026 761 1027 static bool _test_prefix(const char *s, const char *p)1028 {1029 while (*s == *p && *s != 0) {1030 s++;1031 p++;1032 }1033 1034 return *p == 0;1035 }1036 1037 762 /** Test whether p is a prefix of s. 1038 763 * … … 1048 773 bool str_test_prefix(const char *s, const char *p) 1049 774 { 1050 return _test_prefix(s, p); 775 char32_t c1 = 0; 776 char32_t c2 = 0; 777 778 size_t off1 = 0; 779 size_t off2 = 0; 780 781 while (true) { 782 c1 = str_decode(s, &off1, STR_NO_LIMIT); 783 c2 = str_decode(p, &off2, STR_NO_LIMIT); 784 785 if (c2 == 0) 786 return true; 787 788 if (c1 != c2) 789 return false; 790 791 if (c1 == 0) 792 break; 793 } 794 795 return false; 1051 796 } 1052 797 … … 1075 820 1076 821 return s + off; 1077 }1078 1079 /** Copy string as a sequence of bytes. */1080 static void _str_cpy(char *dest, const char *src)1081 {1082 while (*src)1083 *(dest++) = *(src++);1084 1085 *dest = 0;1086 }1087 1088 /** Copy string as a sequence of bytes. */1089 static void _str_cpyn(char *dest, size_t size, const char *src)1090 {1091 assert(dest && src && size);1092 1093 if (!dest || !src || !size)1094 return;1095 1096 if (size == STR_NO_LIMIT)1097 return _str_cpy(dest, src);1098 1099 char *dest_top = dest + size - 1;1100 assert(size == 1 || dest < dest_top);1101 1102 while (*src && dest < dest_top)1103 *(dest++) = *(src++);1104 1105 *dest = 0;1106 822 } 1107 823 … … 1123 839 assert(size > 0); 1124 840 assert(src != NULL); 1125 assert(dest != NULL); 1126 assert(size == STR_NO_LIMIT || dest + size > dest); 1127 1128 /* Copy data. */ 1129 _str_cpyn(dest, size, src); 1130 1131 /* In-place translate invalid bytes to U_SPECIAL. */ 1132 _sanitize_string(dest, size); 841 842 size_t src_off = 0; 843 size_t dest_off = 0; 844 845 char32_t ch; 846 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) { 847 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) 848 break; 849 } 850 851 dest[dest_off] = '\0'; 1133 852 } 1134 853 … … 1153 872 /* There must be space for a null terminator in the buffer. */ 1154 873 assert(size > 0); 1155 assert(src != NULL); 1156 1157 /* Copy data. */ 1158 _str_cpyn(dest, min(size, n + 1), src); 1159 1160 /* In-place translate invalid bytes to U_SPECIAL. */ 1161 _sanitize_string(dest, size); 874 875 size_t src_off = 0; 876 size_t dest_off = 0; 877 878 char32_t ch; 879 while ((ch = str_decode(src, &src_off, n)) != 0) { 880 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) 881 break; 882 } 883 884 dest[dest_off] = '\0'; 1162 885 } 1163 886 … … 1175 898 void str_append(char *dest, size_t size, const char *src) 1176 899 { 1177 assert(src != NULL); 1178 assert(dest != NULL); 1179 assert(size > 0); 1180 assert(size == STR_NO_LIMIT || dest + size > dest); 1181 1182 size_t dstr_size = _str_nsize(dest, size); 1183 if (dstr_size < size) { 1184 _str_cpyn(dest + dstr_size, size - dstr_size, src); 1185 _sanitize_string(dest + dstr_size, size - dstr_size); 1186 } 900 size_t dstr_size; 901 902 dstr_size = str_size(dest); 903 if (dstr_size >= size) 904 return; 905 906 str_cpy(dest + dstr_size, size - dstr_size, src); 1187 907 } 1188 908 … … 1213 933 errno_t spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n) 1214 934 { 1215 size_t len = 0; 1216 1217 /* Determine the length of the source string. */ 1218 for (size_t i = 0; i < n; i++) { 1219 if (src[i] == 0) 1220 break; 1221 1222 if (src[i] != ' ') 1223 len = i + 1; 1224 } 1225 1226 errno_t result = EOK; 1227 size_t out_len = min(len, size - 1); 1228 1229 /* Copy characters */ 1230 for (size_t i = 0; i < out_len; i++) { 1231 dest[i] = src[i]; 1232 1233 if (dest[i] < 0) { 1234 dest[i] = U_SPECIAL; 935 size_t sidx; 936 size_t didx; 937 size_t dlast; 938 uint8_t byte; 939 errno_t rc; 940 errno_t result; 941 942 /* There must be space for a null terminator in the buffer. */ 943 assert(size > 0); 944 result = EOK; 945 946 didx = 0; 947 dlast = 0; 948 for (sidx = 0; sidx < n; ++sidx) { 949 byte = src[sidx]; 950 if (!ascii_check(byte)) { 951 byte = U_SPECIAL; 1235 952 result = EIO; 1236 953 } 1237 } 1238 1239 dest[out_len] = 0; 1240 1241 if (out_len < len) 1242 return EOVERFLOW; 1243 954 955 rc = chr_encode(byte, dest, &didx, size - 1); 956 if (rc != EOK) { 957 assert(rc == EOVERFLOW); 958 dest[didx] = '\0'; 959 return rc; 960 } 961 962 /* Remember dest index after last non-empty character */ 963 if (byte != 0x20) 964 dlast = didx; 965 } 966 967 /* Terminate string after last non-empty character */ 968 dest[dlast] = '\0'; 1244 969 return result; 1245 970 } … … 1482 1207 } 1483 1208 1484 static char *_strchr(const char *str, char c)1485 {1486 while (*str != 0 && *str != c)1487 str++;1488 1489 return (*str == c) ? (char *) str : NULL;1490 }1491 1492 1209 /** Find first occurence of character in string. 1493 1210 * … … 1499 1216 char *str_chr(const char *str, char32_t ch) 1500 1217 { 1501 /* Fast path for an ASCII character. */ 1502 if (ascii_check(ch)) 1503 return _strchr(str, ch); 1504 1505 /* Convert character to UTF-8. */ 1506 char utf8[STR_BOUNDS(1) + 1]; 1507 size_t offset = 0; 1508 1509 if (chr_encode(ch, utf8, &offset, sizeof(utf8)) != EOK || offset == 0) 1510 return NULL; 1511 1512 utf8[offset] = '\0'; 1513 1514 /* Find the first byte, then check if all of them are correct. */ 1515 while (*str != 0) { 1516 str = _strchr(str, utf8[0]); 1517 if (!str) 1518 return NULL; 1519 1520 if (_test_prefix(str, utf8)) 1521 return (char *) str; 1522 1523 str++; 1218 char32_t acc; 1219 size_t off = 0; 1220 size_t last = 0; 1221 1222 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) { 1223 if (acc == ch) 1224 return (char *) (str + last); 1225 last = off; 1524 1226 } 1525 1227 … … 1536 1238 char *str_str(const char *hs, const char *n) 1537 1239 { 1538 size_t hsize = _str_size(hs); 1539 size_t nsize = _str_size(n); 1540 1541 while (hsize >= nsize) { 1542 if (_test_prefix(hs, n)) 1543 return (char *) hs; 1544 1545 hs++; 1546 hsize--; 1240 size_t off = 0; 1241 1242 if (str_lcmp(hs, n, str_length(n)) == 0) 1243 return (char *)hs; 1244 1245 while (str_decode(hs, &off, STR_NO_LIMIT) != 0) { 1246 if (str_lcmp(hs + off, n, str_length(n)) == 0) 1247 return (char *)(hs + off); 1547 1248 } 1548 1249 1549 1250 return NULL; 1550 }1551 1552 static void _str_rtrim(char *str, char c)1553 {1554 char *last = str;1555 1556 while (*str) {1557 if (*str != c)1558 last = str;1559 1560 str++;1561 }1562 1563 /* Truncate string. */1564 last[1] = 0;1565 1251 } 1566 1252 … … 1572 1258 void str_rtrim(char *str, char32_t ch) 1573 1259 { 1574 /* Fast path for the ASCII case. */1575 if (ascii_check(ch)) {1576 _str_rtrim(str, ch);1577 return;1578 }1579 1580 1260 size_t off = 0; 1581 1261 size_t pos = 0; … … 1599 1279 } 1600 1280 1601 static void _str_ltrim(char *str, char c)1602 {1603 char *p = str;1604 1605 while (*p == c)1606 p++;1607 1608 if (str != p)1609 _str_cpy(str, p);1610 }1611 1612 1281 /** Removes specified leading characters from a string. 1613 1282 * … … 1617 1286 void str_ltrim(char *str, char32_t ch) 1618 1287 { 1619 /* Fast path for the ASCII case. */1620 if (ascii_check(ch)) {1621 _str_ltrim(str, ch);1622 return;1623 }1624 1625 1288 char32_t acc; 1626 1289 size_t off = 0; … … 1642 1305 } 1643 1306 1644 static char *_str_rchr(const char *str, char c)1645 {1646 const char *last = NULL;1647 1648 while (*str) {1649 if (*str == c)1650 last = str;1651 1652 str++;1653 }1654 1655 return (char *) last;1656 }1657 1658 1307 /** Find last occurence of character in string. 1659 1308 * … … 1665 1314 char *str_rchr(const char *str, char32_t ch) 1666 1315 { 1667 if (ascii_check(ch))1668 return _str_rchr(str, ch);1669 1670 1316 char32_t acc; 1671 1317 size_t off = 0; … … 1756 1402 char *str_dup(const char *src) 1757 1403 { 1758 size_t size = _str_size(src) + 1;1404 size_t size = str_size(src) + 1; 1759 1405 char *dest = malloc(size); 1760 1406 if (!dest) 1761 1407 return NULL; 1762 1408 1763 memcpy(dest, src, size); 1764 _sanitize_string(dest, size); 1409 str_cpy(dest, size, src); 1765 1410 return dest; 1766 1411 } … … 1788 1433 char *str_ndup(const char *src, size_t n) 1789 1434 { 1790 size_t size = _str_nsize(src, n); 1435 size_t size = str_size(src); 1436 if (size > n) 1437 size = n; 1791 1438 1792 1439 char *dest = malloc(size + 1); … … 1794 1441 return NULL; 1795 1442 1796 memcpy(dest, src, size); 1797 _sanitize_string(dest, size); 1798 dest[size] = 0; 1443 str_ncpy(dest, size + 1, src, size); 1799 1444 return dest; 1800 1445 }
Note:
See TracChangeset
for help on using the changeset viewer.