Changes in uspace/lib/posix/fnmatch.c [9b1503e:a43fbc95] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/posix/fnmatch.c
r9b1503e ra43fbc95 1 1 /* 2 * Copyright (c) 2011 Petr Koupy2 * Copyright (c) 2011 Jiri Zarevucky 3 3 * All rights reserved. 4 4 * … … 33 33 */ 34 34 35 // TODO: clean this up a bit 36 37 #include "stdbool.h" 38 #include "ctype.h" 39 #include "string.h" 40 #include "stdlib.h" 41 #include "assert.h" 42 35 43 #define LIBPOSIX_INTERNAL 36 44 … … 38 46 #include "fnmatch.h" 39 47 48 #define INVALID_PATTERN -1 49 50 /* Type for collating element, simple identity with characters now, 51 * but may be extended for better locale support. 52 */ 53 typedef int _coll_elm_t; 54 55 #define COLL_ELM_INVALID -1 56 57 /** Get collating element matching a string. 58 * 59 * @param str 60 * @return 61 */ 62 static _coll_elm_t _coll_elm_get(const char* str) 63 { 64 if (str[0] == '\0' || str[1] != '\0') { 65 return COLL_ELM_INVALID; 66 } 67 return str[0]; 68 } 69 70 /** Get collating element matching a single character. 71 * 72 * @param c 73 * @return 74 */ 75 static _coll_elm_t _coll_elm_char(int c) 76 { 77 return c; 78 } 79 80 /** Match collating element with a beginning of a string. 81 * 82 * @param elm 83 * @param str 84 * @return 0 if the element doesn't match, or the number of characters matched. 85 */ 86 static int _coll_elm_match(_coll_elm_t elm, const char *str) 87 { 88 return elm == *str; 89 } 90 91 static int _coll_elm_between(_coll_elm_t first, _coll_elm_t second, 92 const char *str) 93 { 94 return *str >= first && *str <= second; 95 } 96 97 /** Read a string delimited by [? and ?]. 98 * 99 * @param pattern Pointer to the string to read from. Its position is moved 100 * to the first character after the closing ]. 101 * @param seq The character on the inside of brackets. 102 * @param buf Read buffer. 103 * @param buf_sz Read buffer's size. If the buffer is not large enough for 104 * the entire string, the string is cut with no error indication. 105 * @return 106 */ 107 static bool _get_delimited( 108 const char **pattern, int seq, 109 char *buf, size_t buf_sz, int flags) 110 { 111 const bool noescape = (flags & FNM_NOESCAPE) != 0; 112 const bool pathname = (flags & FNM_PATHNAME) != 0; 113 114 const char *p = *pattern; 115 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */); 116 p += 2; 117 118 while (true) { 119 if (*p == seq && *(p + 1) == ']') { 120 /* String properly ended, return. */ 121 *pattern = p + 2; 122 *buf = '\0'; 123 return true; 124 } 125 if (!noescape && *p == '\\') { 126 p++; 127 } 128 if (*p == '\0') { 129 /* String not ended properly, invalid pattern. */ 130 return false; 131 } 132 if (pathname && *p == '/') { 133 /* Slash in a pathname pattern is invalid. */ 134 return false; 135 } 136 if (buf_sz > 1) { 137 /* Only add to the buffer if there is space. */ 138 *buf = *p; 139 buf++; 140 buf_sz--; 141 } 142 p++; 143 } 144 } 145 146 /************** CHARACTER CLASSES ****************/ 147 148 #define MAX_CLASS_OR_COLL_LEN 6 149 150 struct _char_class { 151 const char *name; 152 int (*func) (int); 153 }; 154 155 /* List of supported character classes. */ 156 static const struct _char_class _char_classes[] = { 157 { "alnum", isalnum }, 158 { "alpha", isalpha }, 159 { "blank", isblank }, 160 { "cntrl", iscntrl }, 161 { "digit", isdigit }, 162 { "graph", isgraph }, 163 { "lower", islower }, 164 { "print", isprint }, 165 { "punct", ispunct }, 166 { "space", isspace }, 167 { "upper", isupper }, 168 { "xdigit", isxdigit } 169 }; 170 171 static int _class_compare(const void *key, const void *elem) 172 { 173 const struct _char_class *class = elem; 174 return strcmp((const char *) key, class->name); 175 } 176 177 static bool _is_in_class (const char *cname, int c) 178 { 179 /* Search for class in the array of supported character classes. */ 180 const struct _char_class *class = bsearch(cname, _char_classes, 181 sizeof(_char_classes) / sizeof(struct _char_class), 182 sizeof(struct _char_class), _class_compare); 183 184 if (class == NULL) { 185 /* No such class supported - treat as an empty class. */ 186 return false; 187 } else { 188 /* Class matched. */ 189 return class->func(c); 190 } 191 } 192 193 static int _match_char_class(const char **pattern, const char *str, int flags) 194 { 195 char class[MAX_CLASS_OR_COLL_LEN + 1]; 196 197 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) { 198 return INVALID_PATTERN; 199 } 200 201 return _is_in_class(class, *str); 202 } 203 204 /************** END CHARACTER CLASSES ****************/ 205 206 static _coll_elm_t _next_coll_elm(const char **pattern, int flags) 207 { 208 const char *p = *pattern; 209 const bool noescape = (flags & FNM_NOESCAPE) != 0; 210 const bool pathname = (flags & FNM_PATHNAME) != 0; 211 212 if (*p == '[') { 213 if (*(p + 1) == '.') { 214 char buf[MAX_CLASS_OR_COLL_LEN + 1]; 215 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) { 216 return COLL_ELM_INVALID; 217 } 218 return _coll_elm_get(buf); 219 } 220 221 if (*(p + 1) == '=') { 222 char buf[MAX_CLASS_OR_COLL_LEN + 1]; 223 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) { 224 return COLL_ELM_INVALID; 225 } 226 return _coll_elm_get(buf); 227 } 228 } 229 230 if (!noescape && *p == '\\') { 231 p++; 232 } 233 if (pathname && *p == '/') { 234 return COLL_ELM_INVALID; 235 } 236 237 *pattern = p + 1; 238 return _coll_elm_char(*p); 239 } 240 40 241 /** 41 * 242 * 243 */ 244 static int _match_bracket_expr(const char **pattern, const char *str, int flags) 245 { 246 const bool pathname = (flags & FNM_PATHNAME) != 0; 247 const bool special_period = (flags & FNM_PERIOD) != 0; 248 const char *p = *pattern; 249 bool negative = false; 250 int matched = 0; 251 252 #define _matched(match) { \ 253 int _match = match; \ 254 if (_match < 0) { \ 255 /* Invalid pattern */ \ 256 return _match; \ 257 } else if (matched == 0 && _match > 0) { \ 258 /* First match */ \ 259 matched = _match; \ 260 } \ 261 } 262 263 assert(*p == '['); /* calling code should ensure this */ 264 p++; 265 266 if (*str == '\0' || (pathname && *str == '/') || 267 (pathname && special_period && *str == '.' && *(str - 1) == '/')) { 268 /* No bracket expression matches end of string, 269 * slash in pathname match or initial period with FNM_PERIOD 270 * option. 271 */ 272 return 0; 273 } 274 275 if (*p == '^' || *p == '!') { 276 negative = true; 277 p++; 278 } 279 280 if (*p == ']') { 281 /* When ']' is first, treat it as a normal character. */ 282 _matched(*str == ']'); 283 p++; 284 } 285 286 _coll_elm_t current_elm = COLL_ELM_INVALID; 287 288 while (*p != ']') { 289 if (*p == '-' && *(p + 1) != ']' && 290 current_elm != COLL_ELM_INVALID) { 291 /* Range expression. */ 292 p++; 293 _coll_elm_t end_elm = _next_coll_elm(&p, flags); 294 if (end_elm == COLL_ELM_INVALID) { 295 return INVALID_PATTERN; 296 } 297 _matched(_coll_elm_between(current_elm, end_elm, str)); 298 continue; 299 } 300 301 if (*p == '[' && *(p + 1) == ':') { 302 current_elm = COLL_ELM_INVALID; 303 _matched(_match_char_class(&p, str, flags)); 304 continue; 305 } 306 307 current_elm = _next_coll_elm(&p, flags); 308 if (current_elm == COLL_ELM_INVALID) { 309 return INVALID_PATTERN; 310 } 311 _matched(_coll_elm_match(current_elm, str)); 312 } 313 314 /* No error occured - update pattern pointer. */ 315 *pattern = p + 1; 316 317 if (matched == 0) { 318 /* No match found */ 319 return negative; 320 } else { 321 /* Matched 'match' characters. */ 322 return negative ? 0 : matched; 323 } 324 325 #undef _matched 326 } 327 328 /** 329 * 330 */ 331 static bool _partial_match(const char **pattern, const char **string, int flags) 332 { 333 /* Only a single *-delimited subpattern is matched here. 334 * So in this function, '*' is understood as the end of pattern. 335 */ 336 337 const bool pathname = (flags & FNM_PATHNAME) != 0; 338 const bool special_period = (flags & FNM_PERIOD) != 0; 339 const bool noescape = (flags & FNM_NOESCAPE) != 0; 340 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0; 341 342 const char *s = *string; 343 const char *p = *pattern; 344 345 while (*p != '*') { 346 /* Bracket expression. */ 347 if (*p == '[') { 348 int matched = _match_bracket_expr(&p, s, flags); 349 if (matched == 0) { 350 /* Doesn't match. */ 351 return false; 352 } 353 if (matched != INVALID_PATTERN) { 354 s += matched; 355 continue; 356 } 357 358 assert(matched == INVALID_PATTERN); 359 /* Fall through to match [ as an ordinary character. */ 360 } 361 362 /* Wildcard match. */ 363 if (*p == '?') { 364 if (*s == '\0') { 365 /* No character to match. */ 366 return false; 367 } 368 if (pathname && *s == '/') { 369 /* Slash must be matched explicitly. */ 370 return false; 371 } 372 if (special_period && pathname && 373 *s == '.' && *(s - 1) == '/') { 374 /* Initial period must be matched explicitly. */ 375 return false; 376 } 377 378 /* None of the above, match anything else. */ 379 p++; 380 s++; 381 continue; 382 } 383 384 if (!noescape && *p == '\\') { 385 /* Escaped character. */ 386 p++; 387 } 388 389 if (*p == '\0') { 390 /* End of pattern, must match end of string or 391 * an end of subdirectory name (optional). 392 */ 393 394 if (*s == '\0' || (leading_dir && *s == '/')) { 395 break; 396 } 397 398 return false; 399 } 400 401 if (*p == *s) { 402 /* Exact match. */ 403 p++; 404 s++; 405 continue; 406 } 407 408 /* Nothing matched. */ 409 return false; 410 } 411 412 /* Entire sub-pattern matched. */ 413 414 /* postconditions */ 415 assert(*p == '\0' || *p == '*'); 416 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/')); 417 418 *pattern = p; 419 *string = s; 420 return true; 421 } 422 423 static bool _full_match(const char *pattern, const char *string, int flags) 424 { 425 const bool pathname = (flags & FNM_PATHNAME) != 0; 426 const bool special_period = (flags & FNM_PERIOD) != 0; 427 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0; 428 429 if (special_period && *string == '.') { 430 /* Initial dot must be matched by an explicit dot in pattern. */ 431 if (*pattern != '.') { 432 return false; 433 } 434 pattern++; 435 string++; 436 } 437 438 if (*pattern != '*') { 439 if (!_partial_match(&pattern, &string, flags)) { 440 /* The initial match must succeed. */ 441 return false; 442 } 443 } 444 445 while (*pattern != '\0') { 446 assert(*pattern == '*'); 447 pattern++; 448 449 bool matched = false; 450 451 const char *end; 452 if (pathname && special_period && 453 *string == '.' && *(string - 1) == '/') { 454 end = string; 455 } else { 456 end= strchrnul(string, pathname ? '/' : '\0'); 457 } 458 459 /* Try to match every possible offset. */ 460 while (string <= end) { 461 if (_partial_match(&pattern, &string, flags)) { 462 matched = true; 463 break; 464 } 465 string++; 466 } 467 468 if (matched) { 469 continue; 470 } 471 472 return false; 473 } 474 475 return *string == '\0' || (leading_dir && *string == '/'); 476 } 477 478 static char *_casefold(const char *s) 479 { 480 char *result = strdup(s); 481 for (char *i = result; *i != '\0'; ++i) { 482 *i = tolower(*i); 483 } 484 return result; 485 } 486 487 /** 488 * Filename pattern matching. 489 * 42 490 * @param pattern 43 491 * @param string … … 47 495 int posix_fnmatch(const char *pattern, const char *string, int flags) 48 496 { 49 // TODO 50 not_implemented(); 51 } 497 // TODO: don't fold everything in advance, but only when needed 498 499 if ((flags & FNM_CASEFOLD) != 0) { 500 /* Just fold the entire pattern and string. */ 501 pattern = _casefold(pattern); 502 string = _casefold(string); 503 } 504 505 bool result = _full_match(pattern, string, flags); 506 507 if ((flags & FNM_CASEFOLD) != 0) { 508 free((char *) pattern); 509 free((char *) string); 510 } 511 512 return result ? 0 : FNM_NOMATCH; 513 } 514 515 // FIXME: put the testcases somewhere else 516 517 #if 0 518 519 #include <stdio.h> 520 521 void __posix_fnmatch_test() 522 { 523 int fail = 0; 524 525 #undef assert 526 #define assert(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } } 527 #define match(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == 0) 528 #define nomatch(s1, s2, flags) assert(posix_fnmatch(s1, s2, flags) == FNM_NOMATCH) 529 530 assert(FNM_PATHNAME == FNM_FILE_NAME); 531 match("", "", 0); 532 match("*", "hello", 0); 533 match("hello", "hello", 0); 534 match("hello*", "hello", 0); 535 nomatch("hello?", "hello", 0); 536 match("*hello", "prdel hello", 0); 537 match("he[sl]lo", "hello", 0); 538 match("he[sl]lo", "heslo", 0); 539 nomatch("he[sl]lo", "heblo", 0); 540 nomatch("he[^sl]lo", "hello", 0); 541 nomatch("he[^sl]lo", "heslo", 0); 542 match("he[^sl]lo", "heblo", 0); 543 nomatch("he[!sl]lo", "hello", 0); 544 nomatch("he[!sl]lo", "heslo", 0); 545 match("he[!sl]lo", "heblo", 0); 546 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0); 547 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0); 548 match("[a-ce-z]", "a", 0); 549 match("[a-ce-z]", "c", 0); 550 nomatch("[a-ce-z]", "d", 0); 551 match("[a-ce-z]", "e", 0); 552 match("[a-ce-z]", "z", 0); 553 nomatch("[^a-ce-z]", "a", 0); 554 nomatch("[^a-ce-z]", "c", 0); 555 match("[^a-ce-z]", "d", 0); 556 nomatch("[^a-ce-z]", "e", 0); 557 nomatch("[^a-ce-z]", "z", 0); 558 match("helen??", "helenos", 0); 559 match("****booo****", "booo", 0); 560 561 match("hello[[:space:]]world", "hello world", 0); 562 nomatch("hello[[:alpha:]]world", "hello world", 0); 563 564 match("/hoooo*", "/hooooooo/hooo", 0); 565 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME); 566 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME); 567 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME); 568 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME); 569 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR); 570 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR); 571 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR); 572 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR); 573 574 match("*", "hell", 0); 575 match("*?", "hell", 0); 576 match("?*?", "hell", 0); 577 match("?*??", "hell", 0); 578 match("??*??", "hell", 0); 579 nomatch("???*??", "hell", 0); 580 581 nomatch("", "hell", 0); 582 nomatch("?", "hell", 0); 583 nomatch("??", "hell", 0); 584 nomatch("???", "hell", 0); 585 match("????", "hell", 0); 586 587 match("*", "h.ello", FNM_PERIOD); 588 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD); 589 nomatch("*", ".hello", FNM_PERIOD); 590 match("h?ello", "h.ello", FNM_PERIOD); 591 nomatch("?hello", ".hello", FNM_PERIOD); 592 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD); 593 match("/home/user/*", "/home/user/.hello", FNM_PERIOD); 594 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD); 595 596 nomatch("HeLlO", "hello", 0); 597 match("HeLlO", "hello", FNM_CASEFOLD); 598 599 printf("Failed: %d\n", fail); 600 } 601 602 #endif 52 603 53 604 /** @} 54 605 */ 606
Note:
See TracChangeset
for help on using the changeset viewer.