Changes in uspace/app/bdsh/tok.c [f41682c:36ab7c7] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/app/bdsh/tok.c
rf41682c r36ab7c7 42 42 static bool tok_pending_chars(tokenizer_t *); 43 43 static int tok_finish_string(tokenizer_t *); 44 static void tok_start_token(tokenizer_t *, token_type_t);45 44 46 45 /** Initialize the token parser … … 51 50 * @param max_tokens number of elements of the out_tokens array 52 51 */ 53 int tok_init(tokenizer_t *tok, char *input, token_t*out_tokens,52 int tok_init(tokenizer_t *tok, char *input, char **out_tokens, 54 53 size_t max_tokens) 55 54 { 56 55 tok->in = input; 57 56 tok->in_offset = 0; 58 tok->last_in_offset = 0;59 tok->in_char_offset = 0;60 tok->last_in_char_offset = 0;61 57 62 58 tok->outtok = out_tokens; 63 59 tok->outtok_offset = 0; 64 tok->outtok_size = max_tokens; 60 /* Leave one slot for a null terminator */ 61 assert(max_tokens > 0); 62 tok->outtok_size = max_tokens - 1; 65 63 66 64 /* Prepare a buffer where all the token strings will be stored */ … … 89 87 90 88 /** Tokenize the input string into the tokens */ 91 int tok_tokenize(tokenizer_t *tok , size_t *tokens_length)89 int tok_tokenize(tokenizer_t *tok) 92 90 { 93 91 int rc; 94 wchar_t next_char;92 wchar_t cur_char; 95 93 96 94 /* Read the input line char by char and append tokens */ 97 while ((next_char = tok_look_char(tok)) != 0) { 98 if (next_char == ' ') { 99 /* Push the token if there is any. 95 while ((cur_char = tok_get_char(tok)) != 0) { 96 if (cur_char == ' ') { 97 /* Spaces delimit tokens, but are not processed in any way 98 * Push the token if there is any. 100 99 * There may not be any pending char for a token in case 101 100 * there are several spaces in the input. … … 107 106 } 108 107 } 109 tok_start_token(tok, TOKTYPE_SPACE); 110 /* Eat all the spaces */ 111 while (tok_look_char(tok) == ' ') { 112 tok_push_char(tok, tok_get_char(tok)); 113 } 114 tok_push_token(tok); 115 116 } 117 else if (next_char == '|') { 118 /* Pipes are tokens that are delimiters and should be 119 * output as a separate token 108 } 109 else if (cur_char == '|') { 110 /* Pipes are tokens that are delimiters and should be output 111 * as a separate token 120 112 */ 121 113 if (tok_pending_chars(tok)) { … … 126 118 } 127 119 128 tok_start_token(tok, TOKTYPE_PIPE); 129 130 rc = tok_push_char(tok, tok_get_char(tok)); 120 rc = tok_push_char(tok, '|'); 131 121 if (rc != EOK) { 132 122 return rc; … … 138 128 } 139 129 } 140 else if ( next_char == '\'') {130 else if (cur_char == '\'') { 141 131 /* A string starts with a quote (') and ends again with a quote. 142 132 * A literal quote is written as '' 143 133 */ 144 tok_start_token(tok, TOKTYPE_TEXT);145 /* Eat the quote */146 tok_get_char(tok);147 134 rc = tok_finish_string(tok); 148 135 if (rc != EOK) { … … 151 138 } 152 139 else { 153 if (!tok_pending_chars(tok)) {154 tok_start_token(tok, TOKTYPE_TEXT);155 }156 140 /* If we are handling any other character, just append it to 157 141 * the current token. 158 142 */ 159 rc = tok_push_char(tok, tok_get_char(tok));143 rc = tok_push_char(tok, cur_char); 160 144 if (rc != EOK) { 161 145 return rc; … … 172 156 } 173 157 174 *tokens_length = tok->outtok_offset; 158 /* We always have a space for the terminator, as we 159 * reserved it in tok_init */ 160 tok->outtok[tok->outtok_offset] = 0; 175 161 176 162 return EOK; … … 181 167 { 182 168 int rc; 183 wchar_t next_char; 184 185 while ((next_char = tok_look_char(tok)) != 0) { 186 if (next_char == '\'') { 187 /* Eat the quote */ 188 tok_get_char(tok); 169 wchar_t cur_char; 170 171 while ((cur_char = tok_get_char(tok)) != 0) { 172 if (cur_char == '\'') { 189 173 if (tok_look_char(tok) == '\'') { 190 174 /* Encode a single literal quote */ … … 203 187 } 204 188 else { 205 rc = tok_push_char(tok, tok_get_char(tok));189 rc = tok_push_char(tok, cur_char); 206 190 if (rc != EOK) { 207 191 return rc; … … 217 201 wchar_t tok_get_char(tokenizer_t *tok) 218 202 { 219 tok->in_char_offset++;220 203 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT); 221 204 } … … 225 208 { 226 209 size_t old_offset = tok->in_offset; 227 size_t old_char_offset = tok->in_char_offset;228 210 wchar_t ret = tok_get_char(tok); 229 211 tok->in_offset = old_offset; 230 tok->in_char_offset = old_char_offset;231 212 return ret; 232 213 } … … 238 219 } 239 220 240 void tok_start_token(tokenizer_t *tok, token_type_t type)241 {242 tok->current_type = type;243 }244 245 221 /** Push the current token to the output array */ 246 222 int tok_push_token(tokenizer_t *tok) … … 255 231 256 232 tok->outbuf[tok->outbuf_offset++] = 0; 257 token_t *tokinfo = &tok->outtok[tok->outtok_offset++]; 258 tokinfo->type = tok->current_type; 259 tokinfo->text = tok->outbuf + tok->outbuf_last_start; 260 tokinfo->byte_start = tok->last_in_offset; 261 tokinfo->byte_length = tok->in_offset - tok->last_in_offset; 262 tokinfo->char_start = tok->last_in_char_offset; 263 tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset; 233 tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start; 264 234 tok->outbuf_last_start = tok->outbuf_offset; 265 266 /* We have consumed the first char of the next token already */267 tok->last_in_offset = tok->in_offset;268 tok->last_in_char_offset = tok->in_char_offset;269 235 270 236 return EOK;
Note:
See TracChangeset
for help on using the changeset viewer.