Changeset 925a21e in mainline for uspace/app/bdsh/tok.c
- Timestamp:
- 2011-09-24T14:20:29Z (13 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
- Children:
- 5bf76c1
- Parents:
- 867e2555 (diff), 1ab4aca (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/app/bdsh/tok.c
r867e2555 r925a21e 42 42 static bool tok_pending_chars(tokenizer_t *); 43 43 static int tok_finish_string(tokenizer_t *); 44 static void tok_start_token(tokenizer_t *, token_type_t); 44 45 45 46 /** Initialize the token parser … … 50 51 * @param max_tokens number of elements of the out_tokens array 51 52 */ 52 int tok_init(tokenizer_t *tok, char *input, char **out_tokens,53 int tok_init(tokenizer_t *tok, char *input, token_t *out_tokens, 53 54 size_t max_tokens) 54 55 { 55 56 tok->in = input; 56 57 tok->in_offset = 0; 58 tok->last_in_offset = 0; 59 tok->in_char_offset = 0; 60 tok->last_in_char_offset = 0; 57 61 58 62 tok->outtok = out_tokens; 59 63 tok->outtok_offset = 0; 60 /* Leave one slot for a null terminator */ 61 assert(max_tokens > 0); 62 tok->outtok_size = max_tokens - 1; 64 tok->outtok_size = max_tokens; 63 65 64 66 /* Prepare a buffer where all the token strings will be stored */ … … 87 89 88 90 /** Tokenize the input string into the tokens */ 89 int tok_tokenize(tokenizer_t *tok )91 int tok_tokenize(tokenizer_t *tok, size_t *tokens_length) 90 92 { 91 93 int rc; 92 wchar_t cur_char;94 wchar_t next_char; 93 95 94 96 /* Read the input line char by char and append tokens */ 95 while ((cur_char = tok_get_char(tok)) != 0) { 96 if (cur_char == ' ') { 97 /* Spaces delimit tokens, but are not processed in any way 98 * Push the token if there is any. 97 while ((next_char = tok_look_char(tok)) != 0) { 98 if (next_char == ' ') { 99 /* Push the token if there is any. 99 100 * There may not be any pending char for a token in case 100 101 * there are several spaces in the input. … … 106 107 } 107 108 } 108 } 109 else if (cur_char == '|') { 110 /* Pipes are tokens that are delimiters and should be output 111 * as a separate token 109 tok_start_token(tok, TOKTYPE_SPACE); 110 /* Eat all the spaces */ 111 while (tok_look_char(tok) == ' ') { 112 tok_push_char(tok, tok_get_char(tok)); 113 } 114 tok_push_token(tok); 115 116 } 117 else if (next_char == '|') { 118 /* Pipes are tokens that are delimiters and should be 119 * output as a separate token 112 120 */ 113 121 if (tok_pending_chars(tok)) { … … 118 126 } 119 127 120 rc = tok_push_char(tok, '|'); 128 tok_start_token(tok, TOKTYPE_PIPE); 129 130 rc = tok_push_char(tok, tok_get_char(tok)); 121 131 if (rc != EOK) { 122 132 return rc; … … 128 138 } 129 139 } 130 else if ( cur_char == '\'') {140 else if (next_char == '\'') { 131 141 /* A string starts with a quote (') and ends again with a quote. 132 142 * A literal quote is written as '' 133 143 */ 144 tok_start_token(tok, TOKTYPE_TEXT); 145 /* Eat the quote */ 146 tok_get_char(tok); 134 147 rc = tok_finish_string(tok); 135 148 if (rc != EOK) { … … 138 151 } 139 152 else { 153 if (!tok_pending_chars(tok)) { 154 tok_start_token(tok, TOKTYPE_TEXT); 155 } 140 156 /* If we are handling any other character, just append it to 141 157 * the current token. 142 158 */ 143 rc = tok_push_char(tok, cur_char);159 rc = tok_push_char(tok, tok_get_char(tok)); 144 160 if (rc != EOK) { 145 161 return rc; … … 156 172 } 157 173 158 /* We always have a space for the terminator, as we 159 * reserved it in tok_init */ 160 tok->outtok[tok->outtok_offset] = 0; 174 *tokens_length = tok->outtok_offset; 161 175 162 176 return EOK; … … 167 181 { 168 182 int rc; 169 wchar_t cur_char; 170 171 while ((cur_char = tok_get_char(tok)) != 0) { 172 if (cur_char == '\'') { 183 wchar_t next_char; 184 185 while ((next_char = tok_look_char(tok)) != 0) { 186 if (next_char == '\'') { 187 /* Eat the quote */ 188 tok_get_char(tok); 173 189 if (tok_look_char(tok) == '\'') { 174 190 /* Encode a single literal quote */ … … 187 203 } 188 204 else { 189 rc = tok_push_char(tok, cur_char);205 rc = tok_push_char(tok, tok_get_char(tok)); 190 206 if (rc != EOK) { 191 207 return rc; … … 201 217 wchar_t tok_get_char(tokenizer_t *tok) 202 218 { 219 tok->in_char_offset++; 203 220 return str_decode(tok->in, &tok->in_offset, STR_NO_LIMIT); 204 221 } … … 208 225 { 209 226 size_t old_offset = tok->in_offset; 227 size_t old_char_offset = tok->in_char_offset; 210 228 wchar_t ret = tok_get_char(tok); 211 229 tok->in_offset = old_offset; 230 tok->in_char_offset = old_char_offset; 212 231 return ret; 213 232 } … … 219 238 } 220 239 240 void tok_start_token(tokenizer_t *tok, token_type_t type) 241 { 242 tok->current_type = type; 243 } 244 221 245 /** Push the current token to the output array */ 222 246 int tok_push_token(tokenizer_t *tok) … … 231 255 232 256 tok->outbuf[tok->outbuf_offset++] = 0; 233 tok->outtok[tok->outtok_offset++] = tok->outbuf + tok->outbuf_last_start; 257 token_t *tokinfo = &tok->outtok[tok->outtok_offset++]; 258 tokinfo->type = tok->current_type; 259 tokinfo->text = tok->outbuf + tok->outbuf_last_start; 260 tokinfo->byte_start = tok->last_in_offset; 261 tokinfo->byte_length = tok->in_offset - tok->last_in_offset; 262 tokinfo->char_start = tok->last_in_char_offset; 263 tokinfo->char_length = tok->in_char_offset - tok->last_in_char_offset; 234 264 tok->outbuf_last_start = tok->outbuf_offset; 265 266 /* We have consumed the first char of the next token already */ 267 tok->last_in_offset = tok->in_offset; 268 tok->last_in_char_offset = tok->in_char_offset; 235 269 236 270 return EOK;
Note:
See TracChangeset
for help on using the changeset viewer.