def get_default_tokenizer(cls): return tokenizers.create_shlex_tokenizer( with_bof=True, with_eof=True, wordchars=string.ascii_uppercase + string.ascii_lowercase + string.digits + '_.', )
def test_basic_tokenizer_yields_token_and_lookahead(): tokenizer = tokenizers.create_shlex_tokenizer() tokens = list(ShiftReduceParser.tokenize_with_lookahead(tokenizer, '')) assert len(tokens) == 0 tokens = list(ShiftReduceParser.tokenize_with_lookahead( tokenizer, 'a b c')) assert len(tokens) == 3 assert tokens[0] == ('a', 'b') assert tokens[1] == ('b', 'c') assert tokens[2] == ('c', None)
def test_tokenizer_with_bof_yields_token_and_lookahead(): tokenizer = tokenizers.create_shlex_tokenizer(with_bof=True) tokens = list(ShiftReduceParser.tokenize_with_lookahead(tokenizer, '')) assert len(tokens) == 1 assert tokens[0] == (BOF_VALUE, None) tokens = list(ShiftReduceParser.tokenize_with_lookahead( tokenizer, 'a b c')) assert len(tokens) == 4 assert tokens[0] == (BOF_VALUE, 'a') assert tokens[1] == ('a', 'b') assert tokens[2] == ('b', 'c') assert tokens[3] == ('c', None)
def get_default_tokenizer(cls): return create_shlex_tokenizer(wordchars=string.ascii_uppercase + string.ascii_lowercase + string.digits + '_.', )
def get_default_tokenizer(cls): return tokenizers.create_shlex_tokenizer( with_bof=True, with_eof=True, wordchars=string.digits + '.', )
def get_default_tokenizer(cls): return create_shlex_tokenizer(wordchars=string.digits + '.')
def get_default_tokenizer(cls): return tokenizers.create_shlex_tokenizer()