def test_token_label(self): tokens = list(Lexer('abc:[] abc:"我們"').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.TOKEN_LABEL, 'abc'), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_LABEL, 'abc'), Token(TokenType.DEFAULT_TOKEN, '我們'), ])
def test_group(self): tokens = list(Lexer('([] "a")+').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.LPAREN), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.DEFAULT_TOKEN, "a"), Token(TokenType.RPAREN), Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')), ])
def test_escape(self): tokens = list(Lexer('[word="\t"] [word!="\""]').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.ATTR_NAME, "word"), Token(TokenType.ATTR_RELATION, "is"), Token(TokenType.ATTR_VALUE, "\t"), Token(TokenType.ATTR_NAME, "word"), Token(TokenType.ATTR_RELATION, "is_not"), Token(TokenType.ATTR_VALUE, '"'), ])
def test_words(self): tokens = list(Lexer('[x="z"] [x="z"]{1,2} lab:[]*').generate_tokens()) parser = Parser(tokens) tree = parser.parse() self.assertEqual(tree, [ AssignAttrNode(AttrNameNode("x"), "is", AttrValueNode("z")), QuantifyNode( AssignAttrNode(AttrNameNode("x"), "is", AttrValueNode("z")), (1, 2)), LabelNode(QuantifyNode(EmptyTokenNode(), (0, 'inf')), 'lab') ])
def test_attr(self): tokens = list(Lexer('[word="aaa" & pos!="N.*"]').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.ATTR_NAME, "word"), Token(TokenType.ATTR_RELATION, "is"), Token(TokenType.ATTR_VALUE, "aaa"), Token(TokenType.ATTR_AND), Token(TokenType.ATTR_NAME, "pos"), Token(TokenType.ATTR_RELATION, "is_not"), Token(TokenType.ATTR_VALUE, "N.*"), ])
def test_token_quatifier(self): tokens = list(Lexer('"我們"{1,2} "我們"{2} "我們"? "我們"* "我們"+').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.DEFAULT_TOKEN, "我們"), Token(TokenType.TOKEN_QUANTIFIER, (1, 2)), Token(TokenType.DEFAULT_TOKEN, "我們"), Token(TokenType.TOKEN_QUANTIFIER, (2, 2)), Token(TokenType.DEFAULT_TOKEN, "我們"), Token(TokenType.TOKEN_QUANTIFIER, (0, 1)), Token(TokenType.DEFAULT_TOKEN, "我們"), Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')), Token(TokenType.DEFAULT_TOKEN, "我們"), Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')), ])
def test_token_quatifier(self): tokens = list(Lexer('[]{1,2} []{2} []? []* []+').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_QUANTIFIER, (1, 2)), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_QUANTIFIER, (2, 2)), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_QUANTIFIER, (0, 1)), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')), Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN"), Token(TokenType.TOKEN_QUANTIFIER, (1, 'inf')), ])
def test_wordGroups(self): tokens = list( Lexer('lab:([x="z" & word!="b"] [x="z"])?').generate_tokens()) parser = Parser(tokens) tree = parser.parse() self.assertEqual(tree, [ LabelNode( QuantifyNode([ ConjoinAttrNode( AssignAttrNode(AttrNameNode("x"), "is", AttrValueNode("z")), AssignAttrNode(AttrNameNode("word"), "is_not", AttrValueNode("b"))), AssignAttrNode(AttrNameNode("x"), "is", AttrValueNode("z")) ], (0, 1)), 'lab') ])
def generate_queries(cql: str): tokens = list(Lexer(cql).generate_tokens()) parser = Parser(tokens) queries = expand_quantifiers(tokens, MAX_QUANT) values = [] for query in queries: parser = Parser(query) tree = parser.parse() interpreter = Interpreter(default_attrname=DFT_ATTR) value = interpreter.visit(tree) if len(value) > 0: values.append(value) return values
def test_all(self): tokens = list(Lexer('[word="把" & pos="P"] [pos!="N[abcd].*|COMMACATEGORY|PERIODCATEGORY"]* obj:[pos="N[abcd].*"] v:[pos="V.*"]').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.ATTR_NAME, "word"), Token(TokenType.ATTR_RELATION, 'is'), Token(TokenType.ATTR_VALUE, '把'), Token(TokenType.ATTR_AND), Token(TokenType.ATTR_NAME, "pos"), Token(TokenType.ATTR_RELATION, 'is'), Token(TokenType.ATTR_VALUE, 'P'), Token(TokenType.ATTR_NAME, "pos"), Token(TokenType.ATTR_RELATION, 'is_not'), Token(TokenType.ATTR_VALUE, 'N[abcd].*|COMMACATEGORY|PERIODCATEGORY'), Token(TokenType.TOKEN_QUANTIFIER, (0, 'inf')), Token(TokenType.TOKEN_LABEL, 'obj'), Token(TokenType.ATTR_NAME, "pos"), Token(TokenType.ATTR_RELATION, 'is'), Token(TokenType.ATTR_VALUE, 'N[abcd].*'), Token(TokenType.TOKEN_LABEL, 'v'), Token(TokenType.ATTR_NAME, "pos"), Token(TokenType.ATTR_RELATION, 'is'), Token(TokenType.ATTR_VALUE, 'V.*') ])
def test_empty(self): tokens = list(Lexer("").generate_tokens()) parser = Parser(tokens) tree = parser.parse() self.assertEqual(tree, None)
def test_empty(self): tokens = list(Lexer("").generate_tokens()) self.assertEqual(tokens, [])
def test_escape(self): with open("escape_quotes.txt") as f: txt = f.read() tokens = list(Lexer(txt).generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.DEFAULT_TOKEN, '"') ])
def test_default_token(self): tokens = list(Lexer('"我們"').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.DEFAULT_TOKEN, "我們") ])
def test_empty_token(self): tokens = list(Lexer('[]').generate_tokens()) self.assertEqual(tokens, [ Token(TokenType.EMPTY_TOKEN, "EMPTY_TOKEN") ])
from cqls.lexer import Lexer from cqls.parser import Parser from cqls.interpreter import Interpreter from cqls.expand_quantifiers import expand_quantifiers MAX_QUANT = 15 DEFAULT_ATTR_NAME = 'w' while True: # try: text = input('CQL > ') lexer = Lexer(text) tokens = list(lexer.generate_tokens()) print(f"tokens : {tokens}") print() queries = expand_quantifiers(tokens, MAX_QUANT) values = [] for query in queries: parser = Parser(query) tree = parser.parse() print(f"parser : {tree}") print() interpreter = Interpreter(default_attrname=DEFAULT_ATTR_NAME) value = interpreter.visit(tree) if len(value) > 0: values.append(value) print(f"value : {values}") # except Exception as e: # print(e)