def test_parse_error_on_range(self): with self.assertRaises(ParseSyntaxError) as raised: parser.parse('[foo TO ]') self.assertEqual( str(raised.exception), "Syntax error in input : unexpected ']' at position 8!", )
def test_reserved_ko(self): """Test reserved word hurt as they hurt lucene """ with self.assertRaises(ParseSyntaxError) as raised: parser.parse('foo:NOT') self.assertTrue( str(raised.exception).startswith( "Syntax error in input : unexpected end of expr")) with self.assertRaises(ParseSyntaxError) as raised: parser.parse('foo:AND') self.assertEqual( str(raised.exception), "Syntax error in input : unexpected 'AND' at position 4!", ) with self.assertRaises(ParseSyntaxError) as raised: parser.parse('foo:OR') self.assertEqual( str(raised.exception), "Syntax error in input : unexpected 'OR' at position 4!", ) with self.assertRaises(ParseSyntaxError) as raised: parser.parse('OR') self.assertEqual( str(raised.exception), "Syntax error in input : unexpected 'OR' at position 0!", ) with self.assertRaises(ParseSyntaxError) as raised: parser.parse('AND') self.assertEqual( str(raised.exception), "Syntax error in input : unexpected 'AND' at position 0!", )
def test_illegal_character_exception(self): with self.assertRaises(IllegalCharacterError) as raised: parser.parse('\\') self.assertEqual( str(raised.exception), "Illegal character '\\' at position 0", )
def validate_lucene_query(value): if value == "": return try: parser.parse(value) except ParseError as e: raise errors.bad_request.InvalidLuceneSyntax(error=e)
def test_datemath_in_field(self): tree = SearchField("foo", Word(r"2015-12-19||+2\d")) parsed = parser.parse(r'foo:2015-12-19||+2\d') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed) tree = SearchField("foo", Word(r"now+2h+20m\h")) parsed = parser.parse(r'foo:now+2h+20m\h') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed)
def parse(self, query_str): """Parse the query.""" try: # We parse the Lucene query syntax in Python, so we know upfront # if the syntax is correct before executing it in Elasticsearch luqum_parser.parse(query_str) return Q('query_string', query=query_str, **self.extra_params) except ParseError: # Fallback to a multi-match query. return Q('multi_match', query=query_str, **self.extra_params)
def test_incomplete_object_field_raise(self): tree = parser.parse('collection.keywords.more_info:"foo"') with self.assertRaises(NestedSearchFieldException) as e: self.strict_checker(tree) self.assertIn('"collection.keywords.more_info"', str(e.exception)) tree = parser.parse('author:birth:"foo"') with self.assertRaises(NestedSearchFieldException) as e: self.strict_checker(tree) self.assertIn('"author.birth"', str(e.exception))
def _parse(text_query, query_params, user): """Parse the text query and pull out filters and sorts Accepts a text query Returns a tuple of (text_query, filters, sort) text_query - new text query with filters and sorts removed filters - a list of filters to be passed in to solr as field queries (`fq` field) sort - a string from the SORT_MAP to sort on """ if text_query.strip(): try: tree = parser.parse(text_query) escaped = False except (ParseError, TypeError): tree = parser.parse(escape(text_query)) escaped = True # check for boolean expressions to determine if we should pull out # all filters or only sort filters is_boolean = any(BooleanDetector().visit(tree)) # detect fuzzy searches to disable highlighting is_fuzzy = any(FuzzyDetector().visit(tree)) filter_extractor = FilterExtractor(sort_only=is_boolean) tree = filter_extractor.visit(tree) if not user.is_authenticated: tree = AnonymousTransformer().visit(tree) new_query = str(tree) if tree is not None else "" filters = filter_extractor.filters sort = filter_extractor.sort # only use highilighting for queries with no fuzzy searches and # which do not explicitly turn it off use_hl = not is_fuzzy and filter_extractor.use_hl else: # special case for empty query new_query = "" filters = QueryDict(mutable=True) sort = None escaped = False use_hl = False # pull text queries from the parameters into the text query additional_text = _handle_params(query_params, TEXT_FIELDS, DYNAMIC_TEXT_FIELDS) if additional_text: new_query = "{} {}".format(new_query, " ".join(additional_text)) # if nothing is left in the query after pulling out filters, default to *:* # which matches everything, otherwise convert the parse tree back to a text query if not new_query: new_query = "*:*" return new_query, filters, sort, escaped, use_hl
def test_minus(self): tree = (AndOperation(Prohibit(Word("test", tail=" ")), Prohibit(Word("foo", tail=" "), head=" "), Not(Word("bar", head=" "), head=" "))) parsed = parser.parse("-test AND -foo AND NOT bar") self.assertEqual(str(parsed), str(tree)) self.assertEqual(parsed, tree)
def test_complex(self): # the scope of head / tail management is to be able to keep original structure # event after tree transformation or so query = "\rfoo AND bar \nAND \t(\rbaz OR spam\rOR ham\t\t)\r" tree = parser.parse(query) self.assertEqual(str(tree), query) self.assertEqual(tree.__str__(head_tail=True), query)
def default_query_parser(q: str, taxonomy_code=None) -> TaxonomyQuery: """ A parser for the query language :param q: the query in stringified form :param taxonomy_code: set to taxonomy code if terms are searched for. Left None if taxonomies are searched for :return: an instance of TaxonomyQuery """ try: parsed_query = parser.parse(q) except Exception as e: raise TaxonomyQueryNotSupported(str(e)) if isinstance(parsed_query, (Word, UnknownOperation)): return TaxonomyQuery(is_simple=True, query=q, taxonomy_code=taxonomy_code) if isinstance(parsed_query, Phrase): return TaxonomyQuery(is_simple=True, query=q.strip('"').strip("'"), taxonomy_code=taxonomy_code) return TaxonomyQuery(is_simple=False, query=parsed_query, taxonomy_code=taxonomy_code)
def handleReq(self, session): query = self.params["query"] parsed_result = None result = ET.Element(self.search_param) try: tree = LQParser.parse(query) self.parsePass1(tree, QueryContext(), None) parsed_result = self.parsePass2(tree, session) except (MissingParam, NotFound) as e: raise e except Exception as e: log.error(f"Failed to parse search query: {query}", exc_info=e) raise MissingParam("Invalid search query") if parsed_result is not None: # Sort into artist/album/song order and dedup artists = {} albums = {} songs = {} for tag in parsed_result: if tag.tag == "artist": artists[tag.get("id")] = tag elif tag.tag == "album": albums[tag.get("id")] = tag else: songs[tag.get("id")] = tag result.extend(artists.values()) result.extend(albums.values()) result.extend(songs.values()) return self.makeResp(child=result) else: raise NotFound("No results found")
def test_integration(self): tree = parser.parse( " foo:bar OR baz OR ([20 TO 2000] AND more:(yee AND yii)) ") self.assertEqual(tree.span(), (0, 57)) self.assertEqual(tree.span(head_tail=True), (0, 57)) foo, baz, group = tree.children self.assertEqual(foo.span(), (1, 9)) self.assertEqual(foo.span(head_tail=True), (0, 9)) self.assertEqual(baz.span(), (12, 15)) self.assertEqual(baz.span(head_tail=True), (11, 16)) self.assertEqual(group.span(), (19, 56)) self.assertEqual(group.span(head_tail=True), (18, 57)) bar, = foo.children self.assertEqual(bar.span(), (5, 8)) self.assertEqual(bar.span(head_tail=True), (5, 9)) andop, = group.children self.assertEqual(andop.span(), (20, 55)) self.assertEqual(andop.span(head_tail=True), (20, 55)) range_, more = andop.children self.assertEqual(range_.span(), (20, 32)) self.assertEqual(range_.span(head_tail=True), (20, 33)) self.assertEqual(more.span(), (37, 55)) self.assertEqual(more.span(head_tail=True), (36, 55)) field_group, = more.children self.assertEqual(field_group.span(), (42, 55)) self.assertEqual(field_group.span(head_tail=True), (42, 55)) and_op2, = field_group.children yee, yii = and_op2.children self.assertEqual(yee.span(), (43, 46)) self.assertEqual(yee.span(head_tail=True), (43, 47)) self.assertEqual(yii.span(), (51, 54)) self.assertEqual(yii.span(head_tail=True), (50, 54))
def build_query(self, query: str, queried_type: Optional[Type[Object]] = None): context = SQLQueryBuilderContext(queried_type=queried_type) tree = parser.parse(query) condition = self.visit(tree, context=context) return db.session.query(context.queried_type).filter(condition)
def test_complex_mix(self): tree = parser.parse( 'collection:(title:"foo" AND keywords.more_info:(linked.key:"bar" revision:"test"))' ) self.strict_checker(tree) self.checker(tree) self.assertIsNotNone(tree)
def test_plus(self): tree = (AndOperation(Plus(Word("test", tail=" ")), Word("foo", head=" ", tail=" "), Plus(Word("bar"), head=" "))) parsed = parser.parse("+test AND foo AND +bar") self.assertEqual(str(parsed), str(tree)) self.assertEqual(parsed, tree)
def test_or_operation(self): ltree = parser.parse("n_pages:360 OR edition:Lumos") names = auto_name(ltree) query = self.es_builder(ltree) # the one matching Lumos book, = list( self.search.filter(query).filter("term", ref="BB1").execute()) self.assertEqual(len(book.meta.matched_queries), 1) paths_ok, paths_ko = self.propagate_matching( ltree, *matching_from_names(book.meta.matched_queries, names), ) self.assertEqual( self.make_html(ltree, paths_ok, paths_ko), '<span class="ok"><span class="ko">n_pages:360 </span>OR edition:Lumos</span>', ) # the one matching n_pages book, = list( self.search.filter(query).filter("term", ref="HP8").execute()) self.assertEqual(len(book.meta.matched_queries), 1) paths_ok, paths_ko = self.propagate_matching( ltree, *matching_from_names(book.meta.matched_queries, names), ) self.assertEqual( self.make_html(ltree, paths_ok, paths_ko), '<span class="ok">n_pages:360 OR<span class="ko"> edition:Lumos</span></span>', ) # matching None book, = list( self.search.filter(Q(query) | Q("match_all")).filter( Q("term", ref="HP7")).execute()) self.assertFalse(hasattr(book.meta, "matched_queries"))
def _nested_test(self, query, html, ref, num_match=1): """scenario taking into account nested :param str matching_query: the query that match the book :param str ref: ref of expected matching book """ ltree = parser.parse(query) names = auto_name(ltree) query = self.es_builder(ltree) queries = [query] + extract_nested_queries(query) matched_queries = [] # we have to force book matching by adding condition for sub_query in queries: search = self.search.filter(Q(sub_query) | Q("term", ref=ref)).filter("term", ref=ref) book, = list(search.execute()) self.assertEqual(book.ref, ref) matched_queries.extend(getattr(book.meta, "matched_queries", [])) self.assertEqual(len(matched_queries), num_match) paths_ok, paths_ko = self.propagate_matching( ltree, *matching_from_names(matched_queries, names), ) self.assertEqual( self.make_html(ltree, paths_ok, paths_ko), html, ) return matched_queries, html
def test_name_index_nested2(self): # an expression where outer node does not have a name expr = '(objet:(bar OR (foo AND "foo bar")))' tree = parser.parse(expr) self.assertEqual(str(tree), expr) # needs to be sure or_op = tree.expr.expr.expr # group, field, fieldgroup bar = or_op.operands[0] and_op = or_op.operands[1].expr foo = and_op.operands[0] foo_bar = and_op.operands[1] set_name(or_op, "or_op") set_name(bar, "bar") set_name(and_op, "and_op") set_name(foo, "foo") set_name(foo_bar, "foo_bar") result = name_index(tree) def _extract(name): return extract(expr, name, result) self.assertEqual(_extract("or_op"), 'bar OR (foo AND "foo bar")') self.assertEqual(_extract("bar"), "bar") self.assertEqual(_extract("and_op"), 'foo AND "foo bar"') self.assertEqual(_extract("foo"), "foo") self.assertEqual(_extract("foo_bar"), '"foo bar"')
def test_real_situation_7(self): tree = parser.parse( "pays:FR AND " "type:AO AND " "thes:((" "SI_FM_GC_RC_Relation_client_commerciale_courrier OR " "SI_FM_GC_Gestion_Projet_Documents OR " "SI_FM_GC_RC_Mailing_prospection_Enquete_Taxe_apprentissage OR " "SI_FM_GC_RC_Site_web OR " "SI_FM_GC_RH OR SI_FM_GC_RH_Paye OR " "SI_FM_GC_RH_Temps) OR NOT C91_Etranger)" ) result = self.transformer.visit(tree).json expected = {'bool': {'must': [ {'term': {'pays': {'value': 'FR'}}}, {'term': {'type': {'value': 'AO'}}}, {'bool': {'should': [ {'bool': {'should': [ {'term': {'thes': {'value': 'SI_FM_GC_RC_Relation_client_commerciale_courrier'}}}, {'term': {'thes': {'value': 'SI_FM_GC_Gestion_Projet_Documents'}}}, {'term': {'thes': {'value': 'SI_FM_GC_RC_Mailing_prospection_Enquete_Taxe_apprentissage'}}}, {'term': {'thes': {'value': 'SI_FM_GC_RC_Site_web'}}}, {'term': {'thes': {'value': 'SI_FM_GC_RH'}}}, {'term': {'thes': {'value': 'SI_FM_GC_RH_Paye'}}}, {'term': {'thes': {'value': 'SI_FM_GC_RH_Temps'}}} ]}}, {'bool': {'must_not': [ {'term': {'thes': {'value': 'C91_Etranger'}}} ]}} ]}} ]}} self.assertDictEqual(result, expected)
def get_sql(self, query): print("QUERY:", query) self._set_fields() tree = parser.parse(query) rtree = resolver(tree) print("REPR:", repr(rtree)) visited = (self.visit(rtree)) print("VISITED:", visited) self.make_joins() columns = self.get_columns() print("COLUMNS:", columns) s = select(columns) for join in self.joins: s = s.select_from(join) if self.where: print("WHERE:", self.where) for where in self.where: s = s.where(where) s = s.where(visited) if self.having is not None: s = s.having(self.having) if self.group_by is not None: s = s.group_by(self.group_by) print(s) print(s.compile().params) return str( s.compile(compile_kwargs={"literal_binds": True}, dialect=sqlite.dialect()))
def test_double_negation(self): ltree = parser.parse("NOT (n_pages:360 AND - edition:Lumos) AND ref:*") names = auto_name(ltree) query = self.es_builder(ltree) # matching Lumos double negation book, = list( self.search.filter(query).filter("term", ref="BB1").execute()) self.assertEqual(len(book.meta.matched_queries), 2) paths_ok, paths_ko = self.propagate_matching( ltree, *matching_from_names(book.meta.matched_queries, names), ) self.assertEqual( self.make_html(ltree, paths_ok, paths_ko), '<span class="ok">NOT' '<span class="ko"> (n_pages:360 AND -<span class="ok"> edition:Lumos</span>) </span>' 'AND ref:*</span>', ) # not matching Lumos double negation book, = list( self.search.filter(Q(query) | Q("term", ref="HP8")).filter( "term", ref="HP8").execute()) self.assertEqual(len(book.meta.matched_queries), 2) paths_ok, paths_ko = self.propagate_matching( ltree, *matching_from_names(book.meta.matched_queries, names), ) self.assertEqual( self.make_html(ltree, paths_ok, paths_ko), '<span class="ko">NOT' '<span class="ok"> (n_pages:360 AND -<span class="ko"> edition:Lumos</span>) </span>' 'AND<span class="ok"> ref:*</span></span>', )
def _parse_query(self, query: str, ambiguous_action: bool) -> None: """ Parse the query, replace any ambiguous (unknown) parts with the correct operation and check for unsupported operations. Args: query: A Lucene style query. ambiguous_action: The action to use for ambiguous queries, for example "field1:value1 field2:value2" (default: "AND") """ if query is None or query.strip() == "": raise ValueError("Need a valid query") try: self._tree = parser.parse(query) except ParseError as exc: raise QueryException("Could not parse the query, error: {}".format( str(exc))) # Replace any UnknownOperation with the chosen action if ambiguous_action != "Exception": operation = self.ambiguous_actions[ambiguous_action] resolver = UnknownOperationResolver(resolve_to=operation) self._tree = resolver(self._tree) # Raise a QueryException if the user has passed unsupported search terms self._check_tree(self._tree)
def test_flavours(self): tree = SearchField( "somedate", Range(Word("now/d-1d+7H", tail=" "), Word("now/d+7H", head=" "), True, True)) parsed = parser.parse('somedate:[now/d-1d+7H TO now/d+7H]') self.assertEqual(str(parsed), str(tree)) self.assertEqual(parsed, tree)
def test_escaping_word(self): query = r'test\+\-\&\&\|\|\!\(\)\{\}\[\]\^\"\~\*\?\:\\test' tree = Word(query) unescaped = r'test+-&&||!(){}[]^"~*?:\test' parsed = parser.parse(query) self.assertEqual(str(parsed), query) self.assertEqual(parsed, tree) self.assertEqual(parsed.unescaped_value, unescaped)
def test_real_situation_4(self): tree = parser.parse("spam:eggs OR monty:{2 TO 4]") result = self.transformer.visit(tree).json expected = {'bool': {'should': [ {'match': {'spam': {'query': 'eggs', 'type': 'phrase', 'zero_terms_query': 'all'}}}, {'range': {'monty': {'lte': '4', 'gt': '2'}}}, ]}} self.assertDictEqual(result, expected)
def test_real_situation_2(self): tree = parser.parse("pays:FR AND monty:python") result = self.transformer.visit(tree).json expected = {'bool': {'must': [ {'term': {'pays': {'value': 'FR'}}}, {'match': {'monty': {'query': 'python', 'type': 'phrase', 'zero_terms_query': 'all'}}}, ]}} self.assertDictEqual(result, expected)
def test_phrase(self): tree = (AndOperation( Phrase('"a phrase (AND a complicated~ one)"', tail=" "), Phrase('"Another one"', head=" "))) parsed = parser.parse( '"a phrase (AND a complicated~ one)" AND "Another one"') self.assertEqual(str(parsed), str(tree)) self.assertEqual(parsed, tree)
def test_date_in_field(self): tree = SearchField("foo", Word("2015-12-19")) parsed = parser.parse('foo:2015-12-19') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed) tree = SearchField("foo", Word("2015-12-19T22:30")) parsed = parser.parse('foo:2015-12-19T22:30') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed) tree = SearchField("foo", Word("2015-12-19T22:30:45")) parsed = parser.parse('foo:2015-12-19T22:30:45') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed) tree = SearchField("foo", Word("2015-12-19T22:30:45.234Z")) parsed = parser.parse('foo:2015-12-19T22:30:45.234Z') self.assertEqual(str(tree), str(parsed)) self.assertEqual(tree, parsed)
def test_regex(self): tree = (AndOperation( Regex('/a regex (with some.*match+ing)?/', tail=" "), Regex('/Another one/', head=" "))) parsed = parser.parse( '/a regex (with some.*match+ing)?/ AND /Another one/') self.assertEqual(str(parsed), str(tree)) self.assertEqual(parsed, tree)
def test_real_situation_5(self): tree = parser.parse("pays:FR OR objet:{2 TO 4]") result = self.transformer.visit(tree).json expected = {'bool': {'should': [ {'term': {'pays': {'value': 'FR'}}}, {'range': {'objet': {'lte': '4', 'gt': '2'}}}, ]}} self.assertDictEqual(result, expected)
def test_escaping_single_column(self): # non regression for issue #30 query = r'1000\:1000\:\:1/24' tree = Word(r'1000\:1000\:\:1/24') parsed = parser.parse(query) self.assertEqual(parsed, tree) self.assertEqual(str(parsed), query) self.assertEqual(parsed.unescaped_value, "1000:1000::1/24")
def __init__(self, query, default_operator=None, default_operator_in_search_field=None): self._query = query self.default_operator = default_operator or 'and' self.default_operator_in_search_field = default_operator_in_search_field or 'or' self._tree = parser.parse(query)
def test_extract_filter(self, query, new_query, filters, sort_only, sort): tree = parser.parse(query) filter_extractor = FilterExtractor(sort_only=sort_only) tree = filter_extractor.visit(tree) assert new_query == (str(tree) if tree else "") assert filter_extractor.filters == QueryDict(filters) assert filter_extractor.sort == sort
def test_real_situation_6(self): tree = parser.parse("pays:FR OR monty:{2 TO 4] OR python") result = self.transformer.visit(tree).json expected = {'bool': {'should': [ {'term': {'pays': {'value': 'FR'}}}, {'range': {'monty': {'lte': '4', 'gt': '2'}}}, {'match': {'text': {'query': 'python', 'type': 'phrase', 'zero_terms_query': 'all'}}}, ]}} self.assertDictEqual(result, expected)
def test_real_situation_9(self): """ new line and carrier field should be replace by a normal space """ tree = parser.parse('spam:"monthy\r\n python"') result = self.transformer.visit(tree).json expected = {'match_phrase': {'spam': {'query': 'monthy python'}}} self.assertDictEqual(result, expected)
def test_real_situation_3(self): tree = parser.parse("spam:eggs AND (monty:python OR life:bryan)") result = self.transformer.visit(tree).json expected = {'bool': {'must': [ {'match': {'spam': {'query': 'eggs', 'type': 'phrase', 'zero_terms_query': 'all'}}}, {'bool': {'should': [ {'match': {'monty': {'query': 'python', 'type': 'phrase', 'zero_terms_query': 'all'}}}, {'match': {'life': {'query': 'bryan', 'type': 'phrase', 'zero_terms_query': 'all'}}}, ]}}, ]}} self.assertDictEqual(result, expected)
def test_real_situation_1(self): tree = parser.parse("spam:eggs") result = self.transformer.visit(tree).json expected = {'match': {'spam': {'query': 'eggs', 'type': 'phrase', 'zero_terms_query': 'all'}}} self.assertDictEqual(result, expected)
def test_real_situation_8(self): tree = parser.parse( 'objet:(accessibilite OR diagnosti* OR adap OR "ad ap" -(travaux OR amiante OR "hors voirie"))' ) with self.assertRaises(OrAndAndOnSameLevel): self.transformer.visit(tree).json