def test_convert_simple_value_boolean_query_to_and_boolean_queries(): parse_tree = \ parser.SimpleQuery( parser.SpiresKeywordQuery( parser.InspireKeyword('author'), parser.Value( parser.SimpleValueBooleanQuery( parser.SimpleValue('foo'), parser.And(), parser.SimpleValueBooleanQuery( parser.SimpleValue('bar'), parser.Or(), parser.SimpleValueNegation(parser.SimpleValue('foobar')) ) ) ) ) ) expected_parse_tree = \ AndOp( KeywordOp(Keyword('author'), Value('foo')), OrOp( KeywordOp(Keyword('author'), Value('bar')), NotOp(KeywordOp(Keyword('author'), Value('foobar'))) ) ) restructuring_visitor = RestructuringVisitor() parse_tree = parse_tree.accept(restructuring_visitor) assert parse_tree == expected_parse_tree
def test_foo_bar(): query_str = 'find j Nucl.Phys. and not vol A531 and a ellis' print("Parsing: " + query_str) stateful_parser = StatefulParser() restructuring_visitor = RestructuringVisitor() _, parse_tree = stateful_parser.parse(query_str, parser.Query) parse_tree = parse_tree.accept(restructuring_visitor) expected_parse_tree = AndOp( KeywordOp(Keyword('journal'), Value('Nucl.Phys.')), KeywordOp(Keyword('author'), Value('ellis'))) assert parse_tree == expected_parse_tree
def visit_boolean_query(self, node): """Convert BooleanRule into AndOp or OrOp nodes.""" left = node.left.accept(self) right = node.right.accept(self) is_journal_keyword_op = isinstance( left, KeywordOp) and left.left == Keyword('journal') if is_journal_keyword_op: journal_and_volume_conjunction = _restructure_if_volume_follows_journal( left, right) if journal_and_volume_conjunction: return journal_and_volume_conjunction return AndOp(left, right) if isinstance(node.bool_op, And) else OrOp( left, right)
def _restructure_if_volume_follows_journal(left, right): """Remove volume node if it follows a journal logically in the tree hierarchy. Args: left (ast.ASTElement): The journal KeywordOp node. right (ast.ASTElement): The rest of the tree to be restructured. Return: (ast.ASTElement): The restructured tree, with the volume node removed. Notes: This happens to support queries like "journal Phys.Rev. and vol d85". Appends the value of KeywordOp with Keyword 'volume' and discards 'volume' KeywordOp node from the tree. """ def _get_volume_keyword_op_and_remaining_subtree(right_subtree): if isinstance(right_subtree, NotOp) and isinstance(right_subtree.op, KeywordOp) \ and right_subtree.op.left == Keyword('volume'): return None, None elif isinstance(right_subtree, AndOp) and isinstance(right_subtree.left, NotOp) \ and isinstance(right_subtree.left.op, KeywordOp) and right_subtree.left.op.left == Keyword('volume'): return None, right_subtree.right elif isinstance(right_subtree, KeywordOp) and right_subtree.left == Keyword('volume'): return right_subtree, None elif isinstance( right_subtree, AndOp) and right_subtree.left.left == Keyword('volume'): return right_subtree.left, right_subtree.right journal_value = left.right.value volume_and_remaining_subtree = _get_volume_keyword_op_and_remaining_subtree( right) if not volume_and_remaining_subtree: return volume_node, remaining_subtree = volume_and_remaining_subtree if volume_node: left.right.value = ','.join([journal_value, volume_node.right.value]) return AndOp(left, remaining_subtree) if remaining_subtree else left
def visit_boolean_query(self, node): """Convert BooleanRule into AndOp or OrOp nodes.""" left = node.left.accept(self) right = node.right.accept(self) return AndOp(left, right) if isinstance(node.bool_op, And) else OrOp( left, right)
RestructuringVisitor @pytest.mark.parametrize( ['query_str', 'expected_parse_tree'], [ # Find keyword combined with other production rules ('FIN author:\'ellis\'', KeywordOp(Keyword('author'), PartialMatchValue('ellis'))), ('Find author "ellis"', KeywordOp(Keyword('author'), ExactMatchValue('ellis'))), ('f author ellis', KeywordOp(Keyword('author'), Value('ellis'))), # Invenio like search ('author:ellis and title:boson', AndOp(KeywordOp(Keyword('author'), Value('ellis')), KeywordOp(Keyword('title'), Value('boson')))), ('unknown_keyword:\'bar\'', KeywordOp(Keyword('unknown_keyword'), PartialMatchValue('bar'))), ('dotted.keyword:\'bar\'', KeywordOp(Keyword('dotted.keyword'), PartialMatchValue('bar'))), # Boolean operator testing (And/Or) ('author ellis and title \'boson\'', AndOp(KeywordOp(Keyword('author'), Value('ellis')), KeywordOp(Keyword('title'), PartialMatchValue('boson')))), ('f a appelquist and date 1983', AndOp(KeywordOp(Keyword('author'), Value('appelquist')), KeywordOp(Keyword('date'), Value('1983')))), ('fin a henneaux and citedby a nicolai', AndOp( KeywordOp(Keyword('author'), Value('henneaux')),