def _parse_query(query_str):
    stateful_parser = StatefulParser()
    restructuring_visitor = RestructuringVisitor()
    elastic_search_visitor = ElasticSearchVisitor()
    _, parse_tree = stateful_parser.parse(query_str, parser.Query)
    parse_tree = parse_tree.accept(restructuring_visitor)
    return parse_tree.accept(elastic_search_visitor)
def test_restructuring_visitor_functionality(query_str, expected_parse_tree):
    print("Parsing: " + query_str)
    stateful_parser = StatefulParser()
    restructuring_visitor = RestructuringVisitor()
    _, parse_tree = stateful_parser.parse(query_str, parser.Query)
    parse_tree = parse_tree.accept(restructuring_visitor)

    assert parse_tree == expected_parse_tree
def test_parsing_output_with_inspire_next_tests(query_str,
                                                expected_parse_tree):
    print("Parsing: " + query_str)
    stateful_parser = StatefulParser()
    restructuring_visitor = RestructuringVisitor()
    _, parse_tree = stateful_parser.parse(query_str, parser.Query)
    parse_tree = parse_tree.accept(restructuring_visitor)

    assert parse_tree == expected_parse_tree
Пример #4
0
def print_query_and_parse_tree(query_str):
    parser = StatefulParser()
    print('\033[94m' + "Parsing " + '\033[1m' + query_str + "" + '\033[0m')
    _, parse_tree = parser.parse(query_str, Query)
    print('\033[92m' +
          emit_tree_format(parse_tree.accept(RestructuringVisitor())) +
          '\033[0m')
    print(
        "————————————————————————————————————————————————————————————————————————————————"
    )
Пример #5
0
def test_that_parse_terminal_token_does_accept_keywords_if_parsing_parenthesized_terminal_flag_is_on(
):
    query_str = 'and'

    parser = StatefulParser()
    parser._parsing_parenthesized_terminal = True

    returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token(
        parser, query_str)
    assert returned_unrecognised_text == ''
    assert returned_result == query_str
def test_foo_bar():
    query_str = 'find j Nucl.Phys. and not vol A531 and a ellis'
    print("Parsing: " + query_str)
    stateful_parser = StatefulParser()
    restructuring_visitor = RestructuringVisitor()
    _, parse_tree = stateful_parser.parse(query_str, parser.Query)
    parse_tree = parse_tree.accept(restructuring_visitor)
    expected_parse_tree = AndOp(
        KeywordOp(Keyword('journal'), Value('Nucl.Phys.')),
        KeywordOp(Keyword('author'), Value('ellis')))

    assert parse_tree == expected_parse_tree
Пример #7
0
def test_that_parse_terminal_token_does_not_accept_token_followed_by_colon():
    query_str = 'title:'

    parser = StatefulParser()

    returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token(
        parser, query_str)
    assert isinstance(returned_result, SyntaxError)
    assert returned_unrecognised_text == query_str
Пример #8
0
def test_that_parse_terminal_token_does_not_accept_non_shortened_inspire_keywords(
):
    query_str = "exact-author"

    parser = StatefulParser()

    returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token(
        parser, query_str)
    assert isinstance(returned_result, SyntaxError)
    assert returned_unrecognised_text == query_str
Пример #9
0
def test_simple_value_accepted_tokens(query_str, unrecognized_text, result):
    parser = StatefulParser()

    returned_unrecognised_text, returned_result = SimpleValue.parse(
        parser, query_str, None)
    if type(result) != SyntaxError:
        assert returned_unrecognised_text == unrecognized_text
        assert returned_result == result
    else:
        assert returned_unrecognised_text == unrecognized_text
        assert isinstance(returned_result,
                          SyntaxError) and result.msg == result.msg
Пример #10
0
def test_parser_functionality(query_str, expected_parse_tree):
    print("Parsing: " + query_str)
    parser = StatefulParser()
    _, parse_tree = parser.parse(query_str, Query)
    assert parse_tree == expected_parse_tree
Пример #11
0
def parse_query(query_str):
    """
    Drives the whole logic, by parsing, restructuring and finally, generating an ElasticSearch query.

    Args:
        query_str (six.text_types): the given query to be translated to an ElasticSearch query

    Returns:
        six.text_types: Return an ElasticSearch query.

    Notes:
        In case there's an error, an ElasticSearch `multi_match` query is generated with its `query` value, being the
        query_str argument.
    """
    def _generate_match_all_fields_query():
        # Strip colon character (special character for ES)
        stripped_query_str = ' '.join(query_str.replace(':', ' ').split())
        return {
            'multi_match': {
                'query': stripped_query_str,
                'fields': ['_all'],
                'zero_terms_query': 'all'
            }
        }

    if not isinstance(query_str, six.text_type):
        query_str = six.text_type(query_str.decode('utf-8'))

    logger.info('Parsing: "' + query_str + '\".')

    parser = StatefulParser()
    rst_visitor = RestructuringVisitor()
    es_visitor = ElasticSearchVisitor()

    try:
        unrecognized_text, parse_tree = parser.parse(query_str, Query)

        if unrecognized_text:  # Usually, should never happen.
            msg = 'Parser returned unrecognized text: "' + unrecognized_text + \
                  '" for query: "' + query_str + '".'

            if query_str == unrecognized_text and parse_tree is None:
                # Didn't recognize anything.
                logger.warn(msg)
                return _generate_match_all_fields_query()
            else:
                msg += 'Continuing with recognized parse tree.'
            logger.warn(msg)

    except SyntaxError as e:
        logger.warn('Parser syntax error (' + six.text_type(e) +
                    ') with query: "' + query_str +
                    '". Continuing with a match_all with the given query.')
        return _generate_match_all_fields_query()

    # Try-Catch-all exceptions for visitors, so that search functionality never fails for the user.
    try:
        restructured_parse_tree = parse_tree.accept(rst_visitor)
        logger.debug('Parse tree: \n' +
                     emit_tree_format(restructured_parse_tree))

    except Exception as e:
        logger.exception(RestructuringVisitor.__name__ + " crashed" +
                         (": " + six.text_type(e) +
                          ".") if six.text_type(e) else '.')
        return _generate_match_all_fields_query()

    try:
        es_query = restructured_parse_tree.accept(es_visitor)
    except Exception as e:
        logger.exception(ElasticSearchVisitor.__name__ + " crashed" +
                         (": " + six.text_type(e) +
                          ".") if six.text_type(e) else '.')
        return _generate_match_all_fields_query()

    if not es_query:
        # Case where an empty query was generated (i.e. date query with malformed date, e.g. "d < 200").
        return _generate_match_all_fields_query()

    return es_query