Пример #1
0
class SpiresOrQuery(UnaryRule):
    grammar = (omit(re.compile(r"or", re.I)), [
        (omit(Whitespace), attr('op', SpiresSimpleQuery)),
        (omit(_), attr('op', SpiresParenthesizedQuery)),
        (omit(Whitespace), attr('op', SpiresValueQuery)),
        (omit(re.compile(r".*", re.I)), attr('op', EmptyQueryRule)),
    ])
Пример #2
0
class BetweenDateExpression(Expression):
    grammar = \
        omit(symbols['between']), attr('min', Date), \
        omit(symbols['and']), attr('max', Date)

    def build(self, builder):
        builder.add('''(date >= ? AND date <= ?)''', self.min, self.max)
Пример #3
0
class LessEqualOp(UnaryRule):
    """Less than or Equal to operator.

    Supports queries like date <= 10-2000 or author-count 100-.
    """
    grammar = [
        (omit(Literal("<=")), attr('op', SimpleValue)),
        # Accept a number or numbers that are separated with (/ or -) followed by a "-" which should be
        # followed by \s or ) or end of input so that you don't accept a value like 1-e.
        (attr('op', re.compile(r"\d+([/-]\d+)*(?=-)")), omit(re.compile(r'-(?=\s|\)|$)'))),
    ]
Пример #4
0
class SpiresNotQuery(UnaryRule):
    grammar = (
        [
            omit(re.compile(r"and\s+not", re.I)),
            omit(re.compile(r"not", re.I)),
        ],
        [
            (omit(Whitespace), attr('op', SpiresSimpleQuery)),
            (omit(_), attr('op', SpiresParenthesizedQuery)),
            (omit(Whitespace), attr('op', SpiresValueQuery)),
        ],
    )
Пример #5
0
class Value(UnaryRule):
    """Generic rule for all kinds of phrases recognized.

    Serves as an encapsulation of the listed rules.
    """
    grammar = attr('op',
                   [(optional(omit(Literal("="))), RangeOp), GreaterEqualOp,
                    LessEqualOp, GreaterThanOp, LessThanOp,
                    (optional(omit(Literal("="))), [
                        ComplexValue, ParenthesizedSimpleValues,
                        SimpleValueBooleanQuery, SimpleValue
                    ])])
class LessEqualOp(UnaryRule):
    """Less than or Equal to operator.

    Supports queries like date <= 10-2000 or author-count 100-.
    """
    grammar = [
        (omit(Literal("<=")), attr('op', SimpleValue)),
        # Accept a number or anything that doesn't contain {whitespace, (, ), :} followed by a "-" which should be
        # followed by \s or ) or end of input so that you don't accept a value that is 1-e.
        (attr('op', re.compile(r"\d+")), omit(re.compile(r'-(?=\s|\)|$)'))),
        (attr('op', re.compile(r"[^\s():]+(?=( -|-))")),
         omit(re.compile(r'\+(?=\s|\)|$)'))),
    ]
Пример #7
0
class Main(UnaryRule):
    initialized = False

    def __init__(self):
        """Initialize list of allowed keywords on first call."""
        if not Main.initialized:
            from invenio_query_parser.utils import build_valid_keywords_grammar
            build_valid_keywords_grammar()
            Main.initialized = True

    grammar = [
        (omit(_), attr('op', Query), omit(_)),
        attr('op', EmptyQueryRule),
    ]
Пример #8
0
class OrQuery(UnaryRule):
    grammar = [
        (
            omit(Or),
            [
                (omit(Whitespace), attr('op', SimpleQuery)),
                (omit(_), attr('op', ParenthesizedQuery)),
            ],
        ),
        (
            omit(Literal('|')),
            attr('op', SimpleQuery),
        ),
    ]
Пример #9
0
class AndQuery(UnaryRule):
    grammar = [
        (
            omit(And),
            [
                (omit(Whitespace), attr('op', NotQuery)),
                (omit(Whitespace), attr('op', SimpleQuery)),
                (omit(_), attr('op', ParenthesizedQuery)),
            ],
        ),
        (
            omit(Literal('+')),
            attr('op', SimpleQuery),
        ),
    ]
Пример #10
0
class NotExpression(Expression, List):
    grammar = omit(symbols['not']), expressions

    def build(self, builder):
        builder.add('NOT (')
        self[0].build(builder)
        builder.add(')')
Пример #11
0
class LessThanOp(UnaryRule):
    """Less than operator.

    Supports queries like author-count < 100 or date before 1984.
    """
    grammar = omit(re.compile(r"before|<",
                              re.IGNORECASE)), attr('op', SimpleValue)
Пример #12
0
class GreaterThanOp(UnaryRule):
    """Greater than operator.

    Supports queries like author-count > 2000 or date after 10-2000.
    """
    grammar = omit(re.compile(r"after|>",
                              re.IGNORECASE)), attr('op', SimpleValue)
Пример #13
0
class Main(UnaryRule):
    initialized = False

    def __init__(self):
        """Initialize list of allowed keywords on first call."""
        if not Main.initialized:
            from invenio_query_parser.utils import build_valid_keywords_grammar
            from flask import current_app

            build_valid_keywords_grammar(
                current_app.config.get('SEARCH_ALLOWED_KEYWORDS', []))
            Main.initialized = True

    grammar = [
        (omit(_), attr('op', [FindQuery, Query]), omit(_)),
        attr('op', EmptyQueryRule),
    ]
Пример #14
0
class ParenthesizedSimpleValues(UnaryRule):
    """Parses parenthesized simple values along with boolean operations on them."""
    grammar = omit(Literal("(")), [
        SimpleValueBooleanQuery, SimpleValueNegation, SimpleValue
    ], omit(Literal(")"))

    @classmethod
    def parse(cls, parser, text, pos):
        """Using our own parse to enable the flag below."""
        try:
            parser._parsing_parenthesized_simple_values_expression = True
            remaining_text, recognized_tokens = parser.parse(text, cls.grammar)
            return remaining_text, recognized_tokens
        except SyntaxError as e:
            return text, e
        finally:
            parser._parsing_parenthesized_simple_values_expression = False
Пример #15
0
class EmptyQuery(LeafRule):
    grammar = omit(optional(whitespace))

    def __init__(self):
        self.value = None

    def __repr__(self):
        return '%s()' % self.__class__.__name__
Пример #16
0
    def parse(cls, parser, text, pos):
        # Used to check whether we parsed successfully up to
        left_operand, operator = None, None
        try:
            # Parse left operand
            text_after_left_op, left_operand = parser.parse(text, cls.grammar[0])

            # Parse boolean operators
            text_after_bool_op, operator = parser.parse(text_after_left_op, cls.grammar[1])
            if not operator:  # Implicit AND at terminals level
                operator = And(BooleanOperator.AND)

            # Parse right operand.
            # We don't want to eagerly recognize anything else other than a SimpleValue.
            # So we attempt to recognize the more specific rules, and if we do, then we need to stop identifying this
            # rule.
            parser.parse(
                text_after_bool_op,
                [
                    (
                        omit(optional(Not)),
                        [
                            InvenioKeywordQuery,
                            SpiresKeywordQuery,
                        ]
                     ),
                    [
                        RangeOp,
                        GreaterEqualOp,
                        LessEqualOp,
                        GreaterThanOp,
                        LessThanOp,
                        ComplexValue
                    ]
                ]
            )

            # Identified something other than a SimpleValue, stop parsing this rule.
            result = text, SyntaxError("expected simple value related rule as right operand of a " +
                                       cls.__name__)

        except SyntaxError as e:
            result = text, e

            if left_operand and operator:
                # Attempt to parse a right operand
                try:
                    remaining_text, right_operand = parser.parse(text_after_bool_op, cls.grammar[2])
                    result = remaining_text, SimpleValueBooleanQuery(
                        left_operand,
                        bool_op=operator,
                        right=right_operand
                    )
                except SyntaxError as e:  # Actual failure of parsing boolean query at terminals level
                    return text, e

        return result
Пример #17
0
class Query(ListRule):
    """The entry-point for the grammar.

    Find keyword is ignored as the current grammar is an augmentation of SPIRES and Invenio style syntaxes.
    It only serves for backward compatibility with SPIRES syntax.
    """
    grammar = [
        (omit(optional(re.compile(r"(find|fin|fi|f)\s", re.IGNORECASE))),
         (Statement, maybe_some(MalformedQueryWords))),
        MalformedQueryWords,
        EmptyQuery,
    ]
Пример #18
0
class RangeOp(BinaryRule):
    """Range operator mixing any type of values.

    E.g.    muon decay year:1983->1992
            author:"Ellis, J"->"Ellis, Qqq"
            author:"Ellis, J"->Ellis, M

    The non symmetrical type of values will be handled at a later phase.
    """
    grammar = \
        attr('left', [ComplexValue, SimpleRangeValue]), \
        omit(Literal("->")), \
        attr('right', [ComplexValue, SimpleRangeValue])
Пример #19
0
class InvenioKeywordQuery(BinaryRule):
    """Keyword queries with colon separator (i.e. Invenio style).

    There needs to be a distinction between Invenio and SPIRES keyword queries, so as the parser is able to recognize
    any terminal as keyword for the former ones.

    Note:
        "arxiv:arxiv_identifier" should be excluded from the generic keyword pattern as it is a special case of
        SimpleValue, since it contains ":".
    E.g. author: ellis, title: boson, or unknown_keyword: foo.
    """
    grammar = attr('left', [InspireKeyword, re.compile(r"(?!arxiv)[^\s:]+")]), \
        omit(':'), \
        attr('right', Value)
Пример #20
0
class RelativeDateExpression(Expression):
    grammar = \
        omit(symbols['since']), \
        attr('length', number), \
        attr('unit', TimeUnit)

    def build(self, builder):
        unit = self.unit
        length = -int(self.length)

        if unit == 'week':
            length *= 7
            unit = 'day'

        builder.add('''date >= DATE('now', '{} {}')'''.format(length, unit))
    def parse(cls, parser, text, pos):
        # Used to check whether we parsed successfully up to
        left_operand, operator = None, None
        try:
            # Parse left operand
            text_after_left_op, left_operand = parser.parse(
                text, cls.grammar[0])

            # Parse boolean operators
            text_after_bool_op, operator = parser.parse(
                text_after_left_op, cls.grammar[1])

            # Parse right operand.
            # We don't want to eagerly recognize keyword queries as SimpleValues.
            # So we attempt to firstly recognize the more specific rules (keyword queries and their negation), and
            # then a SimpleValue.
            _ = parser.parse(text_after_bool_op, (omit(
                optional(Not)), [InvenioKeywordQuery, SpiresKeywordQuery]))

            # Keyword query parsing succeeded, stop boolean_op among terminals recognition.
            result = text, SyntaxError(
                "found keyword query at terminals level")

        except SyntaxError as e:
            result = text, e

            if left_operand and operator:
                # Attempt to parse a right operand
                try:
                    t, right_operand = parser.parse(text_after_bool_op,
                                                    cls.grammar[2])
                    result = t, SimpleValueBooleanQuery(
                        [left_operand, operator, right_operand])
                except SyntaxError as e:  # Actual failure of parsing boolean query at terminals level
                    return text, e

        return result
Пример #22
0
class LowerQuery(UnaryRule):
    grammar = (omit([Literal('<'), re.compile('before', re.I)],
                    _), attr('op', SpiresValue))
Пример #23
0
        omit(re.compile(r"or", re.I)),
        [
            (omit(Whitespace), attr('op', SpiresSimpleQuery)),
            (omit(_), attr('op', SpiresParenthesizedQuery)),
                (omit(Whitespace), attr('op', SpiresValueQuery)),
        ]
    )


SpiresQuery.grammar = attr('children', (
    [
        SpiresParenthesizedQuery,
        SpiresSimpleQuery,
    ],
    maybe_some((
        omit(_),
        [
            SpiresNotQuery,
            SpiresAndQuery,
            SpiresOrQuery,
        ]
    )),
))


class NestableKeyword(LeafRule):
    grammar = attr('value', [
        re.compile('refersto', re.I),
        re.compile('citedby', re.I),
    ])
Пример #24
0
 def runTest(self):
     r = pypeg2.parse("hello", pypeg2.omit(pypeg2.word))
     self.assertEqual(r, None)
Пример #25
0
 def runTest(self):
     t = pypeg2.compose('hello', pypeg2.omit(pypeg2.word))
     self.assertEqual(t, "")
Пример #26
0
    ParenthesizedQuery,
    SimpleQuery,
])


NestedKeywordQuery.grammar = \
    attr('left', [
        # Most specific regex must be higher.
        re.compile(r'citedbyexcludingselfcites', re.IGNORECASE),
        re.compile(r'citedbyx', re.IGNORECASE),
        re.compile(r'citedby', re.IGNORECASE),
        re.compile(r'referstoexcludingselfcites', re.IGNORECASE),
        re.compile(r'referstox', re.IGNORECASE),
        re.compile(r'refersto', re.IGNORECASE),
    ]), \
    optional(omit(":")), \
    attr('right', Expression)


class BooleanQuery(BooleanRule):
    """Represents boolean query as a binary rule.

    """
    grammar = Expression, [And, Or, None], Statement


# ########################

# #### Main productions ####
Statement.grammar = attr('op', [BooleanQuery, Expression])
Пример #27
0
class SimpleValueNegation(UnaryRule):
    """Negation accepting only SimpleValues."""
    grammar = omit(Not), attr('op', SimpleValue)
Пример #28
0
 class Rule(object):
     grammar = attr('value', SpiresSimpleValue), omit(re.compile(".*"))
Пример #29
0

class ValueQuery(UnaryRule):
    grammar = attr("op", Value)


class Query(ListRule):
    pass


class KeywordQuery(BinaryRule):
    pass


KeywordQuery.grammar = [
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", KeywordQuery)),
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", Value)),
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", Query)),
]


class SimpleQuery(UnaryRule):
    grammar = attr("op", [KeywordQuery, ValueQuery])


class ParenthesizedQuery(UnaryRule):
    grammar = (omit(Literal("("), _), attr("op", Query), omit(_, Literal(")")))


class NotQuery(UnaryRule):
    grammar = [
Пример #30
0
class FindQuery(UnaryRule):
    grammar = omit(Find, Whitespace), attr('op', SpiresQuery)
                              "|".join([x + ":"
                                        for x in SPIRES_KEYWORDS.keys()])))


class KeywordQuery(BinaryRule):
    pass


class EmptyQueryRule(LeafRule):
    grammar = attr('value', re.compile(r'\s*'))


KeywordQuery.grammar = [
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        # FIXME: This should be replaced with KeywordQuery to restore
        # intented functionality.
        # Also NestedKeywordsRule class should be removed from
        # this file, ./ast.py and ./walkers/pypeg_to_ast.py.
        attr('right', NestedKeywordsRule)
    ),
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        attr('right', Value)
    ),
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        attr('right', Query)
Пример #32
0
class NotQuery(UnaryRule):
    """Negation query."""
    grammar = omit(Not), attr('op', Expression)
Пример #33
0
class ParenthesizedQuery(UnaryRule):
    """Parenthesized query for denoting precedence."""
    grammar = omit(Literal('(')), attr('op', Statement), omit(Literal(')'))
Пример #34
0
class LowerEqualQuery(UnaryRule):
    grammar = [
        (omit(Literal('<='), _), attr('op', SpiresValue)),
        (attr('op', Number), omit(re.compile(r'\-(?=\s|\)|$)'))),
    ]
Пример #35
0
class SpiresParenthesizedQuery(UnaryRule):
    grammar = (
        omit(Literal('('), _),
        attr('op', SpiresQuery),
        omit(_, Literal(')')),
    )
Пример #36
0
    grammar = re.compile(r'[^{]+')


class CommentStatement(LeafRule):
    grammar = re.compile(r"{\*.*?\*}", re.S)


class LiteralStatement(LeafRule):
    grammar = re.compile("{literal}.*?{/literal}", re.S)


class Whitespace(object):
    grammar = maybe_some([Literal(' '), Literal('\n'), Literal('\t')])


_ = omit(Whitespace)


class Identifier(LeafRule):
    grammar = re.compile(r'[\w\-\+\*\/]*\w')


"""
Logical Operators.
"""

class AndOperator(EmptyLeafRule):
    grammar = [Literal('and'), Literal('&&')]


class OrOperator(EmptyLeafRule):