Пример #1
0
def mlsqlparser():
    #Define all keywords
    LOAD = define_load()
    READ = define_read()
    SPLIT = define_split()
    REGRESS = define_regress()
    CLASSIFY = define_classify()
    CLUSTER = define_cluster()
    REPLACE = define_replace()
    SAVE = define_save()

    #Define comment
    comment = _define_comment()

    #Combining READ and SPLIT keywords into one clause for combined use
    read_split = READ + SPLIT
    read_split_classify = READ + SPLIT + CLASSIFY
    read_split_classify_regress = READ + SPLIT + CLASSIFY + REGRESS
    read_replace_split_classify_regress = READ + REPLACE + SPLIT + CLASSIFY + REGRESS
    read_replace_split_classify_regress_cluster = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER
    read_replace_split_classify_regress_cluster_save = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER + SAVE

    load_read_replace_split_classify_regress_cluster_save = MatchFirst([read_replace_split_classify_regress_cluster_save, LOAD])

    return load_read_replace_split_classify_regress_cluster_save.ignore(comment)
Пример #2
0
def parse_select_columns(string):
    """Parse a select query and return the columns

    Args:
        string(str): Input string to be parsed

    Returns:
        result(list of str): List of columns
    """

    if string == "":
        return list()

    if string.upper().startswith("WITH"):
        suppressor = _with + delimitedList(_db_name + _as + subquery)
        string = suppressor.suppress().transformString(string)

    # Supress everything after the first from
    suppressor = MatchFirst(_from) + restOfLine
    string = suppressor.suppress().transformString(string)

    parser = _select + delimitedList(field_parser).setResultsName("columns")
    output = parser.parseString(string).columns.asList()

    # Strip extra whitespace from the string
    return [column.strip() for column in output]
Пример #3
0
def get_match_first(lits, parseAction=None):
    el = MatchFirst(NoMatch())
    for lit in lits:
        el = el.__ior__(lit)
    if parseAction:
        el.setParseAction(parseAction)
    return el
Пример #4
0
def define_simple_literals(literal_list, parseAction=None):
    l = MatchFirst([CaselessKeyword(k) for k in literal_list])

    if parseAction:
        l = l.setParseAction(parseAction)

    return l
Пример #5
0
    def _handle_define(self, line, token):
        """define macro function"""
        if self.suppress:
            return

        if token.args:
            args = token.args[0]
            keywords = MatchFirst([Keyword('$' + x).setResultsName(x) for x in args])
            body = self._recurisve_expand(token.body)
            macros = self.function_class(args, body, list(keywords.scanString(body)))
            if token.name in self.functions:
                warnings.warn('%d: macros %s already defined!' % (line, token.name))
            self.functions[token.name] = macros
        else:
            if token.name in self.variables:
                warnings.warn('%d: macros %s already defined!' % (line, token.name))

            value = self.variables[token.name] = self._recurisve_expand(token.value)
            if not token.name.startswith("_"):
                self.on_constant(token.name, value)
Пример #6
0
def targetComponentsForOperatorsInString(operatorNames, codeBlock):
    """
    Return a list of pairs of operator names and their targets that are in `codeString`.
    The valid operator names searched for are `operatorNames`. For example, if 'L' is in `operatorNames`,
    then in the code ``L[phi]`` the return value would be ``('L', 'phi', slice(firstCharacterIndex, lastCharacterIndex))``.
    """
    parser = MatchFirst(Keyword(operatorName) for operatorName in operatorNames).setResultsName('name') \
                + Optional(nestedExpr('[', ']', baseExpr, ignoreExpr).setResultsName('target'))
    parser.ignore(cppStyleComment.copy())
    parser.ignore(quotedString.copy())
    results = []
    for tokens, start, end in parser.scanString(codeBlock.codeString):
        if 'target' in tokens:
            results.append((tokens.name, ''.join(tokens.target.asList()[0]), slice(start, end)))
        else:
            raise CodeParserException(codeBlock, start, "Invalid use of '%s' operator in code block." % tokens.name)
    return results
Пример #7
0
    def __init__(
        self,
        manager,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        default_namespace: Optional[Set[str]] = None,
        allow_redefinition: bool = False,
        skip_validation: bool = False,
        upgrade_urls: bool = False,
    ) -> None:
        """Build a metadata parser.

        :param manager: A cache manager
        :param namespace_to_term_to_encoding:
          An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}}
        :param namespace_to_pattern:
          A regular expression namespace mapping from {namespace keyword: regex string}
        :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values}
        :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string}
        :param default_namespace: A set of strings that can be used without a namespace
        :param skip_validation: If true, don't download and cache namespaces/annotations
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager
        self.disallow_redefinition = not allow_redefinition
        self.skip_validation = skip_validation
        self.upgrade_urls = upgrade_urls

        #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}}
        self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {}
        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_to_pattern = namespace_to_pattern or {}
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None

        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_term = annotation_to_term or {}
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_to_pattern = annotation_to_pattern or {}
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_local = annotation_to_local or {}

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value'),
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document,
            self.namespace_url,
            self.annotation_url,
            self.annotation_list,
            self.annotation_pattern,
            self.namespace_pattern,
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)
Пример #8
0
 def keywords(klass):
     kws = sorted(klass.keyword_aliases.keys())
     return MatchFirst([Keyword(kw) for kw in kws])
Пример #9
0
    "THEN",
    "UNION",
    "UNION_ALL",
    "USING",
    "WITH",
    "WHEN",
    "WHERE",
]

reserved_keywords = []
for name in sql_reserved_words:
    n = name.lower().replace("_", " ")
    value = locals()[name] = (Keyword(
        n, caseless=True).setName(n).setDebugActions(*debug))
    reserved_keywords.append(value)
RESERVED = MatchFirst(reserved_keywords)

join_keywords = {
    "join",
    "full join",
    "cross join",
    "inner join",
    "left join",
    "right join",
    "full outer join",
    "right outer join",
    "left outer join",
}

unary_ops = {"-": "neg", "~": "binary_not"}
Пример #10
0
    def parse_algebra(self):
        """
        Parse an algebraic expression into a tree.

        Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to
        reflect parenthesis and order of operations. Leave all operators in the
        tree and do not parse any strings of numbers into their float versions.

        Adding the groups and result names makes the `repr()` of the result
        really gross. For debugging, use something like
          print OBJ.tree.asXML()
        """
        # 0.33 or 7 or .34 or 16.
        number_part = Word(nums)
        inner_number = (number_part +
                        Optional("." + Optional(number_part))) | ("." +
                                                                  number_part)
        # pyparsing allows spaces between tokens--`Combine` prevents that.
        inner_number = Combine(inner_number)

        # Apply suffixes
        number_suffix = MatchFirst(Literal(k) for k in self.suffixes.keys())

        # 0.33k or 17
        plus_minus = Literal('+') | Literal('-')
        number = Group(
            Optional(plus_minus) + inner_number + Optional(
                CaselessLiteral("E") + Optional(plus_minus) + number_part) +
            Optional(number_suffix))
        number = number("number")

        # Predefine recursive variables.
        expr = Forward()

        # Handle variables passed in. Variables may be either of two forms:
        #   1. front + subscripts + tail
        #   2. front + lower_indices + upper_indices + tail
        # where:
        #   front (required):
        #       starts with alpha, followed by alphanumeric
        #   subscripts (optional):
        #       any combination of alphanumeric and underscores
        #   lower_indices (optional):
        #       Of form "_{<alaphnumeric>}"
        #   upper_indices (optional):
        #       Of form "^{<alaphnumeric>}"
        #   tail:
        #       any number of primes
        front = Word(alphas, alphanums)
        subscripts = Word(alphanums + '_') + ~FollowedBy('{')
        lower_indices = Literal("_{") + Word(alphanums) + Literal("}")
        upper_indices = Literal("^{") + Word(alphanums) + Literal("}")
        tail = ZeroOrMore("'")
        inner_varname = Combine(front + Optional(subscripts | (
            Optional(lower_indices) + Optional(upper_indices))) +
                                tail  # optional already by ZeroOrMore
                                )
        varname = Group(inner_varname)("variable")
        varname.setParseAction(self.variable_parse_action)

        # Same thing for functions
        # Allow primes (apostrophes) at the end of function names, useful for
        # indicating derivatives. Eg, f'(x), g''(x)
        function = Group(inner_varname + Suppress("(") + expr +
                         Suppress(")"))("function")
        function.setParseAction(self.function_parse_action)

        atom = number | function | varname | "(" + expr + ")"
        atom = Group(atom)("atom")

        # Do the following in the correct order to preserve order of operation.
        pow_term = atom + ZeroOrMore("^" + atom)
        pow_term = Group(pow_term)("power")

        par_term = pow_term + ZeroOrMore('||' + pow_term)  # 5k || 4k
        par_term = Group(par_term)("parallel")

        prod_term = par_term + ZeroOrMore(
            (Literal('*') | Literal('/')) + par_term)  # 7 * 5 / 4
        prod_term = Group(prod_term)("product")

        sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(
            plus_minus + prod_term)  # -5 + 4 - 3
        sum_term = Group(sum_term)("sum")

        # Finish the recursion.
        expr << sum_term  # pylint: disable=pointless-statement
        self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
Пример #11
0
 # Threat
 LABEL + IS_A + IMPACT + PROBABILITY + THREAT
 + Optional(DESCRIBED + AS + DESCRIPTION),
 # Security Measure
 LABEL + IS_A + CAPABILITY + MEASURE + AGAINST + THREAT_LIST
 + Optional(DESCRIBED + AS + DESCRIPTION),
 # Label list or alias
 LABEL + IS_A + LABEL_LIST,
 # Component modification
 LABEL + IS_NOW_A + MatchFirst([
     LABELED + NEW_NAME,
     DESCRIBED + AS + DESCRIPTION,
     MatchFirst([
         Or(all_combinations([PROFILE, ROLE, IN + GROUP])),
         CLASSIFICATION + DATUM,
         Or(all_combinations([IMPACT, PROBABILITY])) + THREAT,
         Or([
             CAPABILITY + MEASURE,
             MEASURE + AGAINST + THREAT_LIST,
             CAPABILITY + MEASURE + AGAINST + THREAT_LIST,
         ]),
     ]) + Optional(DESCRIBED + AS + DESCRIPTION)
 ]),
 # These are negative assumptions: anti-patterns which must be disproven.
 # E.g., disprove "lack of transport security".
 # Negative assumptions which have not been disproven should incur risk.
 DISPROVE + ASSUMPTIONS,
 # Interaction
 Optional(ORDINAL) + SUBJECT + Optional(LATERALLY) + ACTION + EFFECT_LIST
 + Optional(TO_FROM + OBJECT)
 + Optional(Optional(BROADLY) + RISKING + THREAT_LIST)
 + Optional(WITH_NOTES + NOTES),
Пример #12
0
def _tdb_grammar():  #pylint: disable=R0914
    """
    Convenience function for getting the pyparsing grammar of a TDB file.
    """
    int_number = Word(nums).setParseAction(lambda t: [int(t[0])])
    # matching float w/ regex is ugly but is recommended by pyparsing
    float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \
        .setParseAction(lambda t: [float(t[0])])
    # symbol name, e.g., phase name, function name
    symbol_name = Word(alphanums + '_:', min=1)
    # species name, e.g., CO2, AL, FE3+
    species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%'))
    # constituent arrays are colon-delimited
    # each subarray can be comma- or space-delimited
    constituent_array = Group(
        delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)),
                      ':'))
    param_types = MatchFirst(
        [TCCommand(param_type) for param_type in TDB_PARAM_TYPES])
    # Let sympy do heavy arithmetic / algebra parsing for us
    # a convenience function will handle the piecewise details
    func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \
        + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \
        Suppress(Word('YNyn', exact=1) | White()))
    # ELEMENT
    cmd_element = TCCommand('ELEMENT') + Word(alphas + '/-', min=1, max=2)
    # TYPE_DEFINITION
    cmd_typedef = TCCommand('TYPE_DEFINITION') + \
        Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd())
    # FUNCTION
    cmd_function = TCCommand('FUNCTION') + symbol_name + \
        func_expr.setParseAction(_make_piecewise_ast)
    # ASSESSED_SYSTEMS
    cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd())
    # DEFINE_SYSTEM_DEFAULT
    cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd())
    # DEFAULT_COMMAND
    cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd())
    # LIST_OF_REFERENCES
    cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd())
    # PHASE
    cmd_phase = TCCommand('PHASE') + symbol_name + \
        Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \
        Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd()
    # CONSTITUENT
    cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \
        Suppress(White()) + Suppress(':') + constituent_array + \
        Suppress(':') + LineEnd()
    # PARAMETER
    cmd_parameter = TCCommand('PARAMETER') + param_types + \
        Suppress('(') + symbol_name + \
        Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \
        Suppress(',') + constituent_array + \
        Optional(Suppress(';') + int_number, default=0) + \
        Suppress(')') + func_expr.setParseAction(_make_piecewise_ast)
    # Now combine the grammar together
    all_commands = cmd_element | \
                    cmd_typedef | \
                    cmd_function | \
                    cmd_ass_sys | \
                    cmd_defsysdef | \
                    cmd_defcmd | \
                    cmd_lor | \
                    cmd_phase | \
                    cmd_constituent | \
                    cmd_parameter
    return all_commands
Пример #13
0
)

from . import elements as ste

__all__ = ["parse"]


def unpack(results: ParseResults) -> tuple:
    """Unpack the members of a :py:class:`~.ParseResults` to a :py:class:`tuple`"""
    return tuple(item[0] for item in results)


#: Words that are not valid identifiers
KEYWORDS = Forward()
KEYWORDS.setName("KEYWORD")
KEYWORDS << MatchFirst(tuple(map(Keyword, kwlist)))

#: literal `...`, e.g. in `typing.Tuple[int, ...]`, not an `Ellipsis`
DOTS = Literal("...").setParseAction(lambda: ste.Dots())
KEYWORDS << MatchFirst((*KEYWORDS.expr.exprs, DOTS))

#: any valid typing or stenotype expression, such as `List`, `typing.List`, `?List`, ...
TYPE = Forward()
TYPE.setName("TYPE")
TYPE_exclude_UNION = Forward()
TYPE_exclude_UNION.setName("TYPE_exclude_UNION")

# typing expressions
# ==================

#: a direct or nested reference, such as `List` or `typing.List`
Пример #14
0
    def __init__(
        self,
        graph,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        disallow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term_to_encoding: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_nested: If true, turn on nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph
        self.metagraph = set()

        self.disallow_nested = disallow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.concept_parser = ConceptParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.concept_parser = ConceptParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term_to_encoding=namespace_to_term_to_encoding,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.concept_parser.get_line_number = self.get_line_number

        concept = Group(self.concept_parser.language)(CONCEPT)

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.concept_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.concept_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.concept_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.concept_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(concept +
                                                               opt_location)

        self.gene_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS), )

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(concept, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion,
                self.gene_fusion_legacy,
                self.gene_modified,
            ]) + opt_location, )

        self.mirna_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS), ) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = concept + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS, ), )

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(concept, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location, )

        self.rna_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(get_legacy_fusion_langauge(
            concept, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location, )

        self.single_abundance = MatchFirst([
            self.general_abundance,
            self.gene,
            self.mirna,
            self.protein,
            self.rna,
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(concept + opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location, )

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location, )

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default, )

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.concept_parser.language, )

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(concept)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(concept)

        self.population = population_tag + nest(concept)

        self.bp_path = self.biological_process | self.pathology | self.population
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)), )

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(concept(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(concept(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT), )

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(concept(FROM_LOC) + WCW + concept(TO_LOC))(EFFECT), )

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified,
            self.translocation_standard,
            self.translocation_legacy,
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion,
            self.cell_surface_expression,
            self.translocation,
            self.degradation,
            self.reaction,
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            correlation_tag,
            no_correlation_tag,
            binds_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag,
            self.biological_process,
        )

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag,
            self.process,
        )

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.abundance,
            has_component_tag,
            self.abundance,
        )

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])
        self.relation.setParseAction(self._handle_relation_harness)

        self.inverted_unqualified_relation = MatchFirst([
            self.has_member,
            self.has_component,
        ])
        self.inverted_unqualified_relation.setParseAction(
            self.handle_inverse_unqualified_relation)

        self.normal_unqualified_relation = MatchFirst([
            self.has_member,
            self.has_component,
            self.has_variant_relation,
            self.part_of_reaction,
        ])
        self.normal_unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested.
        causal_relation_tags = MatchFirst([
            increases_tag,
            decreases_tag,
            directly_decreases_tag,
            directly_increases_tag,
        ])

        self.nested_causal_relationship = triple(
            self.bel_term,
            causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)),
        )

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.inverted_unqualified_relation,
            self.normal_unqualified_relation,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)
Пример #15
0
class BELParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces."""
    def __init__(
        self,
        graph,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        disallow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term_to_encoding: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_nested: If true, turn on nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph
        self.metagraph = set()

        self.disallow_nested = disallow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.concept_parser = ConceptParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.concept_parser = ConceptParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term_to_encoding=namespace_to_term_to_encoding,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.concept_parser.get_line_number = self.get_line_number

        concept = Group(self.concept_parser.language)(CONCEPT)

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.concept_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.concept_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.concept_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.concept_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(concept +
                                                               opt_location)

        self.gene_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS), )

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(concept, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion,
                self.gene_fusion_legacy,
                self.gene_modified,
            ]) + opt_location, )

        self.mirna_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS), ) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = concept + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS, ), )

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(concept, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location, )

        self.rna_modified = concept + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(get_legacy_fusion_langauge(
            concept, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location, )

        self.single_abundance = MatchFirst([
            self.general_abundance,
            self.gene,
            self.mirna,
            self.protein,
            self.rna,
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(concept + opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location, )

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location, )

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default, )

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.concept_parser.language, )

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(concept)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(concept)

        self.population = population_tag + nest(concept)

        self.bp_path = self.biological_process | self.pathology | self.population
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)), )

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(concept(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(concept(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT), )

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(concept(FROM_LOC) + WCW + concept(TO_LOC))(EFFECT), )

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified,
            self.translocation_standard,
            self.translocation_legacy,
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion,
            self.cell_surface_expression,
            self.translocation,
            self.degradation,
            self.reaction,
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            correlation_tag,
            no_correlation_tag,
            binds_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag,
            self.biological_process,
        )

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag,
            self.process,
        )

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.abundance,
            has_component_tag,
            self.abundance,
        )

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])
        self.relation.setParseAction(self._handle_relation_harness)

        self.inverted_unqualified_relation = MatchFirst([
            self.has_member,
            self.has_component,
        ])
        self.inverted_unqualified_relation.setParseAction(
            self.handle_inverse_unqualified_relation)

        self.normal_unqualified_relation = MatchFirst([
            self.has_member,
            self.has_component,
            self.has_variant_relation,
            self.part_of_reaction,
        ])
        self.normal_unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested.
        causal_relation_tags = MatchFirst([
            increases_tag,
            decreases_tag,
            directly_decreases_tag,
            directly_increases_tag,
        ])

        self.nested_causal_relationship = triple(
            self.bel_term,
            causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)),
        )

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.inverted_unqualified_relation,
            self.normal_unqualified_relation,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]:
        """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser."""
        return self.concept_parser.namespace_to_name_to_encoding

    @property
    def _allow_naked_names(self) -> bool:
        """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``)."""
        return self.concept_parser.allow_naked_names

    def get_annotations(self) -> Dict:
        """Get the current annotations in this parser."""
        return self.control_parser.get_annotations()

    def clear(self):
        """Clear the graph and all control parser data (current citation, annotations, and statement group)."""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line: str, position: int,
                               tokens: ParseResults):
        """Handle nested statements.

        If :code:`self.disallow_nested` is True, raises a ``NestedRelationWarning``.

        :raises: NestedRelationWarning
        """
        if self.disallow_nested:
            raise NestedRelationWarning(self.get_line_number(), line, position)

        subject_hash = self._handle_relation_checked(
            line,
            position,
            {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT],
            },
        )

        object_hash = self._handle_relation_checked(
            line,
            position,
            {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT],
            },
        )
        self.metagraph.add((subject_hash, object_hash))
        return tokens

    def check_function_semantics(self, line: str, position: int,
                                 tokens: ParseResults) -> ParseResults:
        """Raise an exception if the function used on the tokens is wrong.

        :raises: InvalidFunctionSemantic
        """
        concept = tokens.get(CONCEPT)
        if not self._namespace_dict or concept is None:
            return tokens

        namespace, name = concept[NAMESPACE], concept[NAME]

        if namespace in self.concept_parser.namespace_to_pattern:
            return tokens

        if self._allow_naked_names and namespace == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings.get(encoding, set())
                for encoding in self._namespace_dict[namespace][name]), )

        if not valid_functions:
            raise InvalidEntity(self.get_line_number(), line, position,
                                namespace, name)

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(
                line_number=self.get_line_number(),
                line=line,
                position=position,
                func=tokens[FUNCTION],
                namespace=namespace,
                name=name,
                allowed_functions=valid_functions,
            )

        return tokens

    def handle_term(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle BEL terms (the subject and object of BEL relations)."""
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens: ParseResults,
                            relation: str) -> ParseResults:
        """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`."""
        parent_node_dsl = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_dsl = self.ensure_node(child_tokens)
            # Note that the polarity is switched since this is just for hasMembers
            # and hasComponents, which are both deprecated as of BEL v2.2
            self.graph.add_unqualified_edge(child_node_dsl, parent_node_dsl,
                                            relation)

        return tokens

    def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, IS_A)

    def handle_has_components(self, _, __,
                              tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, PART_OF)

    def _add_qualified_edge_helper(self, *, u, u_modifier, relation, v,
                                   v_modifier, annotations) -> str:
        """Add a qualified edge from the internal aspects of the parser."""
        m = {
            BINDS: self.graph.add_binds,
        }
        adder = m.get(relation)
        d = dict(
            evidence=self.control_parser.evidence,
            citation=self.control_parser.get_citation(),
            annotations=annotations,
            subject_modifier=u_modifier,
            object_modifier=v_modifier,
            **{LINE: self.get_line_number()},
        )
        if adder is not None:
            return adder(u=u, v=v, **d)
        else:
            return self.graph.add_qualified_edge(u=u,
                                                 v=v,
                                                 relation=relation,
                                                 **d)

    def _add_qualified_edge(self, *, u, u_modifier, relation, v, v_modifier,
                            annotations) -> str:
        """Add an edge, then adds the opposite direction edge if it should."""
        d = dict(
            relation=relation,
            annotations=annotations,
        )
        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(u=v,
                                            u_modifier=v_modifier,
                                            v=u,
                                            v_modifier=u_modifier,
                                            **d)
        return self._add_qualified_edge_helper(u=u,
                                               u_modifier=u_modifier,
                                               v=v,
                                               v_modifier=v_modifier,
                                               **d)

    def _handle_relation(self, tokens: ParseResults) -> str:
        """Handle a relation."""
        u = self.ensure_node(tokens[SUBJECT])
        u_modifier = modifier_po_to_dict(tokens[SUBJECT])
        relation = tokens[RELATION]
        v = self.ensure_node(tokens[OBJECT])
        v_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = self._get_prepared_annotations()

        return self._add_qualified_edge(
            u=u,
            u_modifier=u_modifier,
            relation=relation,
            v=v,
            v_modifier=v_modifier,
            annotations=annotations,
        )

    def _get_prepared_annotations(self):
        return {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True,
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

    def _handle_relation_harness(
            self, line: str, position: int,
            tokens: Union[ParseResults, Dict]) -> ParseResults:
        """Handle BEL relations based on the policy specified on instantiation.

        Note: this can't be changed after instantiation!
        """
        self._handle_relation_checked(line, position, tokens)
        return tokens

    def _handle_relation_checked(self, line, position, tokens):
        if not self.control_parser.citation_is_set:
            raise MissingCitationException(self.get_line_number(), line,
                                           position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.get_line_number(), line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.get_line_number(), line,
                                           position,
                                           missing_required_annotations)

        return self._handle_relation(tokens)

    def handle_unqualified_relation(self, _, __,
                                    tokens: ParseResults) -> ParseResults:
        """Handle unqualified relations."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])
        relation = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl,
                                        relation)
        return tokens

    def handle_inverse_unqualified_relation(
            self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle unqualified relations that should go reverse."""
        u = self.ensure_node(tokens[SUBJECT])
        v = self.ensure_node(tokens[OBJECT])
        relation = tokens[RELATION]
        self.graph.add_unqualified_edge(v, u, relation)
        return tokens

    def ensure_node(self, tokens: ParseResults) -> BaseEntity:
        """Turn parsed tokens into canonical node name and makes sure its in the graph."""
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node = parse_result_to_dsl(tokens)
        self.graph.add_node_from_data(node)
        return node

    def handle_translocation_illegal(self, line: str, position: int,
                                     tokens: ParseResults) -> None:
        """Handle a malformed translocation."""
        raise MalformedTranslocationWarning(self.get_line_number(), line,
                                            position, tokens)
Пример #16
0
from pyparsing import pythonStyleComment, Keyword, Regex, OneOrMore, Word, hexnums, nums
from pyparsing import Suppress, MatchFirst, ZeroOrMore, Group, Combine, Optional, Literal

from .ops import A, K

OPERATOR = Literal('+') | Literal('-')
DECIMAL = Combine(Optional(OPERATOR) +
                  Word(nums)).setParseAction(lambda t: int(t[0]))
HEXADECIMAL = Suppress("0x") + Word(hexnums).setParseAction(
    lambda t: int(t[0], 16))
VALUE = DECIMAL ^ HEXADECIMAL

A_ADDRESS = (Suppress('[') + VALUE +
             Suppress(']')).setParseAction(lambda t: A(int(t[0])))
K_ADDRESS = (Suppress('#') + VALUE).setParseAction(lambda t: K(int(t[0])))
ADDRESS = MatchFirst((A_ADDRESS, K_ADDRESS))

STATEMENT_OP = MatchFirst([
    Keyword("ld"),
    Keyword("ldh"),
    Keyword("ldb"),
    Keyword("ldx"),
    Keyword("ldxb"),
    Keyword("st"),
    Keyword("stx"),
    Keyword("add"),
    Keyword("sub"),
    Keyword("mul"),
    Keyword("div"),
    Keyword("mod"),
    Keyword("neg"),
Пример #17
0
 def __init__(self, elems, precedence=4):
     MatchFirst.__init__(self, concatenate(elems))
     BaseType.__init__(self, precedence)
Пример #18
0
    def __init__(self, parse_method=None, precedence=2):
        MatchFirst.__init__(self, [QString('"'), QString("'")])
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Пример #19
0
) = map(
    CaselessKeyword,
    """UNION, ALL, AND, OR, INTERSECT, INTERSECTION, EXCEPT, COLLATE, ASC, DESC, ON,
 NOT, SELECT, DISTINCT, FROM, WHERE, BY, ORDER, BY, LIMIT, EVERY""".replace(
        ",", "").split())

(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS,
 COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE,
 CURRENT_TIMESTAMP) = map(
     CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
 END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE,
 CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",", "").split())

keyword = MatchFirst(
    (UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, NOT, SELECT,
     DISTINCT, FROM, WHERE, BY, EVERY, ORDER, BY, LIMIT, CAST, ISNULL, NOTNULL,
     NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE,
     GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE,
     CURRENT_TIMESTAMP))


def regex_from_like(s):
    return str(s).replace('*', '\*').replace('_', '.').replace('%', '.*')


def lookup(id):
    return


def lookup_every(id):
    return
Пример #20
0
    LEXER,
    PARSER,
    GRAMMAR,
    TREE,
    CATCH,
    FINALLY,
    THROWS,
    PROTECTED,
    PUBLIC,
    PRIVATE,
) = map(
    Keyword,
    """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected
       public private """.split(),
)
KEYWORD = MatchFirst(keywords)

# Tokens
EOL = Suppress(LineEnd())  # $
SGL_PRINTABLE = Char(printables)
singleTextString = originalTextFor(
    ZeroOrMore(~EOL + (White(" \t") | Word(printables)))
).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
ESC = BSLASH + (
    oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE
)
LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE
CHAR_LITERAL = APOS + LITERAL_CHAR + APOS
STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
Пример #21
0
    def __init__(self, parse_method=None, precedence=2):
        MatchFirst.__init__(self, [QString('"'), QString("'")])
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Пример #22
0
parse_ipv6_snooping = Suppress('snooping ') + restOfLine
parse_ipv6_raguard = Suppress('nd raguard ') + restOfLine
parse_ipv6_destinationguard = Suppress('destination-guard ') + restOfLine
parse_ipv6_dhcpguard = Suppress('dhcp guard ') + restOfLine
parse_lldp = Suppress('lldp ') + restOfLine
parse_username = Suppress('username ') + restOfLine
parse_aaa = Suppress('aaa ') + restOfLine
parse_stp = Suppress('spanning-tree ') + restOfLine
# parse_vtp                   = Suppress('vtp ')                         + restOfLine
parse_line = Suppress('line ') + restOfLine
parse_ip_ssh = Suppress('ip ssh ') + restOfLine
parse_arp_proxy = Suppress('ip arp proxy ') + restOfLine
parse_vstack = Suppress('no') + 'vstack'

parse_enable_password = Suppress('enable') + MatchFirst([
    'secret', 'password'
]) + Optional(Word(nums) + Suppress(White(exact=1))) + Suppress(restOfLine)
parse_ip_dhcp = NotAny(White()) + Suppress('ip dhcp snooping') + Optional(
    Suppress('vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums)))
parse_ip_arp = NotAny(White()) + Suppress('ip arp inspection') + Suppress(
    'vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums))
parse_ip_service = NotAny(White()) + Suppress('ip') + MatchFirst(
    ['finger', 'identd', 'source-route', 'bootp server'])
parse_ip_http = NotAny(White()) + Suppress('ip http ') + restOfLine

# aaa_authorization  = Suppress('authorization ')  + restOfLine
aaa_authentication = Suppress('authentication ') + restOfLine
aaa_accounting = Suppress('accounting ') + restOfLine
aaa_groups = Suppress('group server ') + restOfLine

utill = lambda parse_meth, featur_str: parse_meth.parseString(featur_str
Пример #23
0
def _interfaceParse___iface_attributes(config, check_disabled):
    iface_list = util.get_attributes(config)[0]

    # if iface isn`t enable and unused
    if iface_list:
        iface_dict = {
            'shutdown': 'no',
            'vlans': [],
            'cdp': 'yes',
            'dhcp_snoop': {
                'mode': 'untrust'
            },
            'arp_insp': {
                'mode': 'untrust'
            },
            'storm control': {},
            'port-security': {},
            'ipv6': {}
        }

        vlan_num = Word(nums +
                        '-') + ZeroOrMore(Suppress(',') + Word(nums + '-'))

        parse_description = Suppress('description ') + restOfLine
        parse_type = Suppress('switchport mode ') + restOfLine
        parse_port_sec = Suppress('switchport port-security ') + restOfLine
        parse_stp_port = Suppress('spanning-tree ') + restOfLine
        parse_dhcp_snoop = Suppress('ip dhcp snooping ') + restOfLine
        parse_arp_insp = Suppress('ip arp inspection ') + restOfLine
        parse_source_guard = Suppress('ip verify source ') + restOfLine
        parse_arp_proxy_iface = Optional(
            Word(alphas)) + Suppress('ip proxy-arp')

        parse_vlans = Suppress('switchport ') + Suppress(
            MatchFirst('access vlan ' +
                       ('trunk allowed vlan ' + Optional('add ')))) + vlan_num

        class Storm:
            def __init__(self):
                self.dct = {'type': []}

            def new_line(self, line):
                parse_storm = Suppress('storm-control ') + restOfLine

                try:
                    self.storm_line = parse_storm.parseString(
                        line).asList()[-1]
                    self.level_info()
                    self.action_info()
                    self.type_info()
                except ParseException:
                    pass

            @catch_exception
            def parse_level(self):
                parse_level = Word(alphas) + Suppress('level ') + restOfLine
                value = parse_level.parseString(self.storm_line).asList()
                if 'level' in self.dct:
                    self.dct['level'].append(value)
                else:
                    self.dct['level'] = [value]

            @catch_exception
            def parse_action(self):
                action = Suppress('action ') + Word(alphas)
                self.action = utill(action, self.storm_line)

            @catch_exception
            def parse_type(self):
                type = Word(alphas) + Suppress(
                    Optional("include")) + Word(alphas)
                self.type = utill(type, self.storm_line)

            @catch_exception1
            def action_info(self):
                self.parse_action()
                self.dct['action'] = self.action

            @catch_exception1
            def type_info(self):
                self.parse_type()

                for each in self.type:

                    if each not in self.dct['type'] and each in [
                            'broadcast', 'multicast', 'unicast'
                    ]:
                        self.dct['type'].append(each)

            @catch_exception1
            def level_info(self):
                self.parse_level()

        cl_storm = Storm()

        # Reserved options list is using due to 'shutdown' option is usually located at the end of the list, so it breaks cycle if interface is shutdown and function speed increases
        for option in iface_list[::-1]:
            cl_storm.new_line(option)
            iface_dict['storm control'] = cl_storm.dct
            if option == 'shutdown':
                if check_disabled:
                    iface_dict['shutdown'] = 'yes'
                    pass
                else:
                    iface_dict = {'shutdown': 'yes'}
                    break
            if option == 'switchport nonegotiate':
                iface_dict['dtp'] = 'no'
                continue
            if option == 'no cdp enable':
                iface_dict['cdp'] = 'no'
                continue
            if option == 'no mop enabled':
                iface_dict['mop'] = 'no'
                continue
            elif option == 'mop enabled':
                iface_dict['mop'] = 'yes'
                continue
            try:
                vlan_add = parse_vlans.parseString(option).asList()
                for unit in vlan_add:
                    if '-' in unit:
                        range_units = unit.split('-')
                        range_list = [
                            i for i in range(int(range_units[0]),
                                             int(range_units[1]) + 1)
                        ]
                        vlan_add.remove(unit)
                        iface_dict['vlans'].extend(range_list)
                    else:
                        iface_dict['vlans'].append(int(unit))
                continue
            except ParseException:
                pass
            try:
                iface_dict['description'] = parse_description.parseString(
                    option).asList()[-1]
                continue
            except ParseException:
                pass
            try:
                iface_dict['type'] = parse_type.parseString(
                    option).asList()[-1]
                continue
            except ParseException:
                pass

            try:
                port_sec = parse_port_sec.parseString(option).asList()[-1]
                iface_dict[
                    'port-security'] = parsing_checks.port_security.__ifaceAttributes___port_sec_parse(
                        port_sec, iface_dict['port-security'])
                continue
            except ParseException:
                pass
            try:
                dhcp_snoop = parse_dhcp_snoop.parseString(option).asList()[-1]
                iface_dict[
                    'dhcp_snoop'] = parsing_checks.ip_iface.__ifaceAttributes___ip_parse(
                        dhcp_snoop, iface_dict['dhcp_snoop'])
                continue
            except ParseException:
                pass
            try:
                arp_insp = parse_arp_insp.parseString(option).asList()[-1]
                iface_dict[
                    'arp_insp'] = parsing_checks.ip_iface.__ifaceAttributes___ip_parse(
                        arp_insp, iface_dict['arp_insp'])
                continue
            except ParseException:
                pass
            try:
                stp_port = parse_stp_port.parseString(option).asList()[-1]
                iface_dict['stp'] = stp_port
                continue
            except ParseException:
                pass
            try:
                source_guard = parse_source_guard.parseString(
                    option).asList()[-1]
                iface_dict['source_guard'] = source_guard
                continue
            except ParseException:
                pass
            try:
                ipv6 = parse_ipv6.parseString(option).asList()[-1]
                __ifaceAttributes___ipv6_parse(ipv6, iface_dict['ipv6'])
                continue
            except ParseException:
                pass
            try:
                arp_proxy_iface = parse_arp_proxy_iface.parseString(
                    option).asList()[-1]
                iface_dict['arp_proxy'] = arp_proxy_iface
                continue
            except ParseException:
                pass

        return iface_dict
    else:
        return {'unknown_iface': 1}
Пример #24
0
def evaluator(variables, functions, string, cs=False):
    """
    Evaluate an expression. Variables are passed as a dictionary
    from string to value. Unary functions are passed as a dictionary
    from string to function. Variables must be floats.
    cs: Case sensitive

    """

    all_variables = copy.copy(DEFAULT_VARIABLES)
    all_functions = copy.copy(DEFAULT_FUNCTIONS)
    all_variables.update(variables)
    all_functions.update(functions)

    if not cs:
        string_cs = string.lower()
        all_functions = lower_dict(all_functions)
        all_variables = lower_dict(all_variables)
        CasedLiteral = CaselessLiteral
    else:
        string_cs = string
        CasedLiteral = Literal

    check_variables(string_cs,
                    set(all_variables.keys() + all_functions.keys()))

    if string.strip() == "":
        return float('nan')

    # SI suffixes and percent
    number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()])
    plus_minus = Literal('+') | Literal('-')
    times_div = Literal('*') | Literal('/')

    number_part = Word(nums)

    # 0.33 or 7 or .34 or 16.
    inner_number = (number_part + Optional("." + Optional(number_part))) | (
        "." + number_part)
    # by default pyparsing allows spaces between tokens--Combine prevents that
    inner_number = Combine(inner_number)

    # 0.33k or -17
    number = (
        inner_number +
        Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) +
        Optional(number_suffix))
    number.setParseAction(number_parse_action)  # Convert to number

    # Predefine recursive variables
    expr = Forward()

    # Handle variables passed in.
    #  E.g. if we have {'R':0.5}, we make the substitution.
    # We sort the list so that var names (like "e2") match before
    # mathematical constants (like "e"). This is kind of a hack.
    all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True)
    varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys])
    varnames.setParseAction(lambda x: [all_variables[k] for k in x])

    # if all_variables were empty, then pyparsing wants
    # varnames = NoMatch()
    # this is not the case, as all_variables contains the defaults

    # Same thing for functions.
    all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True)
    funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys])
    function = funcnames + Suppress("(") + expr + Suppress(")")
    function.setParseAction(lambda x: [all_functions[x[0]](x[1])])

    atom = number | function | varnames | Suppress("(") + expr + Suppress(")")

    # Do the following in the correct order to preserve order of operation
    pow_term = atom + ZeroOrMore(Suppress("^") + atom)
    pow_term.setParseAction(exp_parse_action)  # 7^6
    par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term)  # 5k || 4k
    par_term.setParseAction(parallel)
    prod_term = par_term + ZeroOrMore(times_div + par_term)  # 7 * 5 / 4 - 3
    prod_term.setParseAction(prod_parse_action)
    sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(
        plus_minus + prod_term)  # -5 + 4 - 3
    sum_term.setParseAction(sum_parse_action)
    expr << sum_term  # finish the recursion
    return (expr + stringEnd).parseString(string)[0]
Пример #25
0
def _tdb_grammar():  #pylint: disable=R0914
    """
    Convenience function for getting the pyparsing grammar of a TDB file.
    """
    int_number = Word(nums).setParseAction(lambda t: [int(t[0])])
    pos_neg_int_number = Word('+-' + nums).setParseAction(
        lambda t: [int(t[0])])  # '+3' or '-2' are examples
    # matching float w/ regex is ugly but is recommended by pyparsing
    regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)'
    float_number = Regex(r'[-+]?([0-9]+\.(?!([0-9]|[eE])))|{0}'.format(regex_after_decimal)) \
        .setParseAction(lambda t: [float(t[0])])
    # symbol name, e.g., phase name, function name
    symbol_name = Word(alphanums + '_:', min=1)
    ref_phase_name = symbol_name = Word(alphanums + '_-:()/', min=1)
    # species name, e.g., CO2, AL, FE3+
    species_name = Word(alphanums + '+-*/_.', min=1) + Optional(Suppress('%'))
    # constituent arrays are colon-delimited
    # each subarray can be comma- or space-delimited
    constituent_array = Group(
        delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)),
                      ':'))
    param_types = MatchFirst(
        [TCCommand(param_type) for param_type in TDB_PARAM_TYPES])
    # Let sympy do heavy arithmetic / algebra parsing for us
    # a convenience function will handle the piecewise details
    func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \
        + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \
        Suppress(Word('YNyn', exact=1) | White()))
    # ELEMENT
    cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \
        Optional(Suppress(OneOrMore(float_number))) + LineEnd()
    # SPECIES
    cmd_species = TCCommand('SPECIES') + species_name + Group(
        OneOrMore(
            Word(alphas, min=1, max=2) + Optional(float_number, default=1.0))
    ) + Optional(Suppress('/') + pos_neg_int_number) + LineEnd()
    # TYPE_DEFINITION
    cmd_typedef = TCCommand('TYPE_DEFINITION') + \
        Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd())
    # FUNCTION
    cmd_function = TCCommand('FUNCTION') + symbol_name + \
        func_expr.setParseAction(_make_piecewise_ast)
    # ASSESSED_SYSTEMS
    cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd())
    # DEFINE_SYSTEM_DEFAULT
    cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd())
    # DEFAULT_COMMAND
    cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd())
    # DATABASE_INFO
    cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd())
    # VERSION_DATE
    cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd())
    # REFERENCE_FILE
    cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd())
    # ADD_REFERENCES
    cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd())
    # LIST_OF_REFERENCES
    cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd())
    # TEMPERATURE_LIMITS
    cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd())
    # PHASE
    cmd_phase = TCCommand('PHASE') + symbol_name + \
        Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \
        Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd()
    # CONSTITUENT
    cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \
        Suppress(White()) + Suppress(':') + constituent_array + \
        Suppress(':') + LineEnd()
    # PARAMETER
    cmd_parameter = TCCommand('PARAMETER') + param_types + \
        Suppress('(') + symbol_name + \
        Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \
        Suppress(',') + constituent_array + \
        Optional(Suppress(';') + int_number, default=0) + \
        Suppress(')') + func_expr.setParseAction(_make_piecewise_ast)
    # Now combine the grammar together
    all_commands = cmd_element | \
                    cmd_species | \
                    cmd_typedef | \
                    cmd_function | \
                    cmd_ass_sys | \
                    cmd_defsysdef | \
                    cmd_defcmd | \
                    cmd_database_info | \
                    cmd_version_date | \
                    cmd_reference_file | \
                    cmd_add_ref | \
                    cmd_lor | \
                    cmd_templim | \
                    cmd_phase | \
                    cmd_constituent | \
                    cmd_parameter
    return all_commands
Пример #26
0
    {"op": "and", "name": "and"},
    {"op": "or", "name": "or"}
]

locs = locals()
reserved = []
for k in keywords:
    name, value = k.upper().replace(" ", ""), Keyword(k, caseless=True)
    locs[name] = value
    reserved.append(value)
for o in KNOWN_OPS:
    name = o['op'].upper()
    value = locs[name] = o['literal'] = CaselessLiteral(o['op'])
    reserved.append(value)

RESERVED = MatchFirst(reserved)


def to_json_operator(instring, tokensStart, retTokens):
    # ARRANGE INTO {op: params} FORMAT
    tok = retTokens[0]
    op = filter(lambda o: o['op'] == tok[1], KNOWN_OPS)[0]['name']
    return {op: [tok[i * 2] for i in range(int((len(tok) + 1) /2))]}


def to_json_call(instring, tokensStart, retTokens):
    # ARRANGE INTO {op: params} FORMAT
    tok = retTokens
    op = tok.op
    params = tok.params[0]
    if not params:
Пример #27
0
                   | QuotedString('"', escChar='\\', unquoteResults=False))
INT_CONSTANT = Regex(r'-?\d+(?!\.)')
FLOAT_CONSTANT = Regex(r'-?\d*\.\d+')

CONSTANT = STRING_CONSTANT | FLOAT_CONSTANT | INT_CONSTANT

# All constants should be interpreted in Python format
CONSTANT.setParseAction(lambda toks: ast.literal_eval(toks[0]))

COMMA = Literal(',')

LIST = Literal('[') + CONSTANT + ZeroOrMore(COMMA + CONSTANT) + Optional(
    COMMA) + Literal(']')

IDENTIFIER = MatchFirst(
    [Word(alphas, alphanums),
     QuotedString('[', endQuoteChar=']')])
IDENTIFIER.setParseAction(lambda toks: Where(toks[0]))

VALUE = CONSTANT | IDENTIFIER

OPERATOR = MatchFirst([
    Regex(r'==?'),
    Literal('!='),
    Literal('<>'),
    Literal('>='),
    Literal('<='),
    Literal('>'),
    Literal('<'),
    CaselessKeyword('like'),
])
Пример #28
0
 def __init__(self, elems, precedence=4):
     MatchFirst.__init__(self, concatenate(elems))
     BaseType.__init__(self, precedence)
Пример #29
0
operand.setName('r-value')

op = operatorPrecedence
# op  = myOperatorPrecedence
rvalue << op(operand, [
    ('-', 1, opAssoc.RIGHT, Unary.parse_action),
    ('*', 2, opAssoc.LEFT, Binary.parse_action),
    ('-', 2, opAssoc.LEFT, Binary.parse_action),
    ('+', 2, opAssoc.LEFT, Binary.parse_action),
])

# I want
# - BindVariable to have precedence to EqualTo(VariableRef)
# but I also want:
# - Arithmetic to have precedence w.r.t BindVariable
# last is variables
add_contract(misc_variables_contract)
add_contract(int_variables_contract)

add_contract(rvalue.copy().setParseAction(EqualTo.parse_action))

hardwired = MatchFirst(ParsingTmp.contract_types)
hardwired.setName('Predefined contract expression')

simple_contract << (hardwired | identifier_contract)
simple_contract.setName('simple contract expression')

any_contract = composite_contract | simple_contract
any_contract.setName('Any simple or composite contract')
contract_expression << (any_contract)  # Parentheses before << !!
Пример #30
0
def valid_address(addr):
    """Address validator/parser."""
    # Sanitize-ish
    multispace = re.compile("\s+")
    addr = multispace.sub(" ", addr)

    addr_split = [
        x for x in addr.title().split(" ") if x not in ["", '', None]
    ]

    sane_addr = []
    for w in addr_split:
        if len(w) == 2:
            w = w.upper()

        if w not in ["N", "S", "E", "W", "NW", "NE", "SW", "SE"]:
            sane_addr.append(w)
        else:
            sane_addr.append(w)

    addr = " ".join(sane_addr)

    log.debug("Address: %s" % addr)

    from pyparsing import (
        oneOf, CaselessLiteral, Optional, originalTextFor, Combine, Word, nums,
        alphas, White, FollowedBy, MatchFirst, Keyword, OneOrMore, Regex,
        alphanums, Suppress
    )

    # define number as a set of words
    units = oneOf(
        "Zero One Two Three Four Five Six Seven Eight Nine Ten "
        "Eleven Twelve Thirteen Fourteen Fifteen Sixteen Seventeen "
        "Eighteen Nineteen",
        caseless=True
    )
    tens = oneOf(
        "Ten Twenty Thirty Forty Fourty Fifty Sixty Seventy Eighty Ninety",
        caseless=True
    )
    hundred = CaselessLiteral("Hundred")
    thousand = CaselessLiteral("Thousand")
    OPT_DASH = Optional("-")
    numberword = (((
        units + OPT_DASH + Optional(thousand) + OPT_DASH +
        Optional(units + OPT_DASH + hundred) + OPT_DASH + Optional(tens)
    ) ^ tens) + OPT_DASH + Optional(units))

    # number can be any of the forms 123, 21B, 222-A or 23 1/2
    housenumber = originalTextFor(
        numberword | Combine(
            Word(nums) +
            Optional(OPT_DASH + oneOf(list(alphas))+FollowedBy(White()))
        ) + Optional(OPT_DASH + "1/2")
    )
    numberSuffix = oneOf("st th nd rd", caseless=True).setName("numberSuffix")
    streetnumber = originalTextFor(
        Word(nums) + Optional(OPT_DASH + "1/2") + Optional(numberSuffix)
    )

    # just a basic word of alpha characters, Maple, Main, etc.
    name = ~numberSuffix + Word(alphas)

    # types of streets - extend as desired
    type_ = Combine(MatchFirst(map(
        Keyword,
        "Street St ST Boulevard Blvd Lane Ln LN Road Rd RD Avenue Ave AVE "
        " Circle Cir Cove Cv Drive Dr DR Parkway Pkwy PKWY Court Ct Square Sq "
        "Loop Lp LP".split()
    )) + Optional(".").suppress())

    # street name
    nsew = Combine(
        oneOf("N S E W North South East West NW NE SW SE", caseless=True) +
        Optional(".")
    )
    streetName = (
        Combine(
            Optional(nsew) + streetnumber + Optional("1/2") +
            Optional(numberSuffix), joinString=" ", adjacent=False
        ) ^ Combine(
            ~numberSuffix + OneOrMore(~type_ + Combine(
                Word(alphas) + Optional(".") + Optional(",")
            )),
            joinString=" ",
            adjacent=False
        ) ^ Combine("Avenue" + Word(alphas), joinString=" ", adjacent=False)
    ).setName("streetName")

    # PO Box handling
    acronym = lambda s: Regex(r"\.?\s*".join(s)+r"\.?")
    poBoxRef = (
        (acronym("PO") | acronym("APO") | acronym("AFP")) +
        Optional(CaselessLiteral("BOX"))
    ) + Word(alphanums)("boxnumber")

    # basic street address
    streetReference = \
        streetName.setResultsName("name") + \
        Optional(type_).setResultsName("type")
    direct = housenumber.setResultsName("number") + streetReference
    intersection = (
        streetReference.setResultsName("crossStreet") +
        ('@' | Keyword("and", caseless=True)) +
        streetReference.setResultsName("street")
    )
    suiteRef = (
        oneOf("Suite Ste Apt Apartment Room Rm #", caseless=True) +
        Optional(".") +
        Word(alphanums+'-')
    )
    streetAddress = ((
        poBoxRef("street")
        ^ (direct + Optional(suiteRef)).setResultsName("street")
        ^ (streetReference + Optional(suiteRef)).setResultsName("street")
        ^ intersection
    ) + Optional(Suppress(',') + Optional(Suppress('.')))
    ).setResultsName("streetAddress")

    city = (
        OneOrMore(Word(alphas)) + Optional(Suppress(","))
    ).setResultsName("city")

    states_abbr = oneOf(
        "AL AK AZ AR CA CO CT DE FL GA HI ID IL IN IA KS KY LA ME MD MA MI MN"
        "MS MO MT NE NV NH NJ NM NY NC ND OH OK OR PA RI SC SD TN TX UT VT VA"
        "WA WV WI WY",
        caseless=True
    )
    state_names = oneOf(
        ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado",
         "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho",
         "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana",
         "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota",
         "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "Ohio",
         "Oklahoma", "Oregon", "Pennsylvania", "Tennessee", "Texas", "Utah",
         "Vermont", "Virginia", "Washington", "Wisconsin", "Wyoming",
         "New Hampshire", "New Jersey", "New Mexico", "New York",
         "North Carolina", "North Dakota", "Rhode Island", "South Carolina",
         "South Dakota", "West Virginia"],
        caseless=True
    )
    state = (
        states_abbr.setResultsName("state")
        ^ state_names.setResultsName("state")
    ) + Optional(".") + Optional(",")
    zipCode = Word(nums).setResultsName("zip")

    us_address = (
        streetAddress + city + state + zipCode
    ).parseString(addr)
    log.debug("Parsed address: %s" % us_address)

    return us_address
Пример #31
0
plusPairTokens = ['f', 'x']
for pt in plusPairTokens:
    knownTokens.append(usfmTokenValue(pt, plus))
    knownTokens.append(usfmEndToken(pt))

phrasePairTokens = ['fr']
for pt in phrasePairTokens:
    knownTokens.append(usfmTokenValue(pt, phrase))
    knownTokens.append(usfmEndToken(pt))

knownTokens.append(usfmBackslashToken("\\\\"))
knownTokens.append(textBlock)
knownTokens.append(unknown)

usfm = OneOrMore(MatchFirst(knownTokens))

#
# PARSING
#


def parseString(unicodeString):
    try:
        s = clean(unicodeString)
        tokens = usfm.parseString(s, parseAll=True)
    except Exception as e:
        print e
        print repr(unicodeString[:50])
        sys.exit()
    return [createToken(t) for t in tokens]
Пример #32
0
    def __init__(self):

        self.json_query = {'query': {}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+')  # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(
            r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(
                removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(
            removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral(
            "LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction(lambda x: self.frame.update({
            'dist': float(x[0]),
            'units': x[1]
        }))

        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string

        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(
            lambda x: self.frame.update({'filter': x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x: self.frame.update({'index': x[0]}))
        resource_id = Regex(
            r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')'
        ).setParseAction(removeQuotes)
        collection_id = resource_id

        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number>
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date>
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(
            lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(
            date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x: self.time_frame())
        # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(
            from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x: self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral(
            "DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(
            lambda x: self.frame.update({
                'lat': float(x[5]),
                'lon': float(x[7])
            }))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral(
            "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x: self.frame.update({
            'top_left': [float(x[5]), float(x[3])],
            'bottom_right': [float(x[10]), float(x[8])]
        }))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x: self.frame.update({'value': x[1]}))

        geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox)

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(
            lambda x: self.frame.update({'fuzzy': x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(
            lambda x: self.frame.update({'limit': int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(
            lambda x: self.frame.update({'depth': int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral(
            "BY") + limited_string
        order_parameter.setParseAction(
            lambda x: self.frame.update({'order': {
                x[2]: 'asc'
            }}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(
            lambda x: self.frame.update({'offset': int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <time-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (
            range_query | term_query | fuzzy_query | time_query | geo_query
        ) + CaselessLiteral("FROM") + index_name + query_parameter * (0, None)
        # Add the field to the frame object
        search_query.setParseAction(
            lambda x: self.frame.update({'field': x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(
            lambda x: self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral(
            "TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(
            lambda x: self.frame.update({'association': x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(
            depth_parameter)
        owner_query.setParseAction(
            lambda x: self.frame.update({'owner': x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x: self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x: self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x: self.or_frame())

        self.sentence = primary_query + (intersection ^ union) * (0, None)
Пример #33
0
from .grammer import *
from pyparsing import delimitedList, MatchFirst, Word, Regex

#Define list of column numbers in brackets or single column number
column = Word(numbers)
_columns = delimitedList(column, combine=True)
_list_columns = openBracket + _columns + closeBracket
choice_columns = MatchFirst([column, _list_columns])

#Define a numerical values
decimal = Regex(r'\d*\.?\d*')
Пример #34
0
class DnstestParser:
    """
    Parses natural-language-like grammar describing DNS changes
    """

    grammar_strings = []

    # implement my grammar
    word = Word(alphas)
    value = Word(alphanums).setResultsName("value")
    add_op = Keyword("add").setResultsName("operation")
    rm_op = Keyword("remove").setResultsName("operation")
    rename_op = Keyword("rename").setResultsName("operation")
    change_op = Keyword("change").setResultsName("operation")
    confirm_op = Keyword("confirm").setResultsName("operation")
    rec_op = Or([Keyword("record"), Keyword("entry"), Keyword("name")])
    val_op = Optional(Keyword("with")) + Or(
        [Keyword("value"),
         Keyword("address"),
         Keyword("target")])

    fqdn = Regex(
        "(([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])(\.([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9]))*)"
    )
    ipaddr = Regex(
        "((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(1[0-9]{2}|2[0-4][0-9]|25[0-5]|[1-9][0-9]|[0-9]))"
    )
    hostname = Regex("([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])")
    hostname_or_fqdn = And([NotAny(ipaddr), MatchFirst([fqdn, hostname])])
    hostname_fqdn_or_ip = MatchFirst([ipaddr, fqdn, hostname])

    grammar_strings.append(
        'add (record|name|entry)? <hostname_or_fqdn> (with ?)(value|address|target)? <hostname_fqdn_or_ip>'
    )
    cmd_add = add_op + Optional(rec_op) + hostname_or_fqdn.setResultsName(
        "hostname") + Suppress(val_op) + hostname_fqdn_or_ip.setResultsName(
            'value')

    grammar_strings.append('remove (record|name|entry)? <hostname_or_fqdn>')
    cmd_remove = rm_op + Optional(rec_op) + hostname_fqdn_or_ip.setResultsName(
        "hostname")

    grammar_strings.append(
        'rename (record|name|entry)? <hostname_or_fqdn> (with ?)(value ?) <value> to <hostname_or_fqdn>'
    )
    cmd_rename = rename_op + Suppress(Optional(
        rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress(
            Optional(val_op)) + hostname_fqdn_or_ip.setResultsName(
                'value') + Suppress(
                    Keyword("to")) + hostname_or_fqdn.setResultsName('newname')

    grammar_strings.append(
        'change (record|name|entry)? <hostname_or_fqdn> to <hostname_fqdn_or_ip>'
    )
    cmd_change = change_op + Suppress(Optional(
        rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress(
            Keyword("to")) + hostname_fqdn_or_ip.setResultsName('value')

    grammar_strings.append('confirm (record|name|entry)? <hostname_or_fqdn>')
    cmd_confirm = confirm_op + Suppress(
        Optional(rec_op)) + hostname_or_fqdn.setResultsName("hostname")

    line_parser = Or(
        [cmd_confirm, cmd_add, cmd_remove, cmd_rename, cmd_change])

    def __init__(self):
        pass

    def parse_line(self, line):
        res = self.line_parser.parseString(line, parseAll=True)
        d = res.asDict()
        # hostname_or_fqdn using And and NotAny now returns a ParseResults object instead of a string,
        # we need to convert that to a string to just take the first value
        for i in d:
            if isinstance(d[i], ParseResults):
                d[i] = d[i][0]
        return d

    def get_grammar(self):
        """ return a list of possible grammar options """
        return self.grammar_strings
Пример #35
0
    def parse_algebra(self):
        """
        Parse an algebraic expression into a tree.

        Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to
        reflect parenthesis and order of operations. Leave all operators in the
        tree and do not parse any strings of numbers into their float versions.

        Adding the groups and result names makes the `repr()` of the result
        really gross. For debugging, use something like
          print OBJ.tree.asXML()
        """
        # 0.33 or 7 or .34 or 16.
        number_part = Word(nums)
        inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part)
        # pyparsing allows spaces between tokens--`Combine` prevents that.
        inner_number = Combine(inner_number)

        # SI suffixes and percent.
        number_suffix = MatchFirst(Literal(k) for k in SUFFIXES.keys())

        # 0.33k or 17
        plus_minus = Literal('+') | Literal('-')
        number = Group(
            Optional(plus_minus) +
            inner_number +
            Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) +
            Optional(number_suffix)
        )
        number = number("number")

        # Predefine recursive variables.
        expr = Forward()

        # Handle variables passed in. They must start with letters/underscores
        # and may contain numbers afterward.
        inner_varname = Word(alphas + "_", alphanums + "_")
        varname = Group(inner_varname)("variable")
        varname.setParseAction(self.variable_parse_action)

        # Same thing for functions.
        function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function")
        function.setParseAction(self.function_parse_action)

        atom = number | function | varname | "(" + expr + ")"
        atom = Group(atom)("atom")

        # Do the following in the correct order to preserve order of operation.
        pow_term = atom + ZeroOrMore("^" + atom)
        pow_term = Group(pow_term)("power")

        par_term = pow_term + ZeroOrMore('||' + pow_term)  # 5k || 4k
        par_term = Group(par_term)("parallel")

        prod_term = par_term + ZeroOrMore((Literal('*') | Literal('/')) + par_term)  # 7 * 5 / 4
        prod_term = Group(prod_term)("product")

        sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term)  # -5 + 4 - 3
        sum_term = Group(sum_term)("sum")

        # Finish the recursion.
        expr << sum_term  # pylint: disable=W0104
        self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
Пример #36
0
from pyparsing import pyparsing_common as ppc
from pyparsing import Word, alphas, alphanums, CaselessKeyword
from pyparsing import MatchFirst, Forward, ZeroOrMore

#>>>>>>>>>>>>>>> BASICS DEFINITIONS>>>>>>>>>>>>>>>>>>>>>>>

#Defining the reserved words informed on the description
PROGRAMA, CAR, INT, RETORNE = map(CaselessKeyword,
                                  "programa car int retorne".split())
ESCREVA, NOVALINHA, SE, ENTAO = map(CaselessKeyword,
                                    "escreva novalinha se entao".split())
SENAO, ENQUANTO, EXECUTE, LEIA, TERMINATOR = map(
    CaselessKeyword, "senao enquanto execute leia ;".split())

keywords = MatchFirst(
    (PROGRAMA, CAR, INT, RETORNE, ESCREVA, NOVALINHA, SE, ENTAO, SENAO,
     ENQUANTO, EXECUTE, LEIA)).setName("Reserved Words")

#Define the Terminator character
TERMINATOR = Word(";").setName("Terminator")

#Define the numbers
realNum = ppc.real().setName("Real Number")
intNum = ppc.signed_integer().setName("Integer Number")

#Define the identificator
identifier = Word(alphas, alphanums + "_$").setName("Identifier")

#Types Definition
Type = (INT | CAR).setName("Type")
Пример #37
0
    def __init__(
        self,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        citation_clearing: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Initialize the control statement parser.

        :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing
        :param annotation_to_pattern: A dictionary of {annotation: regular expression string}
        :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
        :param required_annotations: Annotations that are required
        """
        self.citation_clearing = citation_clearing

        self.annotation_to_term = annotation_to_term or {}
        self.annotation_to_pattern = annotation_to_pattern or {}
        self.annotation_to_local = annotation_to_local or {}

        self.statement_group = None
        self.citation_db = None
        self.citation_db_id = None
        self.evidence = None
        self.annotations = {}
        self.required_annotations = required_annotations or []

        annotation_key = ppc.identifier('key').setParseAction(
            self.handle_annotation_key)

        self.set_statement_group = set_statement_group_stub().setParseAction(
            self.handle_set_statement_group)
        self.set_citation = set_citation_stub.setParseAction(
            self.handle_set_citation)
        self.set_evidence = set_evidence_stub.setParseAction(
            self.handle_set_evidence)

        set_command_prefix = And([annotation_key('key'), Suppress('=')])
        self.set_command = set_command_prefix + qid('value')
        self.set_command.setParseAction(self.handle_set_command)

        self.set_command_list = set_command_prefix + delimited_quoted_list(
            'values')
        self.set_command_list.setParseAction(self.handle_set_command_list)

        self.unset_command = annotation_key('key')
        self.unset_command.addParseAction(self.handle_unset_command)

        self.unset_evidence = supporting_text_tags(EVIDENCE)
        self.unset_evidence.setParseAction(self.handle_unset_evidence)

        self.unset_citation = Suppress(BEL_KEYWORD_CITATION)
        self.unset_citation.setParseAction(self.handle_unset_citation)

        self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP)
        self.unset_statement_group.setParseAction(
            self.handle_unset_statement_group)

        self.unset_list = delimited_unquoted_list('values')
        self.unset_list.setParseAction(self.handle_unset_list)

        self.unset_all = unset_all.setParseAction(self.handle_unset_all)

        self.set_statements = set_tag('action') + MatchFirst([
            self.set_statement_group,
            self.set_citation,
            self.set_evidence,
            self.set_command,
            self.set_command_list,
        ])

        self.unset_statements = unset_tag('action') + MatchFirst([
            self.unset_all,
            self.unset_citation,
            self.unset_evidence,
            self.unset_statement_group,
            self.unset_command,
            self.unset_list,
        ])

        self.language = self.set_statements | self.unset_statements

        super(ControlParser, self).__init__(self.language)
Пример #38
0
operand = number | int_variables_ref | misc_variables_ref
operand.setName('r-value')


rvalue << myOperatorPrecedence(operand, [
             ('-', 1, opAssoc.RIGHT, Unary.parse_action),
             ('*', 2, opAssoc.LEFT, Binary.parse_action),
             ('-', 2, opAssoc.LEFT, Binary.parse_action),
             ('+', 2, opAssoc.LEFT, Binary.parse_action),
          ])

# I want 
# - BindVariable to have precedence to EqualTo(VariableRef)
# but I also want:
# - Arithmetic to have precedence w.r.t BindVariable 
# last is variables
add_contract(misc_variables_contract)
add_contract(int_variables_contract)
add_contract(rvalue.copy().setParseAction(EqualTo.parse_action))

hardwired = MatchFirst(ParsingTmp.contract_types)
hardwired.setName('Predefined contract expression')

simple_contract << (hardwired | identifier_contract)
simple_contract.setName('simple contract expression')

any_contract = composite_contract | simple_contract
any_contract.setName('Any simple or composite contract')
contract_expression << (any_contract) # Parentheses before << !!

Пример #39
0
    def create(cls, base_shader_path, base_texture_path):
        """
        Create a Stanford polygon file parser (PLY).

        :param base_shader_path:
        :param base_texture_path:
        :return:
        """
        # Define the base patterns for parsing
        real = pyparsing_common.real()
        integer = pyparsing_common.integer()

        # Define how the header portion begins and ends
        start_keyword = cls._or(cls.begin_header_keyword, suppress=True)
        stop_keyword = cls._or(cls.end_header_keyword, suppress=True)

        # Define the grammar of a comment statement
        comment_keyword = cls._or(cls.comment_keyword, suppress=True)
        vertex_shader_comment = Group(
            comment_keyword + Suppress(CaselessKeyword("VertexShaderFile")) +
            Word(alphanums + ".-_"))("vertex_shader_file")
        fragment_shader_comment = Group(
            comment_keyword + Suppress(CaselessKeyword("FragmentShaderFile")) +
            Word(alphanums + ".-_"))("fragment_shader_file")
        texture_comment = Group(comment_keyword +
                                Suppress(CaselessKeyword("TextureFile")) +
                                Word(alphanums + ".-_"))("texture_file")
        other_comment = comment_keyword + NotAny("TextureFile") + Word(
            printables + " ")

        # Define the grammar of a format statement
        format_keyword = cls._or(cls.format_keyword, suppress=True)
        format_type = cls._or(cls.format_type_map)
        format_expr = Group(format_keyword + format_type("file_type") +
                            real("version"))("format")

        # Define the grammar of properties
        property_keyword = cls._or(cls.property_keyword, suppress=True)
        list_keyword = cls._or(cls.list_keyword, suppress=True)
        property_type = cls._or(cls.data_type_map)
        psp = property_keyword + property_type("data_type")

        position_keywords = [cls._or(k) for k in ("x", "y", "z")]
        property_position = cls._aggregate_property("position", psp,
                                                    *position_keywords)

        property_color = Group(
            And([
                Group(psp + MatchFirst((CaselessKeyword("r"),
                                        CaselessKeyword("red")))("name")),
                Group(psp + MatchFirst((CaselessKeyword("g"),
                                        CaselessKeyword("green")))("name")),
                Group(psp + MatchFirst((CaselessKeyword("b"),
                                        CaselessKeyword("blue")))("name")),
                Optional(
                    Group(psp +
                          MatchFirst((CaselessKeyword("a"),
                                      CaselessKeyword("alpha")))("name")), )
            ]))("color")

        ambient_keywords = [
            cls._or(k) for k in ("ambient_red", "ambient_green",
                                 "ambient_blue", "ambient_alpha")
        ]
        property_ambient_color = cls._aggregate_property(
            "ambient_color", psp, *ambient_keywords)

        diffuse_keywords = [
            cls._or(k) for k in ("diffuse_red", "diffuse_green",
                                 "diffuse_blue", "diffuse_alpha")
        ]
        property_diffuse_color = cls._aggregate_property(
            "diffuse_color", psp, *diffuse_keywords)

        specular_keywords = [
            cls._or(k) for k in ("specular_red", "specular_green",
                                 "specular_blue", "specular_alpha")
        ]
        property_specular_color = cls._aggregate_property(
            "specular_color", psp, *specular_keywords)

        texture_keywords = [
            cls._or(*k) for k in (("s", "u", "tx"), ("t", "v", "ty"))
        ]
        property_texture = cls._aggregate_property("texture", psp,
                                                   *texture_keywords)

        normal_keywords = [cls._or(k) for k in ("nx", "ny", "nz")]
        property_normal = cls._aggregate_property("normal", psp,
                                                  *normal_keywords)

        power_keywords = [CaselessKeyword("specular_power")]
        property_specular_power = cls._aggregate_property(
            "specular_power", psp, *power_keywords)

        opacity_keywords = [CaselessKeyword("opacity")]
        property_opacity = cls._aggregate_property("opacity", psp,
                                                   *opacity_keywords)

        plp = property_keyword + list_keyword + property_type(
            "index_type") + property_type("data_type")

        vertex_index_keywords = [cls._or("vertex_index", "vertex_indices")]
        property_vertex_index = cls._aggregate_property(
            "vertex_index", plp, *vertex_index_keywords)

        material_index_keywords = [
            cls._or("material_index", "material_indices")
        ]
        property_material_index = cls._aggregate_property(
            "material_index", plp, *material_index_keywords)

        # Define the grammar of elements
        element_keyword = cls._or(cls.element_keyword, suppress=True)

        element_vertex = Group(
            element_keyword + CaselessKeyword("vertex")("name") +
            integer("count") + Group(
                OneOrMore(property_position | property_color
                          | property_ambient_color | property_diffuse_color
                          | property_specular_color | property_texture
                          | property_normal | property_specular_power
                          | property_opacity))("properties"))

        element_face = Group(element_keyword +
                             CaselessKeyword("face")("name") +
                             integer("count") +
                             Group(property_vertex_index
                                   | property_material_index)("properties"))

        element_group = element_vertex | element_face

        declarations = format_expr + \
                       Group(ZeroOrMore(vertex_shader_comment | fragment_shader_comment | texture_comment | other_comment))("comments") + \
                       Group(OneOrMore(element_group))("elements")

        header_grammar = start_keyword + declarations + stop_keyword

        return cls(header_grammar, base_shader_path, base_texture_path)
Пример #40
0
def evaluator(variables, functions, string, cs=False):
    """
    Evaluate an expression. Variables are passed as a dictionary
    from string to value. Unary functions are passed as a dictionary
    from string to function. Variables must be floats.
    cs: Case sensitive

    """

    all_variables = copy.copy(DEFAULT_VARIABLES)
    all_functions = copy.copy(DEFAULT_FUNCTIONS)
    all_variables.update(variables)
    all_functions.update(functions)

    if not cs:
        string_cs = string.lower()
        all_functions = lower_dict(all_functions)
        all_variables = lower_dict(all_variables)
        CasedLiteral = CaselessLiteral
    else:
        string_cs = string
        CasedLiteral = Literal

    check_variables(string_cs, set(all_variables.keys() + all_functions.keys()))

    if string.strip() == "":
        return float('nan')

    # SI suffixes and percent
    number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()])
    plus_minus = Literal('+') | Literal('-')
    times_div = Literal('*') | Literal('/')

    number_part = Word(nums)

    # 0.33 or 7 or .34 or 16.
    inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part)
    # by default pyparsing allows spaces between tokens--Combine prevents that
    inner_number = Combine(inner_number)

    # 0.33k or -17
    number = (inner_number
              + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part)
              + Optional(number_suffix))
    number.setParseAction(number_parse_action)  # Convert to number

    # Predefine recursive variables
    expr = Forward()

    # Handle variables passed in.
    #  E.g. if we have {'R':0.5}, we make the substitution.
    # We sort the list so that var names (like "e2") match before
    # mathematical constants (like "e"). This is kind of a hack.
    all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True)
    varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys])
    varnames.setParseAction(
        lambda x: [all_variables[k] for k in x]
    )

    # if all_variables were empty, then pyparsing wants
    # varnames = NoMatch()
    # this is not the case, as all_variables contains the defaults

    # Same thing for functions.
    all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True)
    funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys])
    function = funcnames + Suppress("(") + expr + Suppress(")")
    function.setParseAction(
        lambda x: [all_functions[x[0]](x[1])]
    )

    atom = number | function | varnames | Suppress("(") + expr + Suppress(")")

    # Do the following in the correct order to preserve order of operation
    pow_term = atom + ZeroOrMore(Suppress("^") + atom)
    pow_term.setParseAction(exp_parse_action)  # 7^6
    par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term)  # 5k || 4k
    par_term.setParseAction(parallel)
    prod_term = par_term + ZeroOrMore(times_div + par_term)  # 7 * 5 / 4 - 3
    prod_term.setParseAction(prod_parse_action)
    sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term)  # -5 + 4 - 3
    sum_term.setParseAction(sum_parse_action)
    expr << sum_term  # finish the recursion
    return (expr + stringEnd).parseString(string)[0]
Пример #41
0
    def __init__(self):


        self.json_query = {'query':{}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+') # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]}))


        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string
        
        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]}))
        resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes)
        collection_id = resource_id


        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number> 
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date> 
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x : self.time_frame())
           # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement
        time_bounds.setParseAction(lambda x : self.time_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement>        
        #--------------------------------------------------------------------------------------
        vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement
        vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame())
        
        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x : self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])}))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]}))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <match-query>  ::= "MATCH" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x : self.frame.update({'value':x[1]}))
        
        geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox )

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]}))
        match_query = CaselessLiteral("MATCH") + field_query
        match_query.setParseAction(lambda x : self.frame.update({'match':x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string
        order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name
        # Add the field to the frame object
        search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(lambda x : self.frame.update({'association':x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter)
        owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x : self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x : self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x : self.or_frame())

        self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
Пример #42
0
    def __init__(self):

        self.json_query = {"query": {}, "and": [], "or": []}
        self.tokens = None
        # --------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        # --------------------------------------------------------------------------------------
        integer = Regex(r"-?[0-9]+")  # Word matches space for some reason
        double = Regex(r"-?[0-9]+.?[0-9]*")
        number = double | integer

        # --------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        # --------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number
        units = CaselessLiteral("km") | CaselessLiteral("nm") | CaselessLiteral("mi")
        distance = number + units
        distance.setParseAction(lambda x: self.frame.update({"dist": float(x[0]), "units": x[1]}))

        # --------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 '"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        # --------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(lambda x: self.frame.update({"filter": x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x: self.frame.update({"index": x[0]}))
        resource_id = Regex(r'("(?:[a-zA-Z0-9])*"|\'(?:[a-zA-Z0-9]*)\')').setParseAction(removeQuotes)
        collection_id = resource_id

        # --------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES FROM" <number> "TO" <number>
        # --------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + CaselessLiteral("FROM") + number + CaselessLiteral("TO") + number
        # Add the range to the frame object
        range_query.setParseAction(lambda x: self.frame.update({"range": {"from": float(x[2]), "to": float(x[4])}}))

        # --------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        # --------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(lambda x: self.frame.update({"lat": float(x[5]), "lon": float(x[7])}))
        geo_bbox = (
            CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        )
        geo_bbox.setParseAction(
            lambda x: self.frame.update(
                {"top_left": [float(x[5]), float(x[3])], "bottom_right": [float(x[10]), float(x[8])]}
            )
        )

        # --------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        # --------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        # Add the term to the frame object
        term_query.setParseAction(lambda x: self.frame.update({"value": x[1]}))
        geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox)

        # --------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        # --------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(lambda x: self.frame.update({"limit": int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(lambda x: self.frame.update({"depth": int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string
        order_parameter.setParseAction(lambda x: self.frame.update({"order": {x[2]: "asc"}}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(lambda x: self.frame.update({"offset": int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        # --------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query>
        # --------------------------------------------------------------------------------------
        search_query = (
            CaselessLiteral("SEARCH")
            + field
            + (range_query | term_query | geo_query)
            + CaselessLiteral("FROM")
            + index_name
            + query_parameter * (0, None)
        )
        # Add the field to the frame object
        search_query.setParseAction(lambda x: self.frame.update({"field": x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(lambda x: self.frame.update({"collection": x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(lambda x: self.frame.update({"association": x[2]}))
        query = search_query | association_query | collection_query

        # --------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        # --------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x: self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x: self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x: self.or_frame())

        self.sentence = primary_query + (intersection ^ union) * (0, None)