Пример #1
0
def get_match_first(lits, parseAction=None):
    el = MatchFirst(NoMatch())
    for lit in lits:
        el = el.__ior__(lit)
    if parseAction:
        el.setParseAction(parseAction)
    return el
Пример #2
0
def get_match_first(lits, parseAction=None):
    el = MatchFirst(NoMatch())
    for lit in lits:
        el = el.__ior__(lit)
    if parseAction:
        el.setParseAction(parseAction)
    return el
Пример #3
0
def define_simple_literals(literal_list, parseAction=None):
    l = MatchFirst([CaselessKeyword(k) for k in literal_list])

    if parseAction:
        l = l.setParseAction(parseAction)

    return l
Пример #4
0
def define_simple_literals(literal_list, parseAction=None):

    l = MatchFirst([CaselessKeyword(k) for k in literal_list])

    if parseAction:
        l = l.setParseAction(parseAction)

    return l
Пример #5
0
def _get_requirements_build_gradle(path: str) -> list:
    """
    Get list of requirements from Maven project.

    Files supported are build.gradle

    :param path: Project path
    """
    reqs = []
    for file_path in full_paths_in_dir(path):
        if not file_path.endswith('build.gradle'):
            continue

        with open(file_path, encoding='latin-1') as file_fd:
            file_content = file_fd.read()

        string = MatchFirst([quotedString('"'), quotedString("'")])
        string.setParseAction(lambda x: [x[0][1:-1]])

        grammars: list = [
            Suppress(Keyword('compile') + Optional('(')) +
            string.copy()('package'),
            Suppress(Keyword('compile') + Optional('(')) +
            Suppress(Keyword('group') + ':') + string.copy()('group') +
            Suppress(',') + Suppress(Keyword('name') + ':') +
            string.copy()('name') + Suppress(',') +
            Suppress(Keyword('version') + ':') + string.copy()('version'),
        ]

        for grammar in grammars:
            for tokens, _, _ in grammar.scanString(file_content):
                matches = tokens.asDict()
                if 'package' in matches:
                    if ':' in matches['package']:
                        name, version = matches['package'].rsplit(':', 1)
                    else:
                        name, version = matches['package'], None
                    reqs.append((file_path, name, version))
                else:
                    reqs.append(
                        (file_path, f"{matches['group']}:{matches['name']}",
                         matches['version']))
                    reqs.append(
                        (file_path, matches['group'], matches['version']))
    return reqs
Пример #6
0
    def __init__(self):


        self.json_query = {'query':{}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+') # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]}))


        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string
        
        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]}))
        resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes)
        collection_id = resource_id


        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number> 
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date> 
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]}))


        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x : self.time_frame())
           # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement
        time_bounds.setParseAction(lambda x : self.time_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement>        
        #--------------------------------------------------------------------------------------
        vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement
        vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame())
        
        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x : self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])}))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]}))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <match-query>  ::= "MATCH" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x : self.frame.update({'value':x[1]}))
        
        geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox )

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]}))
        match_query = CaselessLiteral("MATCH") + field_query
        match_query.setParseAction(lambda x : self.frame.update({'match':x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string
        order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name
        # Add the field to the frame object
        search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(lambda x : self.frame.update({'association':x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter)
        owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x : self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x : self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x : self.or_frame())

        self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
Пример #7
0
class BELParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces."""
    def __init__(
        self,
        graph,
        namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        allow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph

        self.allow_nested = allow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term=namespace_to_term,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.identifier_parser.get_line_number = self.get_line_number

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.identifier_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.identifier_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.identifier_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.identifier_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)

        self.mirna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)

        self.rna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]:
        """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser."""
        return self.identifier_parser.namespace_to_terms

    @property
    def _allow_naked_names(self) -> bool:
        """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``)."""
        return self.identifier_parser.allow_naked_names

    def get_annotations(self) -> Dict:
        """Get the current annotations in this parser."""
        return self.control_parser.get_annotations()

    def clear(self):
        """Clear the graph and all control parser data (current citation, annotations, and statement group)."""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line: str, position: int,
                               tokens: ParseResults):
        """Handle nested statements.

        If :code:`allow_nested` is False, raises a ``NestedRelationWarning``.

        :raises: NestedRelationWarning
        """
        if not self.allow_nested:
            raise NestedRelationWarning(self.get_line_number(), line, position)

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT],
            })

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT],
            })
        return tokens

    def check_function_semantics(self, line: str, position: int,
                                 tokens: ParseResults) -> ParseResults:
        """Raise an exception if the function used on the tokens is wrong.

        :raises: InvalidFunctionSemantic
        """
        if not self._namespace_dict or NAMESPACE not in tokens:
            return tokens

        namespace, name = tokens[NAMESPACE], tokens[NAME]

        if namespace in self.identifier_parser.namespace_to_pattern:
            return tokens

        if self._allow_naked_names and tokens[
                NAMESPACE] == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings.get(k, set())
                for k in self._namespace_dict[namespace][name]))

        if not valid_functions:
            raise InvalidEntity(self.get_line_number(), line, position,
                                namespace, name)

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(self.get_line_number(), line,
                                          position, tokens[FUNCTION],
                                          namespace, name, valid_functions)

        return tokens

    def handle_term(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle BEL terms (the subject and object of BEL relations)."""
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens: ParseResults,
                            relation: str) -> ParseResults:
        """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`."""
        parent_node_dsl = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_dsl = self.ensure_node(child_tokens)
            self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl,
                                            relation)

        return tokens

    def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_MEMBER)

    def handle_has_components(self, _, __,
                              tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_COMPONENT)

    def _add_qualified_edge_helper(self, u, v, relation, annotations,
                                   subject_modifier, object_modifier) -> str:
        """Add a qualified edge from the internal aspects of the parser."""
        return self.graph.add_qualified_edge(
            u,
            v,
            relation=relation,
            evidence=self.control_parser.evidence,
            citation=self.control_parser.citation.copy(),
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
            **{LINE: self.get_line_number()})

    def _add_qualified_edge(self, u, v, relation, annotations,
                            subject_modifier, object_modifier) -> str:
        """Add an edge, then adds the opposite direction edge if it should."""
        sha512 = self._add_qualified_edge_helper(
            u,
            v,
            relation=relation,
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(
                v,
                u,
                relation=relation,
                annotations=annotations,
                object_modifier=subject_modifier,
                subject_modifier=object_modifier,
            )

        return sha512

    def _handle_relation(self, tokens: ParseResults) -> str:
        """Handle a relation."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])

        subject_modifier = modifier_po_to_dict(tokens[SUBJECT])
        object_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

        return self._add_qualified_edge(
            subject_node_dsl,
            object_node_dsl,
            relation=tokens[RELATION],
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

    def _handle_relation_harness(
            self, line: str, position: int,
            tokens: Union[ParseResults, Dict]) -> ParseResults:
        """Handle BEL relations based on the policy specified on instantiation.

        Note: this can't be changed after instantiation!
        """
        if not self.control_parser.citation:
            raise MissingCitationException(self.get_line_number(), line,
                                           position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.get_line_number(), line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.get_line_number(), line,
                                           position,
                                           missing_required_annotations)

        self._handle_relation(tokens)
        return tokens

    def handle_unqualified_relation(self, _, __,
                                    tokens: ParseResults) -> ParseResults:
        """Handle unqualified relations."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])
        relation = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl,
                                        relation)
        return tokens

    def handle_label_relation(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Handle statements like ``p(X) label "Label for X"``.

        :raises: RelabelWarning
        """
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        description = tokens[OBJECT]

        if self.graph.has_node_description(subject_node_dsl):
            raise RelabelWarning(
                line_number=self.get_line_number(),
                line=line,
                position=position,
                node=self.graph.node,
                old_label=self.graph.get_node_description(subject_node_dsl),
                new_label=description)

        self.graph.set_node_description(subject_node_dsl, description)
        return tokens

    def ensure_node(self, tokens: ParseResults) -> BaseEntity:
        """Turn parsed tokens into canonical node name and makes sure its in the graph."""
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node = parse_result_to_dsl(tokens)
        self.graph.add_node_from_data(node)
        return node

    def handle_translocation_illegal(self, line: str, position: int,
                                     tokens: ParseResults) -> None:
        """Handle a malformed translocation."""
        raise MalformedTranslocationWarning(self.get_line_number(), line,
                                            position, tokens)
Пример #8
0
class BelParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces"""
    def __init__(self,
                 graph,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 allow_naked_names=False,
                 allow_nested=False,
                 allow_unqualified_translocations=False,
                 citation_clearing=True,
                 no_identifier_validation=False,
                 autostreamline=True,
                 required_annotations=None):
        """
        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_dict: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :type namespace_dict: Optional[dict[str,dict[str,str]]]
        :param annotation_dict: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :rype annotation_dict: Optional[dict[str,set[str]]]
        :param namespace_regex: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :type namespace_regex: Optional[dict[str,str]]
        :param annotation_regex: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :type annotation_regex: Optional[dict[str,str]]
        :param bool allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param bool allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param bool allow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param bool autostreamline: Should the parser be streamlined on instantiation?
        :param Optional[list[str]] required_annotations: Optional list of required annotations
        """
        self.graph = graph
        self.allow_nested = allow_nested

        self.control_parser = ControlParser(
            annotation_dict=annotation_dict,
            annotation_regex=annotation_regex,
            citation_clearing=citation_clearing,
            required_annotations=required_annotations,
        )

        if no_identifier_validation:
            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_dict=namespace_dict,
                namespace_regex=namespace_regex,
            )

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = ProteinModificationParser(self.identifier_parser).language

        #: `2.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_variant_var>`_
        self.variant = VariantParser().language

        #: `2.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_proteolytic_fragments>`_
        self.fragment = FragmentParser().language

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = LocationParser(self.identifier_parser).language
        opt_location = Optional(WCW + self.location)

        #: DEPRECATED: `2.2.X Amino Acid Substitutions <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_amino_acid_substitutions>`_
        self.psub = ProteinSubstitutionParser().language

        #: DEPRECATED: `2.2.X Sequence Variations <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_sequence_variations>`_
        self.gsub = GeneSubstitutionParser().language

        #: DEPRECATED
        #: `Truncated proteins <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_truncated_proteins>`_
        self.trunc = TruncationParser().language

        #: PyBEL BEL Specification variant
        self.gmod = GeneModificationParser(
        ).language  # FIXME add identifier parser to this

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = FusionParser(self.identifier_parser).language

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant | self.gsub | self.gmod))
            (VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(build_legacy_fusion(identifier,
                                                            'c'))(FUSION)

        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)
        """`2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_"""

        self.mirna_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant))(VARIANTS)) + opt_location

        self.mirna = mirna_tag + nest(self.mirna_modified)
        """`2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_"""

        self.protein_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(
                Group(
                    MatchFirst([
                        self.pmod, self.variant, self.fragment, self.psub,
                        self.trunc
                    ])))(VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(build_legacy_fusion(
            identifier, 'p'))(FUSION)

        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)
        """`2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_"""

        self.rna_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(build_legacy_fusion(identifier,
                                                           'r'))(FUSION)

        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)
        """`2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_"""

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        self.activity = self.activity_standard | self.activity_legacy
        """`2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_"""

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if not allow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])
        """`2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_"""

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        # BEL Term to BEL Term Relationships

        #: `3.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xincreases>`_
        increases_tag = oneOf(['->', '→', 'increases'
                               ]).setParseAction(replaceWith(INCREASES))

        #: `3.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdIncreases>`_
        directly_increases_tag = one_of_tags(['=>', '⇒', 'directlyIncreases'],
                                             DIRECTLY_INCREASES)

        #: `3.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xdecreases>`_
        decreases_tag = one_of_tags(['-|', 'decreases'], DECREASES)

        #: `3.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdDecreases>`_
        directly_decreases_tag = one_of_tags(['=|', 'directlyDecreases'],
                                             DIRECTLY_DECREASES)

        #: `3.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_analogous>`_
        analogous_tag = one_of_tags(['analogousTo'], ANALOGOUS_TO)

        #: `3.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xcnc>`_
        causes_no_change_tag = one_of_tags(['cnc', 'causesNoChange'],
                                           CAUSES_NO_CHANGE)

        #: `3.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_regulates_reg>`_
        regulates_tag = one_of_tags(['reg', 'regulates'], REGULATES)

        #: `3.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XnegCor>`_
        negative_correlation_tag = one_of_tags(['neg', 'negativeCorrelation'],
                                               NEGATIVE_CORRELATION)

        #: `3.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XposCor>`_
        positive_correlation_tag = one_of_tags(['pos', 'positiveCorrelation'],
                                               POSITIVE_CORRELATION)

        #: `3.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xassociation>`_
        association_tag = one_of_tags(['--', 'association'], ASSOCIATION)

        #: `3.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_orthologous>`_
        orthologous_tag = one_of_tags(['orthologous'], ORTHOLOGOUS)

        #: `3.4.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_isa>`_
        is_a_tag = Keyword(IS_A)

        #: PyBEL Variants
        equivalent_tag = one_of_tags(['eq', EQUIVALENT_TO], EQUIVALENT_TO)
        partof_tag = Keyword(PART_OF)

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        rate_limit_tag = oneOf(['rateLimitingStepOf']).setParseAction(
            replaceWith(RATE_LIMITING_STEP_OF))
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        subprocess_of_tag = oneOf(['subProcessOf']).setParseAction(
            replaceWith(SUBPROCESS_OF))
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        transcribed_tag = oneOf([':>', 'transcribedTo'
                                 ]).setParseAction(replaceWith(TRANSCRIBED_TO))
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        translated_tag = oneOf(['>>', 'translatedTo'
                                ]).setParseAction(replaceWith(TRANSLATED_TO))
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        has_member_tag = oneOf(['hasMember'
                                ]).setParseAction(replaceWith(HAS_MEMBER))
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        has_members_tag = oneOf(['hasMembers'])
        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        has_components_tag = oneOf(['hasComponents'])
        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        has_component_tag = oneOf(['hasComponent']).setParseAction(
            replaceWith(HAS_COMPONENT))
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        #: `3.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biomarkerfor>`_
        biomarker_tag = oneOf(['biomarkerFor'
                               ]).setParseAction(replaceWith(BIOMARKER_FOR))

        #: `3.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_prognosticbiomarkerfor>`_
        prognostic_biomarker_tag = oneOf([
            'prognosticBiomarkerFor'
        ]).setParseAction(replaceWith(PROGONSTIC_BIOMARKER_FOR))

        biomarker_tags = biomarker_tag | prognostic_biomarker_tag

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        has_variant_tags = oneOf(['hasVariant'
                                  ]).setParseAction(replaceWith(HAS_VARIANT))
        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)

        has_reactant_tags = oneOf(['hasReactant'
                                   ]).setParseAction(replaceWith(HAS_REACTANT))
        has_product_tags = oneOf(['hasProduct'
                                  ]).setParseAction(replaceWith(HAS_PRODUCT))
        part_of_reaction_tags = has_reactant_tags | has_product_tags
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BelParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def namespace_dict(self):
        """The dictionary of {namespace: {name: encoding}} stored in the internal identifier parser

        :rtype: dict[str,dict[str,str]]
        """
        return self.identifier_parser.namespace_dict

    @property
    def namespace_regex(self):
        """The dictionary of {namespace keyword: compiled regular expression} stored the internal identifier parser

        :rtype: dict[str,re]
        """
        return self.identifier_parser.namespace_regex_compiled

    @property
    def annotation_dict(self):
        """A dictionary of annotations to their set of values

        :rtype: dict[str,set[str]]
        """
        return self.control_parser.annotation_dict

    @property
    def annotation_regex(self):
        """A dictionary of annotations defined by regular expressions {annotation keyword: string regular expression}

        :rtype: dict[str,str]
        """
        return self.control_parser.annotation_regex

    @property
    def allow_naked_names(self):
        """Should naked names be parsed, or should errors be thrown?

        :rtype: bool
        """
        return self.identifier_parser.allow_naked_names

    def get_annotations(self):
        """Get current annotations in this parser

        :rtype: dict
        """
        return self.control_parser.get_annotations()

    def clear(self):
        """Clears the graph and all control parser data (current citation, annotations, and statement group)"""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line, position, tokens):
        """Handles nested statements. If :code:`allow_nested` is False, raises a warning.

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: NestedRelationWarning
        """
        if not self.allow_nested:
            raise NestedRelationWarning(self.line_number, line, position)

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT]
            })

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT]
            })
        return tokens

    def check_function_semantics(self, line, position, tokens):
        """Raises an exception if the function used on the tokens is wrong

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: InvalidFunctionSemantic
        """
        if self.namespace_dict is None or NAMESPACE not in tokens:
            return tokens

        namespace, name = tokens[NAMESPACE], tokens[NAME]

        if namespace in self.namespace_regex:
            return tokens

        if self.allow_naked_names and tokens[
                NAMESPACE] == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings[k]
                for k in self.namespace_dict[namespace][name]))

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(self.line_number, line, position,
                                          tokens[FUNCTION], namespace, name,
                                          valid_functions)

        return tokens

    def handle_term(self, line, position, tokens):
        """Handles BEL terms (the subject and object of BEL relations)

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens, relation):
        """Provides the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`"""
        parent_node_tuple, parent_node_attr = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_tuple, child_node_attr = self.ensure_node(child_tokens)
            self.graph.add_unqualified_edge(parent_node_tuple,
                                            child_node_tuple, relation)

        return tokens

    def handle_has_members(self, line, position, tokens):
        """Handles list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        return self._handle_list_helper(tokens, HAS_MEMBER)

    def handle_has_components(self, line, position, tokens):
        """Handles list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        return self._handle_list_helper(tokens, HAS_COMPONENT)

    def _add_qualified_edge_helper(self, u, v, relation, annotations,
                                   subject_modifier, object_modifier):
        """Adds a qualified edge from the internal aspects of the parser"""
        self.graph.add_qualified_edge(
            u,
            v,
            relation=relation,
            evidence=self.control_parser.evidence,
            citation=self.control_parser.citation.copy(),
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
            **{LINE: self.line_number})

    def _add_qualified_edge(self, u, v, relation, annotations,
                            subject_modifier, object_modifier):
        """Adds an edge, then adds the opposite direction edge if it should"""
        self._add_qualified_edge_helper(
            u,
            v,
            relation=relation,
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(
                v,
                u,
                relation=relation,
                annotations=annotations,
                object_modifier=subject_modifier,
                subject_modifier=object_modifier,
            )

    def _handle_relation(self, tokens):
        """A policy in which all annotations are stored as sets, including single annotations

        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        object_node_tuple, _ = self.ensure_node(tokens[OBJECT])

        subject_modifier = modifier_po_to_dict(tokens[SUBJECT])
        object_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

        self._add_qualified_edge(
            subject_node_tuple,
            object_node_tuple,
            relation=tokens[RELATION],
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

    def _handle_relation_harness(self, line, position, tokens):
        """Handles BEL relations based on the policy specified on instantiation. Note: this can't be changed after
        instantiation!

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        if not self.control_parser.citation:
            raise MissingCitationException(self.line_number, line, position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.line_number, line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.line_number, line, position,
                                           missing_required_annotations)

        self._handle_relation(tokens)

        return tokens

    def handle_unqualified_relation(self, line, position, tokens):
        """Handles unqualified relations

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        object_node_tuple, _ = self.ensure_node(tokens[OBJECT])
        rel = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_tuple, object_node_tuple,
                                        rel)

    def handle_label_relation(self, line, position, tokens):
        """Handles statements like ``p(X) label "Label for X"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RelabelWarning
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        description = tokens[OBJECT]

        if self.graph.has_node_description(subject_node_tuple):
            raise RelabelWarning(
                line_number=self.line_number,
                line=line,
                position=position,
                node=self.graph.node,
                old_label=self.graph.get_node_description(subject_node_tuple),
                new_label=description)

        self.graph.set_node_description(subject_node_tuple, description)

    def ensure_node(self, tokens):
        """Turns parsed tokens into canonical node name and makes sure its in the graph

        :param pyparsing.ParseResult tokens: Tokens from PyParsing
        :return: A pair of the PyBEL node tuple and the PyBEL node data dictionary
        :rtype: tuple[tuple, dict]
        """
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node_attr_dict = po_to_dict(tokens)
        node_tuple = self.graph.add_node_from_data(node_attr_dict)

        return node_tuple, node_attr_dict

    def handle_translocation_illegal(self, line, position, tokens):
        raise MalformedTranslocationWarning(self.line_number, line, position,
                                            tokens)
Пример #9
0
    def __init__(self):

        self.json_query = {'query': {}, 'and': [], 'or': []}
        self.tokens = None
        #--------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        #--------------------------------------------------------------------------------------
        integer = Regex(r'-?[0-9]+')  # Word matches space for some reason
        double = Regex(r'-?[0-9]+.?[0-9]*')
        number = double | integer

        #--------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        #--------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(
            r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(
                removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(
            removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral(
            "LON") + number
        units = CaselessLiteral('km') | CaselessLiteral('mi')
        distance = number + units
        distance.setParseAction(lambda x: self.frame.update({
            'dist': float(x[0]),
            'units': x[1]
        }))

        #--------------------------------------------------------------------------------------
        # Date
        #--------------------------------------------------------------------------------------
        date = python_string

        #--------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        #--------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(
            lambda x: self.frame.update({'filter': x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x: self.frame.update({'index': x[0]}))
        resource_id = Regex(
            r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')'
        ).setParseAction(removeQuotes)
        collection_id = resource_id

        #--------------------------------------------------------------------------------------
        # <from-statement> ::= "FROM" <number>
        # <to-statement>   ::= "TO" <number>
        #--------------------------------------------------------------------------------------
        from_statement = CaselessLiteral("FROM") + number
        from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        to_statement = CaselessLiteral("TO") + number
        to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <date-from-statement> ::= "FROM" <date>
        # <date-to-statement>   ::= "TO" <date>
        #--------------------------------------------------------------------------------------
        date_from_statement = CaselessLiteral("FROM") + date
        date_from_statement.setParseAction(
            lambda x: self.frame.update({'from': x[1]}))
        date_to_statement = CaselessLiteral("TO") + date
        date_to_statement.setParseAction(
            lambda x: self.frame.update({'to': x[1]}))

        #--------------------------------------------------------------------------------------
        # <time-query> ::= "TIME FROM" <date> "TO" <date>
        #--------------------------------------------------------------------------------------
        time_query = CaselessLiteral("TIME") + Optional(
            date_from_statement) + Optional(date_to_statement)
        time_query.setParseAction(lambda x: self.time_frame())
        # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}}))

        #--------------------------------------------------------------------------------------
        # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        time_bounds = CaselessLiteral(
            "TIMEBOUNDS") + date_from_statement + date_to_statement
        time_bounds.setParseAction(lambda x: self.time_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement>
        #--------------------------------------------------------------------------------------
        vertical_bounds = CaselessLiteral(
            "VERTICAL") + from_statement + to_statement
        vertical_bounds.setParseAction(lambda x: self.vertical_bounds_frame())

        #--------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES" [<from-statement>] [<to-statement>]
        #--------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + Optional(
            from_statement) + Optional(to_statement)
        # Add the range to the frame object
        range_query.setParseAction(lambda x: self.range_frame())

        #--------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        #--------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral(
            "DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(
            lambda x: self.frame.update({
                'lat': float(x[5]),
                'lon': float(x[7])
            }))
        geo_bbox = CaselessLiteral("BOX") + CaselessLiteral(
            "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        geo_bbox.setParseAction(lambda x: self.frame.update({
            'top_left': [float(x[5]), float(x[3])],
            'bottom_right': [float(x[10]), float(x[8])]
        }))

        #--------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <fuzzy-query>  ::= "LIKE" <field-query>
        # <match-query>  ::= "MATCH" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        #--------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        term_query.setParseAction(lambda x: self.frame.update({'value': x[1]}))

        geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox)

        fuzzy_query = CaselessLiteral("LIKE") + field_query
        fuzzy_query.setParseAction(
            lambda x: self.frame.update({'fuzzy': x[1]}))
        match_query = CaselessLiteral("MATCH") + field_query
        match_query.setParseAction(
            lambda x: self.frame.update({'match': x[1]}))

        #--------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        #--------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(
            lambda x: self.json_query.update({'limit': int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(
            lambda x: self.frame.update({'depth': int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral(
            "BY") + limited_string
        order_parameter.setParseAction(
            lambda x: self.json_query.update({'order': {
                x[2]: 'asc'
            }}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(
            lambda x: self.json_query.update({'skip': int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        #--------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <owner-query>       ::= "HAS" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query> | <owner-query>
        #--------------------------------------------------------------------------------------
        search_query = CaselessLiteral("SEARCH") + field + (
            range_query | term_query | fuzzy_query | match_query
            | vertical_bounds | time_bounds | time_query
            | geo_query) + CaselessLiteral("FROM") + index_name
        # Add the field to the frame object
        search_query.setParseAction(
            lambda x: self.frame.update({'field': x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(
            lambda x: self.frame.update({'collection': x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral(
            "TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(
            lambda x: self.frame.update({'association': x[2]}))
        owner_query = CaselessLiteral("HAS") + resource_id + Optional(
            depth_parameter)
        owner_query.setParseAction(
            lambda x: self.frame.update({'owner': x[1]}))
        query = search_query | association_query | collection_query | owner_query

        #--------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        #--------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x: self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x: self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x: self.or_frame())

        self.sentence = primary_query + (intersection ^ union) * (
            0, None) + query_parameter * (0, None)
Пример #10
0
def evaluator(variables, functions, string, cs=False):
    """
    Evaluate an expression. Variables are passed as a dictionary
    from string to value. Unary functions are passed as a dictionary
    from string to function. Variables must be floats.
    cs: Case sensitive

    """

    all_variables = copy.copy(DEFAULT_VARIABLES)
    all_functions = copy.copy(DEFAULT_FUNCTIONS)
    all_variables.update(variables)
    all_functions.update(functions)

    if not cs:
        string_cs = string.lower()
        all_functions = lower_dict(all_functions)
        all_variables = lower_dict(all_variables)
        CasedLiteral = CaselessLiteral
    else:
        string_cs = string
        CasedLiteral = Literal

    check_variables(string_cs,
                    set(all_variables.keys() + all_functions.keys()))

    if string.strip() == "":
        return float('nan')

    # SI suffixes and percent
    number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()])
    plus_minus = Literal('+') | Literal('-')
    times_div = Literal('*') | Literal('/')

    number_part = Word(nums)

    # 0.33 or 7 or .34 or 16.
    inner_number = (number_part + Optional("." + Optional(number_part))) | (
        "." + number_part)
    # by default pyparsing allows spaces between tokens--Combine prevents that
    inner_number = Combine(inner_number)

    # 0.33k or -17
    number = (
        inner_number +
        Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) +
        Optional(number_suffix))
    number.setParseAction(number_parse_action)  # Convert to number

    # Predefine recursive variables
    expr = Forward()

    # Handle variables passed in.
    #  E.g. if we have {'R':0.5}, we make the substitution.
    # We sort the list so that var names (like "e2") match before
    # mathematical constants (like "e"). This is kind of a hack.
    all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True)
    varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys])
    varnames.setParseAction(lambda x: [all_variables[k] for k in x])

    # if all_variables were empty, then pyparsing wants
    # varnames = NoMatch()
    # this is not the case, as all_variables contains the defaults

    # Same thing for functions.
    all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True)
    funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys])
    function = funcnames + Suppress("(") + expr + Suppress(")")
    function.setParseAction(lambda x: [all_functions[x[0]](x[1])])

    atom = number | function | varnames | Suppress("(") + expr + Suppress(")")

    # Do the following in the correct order to preserve order of operation
    pow_term = atom + ZeroOrMore(Suppress("^") + atom)
    pow_term.setParseAction(exp_parse_action)  # 7^6
    par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term)  # 5k || 4k
    par_term.setParseAction(parallel)
    prod_term = par_term + ZeroOrMore(times_div + par_term)  # 7 * 5 / 4 - 3
    prod_term.setParseAction(prod_parse_action)
    sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(
        plus_minus + prod_term)  # -5 + 4 - 3
    sum_term.setParseAction(sum_parse_action)
    expr << sum_term  # finish the recursion
    return (expr + stringEnd).parseString(string)[0]
Пример #11
0
FLOAT_CONSTANT = Regex(r'-?\d*\.\d+')

CONSTANT = STRING_CONSTANT | FLOAT_CONSTANT | INT_CONSTANT

# All constants should be interpreted in Python format
CONSTANT.setParseAction(lambda toks: ast.literal_eval(toks[0]))

COMMA = Literal(',')

LIST = Literal('[') + CONSTANT + ZeroOrMore(COMMA + CONSTANT) + Optional(
    COMMA) + Literal(']')

IDENTIFIER = MatchFirst(
    [Word(alphas, alphanums),
     QuotedString('[', endQuoteChar=']')])
IDENTIFIER.setParseAction(lambda toks: Where(toks[0]))

VALUE = CONSTANT | IDENTIFIER

OPERATOR = MatchFirst([
    Regex(r'==?'),
    Literal('!='),
    Literal('<>'),
    Literal('>='),
    Literal('<='),
    Literal('>'),
    Literal('<'),
    CaselessKeyword('like'),
])

Пример #12
0
def evaluator(variables, functions, string, cs=False):
    """
    Evaluate an expression. Variables are passed as a dictionary
    from string to value. Unary functions are passed as a dictionary
    from string to function. Variables must be floats.
    cs: Case sensitive

    """

    all_variables = copy.copy(DEFAULT_VARIABLES)
    all_functions = copy.copy(DEFAULT_FUNCTIONS)
    all_variables.update(variables)
    all_functions.update(functions)

    if not cs:
        string_cs = string.lower()
        all_functions = lower_dict(all_functions)
        all_variables = lower_dict(all_variables)
        CasedLiteral = CaselessLiteral
    else:
        string_cs = string
        CasedLiteral = Literal

    check_variables(string_cs, set(all_variables.keys() + all_functions.keys()))

    if string.strip() == "":
        return float('nan')

    # SI suffixes and percent
    number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()])
    plus_minus = Literal('+') | Literal('-')
    times_div = Literal('*') | Literal('/')

    number_part = Word(nums)

    # 0.33 or 7 or .34 or 16.
    inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part)
    # by default pyparsing allows spaces between tokens--Combine prevents that
    inner_number = Combine(inner_number)

    # 0.33k or -17
    number = (inner_number
              + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part)
              + Optional(number_suffix))
    number.setParseAction(number_parse_action)  # Convert to number

    # Predefine recursive variables
    expr = Forward()

    # Handle variables passed in.
    #  E.g. if we have {'R':0.5}, we make the substitution.
    # We sort the list so that var names (like "e2") match before
    # mathematical constants (like "e"). This is kind of a hack.
    all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True)
    varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys])
    varnames.setParseAction(
        lambda x: [all_variables[k] for k in x]
    )

    # if all_variables were empty, then pyparsing wants
    # varnames = NoMatch()
    # this is not the case, as all_variables contains the defaults

    # Same thing for functions.
    all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True)
    funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys])
    function = funcnames + Suppress("(") + expr + Suppress(")")
    function.setParseAction(
        lambda x: [all_functions[x[0]](x[1])]
    )

    atom = number | function | varnames | Suppress("(") + expr + Suppress(")")

    # Do the following in the correct order to preserve order of operation
    pow_term = atom + ZeroOrMore(Suppress("^") + atom)
    pow_term.setParseAction(exp_parse_action)  # 7^6
    par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term)  # 5k || 4k
    par_term.setParseAction(parallel)
    prod_term = par_term + ZeroOrMore(times_div + par_term)  # 7 * 5 / 4 - 3
    prod_term.setParseAction(prod_parse_action)
    sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term)  # -5 + 4 - 3
    sum_term.setParseAction(sum_parse_action)
    expr << sum_term  # finish the recursion
    return (expr + stringEnd).parseString(string)[0]
Пример #13
0
def _build_field_expr(field_names):
    field = MatchFirst(
        [CaselessKeyword(field_name) for field_name in field_names])
    field.setParseAction(lambda x: Field(x[0]))
    return field
Пример #14
0
# // BATTLE VERBS
RUN_AWAY = oneOf(['run', 'run away', 'flee'], True, False)
ENGAGE = oneOf(['attack', 'pounce', 'fight', 'engage'], True, False)
BLOCK = oneOf(['dodge', 'parry', 'block'], True, False)


def set_target(t):
    r = [{'target': [x for x in t]}]
    print(r)
    return r


#// NORMALIZE OUR DIRECTIONS
NORTH.setParseAction(lambda: [
    {
        'direction': 'NORTH',
        'key': 0
    },
])
EAST.setParseAction(lambda: [
    {
        'direction': 'EAST',
        'key': 1
    },
])
SOUTH.setParseAction(lambda: [
    {
        'direction': 'SOUTH',
        'key': 2
    },
])
WEST.setParseAction(lambda: [
Пример #15
0
    def __init__(self):

        self.json_query = {"query": {}, "and": [], "or": []}
        self.tokens = None
        # --------------------------------------------------------------------------------------
        # <integer> ::= 0-9
        # <double>  ::= 0-9 ('.' 0-9)
        # <number>  ::= <integer> | <double>
        # --------------------------------------------------------------------------------------
        integer = Regex(r"-?[0-9]+")  # Word matches space for some reason
        double = Regex(r"-?[0-9]+.?[0-9]*")
        number = double | integer

        # --------------------------------------------------------------------------------------
        # <python-string>   ::= (String surrounded by double-quotes)
        # <wildcard-string> ::= <python-string>
        # <limited-string>  ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes)
        # <field>           ::= <limited-string> | "*"
        # <coords>          ::= "LAT" <number> "LON" <number>
        # <units>           ::= ('km' | 'mi' | 'nm')
        # <distance>        ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?)
        # --------------------------------------------------------------------------------------
        python_string = quotedString.setParseAction(removeQuotes)
        wildcard_string = python_string
        limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes)
        field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes)
        coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number
        units = CaselessLiteral("km") | CaselessLiteral("nm") | CaselessLiteral("mi")
        distance = number + units
        distance.setParseAction(lambda x: self.frame.update({"dist": float(x[0]), "units": x[1]}))

        # --------------------------------------------------------------------------------------
        # <query-filter> ::= "FILTER" <python-string>
        # <index-name>   ::= <python-string>
        # <resource-id>  ::= '"' a..z A..Z 0..9 '"' (alpha nums surrounded by double quotes)
        # <collection-id> ::= <resource-id>
        # --------------------------------------------------------------------------------------
        query_filter = CaselessLiteral("FILTER") + python_string
        # Add the filter to the frame object
        query_filter.setParseAction(lambda x: self.frame.update({"filter": x[1]}))
        index_name = MatchFirst(python_string)
        # Add the index to the frame object
        index_name.setParseAction(lambda x: self.frame.update({"index": x[0]}))
        resource_id = Regex(r'("(?:[a-zA-Z0-9])*"|\'(?:[a-zA-Z0-9]*)\')').setParseAction(removeQuotes)
        collection_id = resource_id

        # --------------------------------------------------------------------------------------
        # <range-query>  ::= "VALUES FROM" <number> "TO" <number>
        # --------------------------------------------------------------------------------------
        range_query = CaselessLiteral("VALUES") + CaselessLiteral("FROM") + number + CaselessLiteral("TO") + number
        # Add the range to the frame object
        range_query.setParseAction(lambda x: self.frame.update({"range": {"from": float(x[2]), "to": float(x[4])}}))

        # --------------------------------------------------------------------------------------
        # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords>
        # <geo-bbox>     ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords>
        # --------------------------------------------------------------------------------------
        geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords
        geo_distance.setParseAction(lambda x: self.frame.update({"lat": float(x[5]), "lon": float(x[7])}))
        geo_bbox = (
            CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords
        )
        geo_bbox.setParseAction(
            lambda x: self.frame.update(
                {"top_left": [float(x[5]), float(x[3])], "bottom_right": [float(x[10]), float(x[8])]}
            )
        )

        # --------------------------------------------------------------------------------------
        # <field-query>  ::= <wildcard-string>
        # <term-query>   ::= "IS" <field-query>
        # <geo-query>    ::= "GEO" ( <geo-distance> | <geo-bbox> )
        # --------------------------------------------------------------------------------------
        field_query = wildcard_string
        term_query = CaselessLiteral("IS") + field_query
        # Add the term to the frame object
        term_query.setParseAction(lambda x: self.frame.update({"value": x[1]}))
        geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox)

        # --------------------------------------------------------------------------------------
        # <limit-parameter>  ::= "LIMIT" <integer>
        # <depth-parameter>  ::= "DEPTH" <integer>
        # <order-parameter>  ::= "ORDER" "BY" <limited-string>
        # <offset-parameter> ::= "SKIP" <integer>
        # <query-parameter>  ::= <order-paramater> | <limit-parameter>
        # --------------------------------------------------------------------------------------
        limit_parameter = CaselessLiteral("LIMIT") + integer
        limit_parameter.setParseAction(lambda x: self.frame.update({"limit": int(x[1])}))
        depth_parameter = CaselessLiteral("DEPTH") + integer
        depth_parameter.setParseAction(lambda x: self.frame.update({"depth": int(x[1])}))
        order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string
        order_parameter.setParseAction(lambda x: self.frame.update({"order": {x[2]: "asc"}}))
        offset_parameter = CaselessLiteral("SKIP") + integer
        offset_parameter.setParseAction(lambda x: self.frame.update({"offset": int(x[1])}))
        query_parameter = limit_parameter | order_parameter | offset_parameter

        # --------------------------------------------------------------------------------------
        # <search-query>      ::= "SEARCH" <field> (<range-query> | <term-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]*
        # <collection-query>  ::= "IN <collection-id>"
        # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ]
        # <query>             ::= <search-query> | <association-query> | <collection-query>
        # --------------------------------------------------------------------------------------
        search_query = (
            CaselessLiteral("SEARCH")
            + field
            + (range_query | term_query | geo_query)
            + CaselessLiteral("FROM")
            + index_name
            + query_parameter * (0, None)
        )
        # Add the field to the frame object
        search_query.setParseAction(lambda x: self.frame.update({"field": x[1]}))
        collection_query = CaselessLiteral("IN") + collection_id
        collection_query.setParseAction(lambda x: self.frame.update({"collection": x[1]}))
        association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter)
        # Add the association to the frame object
        association_query.setParseAction(lambda x: self.frame.update({"association": x[2]}))
        query = search_query | association_query | collection_query

        # --------------------------------------------------------------------------------------
        # <primary-query>  ::= <query> [<query-filter>]
        # <atom>           ::= <query>
        # <intersection>   ::= "AND" <atom>
        # <union>          ::= "OR" <atom>
        # <sentence>       ::= <primary-query> [<intersection>]* [<union>]*
        # --------------------------------------------------------------------------------------
        primary_query = query + Optional(query_filter)
        # Set the primary query on the json_query to the frame and clear the frame
        primary_query.setParseAction(lambda x: self.push_frame())
        atom = query
        intersection = CaselessLiteral("AND") + atom
        # Add an AND operation to the json_query and clear the frame
        intersection.setParseAction(lambda x: self.and_frame())
        union = CaselessLiteral("OR") + atom
        # Add an OR operation to the json_query and clear the frame
        union.setParseAction(lambda x: self.or_frame())

        self.sentence = primary_query + (intersection ^ union) * (0, None)