def mlsqlparser(): #Define all keywords LOAD = define_load() READ = define_read() SPLIT = define_split() REGRESS = define_regress() CLASSIFY = define_classify() CLUSTER = define_cluster() REPLACE = define_replace() SAVE = define_save() #Define comment comment = _define_comment() #Combining READ and SPLIT keywords into one clause for combined use read_split = READ + SPLIT read_split_classify = READ + SPLIT + CLASSIFY read_split_classify_regress = READ + SPLIT + CLASSIFY + REGRESS read_replace_split_classify_regress = READ + REPLACE + SPLIT + CLASSIFY + REGRESS read_replace_split_classify_regress_cluster = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER read_replace_split_classify_regress_cluster_save = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER + SAVE load_read_replace_split_classify_regress_cluster_save = MatchFirst([read_replace_split_classify_regress_cluster_save, LOAD]) return load_read_replace_split_classify_regress_cluster_save.ignore(comment)
def parse_select_columns(string): """Parse a select query and return the columns Args: string(str): Input string to be parsed Returns: result(list of str): List of columns """ if string == "": return list() if string.upper().startswith("WITH"): suppressor = _with + delimitedList(_db_name + _as + subquery) string = suppressor.suppress().transformString(string) # Supress everything after the first from suppressor = MatchFirst(_from) + restOfLine string = suppressor.suppress().transformString(string) parser = _select + delimitedList(field_parser).setResultsName("columns") output = parser.parseString(string).columns.asList() # Strip extra whitespace from the string return [column.strip() for column in output]
def get_match_first(lits, parseAction=None): el = MatchFirst(NoMatch()) for lit in lits: el = el.__ior__(lit) if parseAction: el.setParseAction(parseAction) return el
def define_simple_literals(literal_list, parseAction=None): l = MatchFirst([CaselessKeyword(k) for k in literal_list]) if parseAction: l = l.setParseAction(parseAction) return l
def _handle_define(self, line, token): """define macro function""" if self.suppress: return if token.args: args = token.args[0] keywords = MatchFirst([Keyword('$' + x).setResultsName(x) for x in args]) body = self._recurisve_expand(token.body) macros = self.function_class(args, body, list(keywords.scanString(body))) if token.name in self.functions: warnings.warn('%d: macros %s already defined!' % (line, token.name)) self.functions[token.name] = macros else: if token.name in self.variables: warnings.warn('%d: macros %s already defined!' % (line, token.name)) value = self.variables[token.name] = self._recurisve_expand(token.value) if not token.name.startswith("_"): self.on_constant(token.name, value)
def targetComponentsForOperatorsInString(operatorNames, codeBlock): """ Return a list of pairs of operator names and their targets that are in `codeString`. The valid operator names searched for are `operatorNames`. For example, if 'L' is in `operatorNames`, then in the code ``L[phi]`` the return value would be ``('L', 'phi', slice(firstCharacterIndex, lastCharacterIndex))``. """ parser = MatchFirst(Keyword(operatorName) for operatorName in operatorNames).setResultsName('name') \ + Optional(nestedExpr('[', ']', baseExpr, ignoreExpr).setResultsName('target')) parser.ignore(cppStyleComment.copy()) parser.ignore(quotedString.copy()) results = [] for tokens, start, end in parser.scanString(codeBlock.codeString): if 'target' in tokens: results.append((tokens.name, ''.join(tokens.target.asList()[0]), slice(start, end))) else: raise CodeParserException(codeBlock, start, "Invalid use of '%s' operator in code block." % tokens.name) return results
def __init__( self, manager, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, default_namespace: Optional[Set[str]] = None, allow_redefinition: bool = False, skip_validation: bool = False, upgrade_urls: bool = False, ) -> None: """Build a metadata parser. :param manager: A cache manager :param namespace_to_term_to_encoding: An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}} :param namespace_to_pattern: A regular expression namespace mapping from {namespace keyword: regex string} :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values} :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string} :param default_namespace: A set of strings that can be used without a namespace :param skip_validation: If true, don't download and cache namespaces/annotations """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition self.skip_validation = skip_validation self.upgrade_urls = upgrade_urls #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}} self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {} #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary of {namespace keyword: regular expression string} self.namespace_to_pattern = namespace_to_pattern or {} #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_term = annotation_to_term or {} #: A dictionary of {annotation keyword: regular expression string} self.annotation_to_pattern = annotation_to_pattern or {} #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_local = annotation_to_local or {} #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value'), ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.annotation_url, self.annotation_list, self.annotation_pattern, self.namespace_pattern, ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language)
def keywords(klass): kws = sorted(klass.keyword_aliases.keys()) return MatchFirst([Keyword(kw) for kw in kws])
"THEN", "UNION", "UNION_ALL", "USING", "WITH", "WHEN", "WHERE", ] reserved_keywords = [] for name in sql_reserved_words: n = name.lower().replace("_", " ") value = locals()[name] = (Keyword( n, caseless=True).setName(n).setDebugActions(*debug)) reserved_keywords.append(value) RESERVED = MatchFirst(reserved_keywords) join_keywords = { "join", "full join", "cross join", "inner join", "left join", "right join", "full outer join", "right outer join", "left outer join", } unary_ops = {"-": "neg", "~": "binary_not"}
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # Apply suffixes number_suffix = MatchFirst(Literal(k) for k in self.suffixes.keys()) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. Variables may be either of two forms: # 1. front + subscripts + tail # 2. front + lower_indices + upper_indices + tail # where: # front (required): # starts with alpha, followed by alphanumeric # subscripts (optional): # any combination of alphanumeric and underscores # lower_indices (optional): # Of form "_{<alaphnumeric>}" # upper_indices (optional): # Of form "^{<alaphnumeric>}" # tail: # any number of primes front = Word(alphas, alphanums) subscripts = Word(alphanums + '_') + ~FollowedBy('{') lower_indices = Literal("_{") + Word(alphanums) + Literal("}") upper_indices = Literal("^{") + Word(alphanums) + Literal("}") tail = ZeroOrMore("'") inner_varname = Combine(front + Optional(subscripts | ( Optional(lower_indices) + Optional(upper_indices))) + tail # optional already by ZeroOrMore ) varname = Group(inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions # Allow primes (apostrophes) at the end of function names, useful for # indicating derivatives. Eg, f'(x), g''(x) function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore( (Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=pointless-statement self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
# Threat LABEL + IS_A + IMPACT + PROBABILITY + THREAT + Optional(DESCRIBED + AS + DESCRIPTION), # Security Measure LABEL + IS_A + CAPABILITY + MEASURE + AGAINST + THREAT_LIST + Optional(DESCRIBED + AS + DESCRIPTION), # Label list or alias LABEL + IS_A + LABEL_LIST, # Component modification LABEL + IS_NOW_A + MatchFirst([ LABELED + NEW_NAME, DESCRIBED + AS + DESCRIPTION, MatchFirst([ Or(all_combinations([PROFILE, ROLE, IN + GROUP])), CLASSIFICATION + DATUM, Or(all_combinations([IMPACT, PROBABILITY])) + THREAT, Or([ CAPABILITY + MEASURE, MEASURE + AGAINST + THREAT_LIST, CAPABILITY + MEASURE + AGAINST + THREAT_LIST, ]), ]) + Optional(DESCRIBED + AS + DESCRIPTION) ]), # These are negative assumptions: anti-patterns which must be disproven. # E.g., disprove "lack of transport security". # Negative assumptions which have not been disproven should incur risk. DISPROVE + ASSUMPTIONS, # Interaction Optional(ORDINAL) + SUBJECT + Optional(LATERALLY) + ACTION + EFFECT_LIST + Optional(TO_FROM + OBJECT) + Optional(Optional(BROADLY) + RISKING + THREAT_LIST) + Optional(WITH_NOTES + NOTES),
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # matching float w/ regex is ugly but is recommended by pyparsing float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = Optional(float_number) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas + '/-', min=1, max=2) # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_lor | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
) from . import elements as ste __all__ = ["parse"] def unpack(results: ParseResults) -> tuple: """Unpack the members of a :py:class:`~.ParseResults` to a :py:class:`tuple`""" return tuple(item[0] for item in results) #: Words that are not valid identifiers KEYWORDS = Forward() KEYWORDS.setName("KEYWORD") KEYWORDS << MatchFirst(tuple(map(Keyword, kwlist))) #: literal `...`, e.g. in `typing.Tuple[int, ...]`, not an `Ellipsis` DOTS = Literal("...").setParseAction(lambda: ste.Dots()) KEYWORDS << MatchFirst((*KEYWORDS.expr.exprs, DOTS)) #: any valid typing or stenotype expression, such as `List`, `typing.List`, `?List`, ... TYPE = Forward() TYPE.setName("TYPE") TYPE_exclude_UNION = Forward() TYPE_exclude_UNION.setName("TYPE_exclude_UNION") # typing expressions # ================== #: a direct or nested reference, such as `List` or `typing.List`
def __init__( self, graph, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, disallow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term_to_encoding: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_nested: If true, turn on nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.metagraph = set() self.disallow_nested = disallow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.concept_parser = ConceptParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.concept_parser = ConceptParser( allow_naked_names=allow_naked_names, namespace_to_term_to_encoding=namespace_to_term_to_encoding, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.concept_parser.get_line_number = self.get_line_number concept = Group(self.concept_parser.language)(CONCEPT) # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.concept_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.concept_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.concept_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.concept_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest(concept + opt_location) self.gene_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS), ) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(concept, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified, ]) + opt_location, ) self.mirna_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS), ) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = concept + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS, ), ) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(concept, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location, ) self.rna_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group(get_legacy_fusion_langauge( concept, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location, ) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna, ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(concept + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location, ) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location, ) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default, ) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.concept_parser.language, ) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest(concept) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(concept) self.population = population_tag + nest(concept) self.bp_path = self.biological_process | self.pathology | self.population self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)), ) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(concept(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(concept(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT), ) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(concept(FROM_LOC) + WCW + concept(TO_LOC))(EFFECT), ) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy, ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction, ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, correlation_tag, no_correlation_tag, binds_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process, ) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process, ) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.abundance, has_component_tag, self.abundance, ) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.inverted_unqualified_relation = MatchFirst([ self.has_member, self.has_component, ]) self.inverted_unqualified_relation.setParseAction( self.handle_inverse_unqualified_relation) self.normal_unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction, ]) self.normal_unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag, ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term)), ) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.inverted_unqualified_relation, self.normal_unqualified_relation, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline)
class BELParser(BaseParser): """Build a parser backed by a given dictionary of namespaces.""" def __init__( self, graph, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, disallow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term_to_encoding: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_nested: If true, turn on nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.metagraph = set() self.disallow_nested = disallow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.concept_parser = ConceptParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.concept_parser = ConceptParser( allow_naked_names=allow_naked_names, namespace_to_term_to_encoding=namespace_to_term_to_encoding, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.concept_parser.get_line_number = self.get_line_number concept = Group(self.concept_parser.language)(CONCEPT) # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.concept_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.concept_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.concept_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.concept_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest(concept + opt_location) self.gene_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS), ) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(concept, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified, ]) + opt_location, ) self.mirna_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS), ) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = concept + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS, ), ) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(concept, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location, ) self.rna_modified = concept + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group(get_legacy_fusion_langauge( concept, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location, ) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna, ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(concept + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location, ) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location, ) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default, ) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.concept_parser.language, ) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest(concept) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(concept) self.population = population_tag + nest(concept) self.bp_path = self.biological_process | self.pathology | self.population self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)), ) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(concept(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(concept(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT), ) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(concept(FROM_LOC) + WCW + concept(TO_LOC))(EFFECT), ) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy, ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction, ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, correlation_tag, no_correlation_tag, binds_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process, ) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process, ) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.abundance, has_component_tag, self.abundance, ) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.inverted_unqualified_relation = MatchFirst([ self.has_member, self.has_component, ]) self.inverted_unqualified_relation.setParseAction( self.handle_inverse_unqualified_relation) self.normal_unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction, ]) self.normal_unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag, ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term)), ) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.inverted_unqualified_relation, self.normal_unqualified_relation, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline) @property def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]: """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser.""" return self.concept_parser.namespace_to_name_to_encoding @property def _allow_naked_names(self) -> bool: """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``).""" return self.concept_parser.allow_naked_names def get_annotations(self) -> Dict: """Get the current annotations in this parser.""" return self.control_parser.get_annotations() def clear(self): """Clear the graph and all control parser data (current citation, annotations, and statement group).""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line: str, position: int, tokens: ParseResults): """Handle nested statements. If :code:`self.disallow_nested` is True, raises a ``NestedRelationWarning``. :raises: NestedRelationWarning """ if self.disallow_nested: raise NestedRelationWarning(self.get_line_number(), line, position) subject_hash = self._handle_relation_checked( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT], }, ) object_hash = self._handle_relation_checked( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT], }, ) self.metagraph.add((subject_hash, object_hash)) return tokens def check_function_semantics(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Raise an exception if the function used on the tokens is wrong. :raises: InvalidFunctionSemantic """ concept = tokens.get(CONCEPT) if not self._namespace_dict or concept is None: return tokens namespace, name = concept[NAMESPACE], concept[NAME] if namespace in self.concept_parser.namespace_to_pattern: return tokens if self._allow_naked_names and namespace == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings.get(encoding, set()) for encoding in self._namespace_dict[namespace][name]), ) if not valid_functions: raise InvalidEntity(self.get_line_number(), line, position, namespace, name) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic( line_number=self.get_line_number(), line=line, position=position, func=tokens[FUNCTION], namespace=namespace, name=name, allowed_functions=valid_functions, ) return tokens def handle_term(self, _, __, tokens: ParseResults) -> ParseResults: """Handle BEL terms (the subject and object of BEL relations).""" self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens: ParseResults, relation: str) -> ParseResults: """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`.""" parent_node_dsl = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_dsl = self.ensure_node(child_tokens) # Note that the polarity is switched since this is just for hasMembers # and hasComponents, which are both deprecated as of BEL v2.2 self.graph.add_unqualified_edge(child_node_dsl, parent_node_dsl, relation) return tokens def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, IS_A) def handle_has_components(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, PART_OF) def _add_qualified_edge_helper(self, *, u, u_modifier, relation, v, v_modifier, annotations) -> str: """Add a qualified edge from the internal aspects of the parser.""" m = { BINDS: self.graph.add_binds, } adder = m.get(relation) d = dict( evidence=self.control_parser.evidence, citation=self.control_parser.get_citation(), annotations=annotations, subject_modifier=u_modifier, object_modifier=v_modifier, **{LINE: self.get_line_number()}, ) if adder is not None: return adder(u=u, v=v, **d) else: return self.graph.add_qualified_edge(u=u, v=v, relation=relation, **d) def _add_qualified_edge(self, *, u, u_modifier, relation, v, v_modifier, annotations) -> str: """Add an edge, then adds the opposite direction edge if it should.""" d = dict( relation=relation, annotations=annotations, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper(u=v, u_modifier=v_modifier, v=u, v_modifier=u_modifier, **d) return self._add_qualified_edge_helper(u=u, u_modifier=u_modifier, v=v, v_modifier=v_modifier, **d) def _handle_relation(self, tokens: ParseResults) -> str: """Handle a relation.""" u = self.ensure_node(tokens[SUBJECT]) u_modifier = modifier_po_to_dict(tokens[SUBJECT]) relation = tokens[RELATION] v = self.ensure_node(tokens[OBJECT]) v_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = self._get_prepared_annotations() return self._add_qualified_edge( u=u, u_modifier=u_modifier, relation=relation, v=v, v_modifier=v_modifier, annotations=annotations, ) def _get_prepared_annotations(self): return { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True, }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } def _handle_relation_harness( self, line: str, position: int, tokens: Union[ParseResults, Dict]) -> ParseResults: """Handle BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! """ self._handle_relation_checked(line, position, tokens) return tokens def _handle_relation_checked(self, line, position, tokens): if not self.control_parser.citation_is_set: raise MissingCitationException(self.get_line_number(), line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.get_line_number(), line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.get_line_number(), line, position, missing_required_annotations) return self._handle_relation(tokens) def handle_unqualified_relation(self, _, __, tokens: ParseResults) -> ParseResults: """Handle unqualified relations.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) relation = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl, relation) return tokens def handle_inverse_unqualified_relation( self, _, __, tokens: ParseResults) -> ParseResults: """Handle unqualified relations that should go reverse.""" u = self.ensure_node(tokens[SUBJECT]) v = self.ensure_node(tokens[OBJECT]) relation = tokens[RELATION] self.graph.add_unqualified_edge(v, u, relation) return tokens def ensure_node(self, tokens: ParseResults) -> BaseEntity: """Turn parsed tokens into canonical node name and makes sure its in the graph.""" if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node = parse_result_to_dsl(tokens) self.graph.add_node_from_data(node) return node def handle_translocation_illegal(self, line: str, position: int, tokens: ParseResults) -> None: """Handle a malformed translocation.""" raise MalformedTranslocationWarning(self.get_line_number(), line, position, tokens)
from pyparsing import pythonStyleComment, Keyword, Regex, OneOrMore, Word, hexnums, nums from pyparsing import Suppress, MatchFirst, ZeroOrMore, Group, Combine, Optional, Literal from .ops import A, K OPERATOR = Literal('+') | Literal('-') DECIMAL = Combine(Optional(OPERATOR) + Word(nums)).setParseAction(lambda t: int(t[0])) HEXADECIMAL = Suppress("0x") + Word(hexnums).setParseAction( lambda t: int(t[0], 16)) VALUE = DECIMAL ^ HEXADECIMAL A_ADDRESS = (Suppress('[') + VALUE + Suppress(']')).setParseAction(lambda t: A(int(t[0]))) K_ADDRESS = (Suppress('#') + VALUE).setParseAction(lambda t: K(int(t[0]))) ADDRESS = MatchFirst((A_ADDRESS, K_ADDRESS)) STATEMENT_OP = MatchFirst([ Keyword("ld"), Keyword("ldh"), Keyword("ldb"), Keyword("ldx"), Keyword("ldxb"), Keyword("st"), Keyword("stx"), Keyword("add"), Keyword("sub"), Keyword("mul"), Keyword("div"), Keyword("mod"), Keyword("neg"),
def __init__(self, elems, precedence=4): MatchFirst.__init__(self, concatenate(elems)) BaseType.__init__(self, precedence)
def __init__(self, parse_method=None, precedence=2): MatchFirst.__init__(self, [QString('"'), QString("'")]) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
) = map( CaselessKeyword, """UNION, ALL, AND, OR, INTERSECT, INTERSECTION, EXCEPT, COLLATE, ASC, DESC, ON, NOT, SELECT, DISTINCT, FROM, WHERE, BY, ORDER, BY, LIMIT, EVERY""".replace( ",", "").split()) (CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP) = map( CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",", "").split()) keyword = MatchFirst( (UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, NOT, SELECT, DISTINCT, FROM, WHERE, BY, EVERY, ORDER, BY, LIMIT, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP)) def regex_from_like(s): return str(s).replace('*', '\*').replace('_', '.').replace('%', '.*') def lookup(id): return def lookup_every(id): return
LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY, THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map( Keyword, """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected public private """.split(), ) KEYWORD = MatchFirst(keywords) # Tokens EOL = Suppress(LineEnd()) # $ SGL_PRINTABLE = Char(printables) singleTextString = originalTextFor( ZeroOrMore(~EOL + (White(" \t") | Word(printables))) ).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = BSLASH + ( oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE ) LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE CHAR_LITERAL = APOS + LITERAL_CHAR + APOS STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
parse_ipv6_snooping = Suppress('snooping ') + restOfLine parse_ipv6_raguard = Suppress('nd raguard ') + restOfLine parse_ipv6_destinationguard = Suppress('destination-guard ') + restOfLine parse_ipv6_dhcpguard = Suppress('dhcp guard ') + restOfLine parse_lldp = Suppress('lldp ') + restOfLine parse_username = Suppress('username ') + restOfLine parse_aaa = Suppress('aaa ') + restOfLine parse_stp = Suppress('spanning-tree ') + restOfLine # parse_vtp = Suppress('vtp ') + restOfLine parse_line = Suppress('line ') + restOfLine parse_ip_ssh = Suppress('ip ssh ') + restOfLine parse_arp_proxy = Suppress('ip arp proxy ') + restOfLine parse_vstack = Suppress('no') + 'vstack' parse_enable_password = Suppress('enable') + MatchFirst([ 'secret', 'password' ]) + Optional(Word(nums) + Suppress(White(exact=1))) + Suppress(restOfLine) parse_ip_dhcp = NotAny(White()) + Suppress('ip dhcp snooping') + Optional( Suppress('vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums))) parse_ip_arp = NotAny(White()) + Suppress('ip arp inspection') + Suppress( 'vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums)) parse_ip_service = NotAny(White()) + Suppress('ip') + MatchFirst( ['finger', 'identd', 'source-route', 'bootp server']) parse_ip_http = NotAny(White()) + Suppress('ip http ') + restOfLine # aaa_authorization = Suppress('authorization ') + restOfLine aaa_authentication = Suppress('authentication ') + restOfLine aaa_accounting = Suppress('accounting ') + restOfLine aaa_groups = Suppress('group server ') + restOfLine utill = lambda parse_meth, featur_str: parse_meth.parseString(featur_str
def _interfaceParse___iface_attributes(config, check_disabled): iface_list = util.get_attributes(config)[0] # if iface isn`t enable and unused if iface_list: iface_dict = { 'shutdown': 'no', 'vlans': [], 'cdp': 'yes', 'dhcp_snoop': { 'mode': 'untrust' }, 'arp_insp': { 'mode': 'untrust' }, 'storm control': {}, 'port-security': {}, 'ipv6': {} } vlan_num = Word(nums + '-') + ZeroOrMore(Suppress(',') + Word(nums + '-')) parse_description = Suppress('description ') + restOfLine parse_type = Suppress('switchport mode ') + restOfLine parse_port_sec = Suppress('switchport port-security ') + restOfLine parse_stp_port = Suppress('spanning-tree ') + restOfLine parse_dhcp_snoop = Suppress('ip dhcp snooping ') + restOfLine parse_arp_insp = Suppress('ip arp inspection ') + restOfLine parse_source_guard = Suppress('ip verify source ') + restOfLine parse_arp_proxy_iface = Optional( Word(alphas)) + Suppress('ip proxy-arp') parse_vlans = Suppress('switchport ') + Suppress( MatchFirst('access vlan ' + ('trunk allowed vlan ' + Optional('add ')))) + vlan_num class Storm: def __init__(self): self.dct = {'type': []} def new_line(self, line): parse_storm = Suppress('storm-control ') + restOfLine try: self.storm_line = parse_storm.parseString( line).asList()[-1] self.level_info() self.action_info() self.type_info() except ParseException: pass @catch_exception def parse_level(self): parse_level = Word(alphas) + Suppress('level ') + restOfLine value = parse_level.parseString(self.storm_line).asList() if 'level' in self.dct: self.dct['level'].append(value) else: self.dct['level'] = [value] @catch_exception def parse_action(self): action = Suppress('action ') + Word(alphas) self.action = utill(action, self.storm_line) @catch_exception def parse_type(self): type = Word(alphas) + Suppress( Optional("include")) + Word(alphas) self.type = utill(type, self.storm_line) @catch_exception1 def action_info(self): self.parse_action() self.dct['action'] = self.action @catch_exception1 def type_info(self): self.parse_type() for each in self.type: if each not in self.dct['type'] and each in [ 'broadcast', 'multicast', 'unicast' ]: self.dct['type'].append(each) @catch_exception1 def level_info(self): self.parse_level() cl_storm = Storm() # Reserved options list is using due to 'shutdown' option is usually located at the end of the list, so it breaks cycle if interface is shutdown and function speed increases for option in iface_list[::-1]: cl_storm.new_line(option) iface_dict['storm control'] = cl_storm.dct if option == 'shutdown': if check_disabled: iface_dict['shutdown'] = 'yes' pass else: iface_dict = {'shutdown': 'yes'} break if option == 'switchport nonegotiate': iface_dict['dtp'] = 'no' continue if option == 'no cdp enable': iface_dict['cdp'] = 'no' continue if option == 'no mop enabled': iface_dict['mop'] = 'no' continue elif option == 'mop enabled': iface_dict['mop'] = 'yes' continue try: vlan_add = parse_vlans.parseString(option).asList() for unit in vlan_add: if '-' in unit: range_units = unit.split('-') range_list = [ i for i in range(int(range_units[0]), int(range_units[1]) + 1) ] vlan_add.remove(unit) iface_dict['vlans'].extend(range_list) else: iface_dict['vlans'].append(int(unit)) continue except ParseException: pass try: iface_dict['description'] = parse_description.parseString( option).asList()[-1] continue except ParseException: pass try: iface_dict['type'] = parse_type.parseString( option).asList()[-1] continue except ParseException: pass try: port_sec = parse_port_sec.parseString(option).asList()[-1] iface_dict[ 'port-security'] = parsing_checks.port_security.__ifaceAttributes___port_sec_parse( port_sec, iface_dict['port-security']) continue except ParseException: pass try: dhcp_snoop = parse_dhcp_snoop.parseString(option).asList()[-1] iface_dict[ 'dhcp_snoop'] = parsing_checks.ip_iface.__ifaceAttributes___ip_parse( dhcp_snoop, iface_dict['dhcp_snoop']) continue except ParseException: pass try: arp_insp = parse_arp_insp.parseString(option).asList()[-1] iface_dict[ 'arp_insp'] = parsing_checks.ip_iface.__ifaceAttributes___ip_parse( arp_insp, iface_dict['arp_insp']) continue except ParseException: pass try: stp_port = parse_stp_port.parseString(option).asList()[-1] iface_dict['stp'] = stp_port continue except ParseException: pass try: source_guard = parse_source_guard.parseString( option).asList()[-1] iface_dict['source_guard'] = source_guard continue except ParseException: pass try: ipv6 = parse_ipv6.parseString(option).asList()[-1] __ifaceAttributes___ipv6_parse(ipv6, iface_dict['ipv6']) continue except ParseException: pass try: arp_proxy_iface = parse_arp_proxy_iface.parseString( option).asList()[-1] iface_dict['arp_proxy'] = arp_proxy_iface continue except ParseException: pass return iface_dict else: return {'unknown_iface': 1}
def evaluator(variables, functions, string, cs=False): """ Evaluate an expression. Variables are passed as a dictionary from string to value. Unary functions are passed as a dictionary from string to function. Variables must be floats. cs: Case sensitive """ all_variables = copy.copy(DEFAULT_VARIABLES) all_functions = copy.copy(DEFAULT_FUNCTIONS) all_variables.update(variables) all_functions.update(functions) if not cs: string_cs = string.lower() all_functions = lower_dict(all_functions) all_variables = lower_dict(all_variables) CasedLiteral = CaselessLiteral else: string_cs = string CasedLiteral = Literal check_variables(string_cs, set(all_variables.keys() + all_functions.keys())) if string.strip() == "": return float('nan') # SI suffixes and percent number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()]) plus_minus = Literal('+') | Literal('-') times_div = Literal('*') | Literal('/') number_part = Word(nums) # 0.33 or 7 or .34 or 16. inner_number = (number_part + Optional("." + Optional(number_part))) | ( "." + number_part) # by default pyparsing allows spaces between tokens--Combine prevents that inner_number = Combine(inner_number) # 0.33k or -17 number = ( inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number.setParseAction(number_parse_action) # Convert to number # Predefine recursive variables expr = Forward() # Handle variables passed in. # E.g. if we have {'R':0.5}, we make the substitution. # We sort the list so that var names (like "e2") match before # mathematical constants (like "e"). This is kind of a hack. all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True) varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys]) varnames.setParseAction(lambda x: [all_variables[k] for k in x]) # if all_variables were empty, then pyparsing wants # varnames = NoMatch() # this is not the case, as all_variables contains the defaults # Same thing for functions. all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True) funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys]) function = funcnames + Suppress("(") + expr + Suppress(")") function.setParseAction(lambda x: [all_functions[x[0]](x[1])]) atom = number | function | varnames | Suppress("(") + expr + Suppress(")") # Do the following in the correct order to preserve order of operation pow_term = atom + ZeroOrMore(Suppress("^") + atom) pow_term.setParseAction(exp_parse_action) # 7^6 par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term) # 5k || 4k par_term.setParseAction(parallel) prod_term = par_term + ZeroOrMore(times_div + par_term) # 7 * 5 / 4 - 3 prod_term.setParseAction(prod_parse_action) sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term.setParseAction(sum_parse_action) expr << sum_term # finish the recursion return (expr + stringEnd).parseString(string)[0]
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) pos_neg_int_number = Word('+-' + nums).setParseAction( lambda t: [int(t[0])]) # '+3' or '-2' are examples # matching float w/ regex is ugly but is recommended by pyparsing regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)' float_number = Regex(r'[-+]?([0-9]+\.(?!([0-9]|[eE])))|{0}'.format(regex_after_decimal)) \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) ref_phase_name = symbol_name = Word(alphanums + '_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \ Optional(Suppress(OneOrMore(float_number))) + LineEnd() # SPECIES cmd_species = TCCommand('SPECIES') + species_name + Group( OneOrMore( Word(alphas, min=1, max=2) + Optional(float_number, default=1.0)) ) + Optional(Suppress('/') + pos_neg_int_number) + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
{"op": "and", "name": "and"}, {"op": "or", "name": "or"} ] locs = locals() reserved = [] for k in keywords: name, value = k.upper().replace(" ", ""), Keyword(k, caseless=True) locs[name] = value reserved.append(value) for o in KNOWN_OPS: name = o['op'].upper() value = locs[name] = o['literal'] = CaselessLiteral(o['op']) reserved.append(value) RESERVED = MatchFirst(reserved) def to_json_operator(instring, tokensStart, retTokens): # ARRANGE INTO {op: params} FORMAT tok = retTokens[0] op = filter(lambda o: o['op'] == tok[1], KNOWN_OPS)[0]['name'] return {op: [tok[i * 2] for i in range(int((len(tok) + 1) /2))]} def to_json_call(instring, tokensStart, retTokens): # ARRANGE INTO {op: params} FORMAT tok = retTokens op = tok.op params = tok.params[0] if not params:
| QuotedString('"', escChar='\\', unquoteResults=False)) INT_CONSTANT = Regex(r'-?\d+(?!\.)') FLOAT_CONSTANT = Regex(r'-?\d*\.\d+') CONSTANT = STRING_CONSTANT | FLOAT_CONSTANT | INT_CONSTANT # All constants should be interpreted in Python format CONSTANT.setParseAction(lambda toks: ast.literal_eval(toks[0])) COMMA = Literal(',') LIST = Literal('[') + CONSTANT + ZeroOrMore(COMMA + CONSTANT) + Optional( COMMA) + Literal(']') IDENTIFIER = MatchFirst( [Word(alphas, alphanums), QuotedString('[', endQuoteChar=']')]) IDENTIFIER.setParseAction(lambda toks: Where(toks[0])) VALUE = CONSTANT | IDENTIFIER OPERATOR = MatchFirst([ Regex(r'==?'), Literal('!='), Literal('<>'), Literal('>='), Literal('<='), Literal('>'), Literal('<'), CaselessKeyword('like'), ])
operand.setName('r-value') op = operatorPrecedence # op = myOperatorPrecedence rvalue << op(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
def valid_address(addr): """Address validator/parser.""" # Sanitize-ish multispace = re.compile("\s+") addr = multispace.sub(" ", addr) addr_split = [ x for x in addr.title().split(" ") if x not in ["", '', None] ] sane_addr = [] for w in addr_split: if len(w) == 2: w = w.upper() if w not in ["N", "S", "E", "W", "NW", "NE", "SW", "SE"]: sane_addr.append(w) else: sane_addr.append(w) addr = " ".join(sane_addr) log.debug("Address: %s" % addr) from pyparsing import ( oneOf, CaselessLiteral, Optional, originalTextFor, Combine, Word, nums, alphas, White, FollowedBy, MatchFirst, Keyword, OneOrMore, Regex, alphanums, Suppress ) # define number as a set of words units = oneOf( "Zero One Two Three Four Five Six Seven Eight Nine Ten " "Eleven Twelve Thirteen Fourteen Fifteen Sixteen Seventeen " "Eighteen Nineteen", caseless=True ) tens = oneOf( "Ten Twenty Thirty Forty Fourty Fifty Sixty Seventy Eighty Ninety", caseless=True ) hundred = CaselessLiteral("Hundred") thousand = CaselessLiteral("Thousand") OPT_DASH = Optional("-") numberword = ((( units + OPT_DASH + Optional(thousand) + OPT_DASH + Optional(units + OPT_DASH + hundred) + OPT_DASH + Optional(tens) ) ^ tens) + OPT_DASH + Optional(units)) # number can be any of the forms 123, 21B, 222-A or 23 1/2 housenumber = originalTextFor( numberword | Combine( Word(nums) + Optional(OPT_DASH + oneOf(list(alphas))+FollowedBy(White())) ) + Optional(OPT_DASH + "1/2") ) numberSuffix = oneOf("st th nd rd", caseless=True).setName("numberSuffix") streetnumber = originalTextFor( Word(nums) + Optional(OPT_DASH + "1/2") + Optional(numberSuffix) ) # just a basic word of alpha characters, Maple, Main, etc. name = ~numberSuffix + Word(alphas) # types of streets - extend as desired type_ = Combine(MatchFirst(map( Keyword, "Street St ST Boulevard Blvd Lane Ln LN Road Rd RD Avenue Ave AVE " " Circle Cir Cove Cv Drive Dr DR Parkway Pkwy PKWY Court Ct Square Sq " "Loop Lp LP".split() )) + Optional(".").suppress()) # street name nsew = Combine( oneOf("N S E W North South East West NW NE SW SE", caseless=True) + Optional(".") ) streetName = ( Combine( Optional(nsew) + streetnumber + Optional("1/2") + Optional(numberSuffix), joinString=" ", adjacent=False ) ^ Combine( ~numberSuffix + OneOrMore(~type_ + Combine( Word(alphas) + Optional(".") + Optional(",") )), joinString=" ", adjacent=False ) ^ Combine("Avenue" + Word(alphas), joinString=" ", adjacent=False) ).setName("streetName") # PO Box handling acronym = lambda s: Regex(r"\.?\s*".join(s)+r"\.?") poBoxRef = ( (acronym("PO") | acronym("APO") | acronym("AFP")) + Optional(CaselessLiteral("BOX")) ) + Word(alphanums)("boxnumber") # basic street address streetReference = \ streetName.setResultsName("name") + \ Optional(type_).setResultsName("type") direct = housenumber.setResultsName("number") + streetReference intersection = ( streetReference.setResultsName("crossStreet") + ('@' | Keyword("and", caseless=True)) + streetReference.setResultsName("street") ) suiteRef = ( oneOf("Suite Ste Apt Apartment Room Rm #", caseless=True) + Optional(".") + Word(alphanums+'-') ) streetAddress = (( poBoxRef("street") ^ (direct + Optional(suiteRef)).setResultsName("street") ^ (streetReference + Optional(suiteRef)).setResultsName("street") ^ intersection ) + Optional(Suppress(',') + Optional(Suppress('.'))) ).setResultsName("streetAddress") city = ( OneOrMore(Word(alphas)) + Optional(Suppress(",")) ).setResultsName("city") states_abbr = oneOf( "AL AK AZ AR CA CO CT DE FL GA HI ID IL IN IA KS KY LA ME MD MA MI MN" "MS MO MT NE NV NH NJ NM NY NC ND OH OK OR PA RI SC SD TN TX UT VT VA" "WA WV WI WY", caseless=True ) state_names = oneOf( ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "Wisconsin", "Wyoming", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Rhode Island", "South Carolina", "South Dakota", "West Virginia"], caseless=True ) state = ( states_abbr.setResultsName("state") ^ state_names.setResultsName("state") ) + Optional(".") + Optional(",") zipCode = Word(nums).setResultsName("zip") us_address = ( streetAddress + city + state + zipCode ).parseString(addr) log.debug("Parsed address: %s" % us_address) return us_address
plusPairTokens = ['f', 'x'] for pt in plusPairTokens: knownTokens.append(usfmTokenValue(pt, plus)) knownTokens.append(usfmEndToken(pt)) phrasePairTokens = ['fr'] for pt in phrasePairTokens: knownTokens.append(usfmTokenValue(pt, phrase)) knownTokens.append(usfmEndToken(pt)) knownTokens.append(usfmBackslashToken("\\\\")) knownTokens.append(textBlock) knownTokens.append(unknown) usfm = OneOrMore(MatchFirst(knownTokens)) # # PARSING # def parseString(unicodeString): try: s = clean(unicodeString) tokens = usfm.parseString(s, parseAll=True) except Exception as e: print e print repr(unicodeString[:50]) sys.exit() return [createToken(t) for t in tokens]
def __init__(self): self.json_query = {'query': {}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex( r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction( removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction( removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral( "LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction(lambda x: self.frame.update({ 'dist': float(x[0]), 'units': x[1] })) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction( lambda x: self.frame.update({'filter': x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({'index': x[0]})) resource_id = Regex( r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')' ).setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction( lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional( date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x: self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional( from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x: self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral( "DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction( lambda x: self.frame.update({ 'lat': float(x[5]), 'lon': float(x[7]) })) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral( "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x: self.frame.update({ 'top_left': [float(x[5]), float(x[3])], 'bottom_right': [float(x[10]), float(x[8])] })) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x: self.frame.update({'value': x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction( lambda x: self.frame.update({'fuzzy': x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction( lambda x: self.frame.update({'limit': int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction( lambda x: self.frame.update({'depth': int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral( "BY") + limited_string order_parameter.setParseAction( lambda x: self.frame.update({'order': { x[2]: 'asc' }})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction( lambda x: self.frame.update({'offset': int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <time-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + ( range_query | term_query | fuzzy_query | time_query | geo_query ) + CaselessLiteral("FROM") + index_name + query_parameter * (0, None) # Add the field to the frame object search_query.setParseAction( lambda x: self.frame.update({'field': x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction( lambda x: self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral( "TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction( lambda x: self.frame.update({'association': x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional( depth_parameter) owner_query.setParseAction( lambda x: self.frame.update({'owner': x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * (0, None)
from .grammer import * from pyparsing import delimitedList, MatchFirst, Word, Regex #Define list of column numbers in brackets or single column number column = Word(numbers) _columns = delimitedList(column, combine=True) _list_columns = openBracket + _columns + closeBracket choice_columns = MatchFirst([column, _list_columns]) #Define a numerical values decimal = Regex(r'\d*\.?\d*')
class DnstestParser: """ Parses natural-language-like grammar describing DNS changes """ grammar_strings = [] # implement my grammar word = Word(alphas) value = Word(alphanums).setResultsName("value") add_op = Keyword("add").setResultsName("operation") rm_op = Keyword("remove").setResultsName("operation") rename_op = Keyword("rename").setResultsName("operation") change_op = Keyword("change").setResultsName("operation") confirm_op = Keyword("confirm").setResultsName("operation") rec_op = Or([Keyword("record"), Keyword("entry"), Keyword("name")]) val_op = Optional(Keyword("with")) + Or( [Keyword("value"), Keyword("address"), Keyword("target")]) fqdn = Regex( "(([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])(\.([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9]))*)" ) ipaddr = Regex( "((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(1[0-9]{2}|2[0-4][0-9]|25[0-5]|[1-9][0-9]|[0-9]))" ) hostname = Regex("([a-zA-Z0-9_\-]{0,62}[a-zA-Z0-9])") hostname_or_fqdn = And([NotAny(ipaddr), MatchFirst([fqdn, hostname])]) hostname_fqdn_or_ip = MatchFirst([ipaddr, fqdn, hostname]) grammar_strings.append( 'add (record|name|entry)? <hostname_or_fqdn> (with ?)(value|address|target)? <hostname_fqdn_or_ip>' ) cmd_add = add_op + Optional(rec_op) + hostname_or_fqdn.setResultsName( "hostname") + Suppress(val_op) + hostname_fqdn_or_ip.setResultsName( 'value') grammar_strings.append('remove (record|name|entry)? <hostname_or_fqdn>') cmd_remove = rm_op + Optional(rec_op) + hostname_fqdn_or_ip.setResultsName( "hostname") grammar_strings.append( 'rename (record|name|entry)? <hostname_or_fqdn> (with ?)(value ?) <value> to <hostname_or_fqdn>' ) cmd_rename = rename_op + Suppress(Optional( rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress( Optional(val_op)) + hostname_fqdn_or_ip.setResultsName( 'value') + Suppress( Keyword("to")) + hostname_or_fqdn.setResultsName('newname') grammar_strings.append( 'change (record|name|entry)? <hostname_or_fqdn> to <hostname_fqdn_or_ip>' ) cmd_change = change_op + Suppress(Optional( rec_op)) + hostname_or_fqdn.setResultsName("hostname") + Suppress( Keyword("to")) + hostname_fqdn_or_ip.setResultsName('value') grammar_strings.append('confirm (record|name|entry)? <hostname_or_fqdn>') cmd_confirm = confirm_op + Suppress( Optional(rec_op)) + hostname_or_fqdn.setResultsName("hostname") line_parser = Or( [cmd_confirm, cmd_add, cmd_remove, cmd_rename, cmd_change]) def __init__(self): pass def parse_line(self, line): res = self.line_parser.parseString(line, parseAll=True) d = res.asDict() # hostname_or_fqdn using And and NotAny now returns a ParseResults object instead of a string, # we need to convert that to a string to just take the first value for i in d: if isinstance(d[i], ParseResults): d[i] = d[i][0] return d def get_grammar(self): """ return a list of possible grammar options """ return self.grammar_strings
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # SI suffixes and percent. number_suffix = MatchFirst(Literal(k) for k in SUFFIXES.keys()) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix) ) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. They must start with letters/underscores # and may contain numbers afterward. inner_varname = Word(alphas + "_", alphanums + "_") varname = Group(inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions. function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore((Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=W0104 self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
from pyparsing import pyparsing_common as ppc from pyparsing import Word, alphas, alphanums, CaselessKeyword from pyparsing import MatchFirst, Forward, ZeroOrMore #>>>>>>>>>>>>>>> BASICS DEFINITIONS>>>>>>>>>>>>>>>>>>>>>>> #Defining the reserved words informed on the description PROGRAMA, CAR, INT, RETORNE = map(CaselessKeyword, "programa car int retorne".split()) ESCREVA, NOVALINHA, SE, ENTAO = map(CaselessKeyword, "escreva novalinha se entao".split()) SENAO, ENQUANTO, EXECUTE, LEIA, TERMINATOR = map( CaselessKeyword, "senao enquanto execute leia ;".split()) keywords = MatchFirst( (PROGRAMA, CAR, INT, RETORNE, ESCREVA, NOVALINHA, SE, ENTAO, SENAO, ENQUANTO, EXECUTE, LEIA)).setName("Reserved Words") #Define the Terminator character TERMINATOR = Word(";").setName("Terminator") #Define the numbers realNum = ppc.real().setName("Real Number") intNum = ppc.signed_integer().setName("Integer Number") #Define the identificator identifier = Word(alphas, alphanums + "_$").setName("Identifier") #Types Definition Type = (INT | CAR).setName("Type")
def __init__( self, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, citation_clearing: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Initialize the control statement parser. :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing :param annotation_to_pattern: A dictionary of {annotation: regular expression string} :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self.annotation_to_term = annotation_to_term or {} self.annotation_to_pattern = annotation_to_pattern or {} self.annotation_to_local = annotation_to_local or {} self.statement_group = None self.citation_db = None self.citation_db_id = None self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction( self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction( self.handle_set_statement_group) self.set_citation = set_citation_stub.setParseAction( self.handle_set_citation) self.set_evidence = set_evidence_stub.setParseAction( self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list( 'values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction( self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag('action') + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag('action') + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list, ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language)
operand = number | int_variables_ref | misc_variables_ref operand.setName('r-value') rvalue << myOperatorPrecedence(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
def create(cls, base_shader_path, base_texture_path): """ Create a Stanford polygon file parser (PLY). :param base_shader_path: :param base_texture_path: :return: """ # Define the base patterns for parsing real = pyparsing_common.real() integer = pyparsing_common.integer() # Define how the header portion begins and ends start_keyword = cls._or(cls.begin_header_keyword, suppress=True) stop_keyword = cls._or(cls.end_header_keyword, suppress=True) # Define the grammar of a comment statement comment_keyword = cls._or(cls.comment_keyword, suppress=True) vertex_shader_comment = Group( comment_keyword + Suppress(CaselessKeyword("VertexShaderFile")) + Word(alphanums + ".-_"))("vertex_shader_file") fragment_shader_comment = Group( comment_keyword + Suppress(CaselessKeyword("FragmentShaderFile")) + Word(alphanums + ".-_"))("fragment_shader_file") texture_comment = Group(comment_keyword + Suppress(CaselessKeyword("TextureFile")) + Word(alphanums + ".-_"))("texture_file") other_comment = comment_keyword + NotAny("TextureFile") + Word( printables + " ") # Define the grammar of a format statement format_keyword = cls._or(cls.format_keyword, suppress=True) format_type = cls._or(cls.format_type_map) format_expr = Group(format_keyword + format_type("file_type") + real("version"))("format") # Define the grammar of properties property_keyword = cls._or(cls.property_keyword, suppress=True) list_keyword = cls._or(cls.list_keyword, suppress=True) property_type = cls._or(cls.data_type_map) psp = property_keyword + property_type("data_type") position_keywords = [cls._or(k) for k in ("x", "y", "z")] property_position = cls._aggregate_property("position", psp, *position_keywords) property_color = Group( And([ Group(psp + MatchFirst((CaselessKeyword("r"), CaselessKeyword("red")))("name")), Group(psp + MatchFirst((CaselessKeyword("g"), CaselessKeyword("green")))("name")), Group(psp + MatchFirst((CaselessKeyword("b"), CaselessKeyword("blue")))("name")), Optional( Group(psp + MatchFirst((CaselessKeyword("a"), CaselessKeyword("alpha")))("name")), ) ]))("color") ambient_keywords = [ cls._or(k) for k in ("ambient_red", "ambient_green", "ambient_blue", "ambient_alpha") ] property_ambient_color = cls._aggregate_property( "ambient_color", psp, *ambient_keywords) diffuse_keywords = [ cls._or(k) for k in ("diffuse_red", "diffuse_green", "diffuse_blue", "diffuse_alpha") ] property_diffuse_color = cls._aggregate_property( "diffuse_color", psp, *diffuse_keywords) specular_keywords = [ cls._or(k) for k in ("specular_red", "specular_green", "specular_blue", "specular_alpha") ] property_specular_color = cls._aggregate_property( "specular_color", psp, *specular_keywords) texture_keywords = [ cls._or(*k) for k in (("s", "u", "tx"), ("t", "v", "ty")) ] property_texture = cls._aggregate_property("texture", psp, *texture_keywords) normal_keywords = [cls._or(k) for k in ("nx", "ny", "nz")] property_normal = cls._aggregate_property("normal", psp, *normal_keywords) power_keywords = [CaselessKeyword("specular_power")] property_specular_power = cls._aggregate_property( "specular_power", psp, *power_keywords) opacity_keywords = [CaselessKeyword("opacity")] property_opacity = cls._aggregate_property("opacity", psp, *opacity_keywords) plp = property_keyword + list_keyword + property_type( "index_type") + property_type("data_type") vertex_index_keywords = [cls._or("vertex_index", "vertex_indices")] property_vertex_index = cls._aggregate_property( "vertex_index", plp, *vertex_index_keywords) material_index_keywords = [ cls._or("material_index", "material_indices") ] property_material_index = cls._aggregate_property( "material_index", plp, *material_index_keywords) # Define the grammar of elements element_keyword = cls._or(cls.element_keyword, suppress=True) element_vertex = Group( element_keyword + CaselessKeyword("vertex")("name") + integer("count") + Group( OneOrMore(property_position | property_color | property_ambient_color | property_diffuse_color | property_specular_color | property_texture | property_normal | property_specular_power | property_opacity))("properties")) element_face = Group(element_keyword + CaselessKeyword("face")("name") + integer("count") + Group(property_vertex_index | property_material_index)("properties")) element_group = element_vertex | element_face declarations = format_expr + \ Group(ZeroOrMore(vertex_shader_comment | fragment_shader_comment | texture_comment | other_comment))("comments") + \ Group(OneOrMore(element_group))("elements") header_grammar = start_keyword + declarations + stop_keyword return cls(header_grammar, base_shader_path, base_texture_path)
def evaluator(variables, functions, string, cs=False): """ Evaluate an expression. Variables are passed as a dictionary from string to value. Unary functions are passed as a dictionary from string to function. Variables must be floats. cs: Case sensitive """ all_variables = copy.copy(DEFAULT_VARIABLES) all_functions = copy.copy(DEFAULT_FUNCTIONS) all_variables.update(variables) all_functions.update(functions) if not cs: string_cs = string.lower() all_functions = lower_dict(all_functions) all_variables = lower_dict(all_variables) CasedLiteral = CaselessLiteral else: string_cs = string CasedLiteral = Literal check_variables(string_cs, set(all_variables.keys() + all_functions.keys())) if string.strip() == "": return float('nan') # SI suffixes and percent number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()]) plus_minus = Literal('+') | Literal('-') times_div = Literal('*') | Literal('/') number_part = Word(nums) # 0.33 or 7 or .34 or 16. inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # by default pyparsing allows spaces between tokens--Combine prevents that inner_number = Combine(inner_number) # 0.33k or -17 number = (inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number.setParseAction(number_parse_action) # Convert to number # Predefine recursive variables expr = Forward() # Handle variables passed in. # E.g. if we have {'R':0.5}, we make the substitution. # We sort the list so that var names (like "e2") match before # mathematical constants (like "e"). This is kind of a hack. all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True) varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys]) varnames.setParseAction( lambda x: [all_variables[k] for k in x] ) # if all_variables were empty, then pyparsing wants # varnames = NoMatch() # this is not the case, as all_variables contains the defaults # Same thing for functions. all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True) funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys]) function = funcnames + Suppress("(") + expr + Suppress(")") function.setParseAction( lambda x: [all_functions[x[0]](x[1])] ) atom = number | function | varnames | Suppress("(") + expr + Suppress(")") # Do the following in the correct order to preserve order of operation pow_term = atom + ZeroOrMore(Suppress("^") + atom) pow_term.setParseAction(exp_parse_action) # 7^6 par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term) # 5k || 4k par_term.setParseAction(parallel) prod_term = par_term + ZeroOrMore(times_div + par_term) # 7 * 5 / 4 - 3 prod_term.setParseAction(prod_parse_action) sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term) # -5 + 4 - 3 sum_term.setParseAction(sum_parse_action) expr << sum_term # finish the recursion return (expr + stringEnd).parseString(string)[0]
def __init__(self): self.json_query = {'query':{}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]})) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x : self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x : self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x : self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])})) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]})) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x : self.frame.update({'value':x[1]})) geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox ) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction(lambda x : self.frame.update({'match':x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x : self.frame.update({'association':x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter) owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x : self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x : self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x : self.or_frame()) self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
def __init__(self): self.json_query = {"query": {}, "and": [], "or": []} self.tokens = None # -------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> # -------------------------------------------------------------------------------------- integer = Regex(r"-?[0-9]+") # Word matches space for some reason double = Regex(r"-?[0-9]+.?[0-9]*") number = double | integer # -------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) # -------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral("km") | CaselessLiteral("nm") | CaselessLiteral("mi") distance = number + units distance.setParseAction(lambda x: self.frame.update({"dist": float(x[0]), "units": x[1]})) # -------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 '"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> # -------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x: self.frame.update({"filter": x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({"index": x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9])*"|\'(?:[a-zA-Z0-9]*)\')').setParseAction(removeQuotes) collection_id = resource_id # -------------------------------------------------------------------------------------- # <range-query> ::= "VALUES FROM" <number> "TO" <number> # -------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + CaselessLiteral("FROM") + number + CaselessLiteral("TO") + number # Add the range to the frame object range_query.setParseAction(lambda x: self.frame.update({"range": {"from": float(x[2]), "to": float(x[4])}})) # -------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> # -------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x: self.frame.update({"lat": float(x[5]), "lon": float(x[7])})) geo_bbox = ( CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords ) geo_bbox.setParseAction( lambda x: self.frame.update( {"top_left": [float(x[5]), float(x[3])], "bottom_right": [float(x[10]), float(x[8])]} ) ) # -------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) # -------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query # Add the term to the frame object term_query.setParseAction(lambda x: self.frame.update({"value": x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) # -------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> # -------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.frame.update({"limit": int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({"depth": int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.frame.update({"order": {x[2]: "asc"}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x: self.frame.update({"offset": int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter # -------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> # -------------------------------------------------------------------------------------- search_query = ( CaselessLiteral("SEARCH") + field + (range_query | term_query | geo_query) + CaselessLiteral("FROM") + index_name + query_parameter * (0, None) ) # Add the field to the frame object search_query.setParseAction(lambda x: self.frame.update({"field": x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x: self.frame.update({"collection": x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x: self.frame.update({"association": x[2]})) query = search_query | association_query | collection_query # -------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* # -------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * (0, None)