def _define_read_options(): ''' Definition of Optional Arguments for READ Keyword :returns pyparsing object ''' #header Nones = oneOf('None') headerLiteral = (Literal("header") + Literal("=")).suppress() header_choices = MatchFirst([Word(numbers), bool_true, bool_false, Nones]).setResultsName("header") header = headerLiteral + header_choices #separator separatorLiteral = (Literal("separator") + Literal("=")).suppress() definesep = Quote + Word(everythingWOQuotes + whitespace).setResultsName("sep") + Quote separator = separatorLiteral + definesep #datatypes dtypesPhrase = (Literal("types") + Literal("=")).suppress() dtypesDef = index_and_type_list.setResultsName("dtypes") dtypes = dtypesPhrase + openBracket + dtypesDef + closeBracket option = MatchFirst([header, dtypes, separator]) readOptions = openParen + delimitedList(option, delim=',') + closeParen return readOptions
def define_cluster(): ''' Algorithm Definition of Cluster Keyword :returns pyparsing object ''' algoPhrase = (Literal("algorithm") + Literal("=")).suppress() kmeansd = kmeans.define_kmeans() algo = algoPhrase + MatchFirst([kmeansd]).setResultsName("algorithm") # Grammar for Feature Selection feature_prefix = Optional(CaselessLiteral("feature") + Literal("=")).suppress() feature_value = oneOf(["False", "AUTO", "RFE"]).setResultsName("feature") feature = feature_prefix + feature_value #define so that there can be multiple verisions of Cluster clusterKeyword = Keyword("cluster", caseless=True).setResultsName("cluster") #define predictor word to specify column numbers predPhrase = (Literal("predictors") + Literal("=")).suppress() predictorsDef = choice_columns.setResultsName("predictors") preds = predPhrase + predictorsDef labelPhrase = (Literal("label") + Literal("=")).suppress() labelDef = choice_columns.setResultsName("label") labels = labelPhrase + labelDef option = MatchFirst([preds, labels, algo]) options = delimitedList(option, delim=',') cluster = clusterKeyword + openParen + Optional(options) + closeParen return cluster
def _create_parser() -> ParserElement: # operators in the format later used by infixNotation operator_list = [ (None, 2, opAssoc.LEFT, BooleanAndOperation._create_from_implicit_tokens), (CaselessKeyword('not') | "~" | "!", 1, opAssoc.RIGHT, BooleanNotOperation._create_from_tokens), (CaselessKeyword('and') | "&", 2, opAssoc.LEFT, BooleanAndOperation._create_from_tokens), (CaselessKeyword('xor') | "^", 2, opAssoc.LEFT, BooleanXorOperation._create_from_tokens), (CaselessKeyword('or') | "|", 2, opAssoc.LEFT, BooleanOrOperation._create_from_tokens), ] # terms (atoms) that will be combined with the boolean operators term_list = [ (CaselessKeyword('tag'), TagFilterTerm._create_from_tokens), (CaselessKeyword('ingr'), IngredientFilterTerm._create_from_tokens), (CaselessKeyword('unit'), UnitFilterTerm._create_from_tokens), (None, AnyFilterTerm._create_from_tokens), ] # extract keywords that can operator_expressions = [om[0] for om in operator_list if om[0] is not None] term_expressions = [tm[0] for tm in term_list if tm[0] is not None] reserved_expressions = operator_expressions + term_expressions # quoted string indicates exact macthc quoted_filter_string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')).setResultsName('string') # quoted_filter_string.setDebug(True) quoted_filter_string.setName("quoted_filter_string") quoted_filter_string.setParseAction(ExactFilterString._create_from_tokens) # not quoted string is inexact match, can't contain whitespace or be an operator unquoted_filter_string = ~MatchFirst(reserved_expressions) + Regex(r'[^\s\(\)]+', flags=re.U).setResultsName('string') # unquoted_filter_string.setDebug(True) unquoted_filter_string.setName("unquoted_filter_string") unquoted_filter_string.setParseAction(FuzzyFilterString._create_from_tokens) # regular expressions aren't parsed in the grammar but delegated to python re.compile in the parser action regex_filter_string = QuotedString('/', escChar='\\') regex_filter_string.setName("regex_filter_string") regex_filter_string.setParseAction(RegexFilterString._create_from_tokens) # unquoted_filter_string must be last, so that initial quotes are handled correctly filter_string = regex_filter_string | quoted_filter_string | unquoted_filter_string filter_string.setParseAction(lambda toks: toks[0]) filter_terms = [] for prefix_expression, term_action in term_list: if prefix_expression is not None: filter_term = Combine(prefix_expression + ':' + filter_string.setResultsName("filter_string")) filter_term.setName("filter_term_"+str(prefix_expression.match)) else: filter_term = filter_string.setResultsName("filter_string") filter_term.setName("filter_term_None") # filter_term.setDebug(True) filter_term.addParseAction(term_action) filter_terms.append(filter_term) filter_term = MatchFirst(filter_terms) filter_expr = infixNotation(filter_term, operator_list) return filter_expr
def __init__(self): self.select_stmt = Forward().setName("select statement") self.itemName = MatchFirst(Keyword("itemName()")).setParseAction( self.ItemName) self.count = MatchFirst(Keyword("count(*)")).setParseAction(self.Count) self.identifier = ((~keyword + Word(alphas, alphanums + "_")) | QuotedString("`")) self.column_name = (self.itemName | self.identifier.copy()) self.table_name = self.identifier.copy() self.function_name = self.identifier.copy() # expression self.expr = Forward().setName("expression") self.integer = Regex(r"[+-]?\d+") self.string_literal = QuotedString("'") self.literal_value = self.string_literal self.expr_term = ( self.itemName | self.function_name + LPAR + Optional(delimitedList(self.expr)) + RPAR | self.literal_value.setParseAction(self.Literal) | NULL.setParseAction(self.Null) | self.identifier.setParseAction(self.Identifier) | (EVERY + LPAR + self.identifier.setParseAction(self.Identifier) + RPAR).setParseAction(self.EveryIdentifier) | (LPAR + Optional( delimitedList(self.literal_value.setParseAction( self.Literal))) + RPAR).setParseAction(self.ValueList)) self.expr << (operatorPrecedence(self.expr_term, [ (NOT, UNARY, opAssoc.RIGHT, self.BoolNot), (oneOf('< <= > >='), BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), (oneOf('= == != <>') | Group(IS + NOT) | IS | IN | LIKE, BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), ((BETWEEN, AND), TERNARY, opAssoc.LEFT, self.BetweenXAndY), (OR, BINARY, opAssoc.LEFT, self.BoolOr), (AND, BINARY, opAssoc.LEFT, self.BoolAnd), (INTERSECTION, BINARY, opAssoc.LEFT, self.Intersection), ])).setParseAction(self.dont_allow_non_comparing_terms) self.ordering_term = (self.itemName | self.identifier) + Optional(ASC | DESC) self.single_source = self.table_name("table") self.result_column = Group( "*" | self.count | delimitedList(self.column_name))("columns") self.select_core = (SELECT + self.result_column + FROM + self.single_source + Optional(WHERE + self.expr("where_expr"))) self.select_stmt << (self.select_core + Optional(ORDER + BY + Group( delimitedList(self.ordering_term))).setParseAction( self.OrderByTerms)("order_by_terms") + Optional(LIMIT + self.integer)("limit_terms"))
def _handle_define(self, line, token): """define macro function""" if self.suppress: return if token.args: args = token.args[0] keywords = MatchFirst( [Keyword('$' + x).setResultsName(x) for x in args]) body = self._recurisve_expand(token.body) macros = self.function_class(args, body, list(keywords.scanString(body))) if token.name in self.functions: warnings.warn('%d: macros %s already defined!' % (line, token.name)) self.functions[token.name] = macros else: if token.name in self.variables: warnings.warn('%d: macros %s already defined!' % (line, token.name)) value = self.variables[token.name] = self._recurisve_expand( token.value) if not token.name.startswith("_"): self.on_constant(token.name, value)
def _declares_catch_for_exceptions( java_dest: str, exceptions_list: list, open_msg: str, closed_msg: str, exclude: list = None) -> bool: """Search for the declaration of catch for the given exceptions.""" any_exception = L_VAR_CHAIN_NAME provided_exception = MatchFirst( [Keyword(exception) for exception in exceptions_list]) exception_group = delimitedList(expr=any_exception, delim='|') exception_group.addCondition( # Ensure that at least one exception in the group is the provided one lambda tokens: any(provided_exception.matches(tok) for tok in tokens)) grammar = Suppress(Keyword('catch')) + nestedExpr( opener='(', closer=')', content=( exception_group + Suppress(Optional(L_VAR_NAME)))) grammar.ignore(javaStyleComment) grammar.ignore(L_STRING) grammar.ignore(L_CHAR) try: matches = lang.path_contains_grammar(grammar, java_dest, LANGUAGE_SPECS, exclude) except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=java_dest)) else: if matches: show_open(open_msg, details=dict(matched=matches)) return True show_close(closed_msg, details=dict(code_dest=java_dest)) return False
def define_read(): filename = Word(everythingWOQuotes).setResultsName("filename") #define so that there can be multiple verisions of READ readKeyword = Keyword("read", caseless=True).setResultsName("read") #Define Read Optionals #header Nones = oneOf('None') headerLiteral = (Literal("header") + Literal("=")).suppress() header_choices = MatchFirst([Word(numbers), bool_true, bool_false, Nones]).setResultsName("header") header = Optional(headerLiteral + header_choices) #separator separatorLiteral = (Literal("separator") + Literal("=")).suppress() definesep = Quote + Word(everythingWOQuotes + whitespace).setResultsName("sep") + Quote separator = Optional(separatorLiteral + definesep, default=",") #Compose Read Optionals readOptions = Optional(openParen + separator + ocomma + header + closeParen) read = readKeyword + Quote + filename + Quote + readOptions return read
def header_line() -> ParserElement: header_single = (Literal("Type :") + Or(TYPES).setResultsName("type") + Literal("Trs# :") + Integer.setResultsName("trs")) header_split = (Literal("Type :") + Literal("Trs# :") + Or(TYPES).setResultsName("type") + Integer.setResultsName("trs")) return MatchFirst([header_single, header_split])
def has_dos_dow_sqlcod(rpg_dest: str, exclude: list = None) -> bool: r""" Search for DoS for using ``DoW SQLCOD = <ZERO>``\ . :param rpg_dest: Path to a RPG source or directory. :param exclude: Paths that contains any string from this list are ignored. """ tk_dow = CaselessKeyword('dow') tk_sqlcod = CaselessKeyword('sqlcod') tk_literal_zero = CaselessKeyword('*zeros') tk_zeros = MatchFirst([Literal('0'), tk_literal_zero]) dos_dow_sqlcod = tk_dow + tk_sqlcod + Literal('=') + tk_zeros result = False try: matches = lang.check_grammar(dos_dow_sqlcod, rpg_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not have DoS for using "DoW SQLCOD = 0"', details=dict(code_dest=rpg_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=rpg_dest)) return False else: result = True show_open('Code has DoS for using "DoW SQLCOD = 0"', details=dict(matched=matches, total_vulns=len(matches))) return result
def mlsqlparser(): #Define all keywords LOAD = define_load() READ = define_read() SPLIT = define_split() REGRESS = define_regress() CLASSIFY = define_classify() CLUSTER = define_cluster() REPLACE = define_replace() SAVE = define_save() #Define comment comment = _define_comment() #Combining READ and SPLIT keywords into one clause for combined use read_split = READ + SPLIT read_split_classify = READ + SPLIT + CLASSIFY read_split_classify_regress = READ + SPLIT + CLASSIFY + REGRESS read_replace_split_classify_regress = READ + REPLACE + SPLIT + CLASSIFY + REGRESS read_replace_split_classify_regress_cluster = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER read_replace_split_classify_regress_cluster_save = READ + REPLACE + SPLIT + CLASSIFY + REGRESS + CLUSTER + SAVE load_read_replace_split_classify_regress_cluster_save = MatchFirst( [read_replace_split_classify_regress_cluster_save, LOAD]) return load_read_replace_split_classify_regress_cluster_save.ignore( comment)
def _define_split_options(): #train trainPhrase = (CaselessLiteral("train") + Literal("=")).suppress() trainS = decimal.setResultsName("train_split") training = trainPhrase + trainS #test testPhrase = (CaselessLiteral("test") + Literal("=")).suppress() testS = decimal.setResultsName("test_split") testing = testPhrase + testS #val valPhrase = (CaselessLiteral("validation") + Literal("=")).suppress() valS = decimal.setResultsName("validation_split") val = valPhrase + valS #persist persistPhrase = (CaselessLiteral("persist") + Literal("=")).suppress() persist_names = word_to_word_list.setResultsName("persist_names_split") persist = Optional(persistPhrase + openBracket + persist_names + closeBracket) option = MatchFirst([training, testing, val, persist]) splitOptions = openParen + delimitedList(option, delim=',') + closeParen return splitOptions
def uses_sha1_hash(csharp_dest: str, exclude: list = None) -> bool: """ Check if code uses SHA1 as hashing algorithm. See `REQ.150 <https://fluidattacks.com/web/rules/150/>`_. :param csharp_dest: Path to a C# source file or package. :param exclude: Paths that contains any string from this list are ignored. """ method = "new SHA1CryptoServiceProvider(), new SHA1Managed()" tk_new = CaselessKeyword('new') tk_sha1cry = CaselessKeyword('SHA1CryptoServiceProvider') tk_sha1man = CaselessKeyword('SHA1Managed') tk_params = nestedExpr() call_function = tk_new + MatchFirst([tk_sha1cry, tk_sha1man]) + tk_params result = False try: matches = lang.check_grammar(call_function, csharp_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=csharp_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=csharp_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def llt_parser(): """Creates a parser for STL over atomic expressions of the type x_i ~ pi. Note that it is not restricted to depth-2 formulas. """ stl_parser = MatchFirst(stl.stl_parser(expr_parser())) return stl_parser
def swallows_exceptions(js_dest: str, exclude: list = None) -> bool: """ Search for ``catch`` blocks that are empty or only have comments. See `REQ.161 <https://fluidattacks.com/web/rules/161/>`_. See `CWE-391 <https://cwe.mitre.org/data/definitions/391.html>`_. :param js_dest: Path to a JavaScript source file or package. :param exclude: Paths that contains any string from this list are ignored. """ # Empty() grammar matches 'anything' # ~Empty() grammar matches 'not anything' or 'nothing' classic = Suppress(Keyword('catch')) + nestedExpr(opener='(', closer=')') \ + nestedExpr(opener='{', closer='}', content=~Empty()) modern = Suppress('.' + Keyword('catch')) + nestedExpr( opener='(', closer=')', content=~Empty()) grammar = MatchFirst([classic, modern]) grammar.ignore(cppStyleComment) try: matches = lang.path_contains_grammar(grammar, js_dest, LANGUAGE_SPECS, exclude) except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=js_dest)) else: if matches: show_open('Code has empty "catch" blocks', details=dict(matched=matches)) return True show_close('Code does not have empty "catch" blocks', details=dict(code_dest=js_dest)) return False
def _compile_grammar(self): # type: () -> ParserElement """ Takes the individual grammars from each registered directive and compiles them into a full test fixture grammar whose callback methods are the bound methods on this class instance. :return: The full PyParsing grammar for test fixture files. """ grammars = [ (LineEnd().suppress()).setParseAction( functools.partial(self._finalize_test_case) ) ] # directives for directive_class in get_all_directives(): grammars.append( LineStart() + directive_class.get_full_grammar().setParseAction( functools.partial(self._ingest_directive, directive_class) ) + LineEnd() ) return StringStart() + OneOrMore(MatchFirst(grammars)) + StringEnd()
def get_match_first(lits, parseAction=None): el = MatchFirst(NoMatch()) for lit in lits: el = el.__ior__(lit) if parseAction: el.setParseAction(parseAction) return el
def define_classify(): #Algorithm Definitions algoPhrase = (Literal("algorithm") + Literal("=")).suppress() svmd = svm.define_svm() logd = logistic.define_logistic() forestd = forest.define_forest() bayesd = bayes.define_bayes() knnd = knn.define_knn() algo = algoPhrase + MatchFirst([svmd, logd, forestd, bayesd, knnd ]).setResultsName("algorithm") #define so that there can be multiple verisions of Classify classifyKeyword = oneOf(["Classify", "CLASSIFY"]).setResultsName("classify") #Phrases to organize predictor and label column numbers predPhrase = (Literal("predictors") + Literal("=")).suppress() labelPhrase = (Literal("label") + Literal("=")).suppress() #define predictor and label column numbers predictorsDef = choice_columns.setResultsName("predictors") labelDef = column.setResultsName("label") #combine phrases with found column numbers preds = predPhrase + predictorsDef labels = labelPhrase + labelDef classify = Optional(classifyKeyword + openParen + preds + ocomma + labels + ocomma + algo + closeParen) return classify
def define_replace(): #define so that there can be multiple verisions of Replace replaceKeyword = oneOf(["Replace", "REPLACE"]).setResultsName("replace") #Define the columns that need to be replaced replace_cols = choice_columns.setResultsName("replaceColumns") #Define value that needs replacing replace_missing = Quote + Word(everythingWOQuotes).setResultsName( "replaceIdentifier") + Quote #defines possible values or optional replacement words value = Quote + Word(everythingWOQuotes) + Quote options = _replace_options() replacements = MatchFirst(options + [value]).setResultsName("replaceValue") #single group for column replace single_replacement = openParen + replace_cols + ocomma + replace_missing + ocomma + replacements + closeParen group_replacements = delimitedList(single_replacement) #temporary for a single demo (please remove later) temp_replacement = openParen + replace_missing + ocomma + replacements + closeParen #putting it all together to create replacement replace = Optional(replaceKeyword + temp_replacement) return replace
def define_regress(): #Algorithm keyword definitions algoPhrase = (Literal("algorithm") + Literal("=")).suppress() #Algorithms simpled = simple.define_simple() lassod = lasso.define_lasso() ridged = ridge.define_ridge() elasticd = elastic.define_elastic() algo = algoPhrase + MatchFirst([simpled, lassod, ridged, elasticd ]).setResultsName("algorithm") #define so that there can be multiple verisions of Regression regressKeyword = Keyword("regress", caseless=True).setResultsName("regress") #Phrases to organize predictor and label column numbers predPhrase = (Literal("predictors") + Literal("=")).suppress() labelPhrase = (Literal("label") + Literal("=")).suppress() #define predictor and label column numbers predictorsDef = choice_columns.setResultsName("predictors") labelDef = column.setResultsName("label") #combine phrases with found column numbers preds = predPhrase + predictorsDef labels = labelPhrase + labelDef regress = Optional(regressKeyword + openParen + preds + ocomma + labels + ocomma + algo + closeParen) return regress
def __init__(self, identifier_parser=None): """ :param IdentifierParser identifier_parser: An identifier parser for checking the 3P and 5P partners """ self.identifier_parser = identifier_parser if identifier_parser is not None else IdentifierParser( ) pmod_default_ns = oneOf(list( language.pmod_namespace.keys())).setParseAction( self.handle_pmod_default_ns) pmod_legacy_ns = oneOf(list( language.pmod_legacy_labels.keys())).setParseAction( self.handle_pmod_legacy_ns) pmod_identifier = MatchFirst([ Group(self.identifier_parser.identifier_qualified), Group(pmod_default_ns), Group(pmod_legacy_ns) ]) self.language = pmod_tag + nest( pmod_identifier(IDENTIFIER) + Optional(WCW + amino_acid(PMOD_CODE) + Optional(WCW + ppc.integer(PMOD_POSITION)))) super(ProteinModificationParser, self).__init__(self.language)
def parse_select_columns(string): """Parse a select query and return the columns Args: string(str): Input string to be parsed Returns: result(list of str): List of columns """ if string == '': return list() if string.upper().startswith('WITH'): suppressor = _with + delimitedList(_db_name + _as + subquery) string = suppressor.suppress().transformString(string) # Supress everything after the first from suppressor = MatchFirst(_from) + restOfLine string = suppressor.suppress().transformString(string) parser = _select + delimitedList(field_parser).setResultsName('columns') output = parser.parseString(string).columns.asList() # Strip extra whitespace from the string return [column.strip() for column in output]
def get_gene_modification_language( concept_qualified: ParserElement) -> ParserElement: """Build a gene modification parser.""" concept = MatchFirst([ concept_qualified, gmod_default_ns, ]) return gmod_tag + nest(Group(concept)(CONCEPT))
def _globalParse___aaa_attributes(line, type, count_aaa): aaa_dict = {} authentication_list = (Suppress('login') + Word(printables))('authent_list') authentication_groups = ( OneOrMore(Optional(Suppress('group')) + Word(printables)))('authent_methods') parse_authentication = authentication_list + authentication_groups # parse_authorization_options = MatchFirst(['exec', 'login']) + Word(printables) + OneOrMore(Optional(Suppress('group')) + Word(printables)) accounting_login = (MatchFirst( ['exec', 'network', 'connection', 'commands']))('acc_login') accounting_list = (Optional(Word(nums)) + Word(printables))('acc_list') accounting_record = (MatchFirst(['start-stop', 'stop-only', 'stop']))('acc_record') accounting_methods = ( OneOrMore(Optional(Suppress('group')) + Word(printables)))('acc_methods') parse_accounting = accounting_login + accounting_list + accounting_record + accounting_methods if type == 'authentication': result = parse_authentication.parseString(line) aaa_dict.update({'login' + str(count_aaa): {}}) aaa_dict['login' + str(count_aaa)]['list'] = result.authent_list[0] aaa_dict['login' + str(count_aaa)]['methods'] = result.authent_methods.asList() # elif type == 'authorization': # result = parse_authorization_options.parseString(line) # aaa_dict.update({'login' + str(count_aaa): {}}) # aaa_dict['login' + str(count_aaa)]['login'] = result.pop(0) # aaa_dict['login' + str(count_aaa)]['list'] = result.pop(0) # aaa_dict['login' + str(count_aaa)]['methods'] = result.asList() elif type == 'accounting': result = parse_accounting.parseString(line) aaa_dict.update({'login' + str(count_aaa): {}}) aaa_dict['login' + str(count_aaa)]['login'] = result.acc_login aaa_dict['login' + str(count_aaa)]['list'] = result.acc_list.asList() aaa_dict['login' + str(count_aaa)]['record'] = result.acc_record aaa_dict['login' + str(count_aaa)]['methods'] = result.acc_methods.asList() return aaa_dict
def _define_encode_options(): #encode strategy strategyKeyword = (CaselessLiteral('strategy') + Literal('=')).suppress() strategyOptions = _define_encode_strategies() strategy = strategyKeyword + Quote + MatchFirst( strategyOptions).setResultsName('encodeStrategy') + Quote #persist persistKeyword = (CaselessLiteral('persist') + Literal('=')).suppress() persistValue = Quote + Word(everythingWOQuotes).setResultsName( 'encodePersist') + Quote persist = Optional(persistKeyword + persistValue) option = MatchFirst([strategy, persist]) encodeOptions = openParen + delimitedList(option, delim=',') + closeParen return encodeOptions
def define_simple_literals(literal_list, parseAction=None): l = MatchFirst([CaselessKeyword(k) for k in literal_list]) if parseAction: l = l.setParseAction(parseAction) return l
def date_line() -> ParserElement: date_single = (Literal("Date :") + Word("1234567890-").setResultsName("date") + Literal("Invoice# :") + Integer.setResultsName("invoice")) date_split = (Literal("Date :") + Literal("Invoice# :") + Word("1234567890-").setResultsName("date") + Integer.setResultsName("invoice")) return MatchFirst([date_single, date_split]).setDebug()
def get_gene_modification_language( identifier_qualified: ParserElement) -> ParserElement: """Build a gene modification parser.""" gmod_identifier = MatchFirst([ identifier_qualified, gmod_default_ns, ]) return gmod_tag + nest(Group(gmod_identifier)(IDENTIFIER))
def _BNF(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ if not self.bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() # comma = Literal( "," ).suppress() comma = Literal(",") addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") var_list = [Literal(i) for i in self.var_names] expr = Forward() arg_func = Forward() or_vars = MatchFirst(var_list) # atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + delimitedList(Group(expr)) + rpar | or_vars ).setParseAction( self._pushFirst ) | ( lpar + delimitedList(Group(expr)).suppress() + rpar ) ).setParseAction(self._pushUMinus) atom = ((Optional("-") + ( pi | e | fnumber | ident + lpar + arg_func + rpar | or_vars ).setParseAction( self._pushFirst )) | \ (Optional("-") + ( lpar + arg_func.suppress() + rpar )) ).setParseAction(self._pushUMinus) # expr + ZeroOrMore( "," + expr ) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self._pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self._pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self._pushFirst)) arg_func << expr + ZeroOrMore( (comma + expr).setParseAction(self._pushFirst)) self.bnf = expr return self.bnf
def parseImpl(self, instring, loc, doActions=True): exprs = [] for token in self.tokens: for indent in range(self.indent_state.indent + 1, max(0, self.indent_state.indent - 2), -1): exprs.append(Literal(token * indent + " ")) loc, result = MatchFirst(exprs).parseImpl(instring, loc, doActions) self.indent_state.indent = len(result[0]) - 1 return loc, result
def define_regress(): ''' Algorithm Definition of Regress Keyword :returns pyparsing object ''' #Algorithm keyword definitions algoPhrase = (Literal ("algorithm") + Literal("=")).suppress() #Algorithms simpled = simple.define_simple() lassod = lasso.define_lasso() ridged = ridge.define_ridge() elasticd = elastic.define_elastic() algo = algoPhrase + MatchFirst([simpled, lassod, ridged, elasticd]).setResultsName("algorithm") # Grammar for Feature Selection feature_prefix = Optional(CaselessLiteral("feature") + Literal("=")).suppress() feature_value = oneOf(["False", "AUTO", "RFE"]).setResultsName("feature") feature = feature_prefix + feature_value #define so that there can be multiple verisions of Regression regressKeyword = Keyword("regress", caseless = True).setResultsName("regress") #Phrases to organize predictor and label column numbers predPhrase = (Literal("predictors") + Literal("=")).suppress() labelPhrase = (Literal("label") + Literal("=")).suppress() #define predictor and label column numbers predictorsDef = choice_columns.setResultsName("predictors") labelDef = column.setResultsName("label") #combine phrases with found column numbers preds = predPhrase + predictorsDef labels = labelPhrase + labelDef option = MatchFirst([preds, labels, algo]) options = delimitedList(option, delim=',') regress = regressKeyword + openParen + Optional(options)+ closeParen return regress