def parse_expression_from_equation(self, equation): # create the parsing expression based on our method list dice_expr = Combine(self.get_expression(self.all_methods)).setResultsName('expression') parsed_equation = {} for result, start, stop in dice_expr.scanString(equation): methods = self.clean_methods(result.expression) parsed_equation[str(result[0][0])] = [methods, start, stop] return parsed_equation
def parse_ssn(input_string: str): """ ssn ::= nums+ '-' nums+ '-' nums+ nums ::= '0'..'9' """ dash = '-' ssn_parser = Combine( Word(nums, exact=3) + dash + Word(nums, exact=2) + dash + Word(nums, exact=4)) for match, start, stop in ssn_parser.scanString(input_string): print(match, start, stop)
def mwgtofasta(wiki, evt): from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine, nums raw_string = wiki.getActiveEditor().GetSelectedText() start, end = wiki.getActiveEditor().GetSelection() fastaheader = Combine( Literal(">").suppress() + Word(nums).setResultsName("number") + Literal("_").suppress()) try: data, dataStart, dataEnd = fastaheader.scanString(raw_string).next() except StopIteration: number = 1 dataStart = end - start else: number = int(data.number) + 1 wiki.getActiveEditor().SetSelectionByCharPos(start, start + dataStart) name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = "" for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "") fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format( number=number, name=data.name, length=len(s), seq=s) wiki.getActiveEditor().ReplaceSelection(fasta_string) wiki.getActiveEditor().SetSelectionByCharPos(start, start + len(fasta_string))
class SearchRestrictionParser(object): """ Defines the grammar for a simple search restriction expressions. The parsers of the different terms of these restriction expressions are provided by this class. """ def __init__(self): """ Constructor. """ self.__literalExpression = None self.__keywordExpression = None self.__propertyNameExpression = None self.__comparisonExpression = None self.__conditionExpression = None self.__conjunctionExpression = None self.__restrictionExpression = None self.__dateExpression = None self.__numberExpression = None self.__conjunctionTokens = None self.__comparisonTokens = None self.__andKeyword = None self.__orKeyword = None self.__notKeyword = None self.__quotedStringCharacters = ["\"", "'"] self.__initSearchRestrictionParser() def __initSearchRestrictionParser(self): """ Initializes and returns a parser for the search restrictions. """ unicodeUmlaut = unicodedata.lookup("LATIN CAPITAL LETTER A WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER A WITH DIAERESIS") + \ unicodedata.lookup("LATIN CAPITAL LETTER O WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER O WITH DIAERESIS") + \ unicodedata.lookup("LATIN CAPITAL LETTER U WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER U WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER SHARP S") # define property name firstPropertyNameCharacter = alphas + unicodeUmlaut + "_" propertyCharacter = firstPropertyNameCharacter + nums + ".-" self.__propertyNameExpression = Word(firstPropertyNameCharacter, propertyCharacter) # define literal day = Regex("(0[1-9]|[12][0-9]|3[01])") month = Regex("(0[1-9]|1[012])") year = Regex("((?:19|20)\d\d)") hour = Regex("([01][0-9]|2[0-3])") minute = Regex("([0-5][0-9])") second = minute self.__dateExpression = Combine(day + "." + month + "." + year + White() + hour + ":" + minute + ":" + second) self.__numberExpression = Regex( "[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?") self.__literalExpression = QuotedString( self.__quotedStringCharacters[0]) for quotedStringCharacter in self.__quotedStringCharacters[1:]: self.__literalExpression |= QuotedString(quotedStringCharacter) self.__literalExpression.setParseAction(self.__handleLiteral) # define keywords notKeyword = Keyword(NOT_OPERATOR, caseless=True) andKeyword = Keyword(AND_OPERATOR, caseless=True) orKeyword = Keyword(OR_OPERATOR, caseless=True) gteKeyword = Keyword(GTE_OPERATOR) lteKeyword = Keyword(LTE_OPERATOR) equalKeyword = Keyword(EQUAL_OPERATOR) gtKeyword = Keyword(GT_OPERATOR) ltKeyword = Keyword(LT_OPERATOR) likeKeyword = Keyword(LIKE_OPERATOR, caseless=True) comparisonKeyword = gteKeyword | lteKeyword | equalKeyword | gtKeyword | ltKeyword | likeKeyword existsKeyword = Keyword(EXISTS_OPERATOR, caseless=True) contentContainsKeyword = Keyword(CONTENT_CONTAINS_OPERATOR, caseless=True) isCollectionKeyword = Keyword(IS_COLLECTION_OPERATOR, caseless=True) self.__keywordExpression = notKeyword | andKeyword | orKeyword | comparisonKeyword | existsKeyword | \ contentContainsKeyword | isCollectionKeyword | "(" | ")" # definition of condition terms comparisonCondition = Group(self.__propertyNameExpression + comparisonKeyword + self.__literalExpression) existsCondition = Group(existsKeyword + self.__propertyNameExpression) contentContainsCondition = Group(contentContainsKeyword + self.__literalExpression) isCollectionCondition = isCollectionKeyword self.__conditionExpression = comparisonCondition | existsCondition | contentContainsCondition | isCollectionCondition self.__conditionExpression.setParseAction(self.__handleConditionTerm) # definition of restriction expressions (operators to combine the condition terms) self.__restrictionExpression = operatorPrecedence( self.__conditionExpression, [(notKeyword, 1, opAssoc.RIGHT), (andKeyword, 2, opAssoc.LEFT), (orKeyword, 2, opAssoc.LEFT)]) + StringEnd() # definition of comparison expression self.__comparisonExpression = comparisonKeyword self.__andKeyword = andKeyword self.__orKeyword = orKeyword self.__notKeyword = notKeyword # definition of conjunction expression self.__conjunctionExpression = andKeyword | orKeyword def registerPropertyParseAction(self, parseAction): """ Appends a parsing action when matching a property expression. """ self.__propertyNameExpression.setParseAction(parseAction) def registerLiteralParseAction(self, parseAction): """ Appends a parsing action when matching a literal. """ self.__literalExpression.setParseAction(parseAction) def registerConjunctionParseAction(self, parseAction): """ Appends a parsing action when matching a conjunction keyword. """ self.__andKeyword.setParseAction(parseAction) self.__orKeyword.setParseAction(parseAction) self.__notKeyword.setParseAction(parseAction) def registerComparisonParseAction(self, parseAction): """ Appends a parsing action when matching a comparison keyword. """ self.__comparisonExpression.setParseAction(parseAction) def __handleLiteral(self, _, __, tokenList): """" Evaluates the content of the quoted string. """ unquotedString = tokenList[0] result = list() for item in self.__dateExpression.scanString(unquotedString): result.append(item) if len(result) == 1: return time.strptime(str(result[0][0][0]), "%d.%m.%Y %H:%M:%S") else: for item in self.__numberExpression.scanString(unquotedString): result.append(item) if len(result) == 1: return eval(str(result[0][0][0])) def parseString(self, inputString): """ Parses the string and returns the result. @param inputString: String to parse. @type inputString: C{unicode} @raise ParseException: Signals an error parsing the given string. """ return self.__restrictionExpression.parseString(inputString) @staticmethod def __handleConditionTerm(_, __, tokens): """ Extracts operator, literal, property name from the parsed string and calls the given parse action function. """ operator = propertyName = literal = None tokenList = list(list(tokens)[0]) if len(tokenList) == 3: operator = tokenList[1] propertyName = tokenList[0] literal = tokenList[2] elif len(tokenList) == 2: operator = tokenList[0] if operator == EXISTS_OPERATOR: propertyName = tokenList[1] else: literal = tokenList[1] else: operator = tokens[0] return (propertyName, operator, literal) def matchKeyword(self, inputString): """ Returns all matches of keywords. Keywords in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__keywordExpression) def matchPropertyName(self, inputString): """ Returns all matches of property names. Keywords and property names in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__propertyNameExpression) def matchLiteral(self, inputString): """ Returns all matches of literals. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__literalExpression) def matchComparison(self, inputString): """ Returns all matches of comparison operators. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__comparisonExpression) def matchConjunction(self, inputString): """ Returns all matches of conjunction operators. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__conjunctionExpression) def matchConditionTerm(self, inputString): """ Returns all matches of condition terms. Condition terms in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__conditionExpression) @property def comparisonTokens(self): """ Returns a list of strings representing the comparison operators. """ if self.__comparisonTokens is None: self.__comparisonTokens = self._walkKeywordTree( self.__comparisonExpression) return self.__comparisonTokens @property def conjunctionTokens(self): """ Returns a list of strings representing the conjunction keywords. """ if self.__conjunctionTokens is None: self.__conjunctionTokens = self._walkKeywordTree( self.__conjunctionExpression) return self.__conjunctionTokens @property def quotedStringCharacters(self): """ Returns a list of strings representing the quoted string characters. """ return self.__quotedStringCharacters def _walkKeywordTree(self, rootNode): """ Walks through a MatchFirst object and returns possible matches as a string list """ nextRoot = None try: nextRoot = rootNode.exprs[0] except AttributeError: return [rootNode.match] else: result = self._walkKeywordTree(nextRoot) result.append(rootNode.exprs[1].match) return result @staticmethod def _matchWrapper(inputString, expression): """ Calls scanString with given input, parse expression and returns the result. """ result = list() for expression, startIndex, endIndex in expression.scanString( inputString): expressionString = expression[0] result.append((expressionString, startIndex, endIndex)) return result