def songs_pyparsing(fh): r""" >>> import os >>> filename = os.path.dirname(__file__) >>> filename = os.path.join(filename, "data/Various-Pop.m3u") >>> with open(filename, "rt", encoding="utf8") as fh: ... songs = songs_pyparsing(fh) >>> songs[0].title, songs[0].seconds, songs[0].filename ('Various - Two Tribes', 236, 'Various\\Frankie Goes To Hollywood\\02-Two Tribes.ogg') >>> songs[-1].title, songs[-1].seconds, songs[-1].filename ('The Police - Walking On The Moon', 303, 'Various\\Sting & The Police 1997\\06-Walking On The Moon.ogg') >>> lines = [] >>> lines.append("#EXTM3U") >>> lines.append("#EXTINF:140,The Beatles - Love Me Do") >>> lines.append("Beatles\\Greatest Hits\\01-Love Me Do.ogg") >>> lines.append("#EXTINF:-1,The Beatles - From Me To You") >>> lines.append("Beatles\\Greatest Hits\\02-From Me To You.ogg") >>> import io >>> data = io.StringIO("\n".join(lines)) >>> songs = songs_ply(data) >>> len(songs) == 2 True >>> songs[0].title, songs[0].seconds ('The Beatles - Love Me Do', 140) >>> songs[1].title, songs[1].seconds ('The Beatles - From Me To You', -1) """ def add_song(tokens): songs.append(Song(tokens.title, tokens.seconds, tokens.filename)) #songs.append(Song(**tokens.asDict())) songs = [] title = restOfLine("title") filename = restOfLine("filename") seconds = Combine(Optional("-") + Word(nums)).setParseAction( lambda tokens: int(tokens[0]))("seconds") info = Suppress("#EXTINF:") + seconds + Suppress(",") + title entry = info + LineEnd() + filename + LineEnd() entry.setParseAction(add_song) parser = Suppress("#EXTM3U") + OneOrMore(entry) try: parser.parseFile(fh) except ParseException as err: print("parse error: {0}".format(err)) return [] return songs
^ (pyp.CaselessKeyword("b")+braddress) \ ^ (pyp.CaselessKeyword("bl")+register+pyp.Suppress(",")+braddress).setName('instruction') \ ^ (pyp.CaselessKeyword("inc")+register) \ ^ (pyp.CaselessKeyword("dec")+register) smwpseud = pyp.CaselessKeyword("smw") + intvalue rmwpseud = pyp.CaselessKeyword("rmw") + intvalue pushpseud = pyp.CaselessKeyword("push") + register pushpseud.setParseAction(expandPush) poppseud = pyp.CaselessKeyword("pop") + register poppseud.setParseAction(expandPop) pseudoinst = smwpseud \ ^ rmwpseud \ ^ pushpseud \ ^ poppseud label = pyp.Group("@" + tag) comment = pyp.Suppress("%") + pyp.Suppress(pyp.restOfLine("")) codeline = pyp.Group( pyp.Optional(label) + pyp.Group(instruction ^ pseudoinst) + pyp.Optional(comment)) line = (comment ^ codeline) program = pyp.ZeroOrMore(line) # opcodes for the instructions opcodes = { 'ldr': 0, # load register from memory 'str': 1, # store register to memory 'mov': 2, # move register/immediate value to register 'add': 3, # add register and register/immediate value, store result in register 'sub': 4, # subtract register/immediate value from register, store result in register
def args(cls, player): return pyparsing.restOfLine("line")
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine() assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal("charset_map") + space + value + space + value map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certianly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is corect wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def args(cls, player): return pyparsing.restOfLine("command")
import pyparsing as pp MAX_NUM_ARGS = 1000000000 # max of 1 billion arguments for any function (relation constant) # function constants are usually lowercase, but haven't found that as a hard requirement in the spec function_constant = pp.Word(pp.srange("[A-Za-z]"), pp.srange("[a-zA-Z0-9_]")) identifier = pp.Word(pp.srange("[A-Za-z]"), pp.srange("[a-zA-Z0-9_]")) comment = pp.OneOrMore(pp.Word(';').suppress()) + pp.restOfLine('comment') # GDL keywords ("Relation Constants") role = pp.Keyword('role') # role(p) means that p is a player name/side in the game. inpt = pp.Keyword('input') # input(t) means that t is a base proposition in the game. base = pp.Keyword('base') # base(a) means that a is an action in the game, the outcome of a turn. init = pp.Keyword('init') # init(p) means that the datum p is true in the initial state of the game. next = pp.Keyword('next') # next(p) means that the datum p is true in the next state of the game. does = pp.Keyword('does') # does(r, a) means that player r performs action a in the current state. legal = pp.Keyword('legal') # legal(r, a) means it is legal for r to play a in the current state. goal = pp.Keyword('goal') # goal(r, n) means that player the current state has utility n for player r. n must be an integer from 0 through 100. terminal = pp.Keyword('terminal') # terminal(d) means that if the datam d is true, the game has ended and no player actions are legal. distinct = pp.Keyword('distinct') # distinct(x, y) means that the values of x and y are different. true = pp.Keyword('true') # true(p) means that the datum p is true in the current state. # GDL-II Relation Constants sees = pp.Keyword('sees') # The predicate sees(?r,?p) means that role ?r perceives ?p in the next game state. random = pp.Keyword('random') # A predefined player that choses legal moves randomly # GDL-I and GDL-II Relation Constants relation_constant = role | inpt | base | init | next | does | legal | goal | terminal | distinct | true | sees | random # TODO: DRY this up # functions (keywords that should be followed by the number of arguments indicated)
ipV4Address = Combine(Word(nums) + ('.' + Word(nums))*3) ipv4_prefixlen = Word(nums, min=1, max=2) # fix this - combine? ipAddressWithMask = Combine(Word(nums) + ('.' + Word(nums))*3 + "/" + ipv4_prefixlen) integer = Word(nums) comment = Group("!" + restOfLine) hash_comment = Group("#" + restOfLine) router_id = (ipV4Address | integer) word_param = Word(alphanums) interface_id = Word(alphanums + ":") #TODO: make function to return thesline_ip_addresse password = "******" + word_param enable_password = "******" + word_param banner_motd = "banner motd " + word_param("type") + restOfLine("path") line_ip_address = "ip address" + ipAddressWithMask("ip") line_description = "description" + restOfLine("description") line_ip_ospf_cost = "ip ospf cost" + integer("cost") interface_properties = OneOrMore( line_ip_address("ip address") | line_description("description") | line_ip_ospf_cost("ospf cost") | comment| hash_comment ) interface_indent = indentedBlock(interface_properties, indentStack, True)("indent")
def args(cls, player): return pyp.restOfLine("name")
EOL = LineEnd().suppress() SOL = LineStart().leaveWhitespace() blankline = SOL + LineEnd() noIndentation = SOL + ~Word(ws).leaveWhitespace().suppress() indentation = SOL + Word(ws).leaveWhitespace().suppress() # Single statements keyword = Word(alphanums) value = restOfLine value.setParseAction(lambda tokens: tokens[0].strip()) oneLineStatement = keyword("keyword") + value("value") + EOL # If statements nonIndentedLine = noIndentation + restOfLine() + EOL indentedLine = indentation + Group(oneLineStatement) indentedBody = OneOrMore(indentedLine) ifConditions = (restOfLine() + EOL + ZeroOrMore(nonIndentedLine)) ifConditions.setParseAction(lambda tokens: [t for t in tokens if t]) ifStatement = ("if" + Group(ifConditions)("conditions") + indentedBody("body")) # Main parser body = OneOrMore(Group(ifStatement | oneLineStatement | EOL)) parser = body + StringEnd() parser.ignore(blankline)
def make_parser(self): super(ProgrammingGrammarParser, self).make_parser() variable = self.variables[0]['token'] expression = self.expression # parser for program self.program = pp.Forward() programWithControl = pp.Forward() expressionStatement = expression + SEMICOLON assignmentStatement = variable('variable') + pp.Suppress( '=') + expression('expression') + SEMICOLON assignmentStatement.setParseAction(AssignmentAction) # define if while break pass statements # Keywords = {'if':'if', 'while':'while', 'break':'break', 'pass':'******', 'def':'def'} breakStatement = self.keywords['break']('keyword') + SEMICOLON breakStatement.setParseAction(BreakAction) continueStatement = self.keywords['continue']('keyword') + SEMICOLON continueStatement.setParseAction(ContinueAction) passStatement = self.keywords['pass']('keyword') + SEMICOLON passStatement.setParseAction(PassAction) printStatement = self.keywords['print']('keyword') + pp.delimitedList( expression)('args') + SEMICOLON printStatement.setParseAction(PrintAction) returnStatement = self.keywords['return']('keyword') + expression( 'retval') + SEMICOLON returnStatement.setParseAction(ReturnAction) # atomicStatement = assignmentStatement | breakStatement | continueStatement | passStatement | printStatement | returnStatement # block = atomicStatement | LBRACE + self.program + RBRACE ifStatement = self.keywords['if']('keyword') + expression( 'condition') + LBRACE + self.program('program') + RBRACE ifStatement.setParseAction(IfAction) ifStatementWithControl = self.keywords['if']('keyword') + expression( 'condition') + LBRACE + programWithControl('program') + RBRACE ifStatementWithControl.setParseAction(IfAction) # if condition {program} pp.ZeroOrMore(elif condition {program}) else {program} # IfelseAction whileStatement = self.keywords['while']('keyword') + expression( 'condition') + LBRACE + programWithControl('program') + RBRACE whileStatement.setParseAction(WhileAction) defStatement = self.keywords['def']('keyword') + ( variable('function') + LPAREN + pp.delimitedList(variable)('args') + RPAREN | PUNC('left') + pp.delimitedList(variable)('args') + PUNC('right')) + LBRACE + self.program('program') + RBRACE defStatement.setParseAction(DefAction) self.statements = [ ifStatement, whileStatement, defStatement, returnStatement, passStatement, printStatement, assignmentStatement, expressionStatement, LBRACE + self.program + RBRACE ] statement = pp.MatchFirst(self.statements) controlStatements = [ breakStatement, continueStatement, ifStatementWithControl, LBRACE + programWithControl + RBRACE ] statementWithControl = pp.MatchFirst(self.statements + controlStatements) programWithControl <<= pp.OneOrMore( statementWithControl).setParseAction(ProgramSequenceAction) loadStatement = pp.Keyword('load')( 'keyword').suppress() + pp.restOfLine('path') self.program <<= pp.ZeroOrMore(loadStatement)( 'loading') + pp.OneOrMore(statement).setParseAction( ProgramSequenceAction) self.comment = pp.pythonStyleComment self.program.ignore(self.comment)
def d12_geometry_parser(): """Geometry block parser""" title = pp.restOfLine()('title') return title
PP_UNQUOTED_EXPR = pp.Combine( pp.OneOrMore(~PP_KEYWORDS + ~PP_ENDOFLINE + ~PP_BRACES + ~PP_COMMENTSTART + PP_ANYCHAR)).setResultsName("ue", listAllMatches=True) PP_BRACED_EXPR = pp.Forward().setResultsName("be", listAllMatches=True) PP_BRACE_PAIR = pp.Literal("(") + pp.OneOrMore(PP_BRACED_EXPR | PP_UNQUOTED_EXPR | PP_KEYWORDS) + pp.Literal(")") PP_BRACED_EXPR << PP_BRACE_PAIR PP_EXPRESSION = pp.Group( pp.Combine(pp.OneOrMore(PP_UNQUOTED_EXPR | PP_BRACED_EXPR))) kw = ["to", "downto", "entity", "port", "generic", "end", "is"] PP_KEYWORDS = pp.MatchFirst(kw) PP_IDENTIFIER = pp.Word(pp.alphanums + "_") PP_INTEGER = pp.Word(pp.nums) PP_COMMENT = pp.Group(pp.Literal("--") + pp.restOfLine("text")) PP_VALUE = pp.Regex(r"[a-zA-Z0-9\"'_#]*") PP_RANGEDIR = (pp.CaselessKeyword("to") | pp.CaselessKeyword("downto")) PP_DIRECTION = (pp.CaselessKeyword("in") | pp.CaselessKeyword("out") | pp.CaselessKeyword("inout") | pp.CaselessKeyword("buffer")) def PrToStr(pr: pp.ParseResults): strings = [] for r in pr: if type(r) is str: strings.append(r.strip()) else: strings.append(PrToStr(r)) return " ".join(strings)
alphanums, nums, restOfLine, ) import six from pysoa.common.types import ( # noqa F401 ActionResponse, JobResponse, ) from pysoa.test.plan.grammar.tools import recursive_parse_expr_repr ENTRY_POINT_DIRECTIVES = [] REGISTERED_DIRECTIVES = [] VarNameGrammar = Word(alphanums + '-_.{}')('variable_name') VarValueGrammar = restOfLine('value').setParseAction( lambda s, l, t: t[0].strip(' \t')) def get_all_directives(): if not ENTRY_POINT_DIRECTIVES: for entry_point in pkg_resources.iter_entry_points( 'pysoa.test.plan.grammar.directives'): try: directive_class = entry_point.load(require=False) ENTRY_POINT_DIRECTIVES.append(directive_class) except ImportError: sys.stderr.write( 'Warning: could not load {}\n'.format(entry_point)) return REGISTERED_DIRECTIVES + ENTRY_POINT_DIRECTIVES
matrix_row = Group( fnumber + fnumber + fnumber + fnumber ) prob_matrix = Group( Optional(K('alength=') + Word(nums)('ALENGTH')) + Optional(K('w=') + Word(nums)('W')) + Optional(K('nsites=') + Word(nums)('NSITES')) + Optional(K('E=') + fnumber('E')) + Group(OneOrMore(matrix_row))('ROWS') ) letter_probs = K('letter-probability matrix:') + prob_matrix('LETTER_PROBS') log_odds = K('log-odds matrix:') + prob_matrix('LOG_ODDS') prob_matrix = letter_probs | log_odds url = Optional(K('URL') + Word(printables)('URL')) motif = Group( K('MOTIF') + Word(printables)('NAME') + Optional(restOfLine('ALTNAME')) + comment + Group(OneOrMore(prob_matrix))('MATRICES') + comment + url ) meme_format = \ comment \ + version \ + comment \ + Optional(alphabet) \ + comment \ + Optional(strands) \ + comment \ + Optional(background_freqs) \ + comment \
def args(cls, player): return pyp.restOfLine("text")
class APTHistoryLogParser(text_parser.PyparsingSingleLineTextParser): """Parses for Advanced Packaging Tool (APT) History log files.""" NAME = 'apt_history' DATA_FORMAT = 'Advanced Packaging Tool (APT) History log file' # APT History log lines can be very long. MAX_LINE_LENGTH = 65536 _ENCODING = 'utf-8' _HYPHEN = text_parser.PyparsingConstants.HYPHEN _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS _APTHISTORY_DATE_TIME = pyparsing.Group(_FOUR_DIGITS + _HYPHEN + _TWO_DIGITS + _HYPHEN + _TWO_DIGITS + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS) _RECORD_START = ( # APT History logs may start with empty lines pyparsing.ZeroOrMore(pyparsing.lineEnd()) + pyparsing.Literal('Start-Date:') + _APTHISTORY_DATE_TIME.setResultsName('start_date') + pyparsing.lineEnd()) _RECORD_BODY = (pyparsing.MatchFirst([ pyparsing.Literal('Commandline:'), pyparsing.Literal('Downgrade:'), pyparsing.Literal('Error:'), pyparsing.Literal('Install:'), pyparsing.Literal('Purge:'), pyparsing.Literal('Remove:'), pyparsing.Literal('Requested-By:'), pyparsing.Literal('Upgrade:') ]) + pyparsing.restOfLine()) _RECORD_END = (pyparsing.Literal('End-Date:') + _APTHISTORY_DATE_TIME.setResultsName('end_date') + pyparsing.OneOrMore(pyparsing.lineEnd())) LINE_STRUCTURES = [('record_start', _RECORD_START), ('record_body', _RECORD_BODY), ('record_end', _RECORD_END)] def __init__(self): """Initializes an APT History parser.""" super(APTHistoryLogParser, self).__init__() self._date_time = None self._event_data = None self._downgrade = None self._install = None self._purge = None self._remove = None self._upgrade = None @staticmethod def _BuildDateTime(time_elements_structure): """Builds time elements from an APT History time stamp. Args: time_elements_structure (pyparsing.ParseResults): structure of tokens derived from an APT History time stamp. Returns: dfdatetime.TimeElements: date and time extracted from the structure or None f the structure does not represent a valid string. """ # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with: # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults # overriding __getattr__ with a function that returns an empty string when # named token does not exists. try: year, month, day_of_month, hours, minutes, seconds = ( time_elements_structure) date_time = dfdatetime_time_elements.TimeElements( time_elements_tuple=(year, month, day_of_month, hours, minutes, seconds)) # APT History logs store date and time values in local time. date_time.is_local_time = True return date_time except (TypeError, ValueError): return None def _ParseRecordStart(self, parser_mediator, structure): """Parses the first line of a log record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. """ self._date_time = self._BuildDateTime(structure.get( 'start_date', None)) if not self._date_time: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(self._date_time)) return self._event_data = APTHistoryLogEventData() return def _ParseRecordBody(self, structure): """Parses a line from the body of a log record. Args: structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the date and time value is missing. """ if not self._date_time: raise errors.ParseError('Missing date time value.') # Command data if structure[0] == 'Commandline:': self._event_data.command = ''.join(structure) elif structure[0] == 'Error:': self._event_data.error = ''.join(structure) elif structure[0] == 'Requested-By:': self._event_data.requester = ''.join(structure) # Package lists elif structure[0] == 'Downgrade:': self._downgrade = ''.join(structure) elif structure[0] == 'Install:': self._install = ''.join(structure) elif structure[0] == 'Purge:': self._purge = ''.join(structure) elif structure[0] == 'Remove:': self._remove = ''.join(structure) elif structure[0] == 'Upgrade:': self._upgrade = ''.join(structure) def _ParseRecordEnd(self, parser_mediator): """Parses the last line of a log record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. Raises: ParseError: when the date and time value is missing. """ if not self._date_time: raise errors.ParseError('Missing date time value.') # Create relevant events for record if self._downgrade: self._event_data.packages = self._downgrade event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DOWNGRADE, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._install: self._event_data.packages = self._install event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_INSTALLATION, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._purge: self._event_data.packages = self._purge event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DELETED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._remove: self._event_data.packages = self._remove event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_DELETED, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) if self._upgrade: self._event_data.packages = self._upgrade event = time_events.DateTimeValuesEvent( self._date_time, definitions.TIME_DESCRIPTION_UPDATE, time_zone=parser_mediator.timezone) parser_mediator.ProduceEventWithEventData(event, self._event_data) def _ResetState(self): """Resets stored values in the parser.""" self._date_time = None self._downgrade = None self._event_data = None self._install = None self._purge = None self._remove = None self._upgrade = None def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a log entry. Raises: ParseError: when the structure type is unknown. """ if key == 'record_start': self._ParseRecordStart(parser_mediator, structure) return if key == 'record_body': self._ParseRecordBody(structure) return if key == 'record_end': self._ParseRecordEnd(parser_mediator) # Reset for next record. self._ResetState() return raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) def VerifyStructure(self, parser_mediator, line): """Verify that this file is an APT History log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ try: self._RECORD_START.parseString(line) # Reset stored values for parsing a new file. self._ResetState() except pyparsing.ParseException as exception: logger.debug( 'Not an APT History log file: {0!s}'.format(exception)) return False return True
def get_full_grammar(cls): return (super(ActionExpectsFieldMessageErrorsDirective, cls).get_full_grammar() + ',' + Literal('message') + '=' + restOfLine('error_message').setParseAction( lambda s, l, t: t[0].strip(' \t')))
def parse(string=None, filename=None, token=None, lang=None): """ Parse a token stream from or raise a SyntaxError This function includes the parser grammar. """ if not lang: lang = guess_language(string, filename) # # End of Line # EOL = Suppress(lineEnd) UTFWORD = Word(unicodePrintables) # # @tag # TAG = Suppress('@') + UTFWORD # # A table # # A table is made up of rows of cells, e.g. # # | column 1 | column 2 | # # Table cells need to be able to handle escaped tokens such as \| and \n # def handle_esc_char(tokens): token = tokens[0] if token == r'\|': return u'|' elif token == r'\n': return u'\n' elif token == r'\\': return u'\\' raise NotImplementedError(u"Unknown token: %s" % token) ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2) ESC_CHAR.setParseAction(handle_esc_char) # # A cell can contain anything except a cell marker, new line or the # beginning of a cell marker, we then handle escape characters separately # and recombine the cell afterwards # CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR)) CELL.setParseAction(lambda tokens: u''.join(tokens)) TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens]) TABLE = Group(OneOrMore(Group(TABLE_ROW))) # # Multiline string # def clean_multiline_string(s, loc, tokens): """ Clean a multiline string The indent level of a multiline string is the indent level of the triple-". We have to derive this by walking backwards from the location of the quoted string token to the newline before it. We also want to remove the leading and trailing newline if they exist. FIXME: assumes UNIX newlines """ def remove_indent(multiline, indent): """ Generate the lines removing the indent """ for line in multiline.splitlines(): if line and not line[:indent].isspace(): warn("%s: %s: under-indented multiline string " "truncated: '%s'" % (lineno(loc, s), col(loc, s), line), LettuceSyntaxWarning) # for those who are surprised by this, slicing a string # shorter than indent will yield empty string, not IndexError yield line[indent:] # determine the indentation offset indent = loc - s.rfind('\n', 0, loc) - 1 multiline = '\n'.join(remove_indent(tokens[0], indent)) # remove leading and trailing newlines if multiline[0] == '\n': multiline = multiline[1:] if multiline[-1] == '\n': multiline = multiline[:-1] return multiline MULTILINE = QuotedString('"""', multiline=True) MULTILINE.setParseAction(clean_multiline_string) # A Step # # Steps begin with a keyword such as Given, When, Then or And They can # contain an optional inline comment, although it's possible to encapsulate # it in a string. Finally they can contain a table or a multiline 'Python' # string. # # <variables> are not parsed as part of the grammar as it's not easy to # distinguish between a variable and XML. Instead scenarios will replace # instances in the steps based on the outline keys. # STATEMENT_SENTENCE = Group( lang.STATEMENT + # Given, When, Then, And OneOrMore(UTFWORD.setWhitespaceChars(' \t') | quotedString.setWhitespaceChars(' \t')) + EOL ) STATEMENT = Group( STATEMENT_SENTENCE('sentence') + Optional(TABLE('table') | MULTILINE('multiline')) ) STATEMENT.setParseAction(Step) STATEMENTS = Group(ZeroOrMore(STATEMENT)) # # Background: # BACKGROUND_DEFN = \ lang.BACKGROUND('keyword') + Suppress(':') + EOL BACKGROUND_DEFN.setParseAction(Background) BACKGROUND = Group( BACKGROUND_DEFN('node') + STATEMENTS('statements') ) BACKGROUND.setParseAction(Background.add_statements) # # Scenario: description # SCENARIO_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.SCENARIO('keyword') + Suppress(':') + restOfLine('name') + EOL ) SCENARIO_DEFN.setParseAction(Scenario) SCENARIO = Group( SCENARIO_DEFN('node') + STATEMENTS('statements') + Group(ZeroOrMore( Suppress(lang.EXAMPLES + ':') + EOL + TABLE ))('outlines') ) SCENARIO.setParseAction(Scenario.add_statements) # # Feature: description # FEATURE_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.FEATURE('keyword') + Suppress(':') + restOfLine('name') + EOL ) FEATURE_DEFN.setParseAction(Feature) # # A description composed of zero or more lines, before the # Background/Scenario block # DESCRIPTION_LINE = Group( ~BACKGROUND_DEFN + ~SCENARIO_DEFN + OneOrMore(UTFWORD).setWhitespaceChars(' \t') + EOL ) DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL)) DESCRIPTION.setParseAction(Description) # # Complete feature file definition # FEATURE = Group( FEATURE_DEFN('node') + DESCRIPTION('description') + Optional(BACKGROUND('background')) + Group(OneOrMore(SCENARIO))('scenarios') + stringEnd) FEATURE.ignore(pythonStyleComment) FEATURE.setParseAction(Feature.add_blocks) # # Try parsing the string # if not token: token = FEATURE else: token = locals()[token] try: if string: tokens = token.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = token.parseFile(fp) else: raise RuntimeError("Must pass string or filename") return tokens except ParseException as e: if e.parserElement == stringEnd: msg = "Expected EOF (max one feature per file)" else: msg = e.msg raise LettuceSyntaxError( filename, u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format( msg=msg, lineno=e.lineno, col=e.col, line=e.line, space=' ' * (e.col - 1))) except LettuceSyntaxError as e: # reraise the exception with the filename raise LettuceSyntaxError(filename, e.string)
""" pyparsing parser definition to parse STRIPS and PDDL files for AI Planning class (coursera) """ from traceback import print_exc from pyparsing import Optional, Keyword, Literal, Word from pyparsing import Combine, Group, OneOrMore, restOfLine, dictOf from pyparsing import alphas, alphanums from pyparsing import nestedExpr, Forward MAX_NUM_ARGS = 1000000000 # max of 1 billion arguments for any function (relation constant) # function constants are usually lowercase, that's not a firm requirement in the spec identifier = Word( alphas, alphanums + "-_" ) variable = Combine(Literal('?') + Word(alphas, alphanums + '_')) comment = Optional(OneOrMore(Word(';').suppress()) + restOfLine('comment')).suppress() # typ = Literal('-').suppress() + Optional(Literal(' ').suppress()) + identifier # All mean the same thing: ground predicate, ground atom, ground_literal # Any formula whose arguments are all ground terms (literals = non-variables) ground_predicate = Literal('(').suppress() + Group(OneOrMore(identifier)) + Literal(')').suppress() + comment arguments = sequence_of_variables = Literal('(').suppress() + Group(OneOrMore(variable)) + Literal(')').suppress() # Norvig/Russel tend to call this a "fluent" predicate = Literal('(').suppress() + Group(identifier + OneOrMore(variable)) + Literal(')').suppress() notted_predicate = Literal('(').suppress() + Keyword('not') + predicate + Literal(')').suppress() # a set of ground atoms/predicates is a state, they are all presumed to be ANDed together (conjunction) state_conjunction_implicit = OneOrMore(ground_predicate) state_conjunction_explicit = (Literal('(') + Keyword('and')).suppress() + state_conjunction_implicit + Literal(')').suppress() state = state_conjunction_explicit | state_conjunction_implicit
def __discard_pattern(self): return pyp.restOfLine()
fin = open('uData.txt', 'r') data = fin.read() fin.close() fout = open('sData.txt', 'w') NL = LineEnd().suppress() # LineEnd.suppress() gender = oneOf("M F") # Sets the possible genders integer = Word(nums) # Define what integer is date = Combine(integer + '/' + integer + '/' + integer) # Define the line definitions gender_line = gender("sex") + NL dob_line = date("DOB") + NL name_line = restOfLine("name") + NL id_line = Word(alphanums + '-')("ID") + NL recnum_line = integer("recnum") + NL # Define forms of address lines first_addr_line = Suppress('.') + empty + restOfLine + NL # Subsequent address line is not gender subsq_addr_line = ~(gender_line) + restOfLine + NL # a line with a name and a recnum combined, if no ID name_recnum_line = originalTextFor(OneOrMore(Word(alphas + ',')))("name") + \ integer("recnum") + NL # Defining the form of an overall record, either with or without an ID record = Group((first_addr_line + ZeroOrMore(subsq_addr_line))("address") + \ gender_line + dob_line + ((name_line + id_line + recnum_line) | \
def addToAnnotationDict(annotateName, annotate): s = annotate[1] # first remove the quotes if s.startswith("'''"): s = s[3:-3] if s.startswith('"'): s = s[1:-1] annotateDict[annotate['name']] = s comment = '## ' + restOfLine #comment = '#' + restOfLine CMNT = Optional(cStyleComment("comment")) CMNT2 = Optional( (Suppress('//') + restOfLine("comment2"))) #Optional(cppStyleComment("comment2")) STRQ3 = QuotedString("'''", multiline=True) ANNOTSTR = (QuotedString("'''", multiline=True) | quotedString) #IDENTIFIER = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*') #INTEGER = Regex(r'([+-]?(([1-9][0-9]*)|0+))') #IDENTIFIER = Word(alphas+"_", alphas+nums+"_" ) INT_DECI = Regex('([+-]?(([1-9][0-9]*)|0+))') INT_OCT = Regex('(0[0-7]*)') INT_HEX = Regex('(0[xX][0-9a-fA-F]*)') INT = INT_HEX | INT_OCT | INT_DECI FLOAT = Regex( '[+-]?(((\d+\.\d*)|(\d*\.\d+))([eE][-+]?\d+)?)|(\d*[eE][+-]?\d+)') SIZE = INT #VARNAME = IDENTIFIER ##ident = Word(alphas+"_",alphanums+"_").setName("identifier") IDENT = Word(alphas + "_", alphanums + "_")("name")