Пример #1
0
def songs_pyparsing(fh):
    r"""
    >>> import os
    >>> filename = os.path.dirname(__file__)
    >>> filename = os.path.join(filename, "data/Various-Pop.m3u")
    >>> with open(filename, "rt", encoding="utf8") as fh:
    ...     songs = songs_pyparsing(fh)
    >>> songs[0].title, songs[0].seconds, songs[0].filename
    ('Various - Two Tribes', 236, 'Various\\Frankie Goes To Hollywood\\02-Two Tribes.ogg')
    >>> songs[-1].title, songs[-1].seconds, songs[-1].filename
    ('The Police - Walking On The Moon', 303, 'Various\\Sting & The Police 1997\\06-Walking On The Moon.ogg')
    >>> lines = []
    >>> lines.append("#EXTM3U")
    >>> lines.append("#EXTINF:140,The Beatles - Love Me Do")
    >>> lines.append("Beatles\\Greatest Hits\\01-Love Me Do.ogg")
    >>> lines.append("#EXTINF:-1,The Beatles - From Me To You")
    >>> lines.append("Beatles\\Greatest Hits\\02-From Me To You.ogg")
    >>> import io
    >>> data = io.StringIO("\n".join(lines))
    >>> songs = songs_ply(data)
    >>> len(songs) == 2
    True
    >>> songs[0].title, songs[0].seconds
    ('The Beatles - Love Me Do', 140)
    >>> songs[1].title, songs[1].seconds
    ('The Beatles - From Me To You', -1)
    """

    def add_song(tokens):
        songs.append(Song(tokens.title, tokens.seconds,
                          tokens.filename))
        #songs.append(Song(**tokens.asDict()))

    songs = []
    title = restOfLine("title")
    filename = restOfLine("filename")
    seconds = Combine(Optional("-") + Word(nums)).setParseAction(
            lambda tokens: int(tokens[0]))("seconds")
    info = Suppress("#EXTINF:") + seconds + Suppress(",") + title
    entry = info + LineEnd() + filename + LineEnd()
    entry.setParseAction(add_song)
    parser = Suppress("#EXTM3U") + OneOrMore(entry)
    try:
        parser.parseFile(fh)
    except ParseException as err:
        print("parse error: {0}".format(err))
        return []
    return songs
Пример #2
0
            ^ (pyp.CaselessKeyword("b")+braddress) \
            ^ (pyp.CaselessKeyword("bl")+register+pyp.Suppress(",")+braddress).setName('instruction') \
            ^ (pyp.CaselessKeyword("inc")+register) \
            ^ (pyp.CaselessKeyword("dec")+register)
smwpseud = pyp.CaselessKeyword("smw") + intvalue
rmwpseud = pyp.CaselessKeyword("rmw") + intvalue
pushpseud = pyp.CaselessKeyword("push") + register
pushpseud.setParseAction(expandPush)
poppseud = pyp.CaselessKeyword("pop") + register
poppseud.setParseAction(expandPop)
pseudoinst = smwpseud \
           ^ rmwpseud \
           ^ pushpseud \
           ^ poppseud
label = pyp.Group("@" + tag)
comment = pyp.Suppress("%") + pyp.Suppress(pyp.restOfLine(""))
codeline = pyp.Group(
    pyp.Optional(label) + pyp.Group(instruction ^ pseudoinst) +
    pyp.Optional(comment))
line = (comment ^ codeline)
program = pyp.ZeroOrMore(line)

# opcodes for the instructions
opcodes = {
    'ldr': 0,  # load register from memory
    'str': 1,  # store register to memory
    'mov': 2,  # move register/immediate value to register
    'add':
    3,  # add register and register/immediate value, store result in register
    'sub':
    4,  # subtract register/immediate value from register, store result in register
Пример #3
0
 def args(cls, player):
     return pyparsing.restOfLine("line")
Пример #4
0
class RawNginxParser(object):
    # pylint: disable=expression-not-assigned
    """A class that parses nginx configuration with pyparsing."""

    # constants
    space = Optional(White())
    nonspace = Regex(r"\S+")
    left_bracket = Literal("{").suppress()
    right_bracket = space.leaveWhitespace() + Literal("}").suppress()
    semicolon = Literal(";").suppress()
    key = Word(alphanums + "_/+-.")
    dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+"))
    condition = Regex(r"\(.+\)")
    # Matches anything that is not a special character, and ${SHELL_VARS}, AND
    # any chars in single or double quotes
    # All of these COULD be upgraded to something like
    # https://stackoverflow.com/a/16130746
    dquoted = Regex(r'(\".*\")')
    squoted = Regex(r"(\'.*\')")
    nonspecial = Regex(r"[^\{\};,]")
    varsub = Regex(r"(\$\{\w+\})")
    # nonspecial nibbles one character at a time, but the other objects take
    # precedence.  We use ZeroOrMore to allow entries like "break ;" to be
    # parsed as assignments
    value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial))

    location = CharsNotIn("{};," + string.whitespace)
    # modifier for location uri [ = | ~ | ~* | ^~ ]
    modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")

    # rules
    comment = space + Literal('#') + restOfLine()

    assignment = space + key + Optional(space + value, default=None) + semicolon
    location_statement = space + Optional(modifier) + Optional(space + location + space)
    if_statement = space + Literal("if") + space + condition + space
    charset_map_statement = space + Literal("charset_map") + space + value + space + value

    map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space
    # This is NOT an accurate way to parse nginx map entries; it's almost
    # certianly too permissive and may be wrong in other ways, but it should
    # preserve things correctly in mmmmost or all cases.
    #
    #    - I can neither prove nor disprove that it is corect wrt all escaped
    #      semicolon situations
    # Addresses https://github.com/fatiherikli/nginxparser/issues/19
    map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace
    map_entry = space + map_pattern + space + value + space + semicolon
    map_block = Group(
        Group(map_statement).leaveWhitespace() +
        left_bracket +
        Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() +
        right_bracket)

    block = Forward()

    # key could for instance be "server" or "http", or "location" (in which case
    # location_statement needs to have a non-empty location)

    block_begin = (Group(space + key + location_statement) ^
                   Group(if_statement) ^
                   Group(charset_map_statement)).leaveWhitespace()

    block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block)
                          + space).leaveWhitespace()

    block << Group(block_begin + left_bracket + block_innards + right_bracket)

    script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd
    script.parseWithTabs().leaveWhitespace()

    def __init__(self, source):
        self.source = source

    def parse(self):
        """Returns the parsed tree."""
        return self.script.parseString(self.source)

    def as_list(self):
        """Returns the parsed tree as a list."""
        return self.parse().asList()
Пример #5
0
 def args(cls, player):
     return pyparsing.restOfLine("command")
Пример #6
0
import pyparsing as pp

MAX_NUM_ARGS = 1000000000  # max of 1 billion arguments for any function (relation constant)

# function constants are usually lowercase, but haven't found that as a hard requirement in the spec
function_constant = pp.Word(pp.srange("[A-Za-z]"), pp.srange("[a-zA-Z0-9_]"))
identifier = pp.Word(pp.srange("[A-Za-z]"), pp.srange("[a-zA-Z0-9_]"))
comment = pp.OneOrMore(pp.Word(';').suppress()) + pp.restOfLine('comment')

# GDL keywords ("Relation Constants")
role = pp.Keyword('role')  # role(p) means that p is a player name/side in the game.
inpt = pp.Keyword('input') # input(t) means that t is a base proposition in the game.
base = pp.Keyword('base')  # base(a) means that a is an action in the game, the outcome of a turn.
init = pp.Keyword('init')  # init(p) means that the datum p is true in the initial state of the game.
next = pp.Keyword('next')  # next(p) means that the datum p is true in the next state of the game.
does = pp.Keyword('does')  # does(r, a) means that player r performs action a in the current state.
legal = pp.Keyword('legal')  # legal(r, a) means it is legal for r to play a in the current state.
goal = pp.Keyword('goal')  # goal(r, n) means that player the current state has utility n for player r. n must be an integer from 0 through 100.
terminal = pp.Keyword('terminal')  # terminal(d) means that if the datam d is true, the game has ended and no player actions are legal.
distinct = pp.Keyword('distinct')  # distinct(x, y) means that the values of x and y are different.
true = pp.Keyword('true')  # true(p) means that the datum p is true in the current state.

# GDL-II Relation Constants
sees = pp.Keyword('sees')  # The predicate sees(?r,?p) means that role ?r perceives ?p in the next game state.
random = pp.Keyword('random')  # A predefined player that choses legal moves randomly

# GDL-I and GDL-II Relation Constants
relation_constant = role | inpt | base | init | next | does | legal | goal | terminal | distinct | true | sees | random

# TODO: DRY this up
# functions (keywords that should be followed by the number of arguments indicated)
Пример #7
0
ipV4Address = Combine(Word(nums) + ('.' + Word(nums))*3)
ipv4_prefixlen = Word(nums, min=1, max=2)
# fix this - combine?
ipAddressWithMask = Combine(Word(nums) + ('.' + Word(nums))*3 + "/" + ipv4_prefixlen)
integer = Word(nums)
comment = Group("!" + restOfLine)
hash_comment = Group("#" + restOfLine)

router_id = (ipV4Address | integer)
word_param  = Word(alphanums)
interface_id  = Word(alphanums + ":")

#TODO: make function to return thesline_ip_addresse
password = "******" + word_param
enable_password = "******" + word_param
banner_motd = "banner motd " + word_param("type") + restOfLine("path")


line_ip_address = "ip address" + ipAddressWithMask("ip")
line_description = "description" + restOfLine("description")
line_ip_ospf_cost = "ip ospf cost" + integer("cost")

interface_properties = OneOrMore(
    line_ip_address("ip address") |
    line_description("description") |
    line_ip_ospf_cost("ospf cost") |
    comment|
    hash_comment
    )

interface_indent = indentedBlock(interface_properties, indentStack, True)("indent")
Пример #8
0
 def args(cls, player):
     return pyp.restOfLine("name")
Пример #9
0
EOL = LineEnd().suppress()
SOL = LineStart().leaveWhitespace()
blankline = SOL + LineEnd()

noIndentation = SOL + ~Word(ws).leaveWhitespace().suppress()
indentation = SOL + Word(ws).leaveWhitespace().suppress()

# Single statements
keyword = Word(alphanums)
value = restOfLine
value.setParseAction(lambda tokens: tokens[0].strip())
oneLineStatement = keyword("keyword") + value("value") + EOL

# If statements
nonIndentedLine = noIndentation + restOfLine() + EOL
indentedLine = indentation + Group(oneLineStatement)
indentedBody = OneOrMore(indentedLine)

ifConditions = (restOfLine() + EOL +
                ZeroOrMore(nonIndentedLine))
ifConditions.setParseAction(lambda tokens: [t for t in tokens if t])

ifStatement = ("if" +
               Group(ifConditions)("conditions") +
               indentedBody("body"))

# Main parser
body = OneOrMore(Group(ifStatement | oneLineStatement | EOL))
parser = body + StringEnd()
parser.ignore(blankline)
Пример #10
0
 def args(cls, player):
     return pyparsing.restOfLine("command")
Пример #11
0
    def make_parser(self):
        super(ProgrammingGrammarParser, self).make_parser()
        variable = self.variables[0]['token']
        expression = self.expression
        # parser for program
        self.program = pp.Forward()
        programWithControl = pp.Forward()
        expressionStatement = expression + SEMICOLON
        assignmentStatement = variable('variable') + pp.Suppress(
            '=') + expression('expression') + SEMICOLON
        assignmentStatement.setParseAction(AssignmentAction)
        # define if while break pass statements
        # Keywords = {'if':'if', 'while':'while', 'break':'break', 'pass':'******', 'def':'def'}
        breakStatement = self.keywords['break']('keyword') + SEMICOLON
        breakStatement.setParseAction(BreakAction)
        continueStatement = self.keywords['continue']('keyword') + SEMICOLON
        continueStatement.setParseAction(ContinueAction)
        passStatement = self.keywords['pass']('keyword') + SEMICOLON
        passStatement.setParseAction(PassAction)
        printStatement = self.keywords['print']('keyword') + pp.delimitedList(
            expression)('args') + SEMICOLON
        printStatement.setParseAction(PrintAction)
        returnStatement = self.keywords['return']('keyword') + expression(
            'retval') + SEMICOLON
        returnStatement.setParseAction(ReturnAction)

        # atomicStatement = assignmentStatement | breakStatement | continueStatement | passStatement | printStatement | returnStatement
        # block = atomicStatement | LBRACE + self.program + RBRACE

        ifStatement = self.keywords['if']('keyword') + expression(
            'condition') + LBRACE + self.program('program') + RBRACE
        ifStatement.setParseAction(IfAction)
        ifStatementWithControl = self.keywords['if']('keyword') + expression(
            'condition') + LBRACE + programWithControl('program') + RBRACE
        ifStatementWithControl.setParseAction(IfAction)
        # if condition {program} pp.ZeroOrMore(elif condition {program}) else {program}
        # IfelseAction
        whileStatement = self.keywords['while']('keyword') + expression(
            'condition') + LBRACE + programWithControl('program') + RBRACE
        whileStatement.setParseAction(WhileAction)
        defStatement = self.keywords['def']('keyword') + (
            variable('function') + LPAREN + pp.delimitedList(variable)('args')
            + RPAREN | PUNC('left') + pp.delimitedList(variable)('args') +
            PUNC('right')) + LBRACE + self.program('program') + RBRACE
        defStatement.setParseAction(DefAction)
        self.statements = [
            ifStatement, whileStatement, defStatement, returnStatement,
            passStatement, printStatement, assignmentStatement,
            expressionStatement, LBRACE + self.program + RBRACE
        ]
        statement = pp.MatchFirst(self.statements)
        controlStatements = [
            breakStatement, continueStatement, ifStatementWithControl,
            LBRACE + programWithControl + RBRACE
        ]
        statementWithControl = pp.MatchFirst(self.statements +
                                             controlStatements)
        programWithControl <<= pp.OneOrMore(
            statementWithControl).setParseAction(ProgramSequenceAction)
        loadStatement = pp.Keyword('load')(
            'keyword').suppress() + pp.restOfLine('path')
        self.program <<= pp.ZeroOrMore(loadStatement)(
            'loading') + pp.OneOrMore(statement).setParseAction(
                ProgramSequenceAction)
        self.comment = pp.pythonStyleComment
        self.program.ignore(self.comment)
Пример #12
0
def d12_geometry_parser():
    """Geometry block parser"""
    title = pp.restOfLine()('title')
    return title
Пример #13
0
PP_UNQUOTED_EXPR = pp.Combine(
    pp.OneOrMore(~PP_KEYWORDS + ~PP_ENDOFLINE + ~PP_BRACES + ~PP_COMMENTSTART +
                 PP_ANYCHAR)).setResultsName("ue", listAllMatches=True)
PP_BRACED_EXPR = pp.Forward().setResultsName("be", listAllMatches=True)
PP_BRACE_PAIR = pp.Literal("(") + pp.OneOrMore(PP_BRACED_EXPR
                                               | PP_UNQUOTED_EXPR
                                               | PP_KEYWORDS) + pp.Literal(")")
PP_BRACED_EXPR << PP_BRACE_PAIR
PP_EXPRESSION = pp.Group(
    pp.Combine(pp.OneOrMore(PP_UNQUOTED_EXPR | PP_BRACED_EXPR)))

kw = ["to", "downto", "entity", "port", "generic", "end", "is"]
PP_KEYWORDS = pp.MatchFirst(kw)
PP_IDENTIFIER = pp.Word(pp.alphanums + "_")
PP_INTEGER = pp.Word(pp.nums)
PP_COMMENT = pp.Group(pp.Literal("--") + pp.restOfLine("text"))
PP_VALUE = pp.Regex(r"[a-zA-Z0-9\"'_#]*")
PP_RANGEDIR = (pp.CaselessKeyword("to") | pp.CaselessKeyword("downto"))
PP_DIRECTION = (pp.CaselessKeyword("in") | pp.CaselessKeyword("out")
                | pp.CaselessKeyword("inout") | pp.CaselessKeyword("buffer"))


def PrToStr(pr: pp.ParseResults):
    strings = []
    for r in pr:
        if type(r) is str:
            strings.append(r.strip())
        else:
            strings.append(PrToStr(r))
    return " ".join(strings)
Пример #14
0
    alphanums,
    nums,
    restOfLine,
)
import six

from pysoa.common.types import (  # noqa F401
    ActionResponse, JobResponse,
)
from pysoa.test.plan.grammar.tools import recursive_parse_expr_repr

ENTRY_POINT_DIRECTIVES = []
REGISTERED_DIRECTIVES = []

VarNameGrammar = Word(alphanums + '-_.{}')('variable_name')
VarValueGrammar = restOfLine('value').setParseAction(
    lambda s, l, t: t[0].strip(' \t'))


def get_all_directives():
    if not ENTRY_POINT_DIRECTIVES:
        for entry_point in pkg_resources.iter_entry_points(
                'pysoa.test.plan.grammar.directives'):
            try:
                directive_class = entry_point.load(require=False)
                ENTRY_POINT_DIRECTIVES.append(directive_class)
            except ImportError:
                sys.stderr.write(
                    'Warning: could not load {}\n'.format(entry_point))

    return REGISTERED_DIRECTIVES + ENTRY_POINT_DIRECTIVES
Пример #15
0
matrix_row = Group(
    fnumber + fnumber + fnumber + fnumber
)
prob_matrix = Group(
    Optional(K('alength=') + Word(nums)('ALENGTH'))
    + Optional(K('w=') + Word(nums)('W'))
    + Optional(K('nsites=') + Word(nums)('NSITES'))
    + Optional(K('E=') + fnumber('E'))
    + Group(OneOrMore(matrix_row))('ROWS')
)
letter_probs = K('letter-probability matrix:') + prob_matrix('LETTER_PROBS')
log_odds = K('log-odds matrix:') + prob_matrix('LOG_ODDS')
prob_matrix = letter_probs | log_odds
url = Optional(K('URL') + Word(printables)('URL'))
motif = Group(
    K('MOTIF') + Word(printables)('NAME') + Optional(restOfLine('ALTNAME'))
    + comment
    + Group(OneOrMore(prob_matrix))('MATRICES')
    + comment
    + url
)
meme_format = \
    comment \
    + version \
    + comment \
    + Optional(alphabet) \
    + comment \
    + Optional(strands) \
    + comment \
    + Optional(background_freqs) \
    + comment \
Пример #16
0
 def args(cls, player):
     return pyp.restOfLine("text")
Пример #17
0
class APTHistoryLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parses for Advanced Packaging Tool (APT) History log files."""

    NAME = 'apt_history'

    DATA_FORMAT = 'Advanced Packaging Tool (APT) History log file'

    # APT History log lines can be very long.
    MAX_LINE_LENGTH = 65536

    _ENCODING = 'utf-8'

    _HYPHEN = text_parser.PyparsingConstants.HYPHEN

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    _APTHISTORY_DATE_TIME = pyparsing.Group(_FOUR_DIGITS + _HYPHEN +
                                            _TWO_DIGITS + _HYPHEN +
                                            _TWO_DIGITS + _TWO_DIGITS +
                                            pyparsing.Suppress(':') +
                                            _TWO_DIGITS +
                                            pyparsing.Suppress(':') +
                                            _TWO_DIGITS)

    _RECORD_START = (
        # APT History logs may start with empty lines
        pyparsing.ZeroOrMore(pyparsing.lineEnd()) +
        pyparsing.Literal('Start-Date:') +
        _APTHISTORY_DATE_TIME.setResultsName('start_date') +
        pyparsing.lineEnd())

    _RECORD_BODY = (pyparsing.MatchFirst([
        pyparsing.Literal('Commandline:'),
        pyparsing.Literal('Downgrade:'),
        pyparsing.Literal('Error:'),
        pyparsing.Literal('Install:'),
        pyparsing.Literal('Purge:'),
        pyparsing.Literal('Remove:'),
        pyparsing.Literal('Requested-By:'),
        pyparsing.Literal('Upgrade:')
    ]) + pyparsing.restOfLine())

    _RECORD_END = (pyparsing.Literal('End-Date:') +
                   _APTHISTORY_DATE_TIME.setResultsName('end_date') +
                   pyparsing.OneOrMore(pyparsing.lineEnd()))

    LINE_STRUCTURES = [('record_start', _RECORD_START),
                       ('record_body', _RECORD_BODY),
                       ('record_end', _RECORD_END)]

    def __init__(self):
        """Initializes an APT History parser."""
        super(APTHistoryLogParser, self).__init__()
        self._date_time = None
        self._event_data = None
        self._downgrade = None
        self._install = None
        self._purge = None
        self._remove = None
        self._upgrade = None

    @staticmethod
    def _BuildDateTime(time_elements_structure):
        """Builds time elements from an APT History time stamp.

    Args:
      time_elements_structure (pyparsing.ParseResults): structure of tokens
          derived from an APT History time stamp.

    Returns:
      dfdatetime.TimeElements: date and time extracted from the structure or
          None f the structure does not represent a valid string.
    """
        # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise
        # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with:
        # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults
        # overriding __getattr__ with a function that returns an empty string when
        # named token does not exists.
        try:
            year, month, day_of_month, hours, minutes, seconds = (
                time_elements_structure)

            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=(year, month, day_of_month, hours, minutes,
                                     seconds))

            # APT History logs store date and time values in local time.
            date_time.is_local_time = True
            return date_time
        except (TypeError, ValueError):
            return None

    def _ParseRecordStart(self, parser_mediator, structure):
        """Parses the first line of a log record.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.
    """
        self._date_time = self._BuildDateTime(structure.get(
            'start_date', None))
        if not self._date_time:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(self._date_time))
            return

        self._event_data = APTHistoryLogEventData()
        return

    def _ParseRecordBody(self, structure):
        """Parses a line from the body of a log record.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the date and time value is missing.
    """
        if not self._date_time:
            raise errors.ParseError('Missing date time value.')

        # Command data
        if structure[0] == 'Commandline:':
            self._event_data.command = ''.join(structure)

        elif structure[0] == 'Error:':
            self._event_data.error = ''.join(structure)

        elif structure[0] == 'Requested-By:':
            self._event_data.requester = ''.join(structure)

        # Package lists
        elif structure[0] == 'Downgrade:':
            self._downgrade = ''.join(structure)

        elif structure[0] == 'Install:':
            self._install = ''.join(structure)

        elif structure[0] == 'Purge:':
            self._purge = ''.join(structure)

        elif structure[0] == 'Remove:':
            self._remove = ''.join(structure)

        elif structure[0] == 'Upgrade:':
            self._upgrade = ''.join(structure)

    def _ParseRecordEnd(self, parser_mediator):
        """Parses the last line of a log record.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.

    Raises:
      ParseError: when the date and time value is missing.
    """
        if not self._date_time:
            raise errors.ParseError('Missing date time value.')

        # Create relevant events for record
        if self._downgrade:
            self._event_data.packages = self._downgrade
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DOWNGRADE,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._install:
            self._event_data.packages = self._install
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_INSTALLATION,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._purge:
            self._event_data.packages = self._purge
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DELETED,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._remove:
            self._event_data.packages = self._remove
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DELETED,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._upgrade:
            self._event_data.packages = self._upgrade
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_UPDATE,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

    def _ResetState(self):
        """Resets stored values in the parser."""
        self._date_time = None
        self._downgrade = None
        self._event_data = None
        self._install = None
        self._purge = None
        self._remove = None
        self._upgrade = None

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key == 'record_start':
            self._ParseRecordStart(parser_mediator, structure)
            return

        if key == 'record_body':
            self._ParseRecordBody(structure)
            return

        if key == 'record_end':
            self._ParseRecordEnd(parser_mediator)
            # Reset for next record.
            self._ResetState()
            return

        raise errors.ParseError(
            'Unable to parse record, unknown structure: {0:s}'.format(key))

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is an APT History log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        try:
            self._RECORD_START.parseString(line)
            # Reset stored values for parsing a new file.
            self._ResetState()
        except pyparsing.ParseException as exception:
            logger.debug(
                'Not an APT History log file: {0!s}'.format(exception))
            return False

        return True
Пример #18
0
 def get_full_grammar(cls):
     return (super(ActionExpectsFieldMessageErrorsDirective,
                   cls).get_full_grammar() + ',' + Literal('message') +
             '=' + restOfLine('error_message').setParseAction(
                 lambda s, l, t: t[0].strip(' \t')))
Пример #19
0
def parse(string=None, filename=None, token=None, lang=None):
    """
    Parse a token stream from or raise a SyntaxError

    This function includes the parser grammar.
    """

    if not lang:
        lang = guess_language(string, filename)

    #
    # End of Line
    #
    EOL = Suppress(lineEnd)
    UTFWORD = Word(unicodePrintables)

    #
    # @tag
    #
    TAG = Suppress('@') + UTFWORD

    #
    # A table
    #
    # A table is made up of rows of cells, e.g.
    #
    #   | column 1 | column 2 |
    #
    # Table cells need to be able to handle escaped tokens such as \| and \n
    #
    def handle_esc_char(tokens):
        token = tokens[0]

        if token == r'\|':
            return u'|'
        elif token == r'\n':
            return u'\n'
        elif token == r'\\':
            return u'\\'

        raise NotImplementedError(u"Unknown token: %s" % token)

    ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2)
    ESC_CHAR.setParseAction(handle_esc_char)

    #
    # A cell can contain anything except a cell marker, new line or the
    # beginning of a cell marker, we then handle escape characters separately
    # and recombine the cell afterwards
    #
    CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR))
    CELL.setParseAction(lambda tokens: u''.join(tokens))

    TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL
    TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens])
    TABLE = Group(OneOrMore(Group(TABLE_ROW)))

    #
    # Multiline string
    #
    def clean_multiline_string(s, loc, tokens):
        """
        Clean a multiline string

        The indent level of a multiline string is the indent level of the
        triple-". We have to derive this by walking backwards from the
        location of the quoted string token to the newline before it.

        We also want to remove the leading and trailing newline if they exist.

        FIXME: assumes UNIX newlines
        """

        def remove_indent(multiline, indent):
            """
            Generate the lines removing the indent
            """

            for line in multiline.splitlines():
                if line and not line[:indent].isspace():
                    warn("%s: %s: under-indented multiline string "
                         "truncated: '%s'" %
                         (lineno(loc, s), col(loc, s), line),
                         LettuceSyntaxWarning)

                # for those who are surprised by this, slicing a string
                # shorter than indent will yield empty string, not IndexError
                yield line[indent:]

        # determine the indentation offset
        indent = loc - s.rfind('\n', 0, loc) - 1

        multiline = '\n'.join(remove_indent(tokens[0], indent))

        # remove leading and trailing newlines
        if multiline[0] == '\n':
            multiline = multiline[1:]

        if multiline[-1] == '\n':
            multiline = multiline[:-1]

        return multiline

    MULTILINE = QuotedString('"""', multiline=True)
    MULTILINE.setParseAction(clean_multiline_string)

    # A Step
    #
    # Steps begin with a keyword such as Given, When, Then or And They can
    # contain an optional inline comment, although it's possible to encapsulate
    # it in a string. Finally they can contain a table or a multiline 'Python'
    # string.
    #
    # <variables> are not parsed as part of the grammar as it's not easy to
    # distinguish between a variable and XML. Instead scenarios will replace
    # instances in the steps based on the outline keys.
    #
    STATEMENT_SENTENCE = Group(
        lang.STATEMENT +  # Given, When, Then, And
        OneOrMore(UTFWORD.setWhitespaceChars(' \t') |
                  quotedString.setWhitespaceChars(' \t')) +
        EOL
    )

    STATEMENT = Group(
        STATEMENT_SENTENCE('sentence') +
        Optional(TABLE('table') | MULTILINE('multiline'))
    )
    STATEMENT.setParseAction(Step)

    STATEMENTS = Group(ZeroOrMore(STATEMENT))

    #
    # Background:
    #
    BACKGROUND_DEFN = \
        lang.BACKGROUND('keyword') + Suppress(':') + EOL
    BACKGROUND_DEFN.setParseAction(Background)

    BACKGROUND = Group(
        BACKGROUND_DEFN('node') +
        STATEMENTS('statements')
    )
    BACKGROUND.setParseAction(Background.add_statements)

    #
    # Scenario: description
    #
    SCENARIO_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.SCENARIO('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    SCENARIO_DEFN.setParseAction(Scenario)

    SCENARIO = Group(
        SCENARIO_DEFN('node') +
        STATEMENTS('statements') +
        Group(ZeroOrMore(
            Suppress(lang.EXAMPLES + ':') + EOL + TABLE
        ))('outlines')
    )
    SCENARIO.setParseAction(Scenario.add_statements)

    #
    # Feature: description
    #
    FEATURE_DEFN = Group(
        Group(ZeroOrMore(TAG))('tags') +
        lang.FEATURE('keyword') + Suppress(':') +
        restOfLine('name') +
        EOL
    )
    FEATURE_DEFN.setParseAction(Feature)

    #
    # A description composed of zero or more lines, before the
    # Background/Scenario block
    #
    DESCRIPTION_LINE = Group(
        ~BACKGROUND_DEFN + ~SCENARIO_DEFN +
        OneOrMore(UTFWORD).setWhitespaceChars(' \t') +
        EOL
    )
    DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL))
    DESCRIPTION.setParseAction(Description)

    #
    # Complete feature file definition
    #
    FEATURE = Group(
        FEATURE_DEFN('node') +
        DESCRIPTION('description') +
        Optional(BACKGROUND('background')) +
        Group(OneOrMore(SCENARIO))('scenarios') +
        stringEnd)
    FEATURE.ignore(pythonStyleComment)
    FEATURE.setParseAction(Feature.add_blocks)

    #
    # Try parsing the string
    #

    if not token:
        token = FEATURE
    else:
        token = locals()[token]

    try:
        if string:
            tokens = token.parseString(string)
        elif filename:
            with open(filename, 'r', 'utf-8') as fp:
                tokens = token.parseFile(fp)
        else:
            raise RuntimeError("Must pass string or filename")

        return tokens
    except ParseException as e:
        if e.parserElement == stringEnd:
            msg = "Expected EOF (max one feature per file)"
        else:
            msg = e.msg

        raise LettuceSyntaxError(
            filename,
            u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format(
                msg=msg,
                lineno=e.lineno,
                col=e.col,
                line=e.line,
                space=' ' * (e.col - 1)))
    except LettuceSyntaxError as e:
        # reraise the exception with the filename
        raise LettuceSyntaxError(filename, e.string)
Пример #20
0
"""
pyparsing parser definition to parse STRIPS and PDDL files for AI Planning class (coursera)
"""
from traceback import print_exc

from pyparsing import Optional, Keyword, Literal, Word
from pyparsing import Combine, Group, OneOrMore, restOfLine, dictOf
from pyparsing import alphas, alphanums
from pyparsing import nestedExpr, Forward

MAX_NUM_ARGS = 1000000000  # max of 1 billion arguments for any function (relation constant)

# function constants are usually lowercase, that's not a firm requirement in the spec
identifier = Word( alphas, alphanums + "-_" )
variable   = Combine(Literal('?') + Word(alphas, alphanums + '_'))
comment    = Optional(OneOrMore(Word(';').suppress()) + restOfLine('comment')).suppress()
# typ        = Literal('-').suppress() + Optional(Literal(' ').suppress()) + identifier

# All mean the same thing: ground predicate, ground atom, ground_literal
# Any formula whose arguments are all ground terms (literals = non-variables)
ground_predicate = Literal('(').suppress() + Group(OneOrMore(identifier)) + Literal(')').suppress() + comment
arguments = sequence_of_variables = Literal('(').suppress() + Group(OneOrMore(variable)) + Literal(')').suppress()
# Norvig/Russel tend to call this a "fluent"
predicate        = Literal('(').suppress() + Group(identifier + OneOrMore(variable)) + Literal(')').suppress()
notted_predicate = Literal('(').suppress() + Keyword('not') + predicate + Literal(')').suppress()

# a set of ground atoms/predicates is a state, they are all presumed to be ANDed together (conjunction)
state_conjunction_implicit = OneOrMore(ground_predicate)
state_conjunction_explicit = (Literal('(') + Keyword('and')).suppress() + state_conjunction_implicit + Literal(')').suppress()
state = state_conjunction_explicit | state_conjunction_implicit
 def __discard_pattern(self):
     return pyp.restOfLine()
Пример #22
0
fin = open('uData.txt', 'r')
data = fin.read()
fin.close()

fout = open('sData.txt', 'w')


NL = LineEnd().suppress()  # LineEnd.suppress()
gender = oneOf("M F")      # Sets the possible genders
integer = Word(nums)       # Define what integer is
date = Combine(integer + '/' + integer + '/' + integer)

# Define the line definitions
gender_line = gender("sex") + NL
dob_line = date("DOB") + NL
name_line = restOfLine("name") + NL
id_line = Word(alphanums + '-')("ID") + NL
recnum_line = integer("recnum") + NL

# Define forms of address lines
first_addr_line = Suppress('.') + empty + restOfLine + NL
# Subsequent address line is not gender
subsq_addr_line = ~(gender_line) + restOfLine + NL

# a line with a name and a recnum combined, if no ID
name_recnum_line = originalTextFor(OneOrMore(Word(alphas + ',')))("name") + \
    integer("recnum") + NL

# Defining the form of an overall record, either with or without an ID
record = Group((first_addr_line + ZeroOrMore(subsq_addr_line))("address") + \
    gender_line + dob_line + ((name_line + id_line + recnum_line) | \
Пример #23
0

def addToAnnotationDict(annotateName, annotate):
    s = annotate[1]
    # first remove the quotes
    if s.startswith("'''"): s = s[3:-3]
    if s.startswith('"'): s = s[1:-1]
    annotateDict[annotate['name']] = s


comment = '## ' + restOfLine
#comment       = '#' + restOfLine
CMNT = Optional(cStyleComment("comment"))
CMNT2 = Optional(
    (Suppress('//') +
     restOfLine("comment2")))  #Optional(cppStyleComment("comment2"))
STRQ3 = QuotedString("'''", multiline=True)
ANNOTSTR = (QuotedString("'''", multiline=True) | quotedString)
#IDENTIFIER = Regex(r'[a-zA-Z_][a-zA-Z_0-9]*')
#INTEGER    = Regex(r'([+-]?(([1-9][0-9]*)|0+))')
#IDENTIFIER       = Word(alphas+"_", alphas+nums+"_" )
INT_DECI = Regex('([+-]?(([1-9][0-9]*)|0+))')
INT_OCT = Regex('(0[0-7]*)')
INT_HEX = Regex('(0[xX][0-9a-fA-F]*)')
INT = INT_HEX | INT_OCT | INT_DECI
FLOAT = Regex(
    '[+-]?(((\d+\.\d*)|(\d*\.\d+))([eE][-+]?\d+)?)|(\d*[eE][+-]?\d+)')
SIZE = INT
#VARNAME    = IDENTIFIER
##ident = Word(alphas+"_",alphanums+"_").setName("identifier")
IDENT = Word(alphas + "_", alphanums + "_")("name")
Пример #24
0
 def args(cls, player):
     return pyp.restOfLine("name")
Пример #25
0
 def args(cls, player):
     return pyp.restOfLine("text")