def parser_factory(styler): """Builds the JSON parser.""" LBRK, RBRK, LBRC, RBRC, COLON, DQUO = map(pp.Suppress, '[]{}:"') DQUO = styler('class:string', DQUO) control_chars = ''.join(map(chr, range(32))) + '\x7f' normal_chars = pp.CharsNotIn(control_chars + '\\"') s_quo = pp.Literal('\\"').addParseAction(pp.replaceWith('"')) s_sol = pp.Literal('\\/').addParseAction(pp.replaceWith('/')) s_rsol = pp.Literal('\\\\').addParseAction(pp.replaceWith('\\')) s_back = pp.Literal('\\b').addParseAction(pp.replaceWith('\b')) s_form = pp.Literal('\\f').addParseAction(pp.replaceWith('\f')) s_nl = pp.Literal('\\n').addParseAction(pp.replaceWith('\n')) s_ret = pp.Literal('\\r').addParseAction(pp.replaceWith('\r')) s_tab = pp.Literal('\\t').addParseAction(pp.replaceWith('\t')) s_unicode = pp.Suppress('\\u') + pp.Word(pp.hexnums, exact=4) s_unicode.addParseAction(lambda t: chr(int(t[0], 16))) escape_seqs = s_quo | s_sol | s_rsol | s_back | s_form | s_nl | s_ret | s_tab | s_unicode chars = styler('class:string', normal_chars) | styler( 'class:escape', escape_seqs) skip_white = pp.Optional(pp.Suppress(pp.White())) string = skip_white + DQUO - pp.Combine(pp.ZeroOrMore(chars)) + DQUO string.leaveWhitespace() string.setName('string') value = pp.Forward() pair = string + COLON + value pair.addParseAction(tuple) obj = LBRC - pp.Optional(pp.delimitedList(pair)) + pp.NotAny(',') + RBRC obj.addParseAction(lambda t: {k: v for k, v in t}) obj.setName('object') array = LBRK - pp.Optional(pp.delimitedList(value)) + pp.NotAny(',') + RBRK array.addParseAction(lambda t: [list(t)]) array.setName('array') true = pp.Literal('true').addParseAction(pp.replaceWith(True)) false = pp.Literal('false').addParseAction(pp.replaceWith(False)) null = pp.Literal('null').addParseAction(pp.replaceWith(None)) constant = styler('class:constant', true | false | null) value <<= obj | array | string | styler('class:number', ppc.number) | constant value.parseWithTabs() value.setName('JSON value') return value
class Parser(object): comment_def = "--" + pyparsing.NotAny( '-' + pyparsing.CaselessKeyword('begin')) + pyparsing.ZeroOrMore( pyparsing.CharsNotIn("\n")) def __init__(self, scanner, retainSeparator=True): self.scanner = scanner self.scanner.ignore(pyparsing.sglQuotedString) self.scanner.ignore(pyparsing.dblQuotedString) self.scanner.ignore(self.comment_def) self.scanner.ignore(pyparsing.cStyleComment) self.retainSeparator = retainSeparator def separate(self, txt): itms = [] for (sqlcommand, start, end) in self.scanner.scanString(txt): if sqlcommand: if type(sqlcommand[0]) == pyparsing.ParseResults: if self.retainSeparator: itms.append("".join(sqlcommand[0])) else: itms.append(sqlcommand[0][0]) else: if sqlcommand[0]: itms.append(sqlcommand[0]) return itms
class PortWithProfile(Node): """ Variant of :class:`Port` that is used by "card" records inside the "Ports" property. It differs from the normal port syntax by having different entries inside the last section. Availability is not listed here, only priority. Priority does not have a colon before the actual number. This port is followed by profile assignment. """ __fragments__ = { 'name': 'port-name', 'label': 'port-label', 'priority': 'port-priority', 'latency_offset': 'port-latency-offset', 'availability': 'port-availability', 'properties': lambda t: t['port-properties'].asList(), 'profile_list': lambda t: t['port-profile-list'].asList(), } __syntax__ = ( p.Word(p.alphanums + "-;").setResultsName('port-name') + p.Suppress(':') # This part was very tricky to write. The label is basically arbitrary # localized Unicode text. We want to grab all of it in one go but # without consuming the upcoming and latest '(' character or the space # that comes immediately before. # # The syntax here combines a sequence of words, as defined by anything # other than a space and '(', delimited by a single whitespace. + p.Combine( p.OneOrMore(~p.FollowedBy(p.Regex('\(.+?\)') + p.LineEnd()) + p.Regex('[^ \n]+') + p.White().suppress()), ' ').setResultsName('port-label') + p.Suppress('(') + p.Keyword('priority').suppress() + p.Optional(p.Suppress(':')) + p.Word(p.nums).setParseAction(lambda t: int(t[0])).setResultsName( 'port-priority') + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Keyword('latency offset:').suppress() + p.Word(p.nums).setParseAction(lambda t: int(t[0])) + p.Literal("usec").suppress(), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-latency-offset')) + p.Optional( p.MatchFirst([ p.Suppress(',') + p.Literal('not available'), p.Suppress(',') + p.Literal('available'), p.Empty().setParseAction(lambda t: '') ]).setResultsName('port-availability')) + p.Suppress(')') + p.LineEnd().suppress() + p.Optional( p.MatchFirst([ p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.White('\t').suppress() + p.Keyword('Properties:').suppress() + p.LineEnd().suppress() + PropertyAttributeValue, p.Empty().setParseAction(lambda t: []) ]).setResultsName('port-properties')) + p.White('\t', max=3).suppress() + p.Literal("Part of profile(s)").suppress() + p.Suppress(":") + p.delimitedList( p.Word(p.alphanums + "+-:"), ", ").setResultsName("port-profile-list")).setResultsName("port")
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Bash history files.""" NAME = u'bash' DESCRIPTION = u'Parser for Bash history files' _ENCODING = u'utf-8' _TIMESTAMP = pyparsing.Suppress(u'#') + pyparsing.Word( pyparsing.nums, min=9, max=10).setParseAction( text_parser.PyParseIntCast).setResultsName(u'timestamp') _COMMAND = pyparsing.Regex(r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName(u'command') _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd() _VERIFICATION_GRAMMAR = (pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP + pyparsing.NotAny(pyparsing.pythonStyleComment)) LINE_STRUCTURES = [(u'log_entry', _LINE_GRAMMAR)] def ParseRecord(self, mediator, key, structure): """Parses a record and produces a Bash history event. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: UnableToParseFile: if an unsupported key is provided. """ if key != u'log_entry': raise errors.UnableToParseFile( u'Unsupported key: {0:s}'.format(key)) event = BashHistoryEvent(structure.timestamp, structure.command) mediator.ProduceEvent(event) def VerifyStructure(self, unused_mediator, line): """Verifies that this is a bash history file. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. line (str): single line from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ match_generator = self._VERIFICATION_GRAMMAR.scanString(line, maxMatches=1) return bool(list(match_generator))
class GenericListAttribute(Node): __fragments__ = { 'name': 'attribute-name', 'value': lambda t: t['attribute-value'].asList() } __syntax__ = (p.LineStart().suppress() + p.NotAny(p.White(' ')) + p.Optional(p.White('\t')).suppress() + AttributeName + p.Literal(':').suppress() + p.LineEnd().suppress() + GenericListAttributeValue).setResultsName("attribute")
def craft_parse(text): """ """ LineComment = pyp.Combine(pyp.Literal('::') + pyp.restOfLine).suppress() BlockComment = pyp.Combine( pyp.Literal(':>') + pyp.SkipTo(pyp.Literal('<:')) + pyp.Literal('<:')).suppress() Comment = BlockComment | LineComment BlockComment = pyp.Combine( pyp.Literal(':<') + pyp.Combine( pyp.NotAny(pyp.Literal(':>')) + pyp.Word(pyp.printables + ' ')) + pyp.Literal('>:')) Identifier = pyp.Word(pyp.alphanums + '!#$%&()*+,./;<=>?@\\^-_`{|}~') Value = (Comment | pyp.QuotedString('"') | pyp.QuotedString("'") | Identifier.addParseAction(_type_cast_value)) LBRACKET, RBRACKET, COLON = map(pyp.Suppress, '[]:') Function = pyp.Forward() List = pyp.Forward() Function << pyp.Dict( pyp.Group(Identifier + pyp.Literal(':') + pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET))) List << pyp.Group(LBRACKET + pyp.ZeroOrMore(Comment | Function | List | Value) + RBRACKET) Program = pyp.OneOrMore(Comment | Function) # Validate for syntax error messages: validator = SourceValidator() Value.setParseAction(validator.validate) List.setParseAction(validator.validate) Identifier.addParseAction(validator.validate) #Comment.setParseAction(validator.validate) Function.setParseAction(validator.validate) Program.setParseAction(validator.validate) syntax_error = None try: return __walk(Program.parseString(text)[0]) except Exception as e: syntax_error = validator.panic() # Now raise the exception with a clean stack trace raise syntax_error
class Record(Node): """ Single standalone entry of `pactl list`. The record is composed of a name and a list of attributes. Pulseaudio exposes objects such as cards, sinks and sources as separate records. Each attribute may be of a different type. Some attributes are simple values while others have finer structure, including lits and even additional recursive attributes. """ __fragments__ = { 'name': 'record-name', 'attribute_list': lambda t: t['record-attributes'].asList(), 'attribute_map': lambda t: OrderedDict( (attr.name, attr) for attr in t['record-attributes'].asList()), } __syntax__ = ( p.LineStart() + p.NotAny(p.White(' \t')) + p.Regex("[A-Z][a-zA-Z ]+ #[0-9]+").setResultsName("record-name") + p.LineEnd().suppress() + p.OneOrMore( p.Or([ GenericListAttribute.Syntax, GenericSimpleAttribute.Syntax, ]) ).setResultsName("record-attributes") ).setResultsName("record") def as_json(self): return { 'name': self.name, 'attribute_list': self.attribute_list, } def __repr__(self): # Custom __repr__ that skips attribute_map return "{}({})".format( type(self).__name__, ", ".join([ "{}={!r}".format(attr, getattr(self, attr)) for attr in ['name', 'attribute_list']]))
# ------ # define base parser for between expression between_cond = pp.Group(name + pp.CaselessLiteral('between').setResultsName('operator') + value.setResultsName('value1') + pp.CaselessLiteral('and') + value.setResultsName('value2')).setResultsName('between_condition') # ------- # define base parser for functions ppc = pp.pyparsing_common # parantheses LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') # function arguments arglist = pp.delimitedList(number | (pp.Word(pp.alphanums + '-_') + pp.NotAny('='))) args = pp.Group(arglist).setResultsName('args') # function keyword arguments key = ppc.identifier() + pp.Suppress('=') values = (number | pp.Word(pp.alphas)) keyval = pp.dictOf(key, values) kwarglist = pp.delimitedList(keyval) kwargs = pp.Group(kwarglist).setResultsName('kwargs') # build generic function fxn_args = args + ',' + kwargs | pp.Optional(args, default='') + pp.Optional(kwargs, default='') fxn_name = (pp.Word(pp.alphas)).setResultsName('name') fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR).setResultsName('function') # fxn condition fxn_cond = pp.Group(fxn + operator + value).setResultsName('function_condition')
p.Literal("/") + p.Word(p.nums, exact=4)) postcode_district = p.Combine( p.Word(p.alphas, min=1, max=2) + ((p.Word(p.nums, exact=1) + p.Word(p.alphas, exact=1)) | p.Word(p.nums, min=1, max=2))) + p.Suppress(p.White()) offence_code = p.Combine(p.Word(p.alphas, exact=2) + p.Word(p.nums, min=3)) + p.Suppress(p.White()) printed_by_line = p.Group(p.LineStart() + p.Literal("Printed By") + p.SkipTo(p.Literal("Page No.:")) + p.SkipTo(p.LineEnd())).setResultsName("printed_by") first_case_line = p.Group( p.LineStart() + p.NotAny(p.White()) + p.Word(p.nums) + p.SkipTo(p.LineEnd())).setResultsName("first_case_line") heading_block = p.Group( p.SkipTo(p.LineStart() + p.NotAny(p.White()) + p.Literal("Block:")) + p.SkipTo(p.LineEnd())).setResultsName("heading_block") document = heading_block + p.OneOrMore( p.Group(p.SkipTo(first_case_line | printed_by_line)).setResultsName("main_body") + (first_case_line | (printed_by_line + p.Optional(heading_block)))) def parse_court_docs(data): case_data = []
number = pp.Regex(r"[+-~]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") name = pp.Word(pp.alphas + '._', pp.alphanums + '._').setResultsName('parameter') #operator = pp.Regex("==|!=|<=|>=|<|>|=|&|~|||").setResultsName('operator') operator = pp.oneOf(['==', '<=', '<', '>', '>=', '=', '!=', '&', '|']).setResultsName('operator') value = (pp.Word(pp.alphanums + '-_.*') | pp.QuotedString('"') | number).setResultsName('value') # list of numbers nl = pp.delimitedList(number, combine=True) narr = pp.Combine('[' + nl + ']') # function arguments arglist = pp.delimitedList(number | (pp.Word(pp.alphanums + '-_') + pp.NotAny('=')) | narr) args = pp.Group(arglist).setResultsName('args') # function keyword arguments key = pp.Word(pp.alphas) + pp.Suppress('=') values = (number | pp.Word(pp.alphas)) keyval = pp.dictOf(key, values) kwarglist = pp.delimitedList(keyval) kwargs = pp.Group(kwarglist).setResultsName('kwargs') # build generic function fxn_args = pp.Optional(args) + pp.Optional(kwargs) fxn_name = (pp.Word(pp.alphas)).setResultsName('fxn') fxn = pp.Group(fxn_name + LPAR + fxn_args + RPAR) # overall (recursvie) where clause whereexp = pp.Forward()
def parse_specialnets(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y specialnets_id = pp.Suppress(pp.Keyword('SPECIALNETS')) end_specialnets_id = pp.Keyword("END SPECIALNETS").suppress() begin_specialnet = pp.Suppress(pp.Keyword('-')) ws_snet = pp.Suppress(pp.Keyword('+')) # parameter division in NETS # netName netName_1 = pp.Group( LPAR + identifier('compName') + identifier('pinName') + pp.Optional(ws_snet + pp.Keyword('SYNTHESIZED'))('SYNTHESIZED') + RPAR) netName = identifier('netName') + pp.ZeroOrMore( netName_1).setResultsName('nets') # MASK MASK = pp.Group(pp.Keyword('MASK') + number('maskNum')).setResultsName('MASK') MASK_id = pp.Keyword('MASK') RECT_id = pp.Keyword('RECT') VIRTUAL_id = pp.Keyword('VIRTUAL') routingPoints_1 = pp.Optional(MASK('MASK') + number('maskNum')) + pp.Group(pt) routingPoints_2 = pp.Optional(MASK_id('MASK') + number('viaMaskNum')) + pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) \ + identifier('viaName') + pp.Optional(ORIENT('orient')) \ + pp.Optional(pp.Suppress(pp.Keyword('DO')) + number('numX') + pp.Suppress(pp.Keyword('BY')) + number('numY') + pp.Suppress(pp.Keyword('STEP')) + number('stepX') + number('stepY')) routingPoints = (pp.Group(pt) + pp.OneOrMore(routingPoints_1 | routingPoints_2)) specialWiring_placement = (ws_snet + ( (pp.Keyword('COVER')('PLACEMENT')) | (pp.Keyword('FIXED')('PLACEMENT')) | (pp.Keyword('ROUTED')('PLACEMENT')) | (pp.Keyword('SHIELD')('PLACEMENT') + identifier('shieldNetName')))) specialWiring_1 = ( pp.Optional(specialWiring_placement) + pp.Optional(ws_snet + pp.Keyword('SHAPE') + identifier('shapeType')) + pp.Optional(ws_snet + pp.Keyword('MASK') + number('maskNum')) + ((ws_snet + pp.Keyword('POLYGON') + identifier('layerName') + pp.OneOrMore(pt)) | (ws_snet + pp.Keyword('RECT') + identifier('layerName') + pt + pt) | (ws_snet + pp.Keyword('VIA') + identifier('viaName') + pp.Optional(ORIENT('orient')) + pp.OneOrMore(pt)))) SHAPE_elems = (pp.Keyword('RING') | pp.Keyword('PADRING') | pp.Keyword('BLOCKRING') | pp.Keyword('STRIPE') | pp.Keyword('FOLLOWPIN') | pp.Keyword('IOWIRE') | pp.Keyword('COREWIRE') | pp.Keyword('BLOCKWIRE') | pp.Keyword('BLOCKAGEWIRE') | pp.Keyword('FILLWIRE') | pp.Keyword('FILLWIREOPC') | pp.Keyword('DRCFILL')) specialWiring_2 = ( specialWiring_placement + identifier('layerName') + number('routeWidth') + pp.Optional(ws_snet + pp.Keyword('SHAPE') + SHAPE_elems('SHAPE')) + pp.Optional(ws_snet + pp.Keyword('STYLE') + number('styleNum')) + routingPoints('routingPoints') + pp.Group( pp.ZeroOrMore( pp.Group( pp.Keyword('NEW') + identifier('layerName') + number('routeWidth') + pp.Optional(ws_snet + pp.Keyword('SHAPE') + SHAPE_elems('SHAPE')) + pp.Optional(ws_snet + pp.Keyword('STYLE') + identifier('styleNum')) + routingPoints('routingPoints')))))('NEW') specialWiring = pp.Group( pp.OneOrMore(specialWiring_1 | specialWiring_2))('specialWiring') VOLTAGE = ws_snet + pp.Keyword('VOLTAGE') + number('VOLTAGE') SOURCE = ws_snet + pp.Keyword('SOURCE') + ( pp.Keyword('DIST') | pp.Keyword('NETLIST') | pp.Keyword('TIMING') | pp.Keyword('USER')) FIXEDBUMP = ws_snet + pp.Keyword('FIXEDBUMP')('FIXEDBUMP') ORIGINAL = ws_snet + pp.Keyword('ORIGINAL') + identifier( 'ORIGINAL_netName') USE_ids = (pp.Keyword('ANALOG') | pp.Keyword('CLOCK') | pp.Keyword('GROUND') | pp.Keyword('POWER') | pp.Keyword('RESET') | pp.Keyword('SCAN') | pp.Keyword('SIGNAL') | pp.Keyword('TIEOFF')) USE = ws_snet + pp.Keyword('USE') + USE_ids('USE') PATTERN_ids = (pp.Keyword('BALANCED') | pp.Keyword('STEINER') | pp.Keyword('TRUNK') | pp.Keyword('WIREDLOGIC')) PATTERN = ws_snet + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN') ESTCAP = ws_snet + pp.Keyword('ESTCAP') + number( 'ESTCAP_wireCapacitance') WEIGHT = ws_snet + pp.Keyword('WEIGHT') + number('WEIGHT') PROPERTY = pp.Group(ws_snet + pp.Keyword('PROPERTY') + pp.OneOrMore( identifier('propName') + number('propVal')))('PROPERTY') specialnet = pp.Group(begin_specialnet + netName + pp.Optional(VOLTAGE) + pp.ZeroOrMore(specialWiring) + pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) + pp.Optional(ORIGINAL) + pp.Optional(USE) + pp.Optional(PATTERN) + pp.Optional(ESTCAP) + pp.Optional(WEIGHT) + pp.ZeroOrMore(PROPERTY) + linebreak).setResultsName('specialnets', listAllMatches=True) specialnets = pp.Group(specialnets_id + number('numNets') + linebreak + pp.ZeroOrMore(specialnet) + pp.Suppress(end_specialnets_id)).setResultsName( 'SPECIALNETS') return specialnets
class SkyDriveOldLogParser(text_parser.PyparsingSingleLineTextParser): """Parse SkyDrive old log files.""" NAME = 'skydrive_log_old' DESCRIPTION = 'Parser for OneDrive (or SkyDrive) old log files.' _ENCODING = 'utf-8' _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS # Common pyparsing objects. _COLON = pyparsing.Literal(':') _EXCLAMATION = pyparsing.Literal('!') # Date and time format used in the header is: DD-MM-YYYY hhmmss.### # For example: 08-01-2013 21:22:28.999 _DATE_TIME = pyparsing.Group( _TWO_DIGITS.setResultsName('month') + pyparsing.Suppress('-') + _TWO_DIGITS.setResultsName('day_of_month') + pyparsing.Suppress('-') + _FOUR_DIGITS.setResultsName('year') + text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS).setResultsName( 'date_time') _SOURCE_CODE = pyparsing.Combine( pyparsing.CharsNotIn(':') + _COLON + text_parser.PyparsingConstants.INTEGER + _EXCLAMATION + pyparsing.Word(pyparsing.printables)).setResultsName('source_code') _LOG_LEVEL = ( pyparsing.Literal('(').suppress() + pyparsing.SkipTo(')').setResultsName('log_level') + pyparsing.Literal(')').suppress()) _LINE = ( _DATE_TIME + _SOURCE_CODE + _LOG_LEVEL + _COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) # Sometimes the timestamped log line is followed by an empty line, # then by a file name plus other data and finally by another empty # line. It could happen that a logline is split in two parts. # These lines will not be discarded and an event will be generated # ad-hoc (see source), based on the last one if available. _NO_HEADER_SINGLE_LINE = ( pyparsing.NotAny(_DATE_TIME) + pyparsing.Optional(pyparsing.Literal('->').suppress()) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) # Define the available log line structures. LINE_STRUCTURES = [ ('logline', _LINE), ('no_header_single_line', _NO_HEADER_SINGLE_LINE), ] def __init__(self): """Initializes a parser object.""" super(SkyDriveOldLogParser, self).__init__() self._last_date_time = None self._last_event_data = None self.offset = 0 def _ParseLogline(self, parser_mediator, structure): """Parse a logline and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ # TODO: Verify if date and time value is locale dependent. month, day_of_month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(structure.date_time)) return event_data = SkyDriveOldLogEventData() event_data.log_level = structure.log_level event_data.offset = self.offset event_data.source_code = structure.source_code event_data.text = structure.text event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) self._last_date_time = date_time self._last_event_data = event_data def _ParseNoHeaderSingleLine(self, parser_mediator, structure): """Parse an isolated header line and store appropriate attributes. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ if not self._last_event_data: logger.debug('SkyDrive, found isolated line with no previous events') return event_data = SkyDriveOldLogEventData() event_data.offset = self._last_event_data.offset event_data.text = structure.text event = time_events.DateTimeValuesEvent( self._last_date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) # TODO think to a possible refactoring for the non-header lines. self._last_date_time = None self._last_event_data = None def ParseRecord(self, parser_mediator, key, structure): """Parse each record structure and return an EventObject if applicable. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in ('logline', 'no_header_single_line'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'logline': self._ParseLogline(parser_mediator, structure) elif key == 'no_header_single_line': self._ParseNoHeaderSingleLine(parser_mediator, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a SkyDrive old log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ try: structure = self._LINE.parseString(line) except pyparsing.ParseException: logger.debug('Not a SkyDrive old log file') return False day_of_month, month, year, hours, minutes, seconds, milliseconds = ( structure.date_time) time_elements_tuple = ( year, month, day_of_month, hours, minutes, seconds, milliseconds) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a SkyDrive old log file, invalid date and time: {0!s}'.format( structure.date_time)) return False return True
def __init__(self, base_freq=440.0, amplitude=.5, max_gain=10., min_gain=-200., new_scale='C/a', clef='violin'): # an important constant value for the conversion of musical halt tone steps to frequency values # is the twelfth root of 2 self.__root__ = 1.0594630943592952645618252949463 # (2 ** (1 / 12)) # *** parser definitions *** # helper no_whites = pp.NotAny(pp.White()) tok_end = (pp.StringEnd() | pp.LineEnd()).suppress() # numbers real = pp.Combine( pp.Word(pp.nums) + pp.Optional(pp.Char(',.') + pp.Word(pp.nums))).setParseAction( lambda t: float(t[0].replace(',', '.'))) integer = (pp.Optional(pp.Literal('-')) + pp.Word(pp.nums)).setParseAction( lambda t: int(t[0] + t[1]) if len(t) > 1 else int(t[0])) # signs must_sign = pp.Char('+-').setParseAction(lambda t: float(t[0] + '1')) may_sign = pp.Optional(pp.Char('+-')).setParseAction( lambda t: float(t[0] + '1' if len(t) > 0 else '1')) # note value cents cent = (must_sign + no_whites + real).setParseAction(lambda t: t[0] * t[1] / 100) # helpers for the note name parser note_name_offset = { 'C': -9, 'D': -7, 'E': -5, 'F': -4, 'G': -2, 'A': 0, 'B': 2, } note_name = pp.Char('CDEFGABcdefgab').setParseAction( lambda t: note_name_offset[t[0] if t[0] in 'CDEFGAB' else t[0].upper()]) flat_sharp = pp.Char('#b').setParseAction(lambda t: 1 if t[0] == '#' else -1) octave = pp.Char('0123456789').setParseAction(lambda t: (int(t[0]) - 4) * 12) full_note = (note_name + no_whites + pp.Optional(pp.FollowedBy(flat_sharp) + flat_sharp) + no_whites + pp.FollowedBy(octave) + octave).setParseAction(lambda t: sum(t)) self.note_name_parser = ( full_note + pp.Optional(pp.White()).suppress() + pp.Optional(cent) + tok_end ).setParseAction(lambda t: float(sum(t))).setResultsName('note_value') # frequency parsers hertz = real + pp.Literal('Hz').suppress() self.frequency_parser = (hertz + tok_end).setParseAction( lambda t: float(t[0])).setResultsName('frequency') self.base_freq_parser = ( full_note + pp.Literal('=').suppress() + hertz + tok_end ).setParseAction(lambda t: t[1] * (1.0594630943592952645618252949463** -t[0])).setResultsName('base_freq') # parses a string like "sc -7:b" into a musical half tone step (using the MusicConverter.set method) sign = (pp.Keyword('##') | pp.Keyword('bb') | pp.Keyword('#') | pp.Keyword('b') | pp.Keyword('n') | pp.Keyword('_')) self.score_parser = (integer + pp.Literal(':').suppress() + sign + tok_end).setResultsName('notation') # amplitude parser self.amp_parser = ( real + pp.Literal('%').suppress() + tok_end ).setParseAction(lambda t: float(t[0])).setResultsName('amplitude') self.gain_parser = ( may_sign + real + pp.Literal('dB').suppress() + tok_end ).setParseAction(lambda t: float(t[0] * t[1])).setResultsName('gain') # clef parser self.clef_parser = (pp.Keyword('violin') | pp.Keyword('alto') | pp.Keyword('bass')).setResultsName('clef') # key parser key_token = pp.NoMatch() for key in self.keys: key_token = key_token | pp.Keyword(key) self.key_parser = (key_token).setResultsName('key') # complete parser self.input_parser = self.note_name_parser | \ self.frequency_parser | \ self.base_freq_parser | \ self.amp_parser | \ self.gain_parser | \ self.clef_parser | \ self.key_parser | \ self.score_parser # *** initializations *** self.__note_value__ = 0. self.__base_freq__ = 440. self.base_freq = base_freq self.key = new_scale self.__names__ = 'C D EF G A B' self.clef = clef self.__clef__ = 'violin' self.max_gain = max_gain self.min_gain = min_gain self.amplitude = amplitude
import pyparsing as pp ########## Finding Identifiers RESERVED_WORDS = [ "abstract", "baremodule", "begin", "bitstype", "break", "catch", "ccall", "const", "continue", "do", "else", "elseif", "end", "export", "finally", "for", "function", "global", "if", "immutable", "import", "importall", "in", "let", "local", "macro", "module", "quote", "return", "try", "type", "typealias", "using", "while"] pp_reserved_word = pp.Or([pp.Literal(ww) for ww in RESERVED_WORDS]) pp_identifier = (pp.NotAny(pp_reserved_word) + pp.Word(pp.alphanums + "@" + "!"+"_")) TRANSPERENT_PREFIXES = ["@inline", "const"] pp_transperent_prefix = pp.Optional(pp.Or( [pp.Literal(ww) for ww in TRANSPERENT_PREFIXES])).suppress() def _matched_only(matched): return[match for matchgrp in matched for match in matchgrp[0]] def get_exports(raw_text): pp_exports = (pp.Literal("export").suppress() + pp.delimitedList(pp_identifier) ) parsed_exports = pp_exports.scanString(raw_text) return _matched_only(parsed_exports) #TODO Use actual scoping, to determine what is at global scope, rather than looking for things lont intented
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser): """Parses events from Bash history files.""" NAME = 'bash' DESCRIPTION = 'Parser for Bash history files' _ENCODING = 'utf-8' _TIMESTAMP = pyparsing.Suppress('#') + pyparsing.Word( pyparsing.nums, min=9, max=10).setParseAction( text_parser.PyParseIntCast).setResultsName('timestamp') _COMMAND = pyparsing.Regex( r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName('command') _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd() _VERIFICATION_GRAMMAR = ( pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP + pyparsing.NotAny(pyparsing.pythonStyleComment)) LINE_STRUCTURES = [('log_entry', _LINE_GRAMMAR)] def ParseRecord(self, parser_mediator, key, structure): """Parses a record and produces a Bash history event. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key != 'log_entry': raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) event_data = BashHistoryEventData() event_data.command = self._GetValueFromStructure(structure, 'command') timestamp = self._GetValueFromStructure(structure, 'timestamp') date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data) # pylint: disable=unused-argument def VerifyStructure(self, parser_mediator, lines): """Verifies that this is a bash history file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. lines (str): one or more lines from the text file. Returns: bool: True if this is the correct parser, False otherwise. """ match_generator = self._VERIFICATION_GRAMMAR.scanString(lines, maxMatches=1) return bool(list(match_generator))
StartTimeLineComment = ( StartTimeComment + pp.Combine( pp.Combine( pp.Word(pp.nums) + pp.Literal("-") + pp.Word(pp.nums) + pp.Literal("-") + pp.Word(pp.nums)) + pp.Combine( pp.Word(pp.nums) + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(":") + pp.Word(pp.nums) + pp.Literal(".") + pp.Word(pp.nums) + pp.Literal(".") + pp.Word(pp.nums)), joinString=" ", # TODO: Fix this hack adjacent=False, ).setResultsName("StartTimeLineComment") + pp.LineEnd()) LineComment = pp.Group( pp.NotAny(pp.Or(FileVersion ^ StartTime ^ Columns ^ StartTimeLineComment)) + pp.Literal(";") + pp.Regex(r".*") + pp.LineEnd()).setResultsName("LineComment", listAllMatches=True) Header = FileVersion + StartTime + Columns # [N],O,T,[B],I,d,[R],l/L,D ColumnBusNumber = pp.Or( pp.Literal("1") ^ pp.Literal("2") ^ pp.Literal("3") ^ pp.Literal("4") ^ pp.Literal("5") ^ pp.Literal("6") ^ pp.Literal("7") ^ pp.Literal("8") ^ pp.Literal("9") ^ pp.Literal("10") ^ pp.Literal("11") ^ pp.Literal("12") ^ pp.Literal("13") ^ pp.Literal("14") ^ pp.Literal("15") ^ pp.Literal("16") ^ pp.Literal("-")) ColumnDirection = pp.Or(pp.Literal("Rx") ^ pp.Literal("Tx"))
import pyparsing as pp pp_identifier = ( # keywords is not identifier, pp.NotAny( pp.Keyword("void") | pp.Keyword("unsigned") | pp.Keyword("signed") | pp.Keyword("int") | pp.Keyword("float") | pp.Keyword("const") | pp.Keyword("volatile") | pp.Keyword("extern") | pp.Keyword("static")) + pp.Word(pp.alphas + "_", pp.alphanums + "_", asKeyword=True)) #pp_identifier = pp.Word(pp.alphas+"_", pp.alphanums+"_") pp_semicolon = pp.Literal(";") def get_type_spec(p): print("get_type_spec: " + str(p)) pp_type_spec = (pp.Keyword("void") | (pp.Optional(pp.Keyword("unsigned") | pp.Keyword("signed")) + pp.Keyword("int")) | pp.Keyword("float") | pp_identifier).setParseAction(get_type_spec) pp_type_qual = (pp.Keyword("const") | pp.Keyword("volatile")) pp_strage_spec = (pp.Keyword("extern") | pp.Keyword("static")) decl_spec = (pp.Optional(pp_type_qual)
from pathlib import Path from sys import stderr, exit import pyparsing as pp from pyparsing import (Suppress, OneOrMore, Forward, Word, alphanums, Group, SkipTo, Keyword, alphas, Combine, Optional, Literal, delimitedList, ZeroOrMore, infixNotation, opAssoc, oneOf, FollowedBy, pythonStyleComment, ungroup, ParserElement, ParseResults, printables) from pyparsing import pyparsing_common as ppc ParserElement.enablePackrat() LBRACE, \ RBRACE, LBRACK, RBRACK, EQ, COLON,\ SEMICOLON, COMMA, LPAR, RPAR, RAWPREFIX = map(Suppress, "{}[]=:;,()$") NoWhite = pp.NotAny(pp.White()) kws = oneOf("and or not id true false") VARNAME = Word(alphas.lower(), alphanums + "_").ignore(kws) # TODO reserve keywords in IDENTIFIER IDENTIFIER = Word(alphas.upper(), alphanums).ignore(Keyword("Skip")) EXTERN = Combine(Literal("_") + VARNAME) def konst(val): def f(*args, **kwargs): return val return f
def parse(s): orig_s = s rules_to_execute = [] def mk_agent_set(s): return ('agent', s) #given a regex and a input return true or false if regex accepts input def mkroot(regex, y): if re.compile("^" + regex + "$", flags=re.DOTALL).match(y) != None: return True else: return False #return x == y assert (mkroot("abc", "abc") == True) assert (mkroot("abc", "abcd") == False) assert (mkroot("abc.*", "abcd") == True) assert (mkroot("abc", "ab") == False) assert (mkroot("/g/comp/105/.*", "/g/comp/105/") == True) assert (mkroot("/g/comp/105/.*", "/g/comp/160/") == False) #given a path object, return path function: string -> boolean def mkf(x): path = x["path"][0] return lambda y: mkroot(path, y) assert (mkf({'path': ['abc']})('abc') == True) assert (mkf({'path': ['abc']})('abcd') == False) assert (mkf({'path': ['abc']})('ab') == False) assert (mkf({'path': ["/g/comp/105/.*"]})("/g/comp/105/foo") == True) #given a list of path objects return a list of path functions def mkchain(xs): if type(xs) != type([]): xs = [xs] fs = map(mkf, xs) return fs #given a list of path objects return a disjunctive path function # list(paths) -> function: string -> bool def mk(xs): fs = mkchain(xs) return lambda path: True in map(lambda g: g(path), fs) assert (mk([{'path': ['abc']}])("abc") == True) assert (mk([{'path': ['abc']}])("abcd") == False) assert (mk([{'path': ['abc']}, {'path': ['abcd']}])("abcd") == True) #given a pair of functions (a,b), return a function which returns true if either (a or b) returns true def mk_or(a, b): #or two functions def f_or(x, a, b): if a(x): return True else: return b(x) return lambda y: f_or(y, a, b) #given a pair of functions (a,b), return a function which returns true if both (a and b) return true def mk_and(a, b): #and two functions def f_and(x, a, b): if a(x): return b(x) else: return False return lambda y: f_and(y, a, b) #given a fragment of the AST, return a function. def path2function(r): #print("rec",r) if "pathset" in r: v = r["pathset"][0] #print(v) return mk(v) elif "pathvar" in r: key = r["pathvar"][0] #print("key",key,env[key]) v = env[key] return v elif "agentvar" in r: #print("path2function, agentvar",r) #key = r["agentvar"][0] key = r["agentvar"] #print("key",key,env["agent_"+key]) v = env["agent_" + key] return v elif "agentset" in r: #print("path2function agentset",r) #v= r["agentset"][0] v = r["agentset"] #print("agentset..",v) return v elif "agentapplication" in r: a = r["agentapplication"] lhs = a[0]["lhs"][0] v1 = path2function(lhs) #print("LHS of agent application",lhs,path2function(lhs)) #case where we just have a single operand and parens if len(a) == 1: return v1 #in this case just get the value rhs = a[2]["rhs"][0] #print("RHS of agent application",rhs,path2function(rhs)) op = a[1]["op"][0] v2 = path2function(rhs) if op == "or": return v1.union(v2) elif op == "and": return v1.intersection(v2) elif op == "minus": return v1.difference(v2) else: return lambda x: None print("error") 1 / 0 else: if "application" in r: a = r["application"] lhs = a[0]["lhs"][0] v1 = path2function(lhs) #case where we just have a single operand and parens if len(a) == 1: return v1 rhs = a[2]["rhs"][0] op = a[1]["op"][0] v2 = path2function(rhs) if op == "or": return mk_or(v1, v2) elif op == "and": return mk_and(v1, v2) elif op == "minus": return mk_and(v1, lambda x: not v2(x)) else: return lambda x: None else: return lambda x: None def agent2f(n): #print("making " + str(n) + " into a f") #return n return "making " + str(n) + " into a f" #=========================== def parseToDict(x, lbl): r = x.asList() return {lbl: r} def f(lbl): return lambda x: parseToDict(x, lbl) def fprim(lbl): return lambda x: {lbl: x.asList()[0]} PATH = pp.Word(pp.alphanums + "+-./*?")("PATH") PATH.setParseAction(f("path")) LP = pp.Literal("(")("LP") RP = pp.Literal(")")("RP") LBRACE = pp.Word("{")("{") RBRACE = pp.Word("}")("}") LBRACK = pp.Word("[")("[") RBRACK = pp.Word("]")("]") EQ = pp.Word("=") OPS = pp.Word("and") | pp.Word("or") | pp.Word("minus") OP = OPS("OP") OP.setParseAction(f("op")) PATHBODY = pp.delimitedList(PATH, delim=',')("PATHS") PATHBODY.setParseAction(lambda s, l, t: t) PATHOPTION = pp.Group(pp.Suppress(LBRACE) + PATHBODY + pp.Suppress(RBRACE)) PATHOPTION.setParseAction(lambda s, l, t: t[0]) PATHSET = pp.Group(pp.Or([PATHOPTION, PATH])) #PATHSET.setParseAction(lambda x:x.asList()) PATHSET.setParseAction(f("pathset")) GROUP = pp.Suppress(pp.Word("users")) + pp.Suppress( pp.Word("in")) + pp.Word(pp.alphanums) #.setResultsName("GROUP") GROUP.setParseAction(lambda x: ('group', x.asList()[0])) ABSTRACT = pp.Suppress(LBRACK) + pp.Word( pp.alphanums)("absuser") + pp.Suppress(pp.Word(",")) + pp.Optional( pp.Word(pp.alphanums + ","))("absgroups") + pp.Suppress(RBRACK) ABSTRACT.setParseAction(lambda x: ('abstract', x.asDict())) USER = pp.NotAny(pp.Keyword("agent")) + pp.Word(pp.alphanums) USER.setParseAction(lambda x: ('user', x.asList()[0])) AGENT = pp.Group(pp.Or([USER, GROUP])) AGENT.setParseAction(lambda x: x.asList()[0]) AGENTBODY = pp.delimitedList(AGENT, delim=',')("AGENTS") AGENTBODY.setParseAction(lambda s, l, t: t) AGENTOPTION = pp.Group( pp.Suppress(LBRACE) + AGENTBODY + pp.Suppress(RBRACE)) AGENTOPTION.setParseAction(lambda s, l, t: t[0]) AGENTSET = pp.Group(pp.Or([AGENTOPTION, AGENT])) AGENTSET.setParseAction( lambda x: {"agentset": set(agentset2set(x.asList()[0]))}) def agentset2set(xs): base = [] for x in xs: if x[0] == 'user': base.append(x) else: base = base + agent.mkGroup(x[1]) return base IDENTIFIER = pp.Word(pp.alphanums)("ID") # agentexp AGENTEXP = pp.Forward() # -- should change AGENTEXP to AGENTATOM AGENTATOMPAIR = pp.Suppress(LP) + AGENTEXP + pp.Suppress(RP) AGENTATOMPAIR.setParseAction(f("agentapplication")) #used for creating vars and dereferencing vars AGENTVAR = pp.Suppress(pp.Word("agent")) + IDENTIFIER #AGENTVAR.setParseAction(f("agentvar")) AGENTVAR.setParseAction(lambda x: {"agentvar": x.asList()[0]}) AGENTATOM = AGENTATOMPAIR | AGENTSET | AGENTVAR AGENTATOML = AGENTATOM("LEFT") AGENTATOML.setParseAction(f("lhs")) AGENTATOMR = AGENTATOM("RIGHT") AGENTATOMR.setParseAction(f("rhs")) AGENTEXP << AGENTATOML + pp.Optional(OP + AGENTATOMR) AGENTASSIGNMENT = AGENTVAR + pp.Suppress(EQ) + AGENTATOM AGENTASSIGNMENT.setParseAction(f("agentassignment")) # pathexp PATHEXP = pp.Forward() PATHATOMPAIR = pp.Suppress(LP) + PATHEXP + pp.Suppress(RP) PATHATOMPAIR.setParseAction(f("application")) #used for creating vars and dereferencing vars PATHVAR = pp.Suppress(pp.Word("path")) + IDENTIFIER PATHVAR.setParseAction(f("pathvar")) PATHATOM = PATHATOMPAIR | PATHVAR | PATHSET PATHATOML = PATHATOM("LEFT") PATHATOML.setParseAction(f("lhs")) PATHATOMR = PATHATOM("RIGHT") PATHATOMR.setParseAction(f("rhs")) PATHEXP << PATHATOML + pp.Optional(OP + PATHATOMR) PATHASSIGNMENT = PATHVAR + pp.Suppress(EQ) + PATHATOM PATHASSIGNMENT.setParseAction(f("pathassignment")) CAN = pp.Word("cannot") | pp.Word("can") CAN.setParseAction(f("bool")) READ = pp.Word("read") WRITE = pp.Word("write") EXECUTE = pp.Word("execute") TRAVERSE = pp.Word("traverse") DISCOVER = pp.Word("discover") EDIT = pp.Word("edit") PERMISSION = pp.Word("permission") PERMS = pp.Or( [READ, WRITE, EXECUTE, TRAVERSE, PERMISSION, EXECUTE, DISCOVER, EDIT]) AND = pp.Word("and") PERMLIST = pp.delimitedList(PERMS, delim=AND).setResultsName("PERMS") PERMLIST.setParseAction(lambda x: x.asDict()) # RULEATOM = ATOM # RULEATOM.setParseAction(lambda x:path2function(x)) #ONLY = pp.Optional(pp.Word("only"))("ONLY") RULE = AGENTATOM + CAN + PERMLIST + pp.Word("in") + PATHATOM RULE.setParseAction(f("rule")) ONLYRULE = pp.Suppress(pp.Word( "only")) + AGENTATOM + CAN + PERMLIST + pp.Word("in") + PATHATOM ONLYRULE.setParseAction(f("onlyrule")) COMMENT = pp.Suppress(pp.Word("#") + pp.Word(pp.alphanums + "+-./*? ")) ATOMS = pp.ZeroOrMore(AGENTASSIGNMENT + pp.Optional(COMMENT) | PATHASSIGNMENT + pp.Optional(COMMENT) | ONLYRULE + pp.Optional(COMMENT) | RULE + pp.Optional(COMMENT) | PATHATOM + pp.Optional(COMMENT) | COMMENT) def parseStatement(statement): #print("parseStatement asked to parse",str(statement)) if "pathassignment" in statement: s = statement["pathassignment"] #get the id to which the result is to be assiged id = s[0]["pathvar"][0] v = path2function(s[1]) env_set(id, v) printenv(env) return s elif "agentassignment" in statement: s = statement["agentassignment"] #get the id to which the result is to be assiged id = s[0]["agentvar"][0] id = s[0]["agentvar"] #print("...agentassignment",s[1]) v = path2function(s[1]) #v= s[1] env_set("agent_" + id, v) #used to show state of environment printenv(env) return s elif "onlyrule" in statement: #print("this is a rule") s = statement["onlyrule"] #print("s**",s) #print("rule agent", s[0]) agent = path2function(s[0]) #print("the agent is: " + str(agent)) #print("agentinvoke",agent(1)) s[0] = agent cantype = s[1]['bool'][0] + "only" s[1]["only"] = True #print(cantype) perms = s[2]['PERMS'] path = s[4] #print(s[4]) f = path2function(s[4]) #print("the path '" +str(s[4])+ "'' is represented by a function: " + str(f) ) s[4] = f #print(f) #print(s) #printenv(env) rules_to_execute.append((s, cantype, perms, agent, f)) return s elif "rule" in statement: #print("this is a rule") s = statement["rule"] #print("s**",s) #print("rule agent", s[0]) agent = path2function(s[0]) #print("the agent is: " + str(agent)) #print("agentinvoke",agent(1)) s[0] = agent cantype = s[1]['bool'][0] #print(cantype) perms = s[2]['PERMS'] path = s[4] #print(s[4]) f = path2function(s[4]) s[4] = f #print(f) #print(s) #printenv(env) rules_to_execute.append((s, cantype, perms, agent, f)) return s #print(AGENTATOM.parseString(s.strip())) print("=" * 80) statements = ATOMS.parseString(s.strip()) for statement in statements: r = parseStatement(statement) #debugging stuff # print("-->user",USER.parseString("user1")[0]) # print("--->group", GROUP.parseString("users in comp105")[0]) # print("--->agent",AGENT.parseString("users in comp105")[0]) # print("--->agent",AGENT.parseString("users1")[0]) # print("--->agentset",AGENTSET.parseString("{user1,user2,users in comp105}")[0]) # print("--->agentvar",AGENTVAR.parseString("agent xyz")[0]) # print("--->agentOP",AGENTATOMPAIR.parseString("(agent xyz and {user1,users in groupxxx,user3})")[0]) return rules_to_execute
REGEX_ACTION = (FACT | VAR) + pp.Group(REGEX_OP + (VAR | REGEX)) #TODO: EL_ARRAY -> SEQUENCE #a basic array of values in EL: [ e1, e2 ... en ] EL_ARRAY = array_template(CONDITION | FACT | (EL_COMPARISON ^ ARITH_FACT) | REGEX_ACTION | ELEMENT) #TODO:Other Actions? Stack/Queue/sample_from? #TODO: add a negated path var fact special case. # (ie: {.a.b.$x? -> [email protected] } ACTION_ARRAY = array_template(ARITH_FACT | REGEX_ACTION | FACT, brackets_optional=True) #Fact Components, [Root ... pairs ... terminal] #Core part of a fact: a.b!c => (a,DOT),(b.EX) EL_PAIR = ELEMENT + pp.NotAny(pp.LineEnd()) + (DOT | EX) EL_FACT_ROOT = pp.Group(((VAR | DBL_VLINE) + (DOT | EX)) | DOT).setResultsName( str(PARSENAMES.ROOT)) EL_FACT_TERMINAL = ELEMENT | pp.Group(EL_ARRAY) #An Entire sequence, note the stopOn to not continue over lines FACT << op(NOT).setResultsName(str(PARSENAMES.NOT)) + \ EL_FACT_ROOT + \ pp.Group(pp.ZeroOrMore(EL_PAIR)).setResultsName(str(PARSENAMES.BASEFACT)) + \ pp.Group(EL_FACT_TERMINAL).setResultsName(str(PARSENAMES.TERMINAL)) BIND_STATEMENT = VAR + s(BIND) + op(FACT) #Execute Statements? #The entire grammar: ROOT = pp.OneOrMore((BIND_STATEMENT | CONDITION | FACT) + \
def ParseCode(codeDefn, filename): # The file name is used when printing error messages global CurrentFileName CurrentFileName = filename funcExpr = MakeFuncExpr() handlerExpr = MakeHandlerExpr() eventExpr = MakeEventExpr() refExpr = MakeRefExpr() defineExpr = MakeDefineExpr() enumExpr = MakeEnumExpr() bitMaskExpr = MakeBitMaskExpr() importExpr = MakeImportExpr() # Define an expression containing all keywords that can be preceded by a doxygen-style comment. # todo: There is probably a better way to do this with the expressions above, rather than # defining 'keywords' here, but it would probably require changes to the expression # definitions, so this is good enough for now. keywords = (KeywordFunction | KeywordHandler | KeywordEvent | KeywordReference | KeywordDefine | KeywordEnum | KeywordBitMask) # The expressions are applied in the order listed, so give the more common expressions first. allcode = (pyparsing.ZeroOrMore(pyparsing.cStyleComment + pyparsing.NotAny(keywords)) + pyparsing.ZeroOrMore(funcExpr | handlerExpr | eventExpr | refExpr | defineExpr | enumExpr | bitMaskExpr | importExpr)) # Pre-process to remove all comments except for doxygen comments. pyparsing.cppStyleComment.setParseAction(ProcessDoxygen) codeDefn = pyparsing.cppStyleComment.transformString(codeDefn) # Error handling is done in FailFunc() now. However, just in case a parser exception slips # through, handle it here, although the error message and/or location may not be as accurate # as when handled by FailFunc(). In the rare case that another, unexpected, exception happens, # then just let it crash the program so that we get a traceback. try: resultList = allcode.parseString(codeDefn, parseAll=True) except pyparsing.ParseException as error: print "** Unexpected ParseException occurred **" PrintErrorMessage(codeDefn, error.lineno, error.col, error.msg) sys.exit(1) # Need to separate the header comments from the code in the raw resultList, and return # a dictionary with the appropriate sections headerList = [] codeList = [] importList = [] for r in resultList: if isinstance(r, str): headerList.append(r) elif isinstance(r, codeTypes.ImportData): importList.append(r) else: codeList.append(r) resultData = dict(headerList=headerList, codeList=codeList, importList=importList) return resultData
class MacWifiLogParser(text_parser.PyparsingSingleLineTextParser): """Parse text based on wifi.log file.""" NAME = 'macwifi' DESCRIPTION = 'Parser for MacOS wifi.log files.' _ENCODING = 'utf-8' THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS THREE_LETTERS = text_parser.PyparsingConstants.THREE_LETTERS # Regular expressions for known actions. _CONNECTED_RE = re.compile(r'Already\sassociated\sto\s(.*)\.\sBailing') _WIFI_PARAMETERS_RE = re.compile( r'\[ssid=(.*?), bssid=(.*?), security=(.*?), rssi=') _KNOWN_FUNCTIONS = [ 'airportdProcessDLILEvent', '_doAutoJoin', '_processSystemPSKAssoc' ] _AGENT = (pyparsing.Literal('<') + pyparsing.Combine( pyparsing.Literal('airportd') + pyparsing.CharsNotIn('>'), joinString='', adjacent=True).setResultsName('agent') + pyparsing.Literal('>')) _DATE_TIME = pyparsing.Group( THREE_LETTERS.setResultsName('day_of_week') + THREE_LETTERS.setResultsName('month') + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( 'day') + text_parser.PyparsingConstants.TIME_ELEMENTS + pyparsing.Suppress('.') + THREE_DIGITS.setResultsName('milliseconds')) # Log line with a known function name. _MAC_WIFI_KNOWN_FUNCTION_LINE = ( _DATE_TIME.setResultsName('date_time') + _AGENT + pyparsing.oneOf(_KNOWN_FUNCTIONS).setResultsName('function') + pyparsing.Literal(':') + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) # Log line with an unknown function name. _MAC_WIFI_LINE = ( _DATE_TIME.setResultsName('date_time') + pyparsing.NotAny(_AGENT + pyparsing.oneOf(_KNOWN_FUNCTIONS) + pyparsing.Literal(':')) + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) _MAC_WIFI_HEADER = ( _DATE_TIME.setResultsName('date_time') + pyparsing.Literal('***Starting Up***').setResultsName('text')) _DATE_TIME_TURNED_OVER_HEADER = pyparsing.Group( text_parser.PyparsingConstants.MONTH.setResultsName('month') + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName( 'day') + text_parser.PyparsingConstants.TIME_ELEMENTS) _MAC_WIFI_TURNED_OVER_HEADER = ( _DATE_TIME_TURNED_OVER_HEADER.setResultsName('date_time') + pyparsing.Combine( pyparsing.Word(pyparsing.printables) + pyparsing.Word(pyparsing.printables) + pyparsing.Literal('logfile turned over') + pyparsing.LineEnd(), joinString=' ', adjacent=False).setResultsName('text')) # Define the available log line structures. LINE_STRUCTURES = [('header', _MAC_WIFI_HEADER), ('turned_over_header', _MAC_WIFI_TURNED_OVER_HEADER), ('known_function_logline', _MAC_WIFI_KNOWN_FUNCTION_LINE), ('logline', _MAC_WIFI_LINE)] _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES]) def __init__(self): """Initializes a parser object.""" super(MacWifiLogParser, self).__init__() self._last_month = 0 self._year_use = 0 def _GetAction(self, action, text): """Parse the well known actions for easy reading. Args: action (str): the function or action called by the agent. text (str): mac Wifi log text. Returns: str: a formatted string representing the known (or common) action. If the action is not known the original log text is returned. """ # TODO: replace "x in y" checks by startswith if possible. if 'airportdProcessDLILEvent' in action: interface = text.split()[0] return 'Interface {0:s} turn up.'.format(interface) if 'doAutoJoin' in action: match = self._CONNECTED_RE.match(text) if match: ssid = match.group(1)[1:-1] else: ssid = 'Unknown' return 'Wifi connected to SSID {0:s}'.format(ssid) if 'processSystemPSKAssoc' in action: wifi_parameters = self._WIFI_PARAMETERS_RE.search(text) if wifi_parameters: ssid = wifi_parameters.group(1) bssid = wifi_parameters.group(2) security = wifi_parameters.group(3) if not ssid: ssid = 'Unknown' if not bssid: bssid = 'Unknown' if not security: security = 'Unknown' return ('New wifi configured. BSSID: {0:s}, SSID: {1:s}, ' 'Security: {2:s}.').format(bssid, ssid, security) return text def _GetTimeElementsTuple(self, key, structure): """Retrieves a time elements tuple from the structure. Args: key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Returns: tuple: containing: year (int): year. month (int): month, where 1 represents January. day_of_month (int): day of month, where 1 is the first day of the month. hours (int): hours. minutes (int): minutes. seconds (int): seconds. milliseconds (int): milliseconds. """ if key == 'turned_over_header': month, day, hours, minutes, seconds = structure.date_time milliseconds = 0 else: _, month, day, hours, minutes, seconds, milliseconds = structure.date_time # Note that dfdatetime_time_elements.TimeElements will raise ValueError # for an invalid month. month = timelib.MONTH_DICT.get(month.lower(), 0) if month != 0 and month < self._last_month: # Gap detected between years. self._year_use += 1 return self._year_use, month, day, hours, minutes, seconds, milliseconds def _ParseLogLine(self, parser_mediator, key, structure): """Parse a single log line and produce an event object. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. """ time_elements_tuple = self._GetTimeElementsTuple(key, structure) try: date_time = dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: parser_mediator.ProduceExtractionWarning( 'invalid date time value: {0!s}'.format(structure.date_time)) return self._last_month = time_elements_tuple[1] event_data = MacWifiLogEventData() event_data.agent = structure.agent # Due to the use of CharsNotIn pyparsing structure contains whitespaces # that need to be removed. event_data.function = structure.function.strip() event_data.text = structure.text if key == 'known_function_logline': event_data.action = self._GetAction(event_data.function, event_data.text) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data) def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) self._ParseLogLine(parser_mediator, key, structure) def VerifyStructure(self, parser_mediator, line): """Verify that this file is a Mac Wifi log file. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. line (str): line from a text file. Returns: bool: True if the line is in the expected format, False if not. """ self._last_month = 0 self._year_use = parser_mediator.GetEstimatedYear() key = 'header' try: structure = self._MAC_WIFI_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: key = 'turned_over_header' try: structure = self._MAC_WIFI_TURNED_OVER_HEADER.parseString(line) except pyparsing.ParseException: structure = None if not structure: logger.debug('Not a Mac Wifi log file') return False time_elements_tuple = self._GetTimeElementsTuple(key, structure) try: dfdatetime_time_elements.TimeElementsInMilliseconds( time_elements_tuple=time_elements_tuple) except ValueError: logger.debug( 'Not a Mac Wifi log file, invalid date and time: {0!s}'.format( structure.date_time)) return False self._last_month = time_elements_tuple[1] return True
def _generate_grammar(self): # Define grammar: pp.ParserElement.setDefaultWhitespaceChars(" \t") def add_element(name: str, value: pp.ParserElement): nonlocal self if self.debug: value.setName(name) value.setDebug() return value EOL = add_element("EOL", pp.Suppress(pp.LineEnd())) Else = add_element("Else", pp.Keyword("else")) Identifier = add_element( "Identifier", pp.Word(f"{pp.alphas}_", bodyChars=pp.alphanums + "_-./")) BracedValue = add_element( "BracedValue", pp.nestedExpr(ignoreExpr=pp.quotedString | pp.QuotedString(quoteChar="$(", endQuoteChar=")", escQuote="\\", unquoteResults=False)). setParseAction(lambda s, l, t: ["(", *t[0], ")"]), ) Substitution = add_element( "Substitution", pp.Combine( pp.Literal("$") + (((pp.Literal("$") + Identifier + pp.Optional(pp.nestedExpr())) | (pp.Literal("(") + Identifier + pp.Literal(")")) | (pp.Literal("{") + Identifier + pp.Literal("}")) | (pp.Literal("$") + pp.Literal("{") + Identifier + pp.Optional(pp.nestedExpr()) + pp.Literal("}")) | (pp.Literal("$") + pp.Literal("[") + Identifier + pp.Literal("]"))))), ) LiteralValuePart = add_element( "LiteralValuePart", pp.Word(pp.printables, excludeChars="$#{}()")) SubstitutionValue = add_element( "SubstitutionValue", pp.Combine( pp.OneOrMore(Substitution | LiteralValuePart | pp.Literal("$"))), ) FunctionValue = add_element( "FunctionValue", pp.Group( pp.Suppress(pp.Literal("$") + pp.Literal("$")) + Identifier + pp.nestedExpr( ) # .setParseAction(lambda s, l, t: ['(', *t[0], ')']) ).setParseAction(lambda s, l, t: handle_function_value(*t)), ) Value = add_element( "Value", pp.NotAny(Else | pp.Literal("}") | EOL) + (pp.QuotedString(quoteChar='"', escChar="\\") | FunctionValue | SubstitutionValue | BracedValue), ) Values = add_element("Values", pp.ZeroOrMore(Value)("value")) Op = add_element( "OP", pp.Literal("=") | pp.Literal("-=") | pp.Literal("+=") | pp.Literal("*=") | pp.Literal("~="), ) Key = add_element("Key", Identifier) Operation = add_element( "Operation", Key("key") + pp.locatedExpr(Op)("operation") + Values("value")) CallArgs = add_element("CallArgs", pp.nestedExpr()) def parse_call_args(results): out = "" for item in chain(*results): if isinstance(item, str): out += item else: out += "(" + parse_call_args(item) + ")" return out CallArgs.setParseAction(parse_call_args) Load = add_element("Load", pp.Keyword("load") + CallArgs("loaded")) Include = add_element( "Include", pp.Keyword("include") + pp.locatedExpr(CallArgs)("included")) Option = add_element("Option", pp.Keyword("option") + CallArgs("option")) RequiresCondition = add_element("RequiresCondition", pp.originalTextFor(pp.nestedExpr())) def parse_requires_condition(s, l_unused, t): # The following expression unwraps the condition via the additional info # set by originalTextFor. condition_without_parentheses = s[t._original_start + 1:t._original_end - 1] # And this replaces the colons with '&&' similar how it's done for 'Condition'. condition_without_parentheses = ( condition_without_parentheses.strip().replace( ":", " && ").strip(" && ")) return condition_without_parentheses RequiresCondition.setParseAction(parse_requires_condition) Requires = add_element( "Requires", pp.Keyword("requires") + RequiresCondition("project_required_condition")) FunctionArgumentsAsString = add_element( "FunctionArgumentsAsString", pp.originalTextFor(pp.nestedExpr())) QtNoMakeTools = add_element( "QtNoMakeTools", pp.Keyword("qtNomakeTools") + FunctionArgumentsAsString("qt_no_make_tools_arguments"), ) # ignore the whole thing... DefineTestDefinition = add_element( "DefineTestDefinition", pp.Suppress( pp.Keyword("defineTest") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoop = add_element( "ForLoop", pp.Suppress( pp.Keyword("for") + CallArgs + pp.nestedExpr(opener="{", closer="}", ignoreExpr=pp.LineEnd()) ), ) # ignore the whole thing... ForLoopSingleLine = add_element( "ForLoopSingleLine", pp.Suppress( pp.Keyword("for") + CallArgs + pp.Literal(":") + pp.SkipTo(EOL)), ) # ignore the whole thing... FunctionCall = add_element("FunctionCall", pp.Suppress(Identifier + pp.nestedExpr())) Scope = add_element("Scope", pp.Forward()) Statement = add_element( "Statement", pp.Group(Load | Include | Option | Requires | QtNoMakeTools | ForLoop | ForLoopSingleLine | DefineTestDefinition | FunctionCall | Operation), ) StatementLine = add_element("StatementLine", Statement + (EOL | pp.FollowedBy("}"))) StatementGroup = add_element( "StatementGroup", pp.ZeroOrMore(StatementLine | Scope | pp.Suppress(EOL))) Block = add_element( "Block", pp.Suppress("{") + pp.Optional(EOL) + StatementGroup + pp.Optional(EOL) + pp.Suppress("}") + pp.Optional(EOL), ) ConditionEnd = add_element( "ConditionEnd", pp.FollowedBy( (pp.Optional(pp.White()) + (pp.Literal(":") | pp.Literal("{") | pp.Literal("|")))), ) ConditionPart1 = add_element( "ConditionPart1", (pp.Optional("!") + Identifier + pp.Optional(BracedValue))) ConditionPart2 = add_element("ConditionPart2", pp.CharsNotIn("#{}|:=\\\n")) ConditionPart = add_element( "ConditionPart", (ConditionPart1 ^ ConditionPart2) + ConditionEnd) ConditionOp = add_element("ConditionOp", pp.Literal("|") ^ pp.Literal(":")) ConditionWhiteSpace = add_element( "ConditionWhiteSpace", pp.Suppress(pp.Optional(pp.White(" ")))) ConditionRepeated = add_element( "ConditionRepeated", pp.ZeroOrMore(ConditionOp + ConditionWhiteSpace + ConditionPart)) Condition = add_element("Condition", pp.Combine(ConditionPart + ConditionRepeated)) Condition.setParseAction( lambda x: " ".join(x).strip().replace(":", " && ").strip(" && ")) # Weird thing like write_file(a)|error() where error() is the alternative condition # which happens to be a function call. In this case there is no scope, but our code expects # a scope with a list of statements, so create a fake empty statement. ConditionEndingInFunctionCall = add_element( "ConditionEndingInFunctionCall", pp.Suppress(ConditionOp) + FunctionCall + pp.Empty().setParseAction(lambda x: [[]]).setResultsName( "statements"), ) SingleLineScope = add_element( "SingleLineScope", pp.Suppress(pp.Literal(":")) + pp.Group(Block | (Statement + EOL))("statements"), ) MultiLineScope = add_element("MultiLineScope", Block("statements")) SingleLineElse = add_element( "SingleLineElse", pp.Suppress(pp.Literal(":")) + (Scope | Block | (Statement + pp.Optional(EOL))), ) MultiLineElse = add_element("MultiLineElse", Block) ElseBranch = add_element( "ElseBranch", pp.Suppress(Else) + (SingleLineElse | MultiLineElse)) # Scope is already add_element'ed in the forward declaration above. Scope <<= pp.Group( Condition("condition") + (SingleLineScope | MultiLineScope | ConditionEndingInFunctionCall) + pp.Optional(ElseBranch)("else_statements")) Grammar = StatementGroup("statements") Grammar.ignore(pp.pythonStyleComment()) return Grammar
parameter_name = (pp.White(' ', exact=2).suppress() + pp.Word('-').suppress() + word).setResultsName('name') metavar = pp.Combine( pp.ZeroOrMore(pp.White(' ').suppress() + word).setResultsName('metavar')) parameter = pp.Group(parameter_name + metavar + nl + desc) block_name = pp.Or( (pp.Literal('SYNOPSIS'), pp.Literal('USAGE'), pp.Literal('DESCRIPTION'), pp.Literal('OPTIONS'), pp.Literal('AUTHOR'), pp.Literal('COPYRIGHT'), pp.Literal('REFERENCES'))).setResultsName('block_name') parameter_comment = ( pp.NotAny(block_name) + pp.OneOrMore(word + pp.Optional(space)).setResultsName('parameter_comment') + nl + empty_line) parser = pp.ZeroOrMore(parameter) title = (pp.Literal('MRtrix') + line + nl + indent(5) + pp.Word(pp.alphas).setResultsName('cmd') + pp.Literal(':') + line + nl) synopsis = (pp.Literal('SYNOPSIS').suppress() + nl + pp.Combine(pp.OneOrMore(desc_line)).setResultsName('synopsis')) usage = ( pp.Literal('USAGE').suppress() + nl + nl + indent(5) + word + space + pp.Literal('[ parameters ]').suppress() + pp.OneOrMore(space.suppress() + word) + nl + nl + pp.OneOrMore( indent(8) + word + indent(6) + pp.OneOrMore(space + word) + nl + nl))
def parse_nets(self): EOL = pp.LineEnd().suppress() linebreak = pp.Suppress(";" + pp.LineEnd()) identifier = pp.Word( pp.alphanums + '._“!<>/[]$#$%&‘*+,/:<=>?@[\]^_`{|}~') # CONFLICT with '();' number = pp.pyparsing_common.number word = pp.Word(pp.alphas) LPAR = pp.Suppress('(') RPAR = pp.Suppress(')') ORIENT = (pp.Keyword('N') | pp.Keyword('S') | pp.Keyword('E') | pp.Keyword('W') | pp.Keyword('FN') | pp.Keyword('FS') | pp.Keyword('FE') | pp.Keyword('FW')) pt = LPAR + pp.OneOrMore(number | pp.Keyword('*')) + RPAR # pair of x,y nets_id = pp.Keyword('NETS') end_nets_id = pp.Keyword("END NETS").suppress() begin_net = pp.Keyword('-') ws_net = pp.Suppress(pp.Keyword('+')) # parameter division in NETS # netName netName_1 = pp.Group( LPAR + identifier('compName') + identifier('pinName') + pp.Optional(ws_net + pp.Keyword('SYNTHESIZED'))('SYNTHESIZED') + RPAR).setResultsName('netName') netName_2 = pp.Group( pp.Keyword('MUSTJOIN') + LPAR + identifier('compName') + identifier('pinName') + RPAR).setResultsName('MUSTJOIN') netName = ( identifier('netName') + pp.OneOrMore(netName_1 | netName_2)).setResultsName('netName') # SHIELDNET SHIELDNET = pp.Group(ws_net + pp.Keyword('SHIELDNET') + identifier('shieldNetName')).setResultsName( 'SHIELDNET') # VPIN VPIN_PLACEMENT_ids = (pp.Keyword('PLACED') | pp.Keyword('FIXED') | pp.Keyword('COVER')) VPIN_PLACEMENT = (VPIN_PLACEMENT_ids('PLACEMENT') + pp.Group(pt)('pt') + pp.ZeroOrMore(word('orient'))) VPIN_LAYER = pp.Keyword('LAYER') + identifier('layerName') VPIN = pp.Group(ws_net + pp.Keyword('VPIN') + identifier('vpinName') + pp.Optional(VPIN_LAYER) + pp.Group(pt)('pt1') + pp.Group(pt)('pt2') + pp.Optional(pp.Group(VPIN_PLACEMENT)('PLACEMENT')))( 'VPIN') # routingPoints (used by regularWiring) MASK_id = pp.Keyword('MASK')('MASK') RECT_id = pp.Keyword('RECT')('RECT') VIRTUAL_id = pp.Keyword('VIRTUAL')('VIRTUAL') routingPoints_1 = (pp.Optional(MASK_id + number('maskNum')) + pp.Group(pt)) routingPoints_2 = (pp.Optional(MASK_id + number('viaMaskNum')) + pp.NotAny(pp.Keyword('NEW') | pp.Keyword('RECT')) + identifier('viaName') + pp.Optional(ORIENT('orient'))) routingPoints_3 = (pp.Optional(MASK_id + number('maskNum')) + RECT_id + pp.Group(pt)) routingPoints_4 = (VIRTUAL_id + pp.Group(pt)) routingPoints = (pp.Group(pt) + pp.OneOrMore(routingPoints_1 | routingPoints_2 | routingPoints_3 | routingPoints_4)) # regularWiring regularWiring_ids = (pp.Keyword('COVER') | pp.Keyword('FIXED') | pp.Keyword('ROUTED') | pp.Keyword('NOSHIELD')) TAPER_RULE = ((pp.Keyword('TAPER') | pp.Keyword('TAPERRULE')) + identifier('ruleName')) STYLE = (pp.Keyword('STYLE') + identifier('layerName') + pp.OneOrMore(pt)) regularWiring_Head = pp.Group( regularWiring_ids('WIRING_id') + identifier('layerName') + pp.Optional(TAPER_RULE)('TAPER_RULE') + pp.Optional(STYLE)('STYLE') + pp.OneOrMore(routingPoints)('routingPoints')) NEW_WIRING = pp.Group( pp.Keyword('NEW')('WIRING_id') + identifier('layerName') + pp.Optional(TAPER_RULE)('TAPER_RULE') + pp.Optional(STYLE)('STYLE') + pp.OneOrMore(routingPoints)('routingPoints')) regularWiring = pp.Group( ws_net + pp.Group(regularWiring_Head)('WIRING_Head') + pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING'))('WIRING') # SUBNET SUBNET_regularWiring = pp.Group( pp.Group(regularWiring_Head)('WIRING_Head') + pp.Group(pp.ZeroOrMore(NEW_WIRING))('NEW_WIRING'))('WIRING') SUBNET_NONDEFAULTRULE = (pp.Keyword('NONDEFAULTRULE') + identifier('NONDEFAULTRULE_ruleName')) SUBNET_pin_type = (pp.Keyword('VPIN')('VPIN') | pp.Keyword('PIN')('PIN') | identifier('compName')) SUBNET = pp.Group(ws_net + pp.Keyword('SUBNET') + identifier('subnetName') + pp.ZeroOrMore(LPAR + SUBNET_pin_type + identifier('pinName') + RPAR) + pp.Optional(SUBNET_NONDEFAULTRULE) + pp.ZeroOrMore(SUBNET_regularWiring))('SUBNET') # XTALK XTALK = (ws_net + pp.Keyword('XTALK') + number('XTALK_class')) # NONDEFAULTRULE NONDEFAULTRULE = (ws_net + pp.Keyword('NONDEFAULTRULE') + identifier('NONDEFAULTRULE_ruleName')) # SOURCE SOURCE = (ws_net + pp.Keyword('SOURCE') + (pp.Keyword('DIST') | pp.Keyword('NETLIST') | pp.Keyword('TEST') | pp.Keyword('TIMING') | pp.Keyword('USER'))('SOURCE')) # FIXEDBUMP FIXEDBUMP = (ws_net + pp.Keyword('FIXEDBUMP')('FIXEDBUMP')) # FREQUENCY FREQUENCY = (ws_net + pp.Keyword('FREQUENCY') + number('FREQUENCY')) # ORIGINAL ORIGINAL = (ws_net + pp.Keyword('ORIGINAL') + identifier('ORIGINAL_netName')) # USE > USE_ids USE_ids = (pp.Keyword('ANALOG') | pp.Keyword('CLOCK') | pp.Keyword('GROUND') | pp.Keyword('POWER') | pp.Keyword('RESET') | pp.Keyword('SCAN') | pp.Keyword('SIGNAL') | pp.Keyword('TIEOFF')) # USE USE = ws_net + pp.Keyword('USE') + USE_ids('USE') # PATTERN PATTERN_ids = (pp.Keyword('BALANCED') | pp.Keyword('STEINER') | pp.Keyword('TRUNK') | pp.Keyword('WIREDLOGIC')) PATTERN = (ws_net + pp.Keyword('PATTERN') + PATTERN_ids('PATTERN')) # ESTCAP ESTCAP = (ws_net + pp.Keyword('ESTCAP') + number('ESTCAP_wireCap')) # WEIGHT WEIGHT = (ws_net + pp.Keyword('WEIGHT') + number('WEIGHT')) # PROPERTY PROPERTY = pp.Group(ws_net + pp.Keyword('PROPERTY') + pp.OneOrMore( identifier('propName') + number('propVal')))('PROPERTY') # Refactor this!? if self.ignore_nets_route: regularWiring = pp.SkipTo((EOL + ws_net) | linebreak) net = pp.Group( pp.Suppress(begin_net) + netName + pp.Optional(SHIELDNET) + pp.ZeroOrMore(VPIN) + pp.ZeroOrMore(SUBNET) + pp.Optional(XTALK) + pp.Optional(NONDEFAULTRULE) + pp.ZeroOrMore(regularWiring) + pp.Optional(SOURCE) + pp.Optional(FIXEDBUMP) + pp.Optional(FREQUENCY) + pp.Optional(ORIGINAL) + pp.Optional(USE) + pp.Optional(PATTERN) + pp.Optional(ESTCAP) + pp.Optional(WEIGHT) + pp.ZeroOrMore(PROPERTY) + linebreak).setResultsName( 'net', listAllMatches=True) nets = pp.Group( pp.Suppress(nets_id) + number('numNets') + linebreak + pp.ZeroOrMore(net) + pp.Suppress(end_nets_id)).setResultsName('NETS') return nets
import pyparsing as pp import operator import math expop = pp.Literal('**') plus = pp.Literal('+') minus = pp.Literal('-') div = pp.Literal('/') mult = (pp.Literal('*') + pp.NotAny("*")) dot = pp.Literal(".").setName('dot').suppress() bitOr = pp.Literal('|') bitAnd = pp.Literal('&') bitXor = pp.Literal('^') bitNot = pp.Literal('~') lShift = pp.Literal("<<") rShift = pp.Literal(">>") multop = div ^ mult addop = plus ^ minus rPar = pp.Literal(')').suppress() lPar = pp.Literal('(').suppress() lBrac = pp.Literal('[').suppress() rBrac = pp.Literal(']').suppress() lCur = pp.Literal("{").suppress() rCur = pp.Literal("}").suppress() and_ = pp.Keyword('AND') ^ bitAnd orXor_ = pp.Keyword('OR') ^ bitOr ^bitXor not_ = pp.Keyword('NOT') qStr = pp.QuotedString(quoteChar='"', unquoteResults=False) eq = pp.Literal('=').suppress() neq = pp.Literal('<>') lessEq = pp.Literal('<=')
class sparc_syntax: divide = False noprefix = False comment = pp.Regex(r'\#.*') symbol = pp.Regex(r'[A-Za-z_.$][A-Za-z0-9_.$]*').setParseAction( lambda r: env.ext(r[0], size=32)) mnemo = pp.LineStart() + symbol + pp.Optional(pp.Literal(',a')) mnemo.setParseAction(lambda r: r[0].ref.lower() + ''.join(r[1:])) integer = pp.Regex(r'[1-9][0-9]*').setParseAction(lambda r: int(r[0], 10)) hexa = pp.Regex(r'0[xX][0-9a-fA-F]+').setParseAction( lambda r: int(r[0], 16)) octa = pp.Regex(r'0[0-7]*').setParseAction(lambda r: int(r[0], 8)) bina = pp.Regex(r'0[bB][01]+').setParseAction(lambda r: int(r[0], 2)) char = pp.Regex(r"('.)|('\\\\)").setParseAction(lambda r: ord(r[0])) number = integer | hexa | octa | bina | char number.setParseAction(lambda r: env.cst(r[0], 32)) term = symbol | number exp = pp.Forward() op_one = pp.oneOf("- ~") op_sig = pp.oneOf("+ -") op_mul = pp.oneOf("* /") op_cmp = pp.oneOf("== != <= >= < > <>") op_bit = pp.oneOf("^ && || & |") operators = [ (op_one, 1, pp.opAssoc.RIGHT), (op_sig, 2, pp.opAssoc.LEFT), (op_mul, 2, pp.opAssoc.LEFT), (op_cmp, 2, pp.opAssoc.LEFT), (op_bit, 2, pp.opAssoc.LEFT), ] reg = pp.Suppress('%') + pp.NotAny(pp.oneOf('hi lo')) + symbol hilo = pp.oneOf('%hi %lo') + pp.Suppress('(') + exp + pp.Suppress(')') exp << pp.operatorPrecedence(term | reg | hilo, operators) adr = pp.Suppress('[') + exp + pp.Suppress(']') mem = adr #+pp.Optional(symbol|imm) mem.setParseAction(lambda r: env.mem(r[0])) opd = exp | mem | reg opds = pp.Group(pp.delimitedList(opd)) instr = mnemo + pp.Optional(opds) + pp.Optional(comment) def action_reg(toks): rname = toks[0] if rname.ref.startswith('asr'): return env.reg(rname.ref) return env.__dict__[rname.ref] def action_hilo(toks): v = toks[1] return env.hi(v) if toks[0] == '%hi' else env.lo(v).zeroextend(32) def action_exp(toks): tok = toks[0] if isinstance(tok, env.exp): return tok if len(tok) == 2: op = tok[0] r = tok[1] if isinstance(r, list): r = action_exp(r) return env.oper(op, r) elif len(tok) == 3: op = tok[1] l = tok[0] r = tok[2] if isinstance(l, list): l = action_exp(l) if isinstance(r, list): r = action_exp(r) return env.oper(op, l, r) else: return tok def action_instr(toks): i = instruction('') i.mnemonic = toks[0] if len(toks) > 1: i.operands = toks[1][0:] return asmhelper(i) # actions: reg.setParseAction(action_reg) hilo.setParseAction(action_hilo) exp.setParseAction(action_exp) instr.setParseAction(action_instr)
def TmxParser(): def MatchKeywords(keywords): return pp.Or( map(lambda x: pp.Keyword(x).setResultsName('type'), keywords)) # Build grammar KEYWORDS_3ARG = 'step' KEYWORDS_4ARG_WRITE = 'write write2' KEYWORDS_4ARG = 'wind wind2' KEYWORDS_45ARG = 'shuffle shuffle2' KEYWORDS_5ARG = 'fill fill2' KEYWORDS_6ARG = 'copy' KEYWORD_MATCH = 'match' KEYWORD_ENDMATCH = 'endmatch' KEYWORDS = map(pp.Keyword, (' '.join([ KEYWORDS_4ARG, KEYWORDS_4ARG_WRITE, KEYWORDS_45ARG, KEYWORDS_5ARG, KEYWORD_MATCH, KEYWORD_ENDMATCH ])).split()) RESERVED_STATES = map(pp.Keyword, 'ACCEPT ERROR HALT REJECT OUT'.split()) COMMENT_CHAR = '%%' WHITESPACE = ' \t\r' STEP_DIRECTION = 'L R' pp.ParserElement.setDefaultWhitespaceChars(WHITESPACE) # Comments and blank lines whitespace = pp.White(ws=WHITESPACE).suppress() comment = pp.Word(COMMENT_CHAR) + pp.ZeroOrMore( pp.Word(pp.printables, excludeChars='\n')) newlines = pp.Group( pp.OneOrMore((comment + pp.LineEnd()) | pp.LineEnd())).setName('new line(s)').suppress() # State tags state_tag = pp.NotAny(pp.Or(KEYWORDS)) + pp.Word( pp.printables, min=2, excludeChars=':\n') state_tag_named = pp.NotAny(pp.Or(KEYWORDS)) + pp.Word( pp.printables, min=2, excludeChars=':\n').setResultsName('name') state_definition_tag = pp.NotAny(pp.Or(RESERVED_STATES)) + state_tag_named # Words and characters in alphabet language_word = pp.Word(pp.printables, excludeChars='\n') language_character = pp.Word(pp.printables, exact=1, excludeChars='\n') # Direction indicator direction_character = pp.oneOf(STEP_DIRECTION, asKeyword=True).setName( 'direction indicator').setResultsName('direction') # Direction arrow for match match_arrow = pp.oneOf('-> -<', asKeyword=True) # 3-argument single line commands single_line_command_keyword_3 = MatchKeywords(KEYWORDS_3ARG.split()) # 4-argument single line commands single_line_command_keyword_4 = MatchKeywords(KEYWORDS_4ARG.split()) # 4-argument single line commands which accept a WORD as 3rd arg single_line_command_keyword_4_write = MatchKeywords( KEYWORDS_4ARG_WRITE.split()) single_line_command_keyword_45 = MatchKeywords(KEYWORDS_45ARG.split()) # 5-argument single line commands single_line_command_keyword_5 = MatchKeywords(KEYWORDS_5ARG.split()) # 6-argument single line commands single_line_command_keyword_6 = MatchKeywords(KEYWORDS_6ARG.split()) jump_target = pp.Forward().setResultsName("jump_target") # Single line expression single_line_expr = pp.Group(( (single_line_command_keyword_3 + direction_character) | (single_line_command_keyword_4 + direction_character + language_character.setResultsName("stop")) | (single_line_command_keyword_4_write + direction_character + language_word.setResultsName("write") + whitespace) | (single_line_command_keyword_45 + direction_character + language_character.setResultsName("stop") + whitespace + pp.Optional(language_character.setResultsName("write") + whitespace)) | (single_line_command_keyword_5 + direction_character + language_character.setResultsName("stop") + whitespace + language_character.setResultsName("write") + whitespace) | (single_line_command_keyword_6 + direction_character + language_character.setResultsName("start") + whitespace + language_character.setResultsName("target") + whitespace + language_character.setResultsName("stop") + whitespace)) + jump_target) jump_target << (single_line_expr | state_tag) match_target = pp.Group( language_word.setResultsName("match_string") + match_arrow.setResultsName("direction") + pp.Optional(language_character.setResultsName("write") + ';') + jump_target) match_expr = pp.Group( pp.Keyword(KEYWORD_MATCH).setResultsName("type") + direction_character + pp.Optional(jump_target) + newlines + pp.ZeroOrMore(match_target + newlines).setResultsName("match_targets") + pp.Keyword(KEYWORD_ENDMATCH)) state_definition = pp.Group( (newlines + state_definition_tag.setName('state tag') + ':' + newlines + (single_line_expr.setResultsName("expr") | match_expr.setResultsName("expr")).setName('state definition') )).setName('state') alphabet = language_word.setName('alphabet').setResultsName("alphabet") document = alphabet + pp.ZeroOrMore(state_definition).setResultsName( "states") + pp.ZeroOrMore(newlines) return document
] # temporary modifier # temporary modifier temp_modifier = dice_bonus.setResultsName('temp_modifier') # verb phrases # verb phrases o = L('[') t = L(']') wordchars = P.alphanums + string.punctuation.replace(']', '') v_word = P.Word(wordchars) v_words = P.OneOrMore(v_word).setResultsName('verbs') v_word_nonterminal = v_word + P.NotAny(t) v_words_nonterminal = P.OneOrMore(v_word_nonterminal).setResultsName('verbs') # FIXME - [d20 1d10] should be an error v_content = P.Optional(v_words_nonterminal) + (temp_modifier | dice) | v_words verb_phrase = Sup(o) + v_content + Sup(t) verb_phrase = verb_phrase.setResultsName('verb_phrase') _test_verb_phrases = [ ("[]", P.ParseException), ("[star]", "['star']"), ("[rock star]", "['rock', 'star']"), ("[woo 1d20+1]", "['woo', 1, 20, 1]"), ("[woo +2]", "['woo', 2]"), ("[woo -2]", "['woo', -2]"), ("[1d20+1]", "[1, 20, 1]"),
class SQLParser(object): """SQL Parser""" def _LogStart(self, instring, loc, expr): logging.debug('Start: base_loc: %d, loc: %d, expr: %s', self._base_loc, loc, expr.name) def _LogSuccess(self, instring, start, loc, expr, tokens): logging.debug('Success: base_loc: %d, loc: %d, expr: %s, tokens: %s', self._base_loc, loc, expr.name, tokens) tokens['loc'] = self._base_loc + loc def _LogFailure(self, instring, start, expr, err): logging.debug('Failure: base_loc: %d, loc: %d, expr: %s, err: %s', self._base_loc, err.loc, expr.name, err) def __init__(self, progress_callback=None): """Constructor. Args: progress_callback: If specified, called with the character location of the end of the last-yielded statement. """ # Get all the class variables that matches _*_TOKEN keywords = list(SQLParser.__dict__[k] for k in SQLParser.__dict__ if re.match(r'^_([_\w])+_TOKEN$', k)) # Fill the grammar rule _KEYWORDS with all the keywords possible SQLParser.__dict__['_KEYWORDS'] << pyp.MatchFirst(keywords) self._loc = 0 # Last yielded line end self._base_loc = 0 # Start of this statement self._callback = progress_callback for key in dir(self): grammar_rule = getattr(self, key) if isinstance(grammar_rule, pyp.ParserElement): grammar_rule.setName(key) grammar_rule.setDebugActions( self._LogStart, self._LogSuccess, self._LogFailure) def _OnNewLine(self, loc): self._loc = loc def ParseString(self, string): logging.debug('Parsing: %r', string) try: for statement in db.XCombineSQL(db.XSplit(string, '\n', callback=self._OnNewLine)): yield self._QUERY.parseString(statement)[0] if self._callback: self._callback(self._loc) self._base_loc = self._loc + len(statement) + 1 except pyp.ParseException as e: raise ParseError(e.msg, self._base_loc + e.loc) except db.InputRemaining as e: raise ParseError('Input remaining: %s' % e, self._base_loc + self._loc) # DISCARDED _COMMENT_START = pyp.Keyword( '--', identChars=pyp.Keyword.DEFAULT_KEYWORD_CHARS + '-') _COMMENT_LINE = _COMMENT_START + pyp.restOfLine _COMMENT_BLOCK = pyp.Regex(r'/\*(?=[^!])(?:[^*]*\*+)+?/') # TERMINALS _LINE_DELIMITER = pyp.Suppress(';').setName(';') _ALTER_TOKEN = pyp.CaselessKeyword('alter') _SELECT_TOKEN = pyp.CaselessKeyword('select') _CREATE_TOKEN = pyp.CaselessKeyword('create') _UPDATE_TOKEN = pyp.CaselessKeyword('update') _INSERT_TOKEN = pyp.CaselessKeyword('insert') _REPLACE_TOKEN = pyp.CaselessKeyword('replace') _DELETE_TOKEN = pyp.CaselessKeyword('delete') _MODIFY_TOKEN = pyp.CaselessKeyword('modify') _ADD_TOKEN = pyp.CaselessKeyword('add') _CHANGE_TOKEN = pyp.CaselessKeyword('change') _DROP_TOKEN = pyp.CaselessKeyword('drop') _CONVERT_TOKEN = pyp.CaselessKeyword('convert') _TO_TOKEN = pyp.CaselessKeyword('to') _ALL_TOKEN = pyp.CaselessKeyword('all') _DISTINCT_TOKEN = pyp.CaselessKeyword('distinct') _DISTINCTROW_TOKEN = pyp.CaselessKeyword('distinctrow') _FROM_TOKEN = pyp.CaselessKeyword('from').suppress() _WHERE_TOKEN = pyp.CaselessKeyword('where').suppress() _ORDER_TOKEN = pyp.CaselessKeyword('order').suppress() _GROUP_TOKEN = pyp.CaselessKeyword('group').suppress() _HAVING_TOKEN = pyp.CaselessKeyword('having').suppress() _LIMIT_TOKEN = pyp.CaselessKeyword('limit').suppress() _BY_TOKEN = pyp.CaselessKeyword('by').suppress() _AS_TOKEN = pyp.CaselessKeyword('as').suppress() _INTO_TOKEN = pyp.CaselessKeyword('into').suppress() _VALUES_TOKEN = pyp.CaselessKeyword('values').suppress() _IS_TOKEN = pyp.CaselessKeyword('is') _NOT_TOKEN = pyp.CaselessKeyword('not') _NULL_TOKEN = pyp.CaselessKeyword('null') _TRUE_TOKEN = pyp.CaselessKeyword('true') _FALSE_TOKEN = pyp.CaselessKeyword('false') _UNKNOWN_TOKEN = pyp.CaselessKeyword('unknown') _IN_TOKEN = pyp.CaselessKeyword('in') _CASE_TOKEN = pyp.CaselessKeyword('case') _WHEN_TOKEN = pyp.CaselessKeyword('when') _THEN_TOKEN = pyp.CaselessKeyword('then') _ELSE_TOKEN = pyp.CaselessKeyword('else') _START_TOKEN = pyp.CaselessKeyword('start') _END_TOKEN = pyp.CaselessKeyword('end') _JOIN_TOKEN = pyp.CaselessKeyword('join') _LEFT_TOKEN = pyp.CaselessKeyword('left') _RIGHT_TOKEN = pyp.CaselessKeyword('right') _CROSS_TOKEN = pyp.CaselessKeyword('cross') _INNER_TOKEN = pyp.CaselessKeyword('inner') _OUTER_TOKEN = pyp.CaselessKeyword('outer') _NATURAL_TOKEN = pyp.CaselessKeyword('natural') _ON_TOKEN = pyp.CaselessKeyword('on') _USING_TOKEN = pyp.CaselessKeyword('using') _STRAIGHT_JOIN_TOKEN = pyp.CaselessKeyword('straight_join') _LIKE_TOKEN = pyp.CaselessKeyword('like') _ENGINE_TOKEN = pyp.CaselessKeyword('engine') _IF_TOKEN = pyp.CaselessKeyword('if').suppress() _EXISTS_TOKEN = pyp.CaselessKeyword('exists').suppress() _CHARSET_TOKEN = pyp.CaselessKeyword('charset') _CHARACTER_TOKEN = pyp.CaselessKeyword('character') _NAMES_TOKEN = pyp.CaselessKeyword('names') _COLLATE_TOKEN = pyp.CaselessKeyword('collate') _INTERVAL_TOKEN = pyp.CaselessKeyword('interval') _DATABASE_TOKEN = pyp.CaselessKeyword('database') _TABLE_TOKEN = pyp.CaselessKeyword('table').suppress() _COLUMN_TOKEN = pyp.CaselessKeyword('column').suppress() _INDEX_TOKEN = pyp.CaselessKeyword('index') _PRIMARY_TOKEN = pyp.CaselessKeyword('primary') _KEY_TOKEN = pyp.CaselessKeyword('key') _UNIQUE_TOKEN = pyp.CaselessKeyword('unique') _DUPLICATE_TOKEN = pyp.CaselessKeyword('duplicate').suppress() _AUTO_INCREMENT_TOKEN = pyp.CaselessKeyword('auto_increment') _DEFAULT_TOKEN = pyp.CaselessKeyword('default').suppress() _USE_TOKEN = pyp.CaselessKeyword('use') _IGNORE_TOKEN = pyp.CaselessKeyword('ignore') _FORCE_TOKEN = pyp.CaselessKeyword('force') _CONSTRAINT_TOKEN = pyp.CaselessKeyword('constraint') _FOREIGN_TOKEN = pyp.CaselessKeyword('foreign') _RESTRICT_TOKEN = pyp.CaselessKeyword('restrict') _CASCADE_TOKEN = pyp.CaselessKeyword('cascade') _NO_TOKEN = pyp.CaselessKeyword('no') _ACTION_TOKEN = pyp.CaselessKeyword('action') _REFERENCES_TOKEN = pyp.CaselessKeyword('references') _TINYINT_TOKEN = pyp.CaselessKeyword('tinyint') _SMALLINT_TOKEN = pyp.CaselessKeyword('smallint') _MEDIUMINT_TOKEN = pyp.CaselessKeyword('mediumint') _INT_TOKEN = pyp.CaselessKeyword('int') _INTEGER_TOKEN = pyp.CaselessKeyword('integer') _BIGINT_TOKEN = pyp.CaselessKeyword('bigint') _UNSIGNED_TOKEN = pyp.CaselessKeyword('unsigned') _DECIMAL_TOKEN = pyp.CaselessKeyword('decimal') _DEC_TOKEN = pyp.CaselessKeyword('dec') _FIXED_TOKEN = pyp.CaselessKeyword('fixed') _FLOAT_TOKEN = pyp.CaselessKeyword('float') _DOUBLE_TOKEN = pyp.CaselessKeyword('double') _PRECISION_TOKEN = pyp.CaselessKeyword('precision') _DATE_TOKEN = pyp.CaselessKeyword('date') _DATETIME_TOKEN = pyp.CaselessKeyword('datetime') _TIMESTAMP_TOKEN = pyp.CaselessKeyword('timestamp') _TIME_TOKEN = pyp.CaselessKeyword('time') _YEAR_TOKEN = pyp.CaselessKeyword('year') _CHAR_TOKEN = pyp.CaselessKeyword('char') _VARCHAR_TOKEN = pyp.CaselessKeyword('varchar') _BINARY_TOKEN = pyp.CaselessKeyword('binary') _VARBINARY_TOKEN = pyp.CaselessKeyword('varbinary') _TINYBLOB_TOKEN = pyp.CaselessKeyword('tinyblob') _BLOB_TOKEN = pyp.CaselessKeyword('blob') _MEDIUMBLOB_TOKEN = pyp.CaselessKeyword('mediumblob') _LONGBLOB_TOKEN = pyp.CaselessKeyword('longblob') _TINYTEXT_TOKEN = pyp.CaselessKeyword('tinytext') _TEXT_TOKEN = pyp.CaselessKeyword('text') _MEDIUMTEXT_TOKEN = pyp.CaselessKeyword('mediumtext') _LONGTEXT_TOKEN = pyp.CaselessKeyword('longtext') _ENUM_TOKEN = pyp.CaselessKeyword('enum') _SET_TOKEN = pyp.CaselessKeyword('set') _BIT_TOKEN = pyp.CaselessKeyword('bit') _FIRST_TOKEN = pyp.CaselessKeyword('first') _BEFORE_TOKEN = pyp.CaselessKeyword('before') _AFTER_TOKEN = pyp.CaselessKeyword('after') _CURRENT_TIMESTAMP_TOKEN = pyp.CaselessKeyword('current_timestamp') _BEGIN_TOKEN = pyp.CaselessKeyword('begin') _TRANSACTION_TOKEN = pyp.CaselessKeyword('transaction') _COMMIT_TOKEN = pyp.CaselessKeyword('commit') _ROLLBACK_TOKEN = pyp.CaselessKeyword('rollback') _LOCAL_TOKEN = pyp.CaselessKeyword('local') _SESSION_TOKEN = pyp.CaselessKeyword('session') _GLOBAL_TOKEN = pyp.CaselessKeyword('global') ## IDENTIFIER _KEYWORDS = pyp.Forward() # list of keywords, defined by __init__() _IDENTIFIER = pyp.Group(pyp.Word(pyp.alphas, pyp.alphanums + '_$') | pyp.QuotedString('`', multiline=True, escChar='\\')) _CHARSET = '_' + pyp.Word(pyp.alphanums).setResultsName('character_set') _STRING = (pyp.Optional(_CHARSET) + (pyp.QuotedString('\'', multiline=True, escChar='\\') | pyp.QuotedString('\"', multiline=True, escChar='\\'))) _NUMBER = pyp.Word(pyp.nums) _ARITH_SIGN = pyp.Word('+-', exact=1) _E = pyp.CaselessLiteral('E') _REAL_NUMBER = pyp.Combine(pyp.Optional(_ARITH_SIGN) + pyp.Optional(_NUMBER) + '.' + _NUMBER + pyp.Optional(_E + pyp.Optional(_ARITH_SIGN) + _NUMBER)) _INT_NUMBER = pyp.Combine(pyp.Optional(_ARITH_SIGN) + _NUMBER + pyp.Optional(_E + pyp.Optional('+') + _NUMBER)) _HEX = ((pyp.CaselessLiteral('0x').suppress() + pyp.Word(pyp.hexnums)) | pyp.Regex(r"x'(?:[0-9a-fA-F])+'")) _VAL = pyp.Group( _HEX | pyp.OneOrMore(_STRING) | _REAL_NUMBER | _INT_NUMBER | _NULL_TOKEN | _TRUE_TOKEN | _FALSE_TOKEN).setResultsName('val') ## TYPES _FIELD_LIST = pyp.Group(pyp.Suppress('(') + pyp.delimitedList(_IDENTIFIER) + pyp.Suppress(')') ).setResultsName('fields') _STRING_LIST = pyp.Group(pyp.Suppress('(') + pyp.delimitedList(_STRING) + pyp.Suppress(')') ).setResultsName('values') _TYPE_SIZE = (pyp.Suppress('(') + _NUMBER.setName('type_size') + pyp.Suppress(')')) _TYPE_PRECISION = (pyp.Suppress('(') + _NUMBER.setName('type_precision') + pyp.Suppress(',') + _NUMBER.setName('type_scale') + pyp.Suppress(')')) # Types that don't take arguments. _SIMPLE_TYPE = (_DATE_TOKEN | _DATETIME_TOKEN | _TIMESTAMP_TOKEN | _TIME_TOKEN | _YEAR_TOKEN | _TINYTEXT_TOKEN | _TEXT_TOKEN | _MEDIUMTEXT_TOKEN | _LONGTEXT_TOKEN | _TINYBLOB_TOKEN | _BLOB_TOKEN | _MEDIUMBLOB_TOKEN | _LONGBLOB_TOKEN).setResultsName('type_type') _BIT = (_BIT_TOKEN.setResultsName('type_type') + pyp.Optional(_TYPE_SIZE)) _ENUM = (_ENUM_TOKEN.setResultsName('type_type') + _STRING_LIST) _SET_TYPE = (_SET_TOKEN.setResultsName('type_type') + _STRING_LIST) _INTS = ((_TINYINT_TOKEN | _SMALLINT_TOKEN | _MEDIUMINT_TOKEN | _INT_TOKEN | _INTEGER_TOKEN | _BIGINT_TOKEN).setResultsName('type_type') + pyp.Optional(_TYPE_SIZE) + pyp.Optional(_UNSIGNED_TOKEN)) _REALS = ((_DECIMAL_TOKEN | _DEC_TOKEN | _FIXED_TOKEN | _FLOAT_TOKEN | _DOUBLE_TOKEN + pyp.Optional(_PRECISION_TOKEN) ).setResultsName('type_type') + pyp.Optional(_TYPE_PRECISION)) _CHARS = ((_VARCHAR_TOKEN | _CHAR_TOKEN | _BINARY_TOKEN | _VARBINARY_TOKEN).setResultsName('type_type') + pyp.Optional(_TYPE_SIZE) + pyp.Optional(_BINARY_TOKEN)) _TYPE = pyp.Group(_BIT | _ENUM | _SET_TYPE | _INTS | _REALS | _CHARS | _SIMPLE_TYPE ).setResultsName('type') ## GRAMMAR # COMMONS _DB_NAME = _IDENTIFIER.setResultsName('database') _TABLE_NAME_ONLY = _IDENTIFIER.setResultsName('table') _TABLE_NAME = pyp.Group((_DB_NAME + '.' + _TABLE_NAME_ONLY) | _TABLE_NAME_ONLY).setResultsName('table_spec') _COLUMN_NAME_WILD = (_IDENTIFIER | '*').setResultsName('column') _COLUMN_NAME = pyp.Group( (_DB_NAME + '.' + _TABLE_NAME_ONLY + '.' + _COLUMN_NAME_WILD) | (_TABLE_NAME_ONLY + '.' + _COLUMN_NAME_WILD) | _COLUMN_NAME_WILD).setResultsName('column_spec') _INDEX_NAME = _IDENTIFIER.setResultsName('index') _COLUMN_LIST = pyp.Group(pyp.Suppress('(') + pyp.delimitedList(_COLUMN_NAME) + pyp.Suppress(')') ).setResultsName('columns') # DATA DEFINITION COMMONS _DEFAULT_VAL = (_DEFAULT_TOKEN + pyp.Group(_NULL_TOKEN | _VAL | _CURRENT_TIMESTAMP_TOKEN ).setResultsName('default')) _COLUMN_CONSTRAINT = pyp.Group(pyp.Optional(_NOT_TOKEN) + _NULL_TOKEN ).setResultsName('constraint') _POSITIONAL = pyp.Group(_FIRST_TOKEN | ((_BEFORE_TOKEN | _AFTER_TOKEN) + _COLUMN_NAME) ).setResultsName('position') # Optional column flags: # - CHARSET <charset> # - CHARACTER SET <charset> # - COLLATE <collate name> # - DEFAULT '<value>' # - AUTO_INCREMENT # - NOT NULL # - ON UPDATE CURRENT_TIMESTAMP _COLUMN_FLAGS = pyp.Group( (_CHARSET_TOKEN + _IDENTIFIER.setResultsName('charset')) | (_CHARACTER_TOKEN + _SET_TOKEN + _IDENTIFIER.setResultsName('charset')) | (_COLLATE_TOKEN + _IDENTIFIER.setResultsName('collate')) | _COLUMN_CONSTRAINT | _DEFAULT_VAL | _AUTO_INCREMENT_TOKEN.setResultsName('option') | (_ON_TOKEN + _UPDATE_TOKEN + _CURRENT_TIMESTAMP_TOKEN) ).setResultsName('column_flags') _COLUMN_DEFINITION = pyp.Group(_TYPE + pyp.ZeroOrMore(_COLUMN_FLAGS) ).setResultsName('column_definition') _KEY_DEFINITION = pyp.Group( (((pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option') + (_INDEX_TOKEN | _KEY_TOKEN).setResultsName('key_type')) | _UNIQUE_TOKEN.setResultsName('key_type')) + pyp.Optional(_IDENTIFIER).setResultsName('key_name') + _FIELD_LIST) | ((_PRIMARY_TOKEN + _KEY_TOKEN).setResultsName('key_type') + _FIELD_LIST) ).setResultsName('key_definition') # ALTER STATEMENTS # ADD COLUMN columnname TYPE [BEFORE | AFTER ...] # ADD COLUMN (columnname TYPE, ...) [BEFORE | AFTER ...] _ALTER_TABLE_ADD_COLUMN = pyp.Group( _ADD_TOKEN + pyp.Optional(_COLUMN_TOKEN) + ((_COLUMN_NAME + _COLUMN_DEFINITION) | (pyp.Suppress('(') + pyp.delimitedList(_COLUMN_NAME + _COLUMN_DEFINITION) + pyp.Suppress(')'))) + pyp.ZeroOrMore(_COLUMN_FLAGS) + pyp.Optional(_PRIMARY_TOKEN + _KEY_TOKEN) + pyp.Optional(_POSITIONAL) ).setResultsName('add_column') _REFERENCE_OPTION = pyp.Group( _RESTRICT_TOKEN | _CASCADE_TOKEN | (_SET_TOKEN + _NULL_TOKEN) | (_NO_TOKEN + _ACTION_TOKEN) ).setResultsName('reference_option') _CONSTRAINT_DEFINITION = pyp.Group( pyp.Optional( _CONSTRAINT_TOKEN + pyp.Optional(_IDENTIFIER).setResultsName('constraint_name') ) + _FOREIGN_TOKEN + _KEY_TOKEN + pyp.Optional(_IDENTIFIER).setResultsName('key_name') + _FIELD_LIST + _REFERENCES_TOKEN + _TABLE_NAME + _FIELD_LIST + pyp.Optional(_ON_TOKEN + _DELETE_TOKEN + _REFERENCE_OPTION) + pyp.Optional(_ON_TOKEN + _UPDATE_TOKEN + _REFERENCE_OPTION) ) _ALTER_TABLE_ADD_CONSTRAINT = pyp.Group( _ADD_TOKEN + _CONSTRAINT_DEFINITION ).setResultsName('add_constraint') _ALTER_TABLE_DROP_FOREIGN_KEY = pyp.Group( _DROP_TOKEN + _FOREIGN_TOKEN + _KEY_TOKEN + _IDENTIFIER.setResultsName('constraint_name') ).setResultsName('drop_foreign_key') # ADD [UNIQUE] INDEX | KEY ... # ADD UNIQUE ... _ALTER_TABLE_ADD_INDEX = pyp.Group( _ADD_TOKEN + ((pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option') + (_INDEX_TOKEN | _KEY_TOKEN)) | (_UNIQUE_TOKEN).setResultsName('key_type')) + pyp.Optional(_IDENTIFIER).setResultsName('key_name') + _FIELD_LIST ).setResultsName('add_index') _ALTER_TABLE_ADD_PRIMARY_KEY = pyp.Group( _ADD_TOKEN + _PRIMARY_TOKEN + _KEY_TOKEN + _FIELD_LIST ).setResultsName('add_primary_key') _ALTER_TABLE_ALTER = pyp.Group( _ALTER_TOKEN + pyp.Optional(_COLUMN_TOKEN) + _COLUMN_NAME + ((_SET_TOKEN + _DEFAULT_VAL) | (_DROP_TOKEN + _DEFAULT_TOKEN)) ).setResultsName('alter_column') _ALTER_TABLE_MODIFY = pyp.Group( _MODIFY_TOKEN + pyp.Optional(_COLUMN_TOKEN) + (_COLUMN_NAME + _COLUMN_DEFINITION) + pyp.Optional(_POSITIONAL) ).setResultsName('modify_column') _ALTER_TABLE_CHANGE = pyp.Group( _CHANGE_TOKEN + pyp.Optional(_COLUMN_TOKEN) + _COLUMN_NAME + _COLUMN_NAME.setResultsName('column_spec_new') + _COLUMN_DEFINITION ).setResultsName('change_column') _ALTER_TABLE_DROP_COLUMN = pyp.Group( _DROP_TOKEN + pyp.Optional(_COLUMN_TOKEN) + _COLUMN_NAME ).setResultsName('drop_column') _ALTER_TABLE_DROP_PRIMARY_KEY = pyp.Group( _DROP_TOKEN + _PRIMARY_TOKEN + _KEY_TOKEN ).setResultsName('drop_primary_key') _ALTER_TABLE_DROP_INDEX = pyp.Group( _DROP_TOKEN + (_INDEX_TOKEN | _KEY_TOKEN) + _IDENTIFIER.setResultsName('key_name') ).setResultsName('drop_index') _ALTER_TABLE_CONVERT = pyp.Group( _CONVERT_TOKEN + _TO_TOKEN + _CHARACTER_TOKEN + _SET_TOKEN + _IDENTIFIER.setResultsName('character_set') ).setResultsName('convert') _ALTER_CHARACTER_SET = pyp.Group( _CHARACTER_TOKEN + _SET_TOKEN + _IDENTIFIER.setResultsName('character_set') ).setResultsName('alter_charset') # The various ALTER TABLE operations supported: # - ADD PRIMARY KEY # - ADD INDEX # - ADD COLUMN # - CHANGE # - DROP # - ALTER _ALTER_TABLE_OPERATIONS = pyp.Group( _ALTER_TABLE_MODIFY | _ALTER_TABLE_ADD_PRIMARY_KEY | _ALTER_TABLE_ADD_CONSTRAINT | _ALTER_TABLE_DROP_FOREIGN_KEY | _ALTER_TABLE_ADD_INDEX | _ALTER_TABLE_ADD_COLUMN | _ALTER_TABLE_CHANGE | _ALTER_TABLE_DROP_PRIMARY_KEY | _ALTER_TABLE_DROP_INDEX | _ALTER_TABLE_DROP_COLUMN | _ALTER_TABLE_ALTER | _ALTER_TABLE_CONVERT | _ALTER_CHARACTER_SET ).setResultsName('operations') _ALTER_TABLE_SQL = pyp.Group(_ALTER_TOKEN + _TABLE_TOKEN + _TABLE_NAME + pyp.delimitedList(_ALTER_TABLE_OPERATIONS) ).setResultsName('alter') _ALTER_DATABASE_OPERATIONS = pyp.Group( _ALTER_CHARACTER_SET ).setResultsName('operations') _ALTER_DATABASE_SQL = pyp.Group( _ALTER_TOKEN + _DATABASE_TOKEN + _DB_NAME + pyp.delimitedList(_ALTER_DATABASE_OPERATIONS) ).setResultsName('alter_db') # CREATE STATEMENTS _CREATE_DEFINITION = pyp.Group(_KEY_DEFINITION | _CONSTRAINT_DEFINITION | (_COLUMN_NAME + _COLUMN_DEFINITION) ).setResultsName('operation') # Match on IF NOT EXISTS _CREATE_NO_OVERWRITE = _IF_TOKEN + _NOT_TOKEN + _EXISTS_TOKEN _CREATE_OPERATIONS = pyp.Group(pyp.delimitedList(_CREATE_DEFINITION) ).setResultsName('operations') # CREATE TABLE table options can come in any order. There may be # zero or many of them _TABLE_FLAGS = pyp.Group(_ENGINE_TOKEN | (_DEFAULT_TOKEN + _CHARSET_TOKEN) | _CHARSET_TOKEN | (_CHARACTER_TOKEN + _SET_TOKEN) | (_DEFAULT_TOKEN + _CHARACTER_TOKEN + _SET_TOKEN) | _COLLATE_TOKEN ).setResultsName('table_flags_type') # CREATE TABLE table options are always of the format: FLAG=VALUE _TABLE_FLAGS_DEF = pyp.Group( _TABLE_FLAGS + pyp.Optional(pyp.Suppress('=')) + _IDENTIFIER.setResultsName('table_flags_identifier') ).setResultsName('table_flags_definition') _CREATE_TABLE_SQL = pyp.Group( _CREATE_TOKEN + _TABLE_TOKEN + pyp.Optional(_CREATE_NO_OVERWRITE) + _TABLE_NAME + pyp.Suppress('(') + _CREATE_OPERATIONS + pyp.Suppress(')') + pyp.ZeroOrMore(_TABLE_FLAGS_DEF).setResultsName('table_flags') ).setResultsName('create_table') _CREATE_TABLE_LIKE_SQL = pyp.Group( _CREATE_TOKEN + _TABLE_TOKEN + pyp.Optional(_CREATE_NO_OVERWRITE) + _TABLE_NAME + _LIKE_TOKEN + _TABLE_NAME ).setResultsName('create_table_like') # DROP TABLE [IF EXISTS] table _DROP_TABLE_SQL = pyp.Group(_DROP_TOKEN + _TABLE_TOKEN + pyp.Optional(_IF_TOKEN + _EXISTS_TOKEN) + pyp.delimitedList(_TABLE_NAME) ).setResultsName('drop_table') # CREATE DATABASE dbname _CREATE_DATABASE_SQL = pyp.Group(_CREATE_TOKEN + _DATABASE_TOKEN + pyp.Optional(_CREATE_NO_OVERWRITE) + _DB_NAME ).setResultsName('create_database') # DROP DATABASE dbname _DROP_DATABASE_SQL = pyp.Group(_DROP_TOKEN + _DATABASE_TOKEN + pyp.Optional(_IF_TOKEN + _EXISTS_TOKEN) + _DB_NAME ).setResultsName('drop_database') # CREATE INDEX idx ON table (column, ...) _CREATE_INDEX_SQL = ( _CREATE_TOKEN + pyp.Optional(_UNIQUE_TOKEN).setResultsName('key_option') + _INDEX_TOKEN + _INDEX_NAME.setResultsName('key_name') + _ON_TOKEN + _TABLE_NAME + _COLUMN_LIST) # EXPRESSIONS _BINOP1 = pyp.oneOf("* / %") _BINOP2 = pyp.oneOf("+ - << >> | &") _BINOP3 = pyp.oneOf(":= = != <> < > >= <=") _BINOP4 = pyp.oneOf("like between regexp", caseless=True) # optional "NOT" _BINOP5 = pyp.oneOf("and", caseless=True) _BINOP6 = pyp.oneOf("or", caseless=True) _EXPRESSION = pyp.Forward() # _EXPRESSION is recursive _DATE_FUNCTION_NAME = pyp.oneOf("date_add date_sub", caseless=True ).setResultsName('function_name') _INTERVAL_UNIT = pyp.oneOf( "microsecond second minute hour day week month quarter year " "second_microsecond minute_microsecond minute_second hour_microsecond " "hour_second hour_minute day_microsecond day_second day_minute " "day_hour year_month", caseless=True ).setResultsName('interval_unit') _DATE_FUNCTION = pyp.Group( _DATE_FUNCTION_NAME + pyp.Suppress('(') + _EXPRESSION.setResultsName('arg') + pyp.Suppress(',') + _INTERVAL_TOKEN + _EXPRESSION.setResultsName('interval_val') + _INTERVAL_UNIT + pyp.Suppress(')') ).setResultsName('function') _FUNCTION_NAME = (_IDENTIFIER ).setResultsName('function_name') _ARG_LIST = pyp.Group( pyp.Suppress('(') + pyp.Optional(pyp.delimitedList(_EXPRESSION.setResultsName('arg'))) + pyp.Suppress(')') ).setResultsName('args') _FUNCTION = pyp.Group( _FUNCTION_NAME + _ARG_LIST ).setResultsName('function') _VARIABLE = pyp.Group( pyp.Group(pyp.Literal('@@') | pyp.Literal('@') ).setResultsName('scope') + _IDENTIFIER.setResultsName('variable')) _LVAL = ((pyp.Suppress('(') + _EXPRESSION + pyp.Suppress(')')) | _VAL | _FUNCTION | _DATE_FUNCTION | _COLUMN_NAME + pyp.Optional( _COLLATE_TOKEN + _IDENTIFIER.setResultsName('collate')) | _VARIABLE) _IN_EXPRESSION = pyp.Group( _LVAL + pyp.Optional(_NOT_TOKEN) + _IN_TOKEN + pyp.Suppress('(') + pyp.delimitedList(_VAL) + pyp.Suppress(')') ).setResultsName('in') _IS_EXPRESSION = pyp.Group( _LVAL + _IS_TOKEN + pyp.Optional(_NOT_TOKEN) + (_NULL_TOKEN | _TRUE_TOKEN | _FALSE_TOKEN | _UNKNOWN_TOKEN) ).setResultsName('is') _CASES_LIST = ( pyp.OneOrMore(_WHEN_TOKEN + _EXPRESSION + _THEN_TOKEN + _EXPRESSION) + pyp.Optional(_ELSE_TOKEN + _EXPRESSION)) _CASE_EXPRESSION = pyp.Group( _CASE_TOKEN + (_CASES_LIST | (_EXPRESSION + _CASES_LIST)) + _END_TOKEN).setResultsName('case') _UNARY = ( _NOT_TOKEN | '!' | '-') _EXPRESSION0 = ( _IS_EXPRESSION | _IN_EXPRESSION | _CASE_EXPRESSION | (pyp.Optional(_UNARY) + _LVAL)) _EXPRESSION1 = ( pyp.Group(_EXPRESSION0 + pyp.ZeroOrMore(_BINOP1 + _EXPRESSION0)).setResultsName('ex')) _EXPRESSION2 = ( pyp.Group(_EXPRESSION1 + pyp.ZeroOrMore(_BINOP2 + _EXPRESSION1)).setResultsName('ex')) _EXPRESSION3 = ( pyp.Group(_EXPRESSION2 + pyp.ZeroOrMore(_BINOP3 + _EXPRESSION2)).setResultsName('ex')) _EXPRESSION4 = ( pyp.Group(_EXPRESSION3 + pyp.ZeroOrMore( pyp.Optional(_NOT_TOKEN) + _BINOP4 + _EXPRESSION3) ).setResultsName('ex')) _EXPRESSION5 = ( pyp.Group(_EXPRESSION4 + pyp.ZeroOrMore(_BINOP5 + _EXPRESSION4)).setResultsName('ex')) _EXPRESSION << ( pyp.Group(_EXPRESSION5 + pyp.ZeroOrMore(_BINOP6 + _EXPRESSION5)).setResultsName('ex')) # SET STATEMENT _SET_VARIABLE = ( pyp.Optional( _LOCAL_TOKEN | _SESSION_TOKEN | _GLOBAL_TOKEN | pyp.Literal('@@') | pyp.Literal('@') ).setResultsName('scope') + _IDENTIFIER.setResultsName('variable') + pyp.Literal('=') + _EXPRESSION) _SET_CHARSET = ( _CHARACTER_TOKEN + _SET_TOKEN + _EXPRESSION) _SET_NAMES = ( _NAMES_TOKEN + _EXPRESSION) _SET_SQL = pyp.Group( _SET_TOKEN + pyp.delimitedList(_SET_VARIABLE | _SET_CHARSET | _SET_NAMES)) # TABLE REFERENCE _INDEX_HINT = ((_USE_TOKEN | _IGNORE_TOKEN | _FORCE_TOKEN) + (_INDEX_TOKEN | _KEY_TOKEN) + pyp.Suppress('(') + pyp.delimitedList(_IDENTIFIER) + pyp.Suppress(')')) _ALIAS = (pyp.Optional(_AS_TOKEN) + pyp.NotAny(_KEYWORDS) + _IDENTIFIER.setResultsName('alias')) _TABLE = (pyp.Group(_TABLE_NAME + pyp.Optional(_ALIAS)).setResultsName('table_alias') + pyp.Optional(pyp.delimitedList(_INDEX_HINT))) _JOIN_CONDITION = ((_ON_TOKEN + _EXPRESSION) | pyp.Group(_USING_TOKEN + _COLUMN_LIST).setResultsName('using')) _JOIN_LEFT_RIGHT = ((_LEFT_TOKEN | _RIGHT_TOKEN) + pyp.Optional(_OUTER_TOKEN)) _JOIN_SIDE = pyp.Group((_INNER_TOKEN | _CROSS_TOKEN) |(_NATURAL_TOKEN + pyp.Optional(_JOIN_LEFT_RIGHT)) | _JOIN_LEFT_RIGHT ).setResultsName('join_side') _TABLE_JOIN = pyp.Group( pyp.Optional(_JOIN_SIDE) + (_JOIN_TOKEN | _STRAIGHT_JOIN_TOKEN) + _TABLE + pyp.Optional(_JOIN_CONDITION)).setResultsName('tablejoin') _TABLE_REFERENCE = _TABLE + pyp.ZeroOrMore(_TABLE_JOIN) _TABLE_REFERENCES = pyp.Group(pyp.delimitedList(_TABLE_REFERENCE)) # DATA MANIPULATION COMMONS _EXPRESSION_LIST = pyp.Group(pyp.delimitedList(_EXPRESSION)) _WHERE = (_WHERE_TOKEN + _EXPRESSION_LIST.setResultsName('where')) _ORDER_BY = (_ORDER_TOKEN + _BY_TOKEN + _EXPRESSION_LIST.setResultsName('order_by')) _GROUP_BY = (_GROUP_TOKEN + _BY_TOKEN + _EXPRESSION_LIST.setResultsName('group_by')) _HAVING = (_HAVING_TOKEN + _EXPRESSION_LIST.setResultsName('having')) _LIMIT = (_LIMIT_TOKEN + _NUMBER.setResultsName('limit')) _SET_VALUE = pyp.Group(_COLUMN_NAME + pyp.Suppress('=') + _EXPRESSION.setResultsName('set_value') ).setResultsName('set') _SET_VALUE_LIST = pyp.Group(pyp.delimitedList(_SET_VALUE) ).setResultsName('sets') _SET = (_SET_TOKEN.suppress() + _SET_VALUE_LIST) # SELECT STATEMENTS _SELECT_EXPRESSION = (pyp.Group( _EXPRESSION.setResultsName('select_expression') + pyp.Optional(_AS_TOKEN + _IDENTIFIER.setResultsName('alias'))) | pyp.Suppress('*')) _SELECT_FROM = pyp.Group(_FROM_TOKEN + _TABLE_REFERENCES).setResultsName('select_from') _SELECT_SQL_2 = (_SELECT_FROM + pyp.Optional(_WHERE) + pyp.Optional(_GROUP_BY) + pyp.Optional(_HAVING) + pyp.Optional(_ORDER_BY) + pyp.Optional(_LIMIT)) _SELECT_OPTIONS = (_ALL_TOKEN | _DISTINCT_TOKEN | _DISTINCTROW_TOKEN) _SELECT_SQL = pyp.Group(_SELECT_TOKEN + pyp.Optional(_SELECT_OPTIONS) + pyp.delimitedList(_SELECT_EXPRESSION) .setResultsName('select_expressions') + pyp.Optional(_SELECT_SQL_2) ).setResultsName('select') # UPDATE STATEMENTS _UPDATE_TABLE = (_TABLE_NAME + _SET + pyp.Optional(_WHERE) + pyp.Optional(_ORDER_BY) + pyp.Optional(_LIMIT)) _UPDATE_TABLE_REFERENCE = (_TABLE_REFERENCES + _SET + pyp.Optional(_WHERE)) _UPDATE_SQL = pyp.Group(_UPDATE_TOKEN + (_UPDATE_TABLE | _UPDATE_TABLE_REFERENCE) ).setResultsName('update') # INSERT/REPLACE STATEMENTS _VALUES = pyp.Group(pyp.Suppress('(') + pyp.delimitedList(_EXPRESSION) + pyp.Suppress(')') ).setResultsName('vals') _INSERT_VALUES = (pyp.Optional(_COLUMN_LIST) + _VALUES_TOKEN + pyp.delimitedList(_VALUES)) _INSERT_SET = _SET _INSERT_SELECT = (pyp.Optional(_COLUMN_LIST) + pyp.Optional(pyp.Suppress('(')) + pyp.Group(_SELECT_SQL).setResultsName('source_select') + pyp.Optional(pyp.Suppress(')'))) _ON_DUPLICATE_KEY_UPDATE = (_ON_TOKEN + _DUPLICATE_TOKEN + _KEY_TOKEN + _UPDATE_TOKEN + _SET_VALUE_LIST) _INSERT_SQL = pyp.Group(_INSERT_TOKEN + pyp.Optional(_IGNORE_TOKEN) + pyp.Optional(_INTO_TOKEN) + _TABLE_NAME + (_INSERT_VALUES | _INSERT_SET | _INSERT_SELECT) + pyp.Optional(_ON_DUPLICATE_KEY_UPDATE) ).setResultsName('insert') _REPLACE_SQL = pyp.Group(_REPLACE_TOKEN + pyp.Optional(_INTO_TOKEN) + _TABLE_NAME + (_INSERT_VALUES | _INSERT_SET | _INSERT_SELECT) ).setResultsName('replace') # DELETE STATEMENTS # DELETE FROM table WHERE ... [ORDER BY ...] [LIMIT ...] # WHERE ... is not optional because sql.par demands its existence # in this statement type. _DELETE_SIMPLE_SQL = pyp.Group(_DELETE_TOKEN + _FROM_TOKEN + _TABLE_NAME + pyp.Optional(_WHERE) + pyp.Optional(_ORDER_BY) + pyp.Optional(_LIMIT) ).setResultsName('delete') # DELETE table FROM table_references [WHERE ...] _DELETE_MULTI_SQL = pyp.Group(_DELETE_TOKEN + pyp.delimitedList(_TABLE_NAME + pyp.Optional('.*')) + _FROM_TOKEN + _TABLE_REFERENCES.setResultsName('exclude') + (pyp.Group(pyp.Optional(_WHERE)) .setResultsName('exclude')) ).setResultsName('delete') # DELETE FROM table USING table_references [WHERE ...] _DELETE_MULTI_SQL2 = pyp.Group(_DELETE_TOKEN + _FROM_TOKEN + pyp.delimitedList(_TABLE_NAME + pyp.Optional('.*')) + _USING_TOKEN + _TABLE_REFERENCES.setResultsName('exclude') + (pyp.Group(pyp.Optional(_WHERE)) .setResultsName('exclude')) ).setResultsName('delete') # TRANSACTIONS _START_TRANSACTION_SQL = pyp.Group((_START_TOKEN + _TRANSACTION_TOKEN) | _BEGIN_TOKEN ).setResultsName('start_transaction') _END_TRANSACTION_SQL = pyp.Group(_COMMIT_TOKEN | _ROLLBACK_TOKEN ).setResultsName('end_transaction') # UNSUPPORTED QUERIES _RENAME_TABLE_SQL = (pyp.CaselessKeyword('rename') + pyp.SkipTo(_LINE_DELIMITER).suppress()) _TRUNCATE_SQL = (pyp.CaselessKeyword('truncate') + pyp.SkipTo(_LINE_DELIMITER).suppress()) # VERSIONED COMMENTS _STATEMENT = pyp.Forward() _VERSIONED_COMMENT = (pyp.Literal('/*!') + pyp.Optional(_NUMBER.setResultsName('min_version')) + _STATEMENT + pyp.Literal('*/')) # MAIN _STATEMENT << pyp.Group(_ALTER_TABLE_SQL | _ALTER_DATABASE_SQL | _CREATE_TABLE_SQL | _CREATE_TABLE_LIKE_SQL | _DROP_TABLE_SQL | _RENAME_TABLE_SQL | _SELECT_SQL | _UPDATE_SQL | _INSERT_SQL | _REPLACE_SQL | _DELETE_MULTI_SQL | _DELETE_MULTI_SQL2 | _DELETE_SIMPLE_SQL | _TRUNCATE_SQL | _START_TRANSACTION_SQL | _END_TRANSACTION_SQL | _CREATE_DATABASE_SQL | _DROP_DATABASE_SQL | _CREATE_INDEX_SQL | _SET_SQL | _VERSIONED_COMMENT ).setResultsName('statement') _QUERY = pyp.Group(_STATEMENT + _LINE_DELIMITER).setResultsName('query') _QUERY.ignore(_COMMENT_LINE) _QUERY.ignore(_COMMENT_BLOCK)