class LSBInitLexer(lexer.Lexer): """Parse out upstart configurations from init scripts. Runlevels in /etc/init.d are defined in stanzas like: ### BEGIN INIT INFO # Provides: sshd # Required-Start: $remote_fs $syslog # Required-Stop: $remote_fs $syslog # Default-Start: 2 3 4 5 # Default-Stop: 1 # Short-Description: OpenBSD Secure Shell server ### END INIT INFO """ tokens = [ lexer.Token("INITIAL", r"### BEGIN INIT INFO", None, "UPSTART"), lexer.Token("UPSTART", r"### END INIT INFO", "Finish", "INITIAL"), lexer.Token("UPSTART", r"#\s+([-\w]+):\s+([^#\n]*)", "StoreEntry", None), lexer.Token("UPSTART", r"\n\s*\w+", "Finish", None), lexer.Token(".*", ".", None, None) ] required = {"provides", "default-start"} def __init__(self): super(LSBInitLexer, self).__init__() self.entries = {} def StoreEntry(self, match, **_): key, val = match.groups() setting = key.strip().lower() if setting: self.entries[setting] = val def Finish(self, **_): self.buffer = [] def ParseEntries(self, data): precondition.AssertType(data, Text) self.entries = {} self.Reset() self.Feed(data) self.Close() found = set(self.entries) if self.required.issubset(found): return self.entries
class Parser(lexer.SearchParser): """Parses and generates an AST for a query written in the described language. Examples of valid syntax: size is 40 (name contains "Program Files" AND hash.md5 is "123abc") @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow") """ expression_cls = BasicExpression binary_expression_cls = BinaryExpression context_cls = ContextExpression identity_expression_cls = IdentityExpression list_args = [] tokens = [ # Operators and related tokens lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState", "CONTEXTOPEN"), lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"), lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None), lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"), # Context lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"), # Double quoted string lexer.Token("STRING", "\"", "PopState,StringFinish", None), lexer.Token("STRING", r"\\x(..)", "HexEscape", None), lexer.Token("STRING", r"\\(.)", "StringEscape", None), lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None), # Single quoted string lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None), lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None), lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None), lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None), # List processing. lexer.Token("LIST_ARG", r"]", "PopState,ListFinish", None), lexer.Token("LIST_ARG", r"(\d+\.\d+)", "InsertFloatArg", "LIST_ARG"), lexer.Token("LIST_ARG", r"(0x[a-f\d]+)", "InsertInt16Arg", "LIST_ARG"), lexer.Token("LIST_ARG", r"(\d+)", "InsertIntArg", "LIST_ARG"), lexer.Token("LIST_ARG", "\"", "PushState,StringStart", "STRING"), lexer.Token("LIST_ARG", "'", "PushState,StringStart", "SQ_STRING"), lexer.Token("LIST_ARG", r",", None, None), # Basic expression lexer.Token("ATTRIBUTE", r"[\w._0-9]+", "StoreAttribute", "OPERATOR"), lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"), lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"), lexer.Token("ARG", r"(0x[a-f\d]+)", "InsertInt16Arg", "ARG"), lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"), lexer.Token("ARG", r"\[", "PushState,ListStart", "LIST_ARG"), lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"), lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"), # When the last parameter from arg_list has been pushed # State where binary operators are supported (AND, OR) lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator", "INITIAL"), # - We can also skip spaces lexer.Token("BINARY", r"\s+", None, None), # - But if it's not "and" or just spaces we have to go back lexer.Token("BINARY", ".", "PushBack,PopState", None), # Skip whitespace. lexer.Token(".", r"\s+", None, None), ] def InsertArg(self, string="", **_): """Insert an arg to the current expression.""" if self.state == "LIST_ARG": self.list_args.append(string) elif self.current_expression.AddArg(string): # This expression is complete self.stack.append(self.current_expression) self.current_expression = self.expression_cls() # We go to the BINARY state, to find if there's an AND or OR operator return "BINARY" def InsertFloatArg(self, string="", **_): """Inserts a Float argument.""" try: float_value = float(string) return self.InsertArg(float_value) except (TypeError, ValueError): raise ParseError("%s is not a valid float." % string) def InsertIntArg(self, string="", **_): """Inserts an Integer argument.""" try: int_value = int(string) return self.InsertArg(int_value) except (TypeError, ValueError): raise ParseError("%s is not a valid integer." % string) def InsertInt16Arg(self, string="", **_): """Inserts an Integer in base16 argument.""" try: int_value = int(string, 16) return self.InsertArg(int_value) except (TypeError, ValueError): raise ParseError("%s is not a valid base16 integer." % string) def ListStart(self, **_): self.list_args = [] def ListFinish(self, **_): return self.InsertArg(string=self.list_args) def StringFinish(self, **_): if self.state == "ATTRIBUTE": return self.StoreAttribute(string=self.string) elif self.state == "ARG" or "LIST_ARG": return self.InsertArg(string=self.string) def StringEscape(self, string, match, **_): """Escape backslashes found inside a string quote. Backslashes followed by anything other than [\'"rnbt] will raise an Error. Args: string: The string that matched. match: The match object (m.group(1) is the escaped code) Raises: ParseError: For strings other than those used to define a regexp, raise an error if the escaped string is not one of [\'"rnbt]. """ precondition.AssertType(string, Text) # Allow unfiltered strings for regexp operations so that escaped special # characters (e.g. \*) or special sequences (e.g. \w) can be used in # objectfilter. if self.current_expression.operator == "regexp": self.string += compatibility.UnescapeString(string) elif match.group(1) in "\\'\"rnbt": self.string += compatibility.UnescapeString(string) else: raise ParseError("Invalid escape character %s." % string) def HexEscape(self, string, match, **_): """Converts a hex escaped string.""" hex_string = match.group(1) try: self.string += binascii.unhexlify(hex_string).decode("utf-8") # TODO: In Python 2 `binascii` throws `TypeError` for invalid # input values (for whathever reason). This behaviour is fixed in Python 3 # where `binascii.Error` (a subclass of `ValueError`) is raised. Once we do # not have to support Python 2 anymore, this `TypeError` catch should be # removed. except (binascii.Error, TypeError) as error: raise ParseError("Invalid hex escape '{}': {}".format(hex_string, error)) def ContextOperator(self, string="", **_): self.stack.append(self.context_cls(string[1:])) def Reduce(self): """Reduce the token stack into an AST.""" # Check for sanity if self.state != "INITIAL" and self.state != "BINARY": self.Error("Premature end of expression") length = len(self.stack) while length > 1: # Precedence order self._CombineParenthesis() self._CombineBinaryExpressions("and") self._CombineBinaryExpressions("or") self._CombineContext() # No change if len(self.stack) == length: break length = len(self.stack) if length != 1: self.Error("Illegal query expression") return self.stack[0] def Error(self, message=None, _=None): raise ParseError("%s in position %s: %s <----> %s )" % (message, len(self.processed_buffer), self.processed_buffer, self.buffer)) def _CombineBinaryExpressions(self, operator): for i in range(1, len(self.stack) - 1): item = self.stack[i] if (isinstance(item, lexer.BinaryExpression) and item.operator.lower() == operator.lower() and isinstance(self.stack[i - 1], lexer.Expression) and isinstance(self.stack[i + 1], lexer.Expression)): lhs = self.stack[i - 1] rhs = self.stack[i + 1] self.stack[i].AddOperands(lhs, rhs) # pytype: disable=attribute-error self.stack[i - 1] = None self.stack[i + 1] = None self.stack = list(filter(None, self.stack)) def _CombineContext(self): # Context can merge from item 0 for i in range(len(self.stack) - 1, 0, -1): item = self.stack[i - 1] if (isinstance(item, ContextExpression) and isinstance(self.stack[i], lexer.Expression)): expression = self.stack[i] item.SetExpression(expression) self.stack[i] = None self.stack = list(filter(None, self.stack))
def _AddToken(self, state_regex, regex, actions, next_state): self._tokens.append( lexer.Token(state_regex, regex, actions, next_state))
class PlistFilterParser(objectfilter.Parser): """Plist specific filter parser. Because we will be filtering dictionaries and the path components will be matched against dictionary keys, we must be more permissive with attribute names. This parser allows path components to be enclosed in double quotes to allow for spaces, dots or even raw hex-escaped data in them, such as: "My\x20first\x20path component".2nd."TH.IRD" contains "Google" We store the attribute name as a list of paths into the object instead of as a simple string that will be chunked in objectfilter. """ tokens = [ # Operators and related tokens lexer.Token("INITIAL", r"\@[\w._0-9]+", "ContextOperator,PushState", "CONTEXTOPEN"), lexer.Token("INITIAL", r"[^\s\(\)]", "PushState,PushBack", "ATTRIBUTE"), lexer.Token("INITIAL", r"\(", "PushState,BracketOpen", None), lexer.Token("INITIAL", r"\)", "BracketClose", "BINARY"), # Context lexer.Token("CONTEXTOPEN", r"\(", "BracketOpen", "INITIAL"), # Double quoted string lexer.Token("STRING", "\"", "PopState,StringFinish", None), lexer.Token("STRING", r"\\x(..)", "HexEscape", None), lexer.Token("STRING", r"\\(.)", "StringEscape", None), lexer.Token("STRING", r"[^\\\"]+", "StringInsert", None), # Single quoted string lexer.Token("SQ_STRING", "'", "PopState,StringFinish", None), lexer.Token("SQ_STRING", r"\\x(..)", "HexEscape", None), lexer.Token("SQ_STRING", r"\\(.)", "StringEscape", None), lexer.Token("SQ_STRING", r"[^\\']+", "StringInsert", None), # Basic expression lexer.Token("ATTRIBUTE", r"\.", "AddAttributePath", "ATTRIBUTE"), lexer.Token("ATTRIBUTE", r"\s+", "AddAttributePath", "OPERATOR"), lexer.Token("ATTRIBUTE", "\"", "PushState,StringStart", "STRING"), lexer.Token("ATTRIBUTE", r"[\w_0-9\-]+", "StringStart,StringInsert", "ATTRIBUTE"), lexer.Token("OPERATOR", r"(\w+|[<>!=]=?)", "StoreOperator", "ARG"), lexer.Token("ARG", r"(\d+\.\d+)", "InsertFloatArg", "ARG"), lexer.Token("ARG", r"(0x\d+)", "InsertInt16Arg", "ARG"), lexer.Token("ARG", r"(\d+)", "InsertIntArg", "ARG"), lexer.Token("ARG", "\"", "PushState,StringStart", "STRING"), lexer.Token("ARG", "'", "PushState,StringStart", "SQ_STRING"), # When the last parameter from arg_list has been pushed # State where binary operators are supported (AND, OR) lexer.Token("BINARY", r"(?i)(and|or|\&\&|\|\|)", "BinaryOperator", "INITIAL"), # - We can also skip spaces lexer.Token("BINARY", r"\s+", None, None), # - But if it's not "and" or just spaces we have to go back lexer.Token("BINARY", ".", "PushBack,PopState", None), # Skip whitespace. lexer.Token(".", r"\s+", None, None), ] def StringFinish(self, **_): """StringFinish doesn't act on ATTRIBUTEs here.""" if self.state == "ARG": return self.InsertArg(string=self.string) def AddAttributePath(self, **_): """Adds a path component to the current attribute.""" attribute_path = self.current_expression.attribute if not attribute_path: attribute_path = [] attribute_path.append(self.string) self.current_expression.SetAttribute(attribute_path)