def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): _ = cmd, args, stdout, stderr, return_val, time_taken, knowledge_base packages = [] installed = rdf_client.SoftwarePackage.InstallState.INSTALLED packages.append( rdf_client.SoftwarePackage( name="Package1", description="Desc1", version="1", architecture="amd64", install_state=installed)) packages.append( rdf_client.SoftwarePackage( name="Package2", description="Desc2", version="1", architecture="i386", install_state=installed)) yield rdf_client.SoftwarePackages(packages=packages) # Also yield something random so we can test return type filtering. yield rdf_client_fs.StatEntry() # Also yield an anomaly to test that. yield rdf_anomaly.Anomaly( type="PARSER_ANOMALY", symptom="could not parse gremlins.")
def testDpkgCmdParser(self): """Ensure we can extract packages from dpkg output.""" parser = linux_cmd_parser.DpkgCmdParser() content = open(os.path.join(self.base_path, "checks/data/dpkg.out"), "rb").read() out = list(parser.Parse("/usr/bin/dpkg", ["--list"], content, b"", 0, None)) self.assertLen(out, 1) package_list = out[0] self.assertLen(package_list.packages, 181) self.assertEqual( package_list.packages[0], rdf_client.SoftwarePackage( name="acpi-support-base", description="scripts for handling base ACPI events such as the power button", version="0.140-5", architecture="all", install_state=rdf_client.SoftwarePackage.InstallState.INSTALLED)) self.assertEqual( package_list.packages[22], rdf_client.SoftwarePackage( name="diffutils", description=None, # Test package with empty description. version="1:3.2-6", architecture="amd64", install_state=rdf_client.SoftwarePackage.InstallState.INSTALLED))
def testDpkgCmdParserPrecise(self): """Ensure we can extract packages from dpkg output on ubuntu precise.""" parser = linux_cmd_parser.DpkgCmdParser() content = open( os.path.join(self.base_path, "checks/data/dpkg.precise.out"), "rb").read() out = list(parser.Parse("/usr/bin/dpkg", ["--list"], content, b"", 0, None)) self.assertLen(out, 1) package_list = out[0] self.assertLen(package_list.packages, 30) self.assertEqual( package_list.packages[0], rdf_client.SoftwarePackage( name="adduser", description="add and remove users and groups", version="3.113ubuntu2", architecture=None, install_state=rdf_client.SoftwarePackage.InstallState.INSTALLED)) self.assertEqual( package_list.packages[12], rdf_client.SoftwarePackage( name="diffutils", description=None, # Test package with empty description. version="1:3.2-1ubuntu1", architecture=None, install_state=rdf_client.SoftwarePackage.InstallState.INSTALLED))
def ParseFile(self, knowledge_base, pathspec, filedesc): del knowledge_base # Unused. del pathspec # Unused. plist = biplist.readPlist(filedesc) if not isinstance(plist, list): raise parser.ParseError( "InstallHistory plist is a '%s', expecting a list" % type(plist)) packages = [] for sw in plist: packages.append( rdf_client.SoftwarePackage( name=sw.get("displayName"), version=sw.get("displayVersion"), description=",".join(sw.get("packageIdentifiers")), # TODO(hanuszczak): make installed_on an RDFDatetime installed_on=_DateToEpoch(sw.get("date")), install_state=rdf_client.SoftwarePackage.InstallState. INSTALLED)) if packages: yield rdf_client.SoftwarePackages(packages=packages)
def Parse(self, cmd, args, stdout, stderr, return_val, knowledge_base): """Parse the dpkg output.""" _ = stderr, args, knowledge_base # Unused. self.CheckReturn(cmd, return_val) lines = stdout.decode("utf-8").splitlines() num_columns = 0 i = 0 packages = [] for i, line in enumerate(lines): if line.startswith("+++-"): # This is a special header line that determines column size. columns = line.split("-") num_columns = len(columns) for col in columns[1:]: if not re.match("=*", col): raise parsers.ParseError( "Invalid header parsing for %s at line " "%s" % (cmd, i)) break if num_columns == 0: return elif num_columns not in [4, 5]: raise ValueError( "Bad number of columns ({}) in dpkg --list output:\n{}\n...". format(num_columns, "\n".join(lines[:10]))) for line in lines[i + 1:]: # Split the line at whitespace into at most `num_columns` columns. columns = line.split(None, num_columns - 1) # If the last column (description) is empty, pad it with None. if len(columns) == num_columns - 1: columns.append(None) if num_columns == 5: # Installed, Name, Version, Architecture, Description status, name, version, arch, desc = columns else: # num_columns is 4 # Older versions of dpkg don't print Architecture status, name, version, desc = columns arch = None # Status is potentially 3 columns, but always at least two, desired and # actual state. We only care about actual state. if status[1] == "i": status = rdf_client.SoftwarePackage.InstallState.INSTALLED else: status = rdf_client.SoftwarePackage.InstallState.UNKNOWN packages.append( rdf_client.SoftwarePackage(name=name, description=desc, version=version, architecture=arch, install_state=status)) if packages: yield rdf_client.SoftwarePackages(packages=packages)
def ParseFile( self, knowledge_base: rdf_client.KnowledgeBase, pathspec: rdf_paths.PathSpec, filedesc: IO[bytes], ) -> Iterator[rdf_client.SoftwarePackages]: del knowledge_base # Unused. del pathspec # Unused. packages = [] sw_data = utils.ReadFileBytesAsUnicode(filedesc) try: for pkg in self._deb822.Packages.iter_paragraphs( sw_data.splitlines()): if self.installed_re.match(pkg["Status"]): packages.append( rdf_client.SoftwarePackage( name=pkg["Package"], description=pkg["Description"], version=pkg["Version"], architecture=pkg["Architecture"], publisher=pkg["Maintainer"], install_state="INSTALLED")) except SystemError: yield rdf_anomaly.Anomaly(type="PARSER_ANOMALY", symptom="Invalid dpkg status file") finally: if packages: yield rdf_client.SoftwarePackages(packages=packages)
def Parse(self, result): """Parse the WMI packages output.""" status = rdf_client.SoftwarePackage.InstallState.INSTALLED soft = rdf_client.SoftwarePackage(name=result["Name"], description=result["Description"], version=result["Version"], install_state=status) yield soft
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): del cmd, args, stderr, return_val, time_taken, knowledge_base # Unused installed = rdf_client.SoftwarePackage.InstallState.INSTALLED soft = rdf_client.SoftwarePackage(name="Package", description=stdout, version="1", architecture="amd64", install_state=installed) yield soft
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): """Parse the dpkg output.""" _ = stderr, time_taken, args, knowledge_base # Unused. self.CheckReturn(cmd, return_val) column_lengths = [] i = 0 for i, line in enumerate(stdout.decode("utf-8").splitlines()): if line.startswith("+++-"): # This is a special header line that determines column size. for col in line.split("-")[1:]: if not re.match("=*", col): raise parser.ParseError( "Invalid header parsing for %s at line " "%s" % (cmd, i)) column_lengths.append(len(col)) break if not column_lengths: return packages = [] remaining_lines = stdout.splitlines()[i + 1:] for i, line in enumerate(remaining_lines): cols = line.split(None, len(column_lengths)) # The status column is ignored in column_lengths. if len(column_lengths) == 4: # Installed, Name, Version, Architecture, Description status, name, version, arch, desc = cols elif len(column_lengths) == 3: # Older versions of dpkg don't print Architecture status, name, version, desc = cols arch = None else: raise ValueError( "Bad number of columns in dpkg --list output: %s" % len(column_lengths)) # Status is potentially 3 columns, but always at least two, desired and # actual state. We only care about actual state. if status[1] == "i": status = rdf_client.SoftwarePackage.InstallState.INSTALLED else: status = rdf_client.SoftwarePackage.InstallState.UNKNOWN packages.append( rdf_client.SoftwarePackage(name=name, description=desc, version=version, architecture=arch, install_state=status)) if packages: yield rdf_client.SoftwarePackages(packages=packages)
def ParseMultiple(self, result_dicts): """Parse the WMI packages output.""" status = rdf_client.SoftwarePackage.InstallState.INSTALLED packages = [] for result_dict in result_dicts: packages.append( rdf_client.SoftwarePackage( name=result_dict["Name"], description=result_dict["Description"], version=result_dict["Version"], install_state=status)) if packages: yield rdf_client.SoftwarePackages(packages=packages)
def Parse(self, result): """Parse the WMI packages output.""" status = rdf_client.SoftwarePackage.InstallState.INSTALLED result = result.ToDict() # InstalledOn comes back in a godawful format such as '7/10/2013'. installed_on = self.AmericanDateToEpoch(result.get("InstalledOn", "")) soft = rdf_client.SoftwarePackage( name=result.get("HotFixID"), description=result.get("Caption"), installed_by=result.get("InstalledBy"), install_state=status, installed_on=installed_on) yield soft
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): """Parse the yum output.""" _ = stderr, time_taken, args, knowledge_base # Unused. self.CheckReturn(cmd, return_val) for line in stdout.splitlines()[1:]: # Ignore first line cols = line.split() name_arch, version, source = cols name, arch = name_arch.split(".") status = rdf_client.SoftwarePackage.InstallState.INSTALLED yield rdf_client.SoftwarePackage(name=name, publisher=source, version=version, architecture=arch, install_state=status)
def Parse(self, stat, file_object, knowledge_base): """Parse the status file.""" _, _ = stat, knowledge_base try: sw_data = file_object.read() for pkg in deb822.Packages.iter_paragraphs(sw_data.splitlines()): if self.installed_re.match(pkg["Status"]): soft = rdf_client.SoftwarePackage( name=pkg["Package"], description=pkg["Description"], version=pkg["Version"], architecture=pkg["Architecture"], publisher=pkg["Maintainer"], install_state="INSTALLED") yield soft except SystemError: yield rdf_anomaly.Anomaly(type="PARSER_ANOMALY", symptom="Invalid dpkg status file")
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): """Parse the rpm -qa output.""" _ = time_taken, args, knowledge_base # Unused. rpm_re = re.compile(r"^(\w[-\w\+]+?)-(\d.*)$") self.CheckReturn(cmd, return_val) for line in stdout.splitlines(): pkg_match = rpm_re.match(line.strip()) if pkg_match: name, version = pkg_match.groups() status = rdf_client.SoftwarePackage.InstallState.INSTALLED yield rdf_client.SoftwarePackage( name=name, version=version, install_state=status) for line in stderr.splitlines(): if "error: rpmdbNextIterator: skipping h#" in line: yield rdf_anomaly.Anomaly( type="PARSER_ANOMALY", symptom="Broken rpm database.") break
def Parse(self, statentry, file_object, knowledge_base): """Parse the Plist file.""" plist = binplist.readPlist(file_object) if not isinstance(plist, list): raise parser.ParseError( "InstallHistory plist is a '%s', expecting a list" % type(plist)) for sw in plist: yield rdf_client.SoftwarePackage( name=sw.get("displayName"), version=sw.get("displayVersion"), description=",".join(sw.get("packageIdentifiers")), # TODO(hanuszczak): make installed_on an RDFDatetime installed_on=_DateToEpoch(sw.get("date")), install_state=rdf_client.SoftwarePackage.InstallState.INSTALLED )
def ParseMultiple(self, result_dicts): """Parse the WMI packages output.""" status = rdf_client.SoftwarePackage.InstallState.INSTALLED packages = [] for result_dict in result_dicts: result = result_dict.ToDict() # InstalledOn comes back in a godawful format such as '7/10/2013'. installed_on = self.AmericanDateToEpoch( result.get("InstalledOn", "")) packages.append( rdf_client.SoftwarePackage( name=result.get("HotFixID"), description=result.get("Caption"), installed_by=result.get("InstalledBy"), install_state=status, installed_on=installed_on)) if packages: yield rdf_client.SoftwarePackages(packages=packages)
def Parse(self, stat, file_object, knowledge_base): """Parse the status file.""" _, _ = stat, knowledge_base packages = [] sw_data = utils.ReadFileBytesAsUnicode(file_object) try: for pkg in self._deb822.Packages.iter_paragraphs( sw_data.splitlines()): if self.installed_re.match(pkg["Status"]): packages.append( rdf_client.SoftwarePackage( name=pkg["Package"], description=pkg["Description"], version=pkg["Version"], architecture=pkg["Architecture"], publisher=pkg["Maintainer"], install_state="INSTALLED")) except SystemError: yield rdf_anomaly.Anomaly(type="PARSER_ANOMALY", symptom="Invalid dpkg status file") finally: if packages: yield rdf_client.SoftwarePackages(packages=packages)
def testBigQueryPluginFallbackToAFF4(self): plugin_args = bigquery_plugin.BigQueryOutputPluginArgs() responses = [ rdf_client.StatEntry( pathspec=rdf_paths.PathSpec(path="/中国新闻网新闻中", pathtype="OS")), rdf_client.Process(pid=42), rdf_client.Process(pid=43), rdf_client.SoftwarePackage(name="test.deb") ] plugin = bigquery_plugin.BigQueryOutputPlugin( source_urn=self.results_urn, output_base_urn=self.base_urn, args=plugin_args, token=self.token) plugin.InitializeState() messages = [] for response in responses: messages.append( rdf_flows.GrrMessage(source=self.client_id, payload=response)) with test_lib.FakeTime(1445995873): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider({"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We have 3 output types but a limit of 2 upload failures, so we # shouldn't try the third one. self.assertEqual(mock_bigquery.return_value.InsertData.call_count, 2) # We should have written a data file and a schema file for each type. for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: schema_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.schema" % output_name), token=self.token) data_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) if output_name == "ExportedFile": self.CompareSchemaToKnownGood(json.load(schema_fd)) self.assertEqual( json.load(actual_fd)["urn"], self.client_id.Add("/fs/os/中国新闻网新闻中")) elif output_name == "ExportedProcess": self.assertEqual(json.load(schema_fd)[1]["name"], "pid") expected_pids = ["42", "43"] for i, line in enumerate(actual_fd): self.assertEqual(json.loads(line)["pid"], expected_pids[i]) else: self.assertEqual(json.load(schema_fd)[1]["name"], "name") self.assertEqual(json.load(actual_fd)["name"], "test.deb") # Process the same messages to make sure we're re-using the filehandles. with test_lib.FakeTime(1445995878): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider({"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We shouldn't call insertdata at all because we have passed max # failures already self.assertEqual(mock_bigquery.return_value.InsertData.call_count, 0) expected_line_counts = { "ExportedFile": 2, "ExportedProcess": 4, "AutoExportedSoftwarePackage": 2 } for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: data_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) self.assertEqual( sum(1 for line in actual_fd), expected_line_counts[output_name])