def testMissingFilesAreListedInManifest(self): generator = archive_generator.FlowArchiveGenerator( self.flow, archive_generator.ArchiveFormat.ZIP) mappings = [ flow_base.ClientPathArchiveMapping(self.path1, "foo/file"), flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, ["non", "existing"]), "foo/bar/file"), ] fd_path = self._GenerateArchive(generator.Generate(mappings)) zip_fd = zipfile.ZipFile(fd_path) names = [str(s) for s in sorted(zip_fd.namelist())] # Expecting in the archive: 1 file (the other shouldn't be found) # and a manifest. self.assertLen(names, 2) contents = zip_fd.read(os.path.join(generator.prefix, "foo", "file")) self.assertEqual(contents, b"hello1") manifest = yaml.safe_load( zip_fd.read(os.path.join(generator.prefix, "MANIFEST"))) self.assertCountEqual(manifest["processed_files"].items(), [(self.path1.vfs_path, "foo/file")]) self.assertCountEqual(manifest["missing_files"], ["fs/os/non/existing"])
def testCreatesZipContainingTwoMappedFilesAndManifest(self): generator = archive_generator.FlowArchiveGenerator( self.flow, archive_generator.ArchiveFormat.ZIP) mappings = [ flow_base.ClientPathArchiveMapping(self.path1, "foo/file"), flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"), ] fd_path = self._GenerateArchive(generator.Generate(mappings)) zip_fd = zipfile.ZipFile(fd_path) names = [str(s) for s in sorted(zip_fd.namelist())] # Expecting in the archive: 2 files and a manifest. self.assertLen(names, 3) contents = zip_fd.read(os.path.join(generator.prefix, "foo", "file")) self.assertEqual(contents, b"hello1") contents = zip_fd.read( os.path.join(generator.prefix, "foo", "bar", "file")) self.assertEqual(contents, b"hello2") manifest = yaml.safe_load( zip_fd.read(os.path.join(generator.prefix, "MANIFEST"))) self.assertCountEqual(manifest["processed_files"].items(), [(self.path1.vfs_path, "foo/file"), (self.path2.vfs_path, "foo/bar/file")]) self.assertCountEqual(manifest["missing_files"], []) self.assertEqual(manifest["client_id"], self.client_id) self.assertEqual(manifest["flow_id"], self.flow_id)
def testCreatesTarContainingTwoMappedFilesAndManifest(self): generator = archive_generator.FlowArchiveGenerator( self.flow, archive_generator.ArchiveFormat.TAR_GZ) mappings = [ flow_base.ClientPathArchiveMapping(self.path1, "foo/file"), flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"), ] fd_path = self._GenerateArchive(generator.Generate(mappings)) with tarfile.open(fd_path, encoding="utf-8") as tar_fd: self.assertLen(tar_fd.getnames(), 3) contents = tar_fd.extractfile( os.path.join(generator.prefix, "foo", "file")).read() self.assertEqual(contents, b"hello1") contents = tar_fd.extractfile( os.path.join(generator.prefix, "foo", "bar", "file")).read() self.assertEqual(contents, b"hello2") manifest = yaml.safe_load( tar_fd.extractfile(os.path.join(generator.prefix, "MANIFEST")).read()) self.assertCountEqual(manifest["processed_files"].items(), [(self.path1.vfs_path, "foo/file"), (self.path2.vfs_path, "foo/bar/file")]) self.assertCountEqual(manifest["missing_files"], []) self.assertEqual(manifest["client_id"], self.client_id) self.assertEqual(manifest["flow_id"], self.flow_id)
def testCorrectlyGeneratesArchiveMappings(self): with mock.patch.object(collectors, "ArtifactCollectorFlow", MockArtifactCollectorFlow): flow_id, _, _ = self._RunCollectBrowserHistory(browsers=[ webhistory.Browser.CHROME, webhistory.Browser.SAFARI, ]) flow = flow_base.FlowBase.CreateFlowInstance( flow_test_lib.GetFlowObj(self.client_id, flow_id)) results = flow_test_lib.GetRawFlowResults(self.client_id, flow_id) mappings = flow.GetFilesArchiveMappings(results) self.assertCountEqual(mappings, [ flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, ("home", "foo", "ChromeHistory")), "chrome/ChromeHistory", ), flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, ("home", "foo", "SafariHistory")), "safari/SafariHistory", ), ])
def testPropagatesStreamingExceptions(self): generator = archive_generator.FlowArchiveGenerator( self.flow, archive_generator.ArchiveFormat.TAR_GZ) mappings = [ flow_base.ClientPathArchiveMapping(self.path1, "foo/file"), flow_base.ClientPathArchiveMapping(self.path2, "foo/bar/file"), ] with mock.patch.object( file_store, "StreamFilesChunks", side_effect=Exception("foobar")): with self.assertRaises(Exception) as context: self._GenerateArchive(generator.Generate(mappings)) self.assertEqual(str(context.exception), "foobar")
def testArchiveMappingsForDuplicateFilesInResult(self): with temp.AutoTempFilePath() as temp_file_path: with io.open(temp_file_path, mode="w", encoding="utf-8") as fd: fd.write("Just sample text to put in the file.") table = f""" [ {{ "collect_column": "{temp_file_path}" }} ] """ with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""): flow_id = self._InitializeFlow( file_collection_columns=["collect_column"]) flow = flow_base.FlowBase.CreateFlowInstance( flow_test_lib.GetFlowObj(self.client_id, flow_id)) results = list(flow_test_lib.GetRawFlowResults(self.client_id, flow_id)) # This is how we emulate duplicate filenames in the results duplicated_results = results + results + results mappings = list(flow.GetFilesArchiveMappings(iter(duplicated_results))) self.assertCountEqual(mappings, [ flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, temp_file_path.split("/")[1:]), f"osquery_collected_files{temp_file_path}", ), flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, temp_file_path.split("/")[1:]), f"osquery_collected_files{temp_file_path}-1", ), flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, temp_file_path.split("/")[1:]), f"osquery_collected_files{temp_file_path}-2", ), ])
def testArchiveMappingsForMultipleFiles(self): with temp.AutoTempDirPath(remove_non_empty=True) as temp_dir_path: temp_file_path1 = os.path.join(temp_dir_path, "foo") temp_file_path2 = os.path.join(temp_dir_path, "bar") with io.open(temp_file_path1, mode="w", encoding="utf-8") as fd: fd.write("Just sample text to put in the file 1.") with io.open(temp_file_path2, mode="w", encoding="utf-8") as fd: fd.write("Just sample text to put in the file 2.") table = f""" [ {{ "collect_column": "{temp_file_path1}" }}, {{ "collect_column": "{temp_file_path2}" }} ] """ with osquery_test_lib.FakeOsqueryiOutput(stdout=table, stderr=""): flow_id = self._InitializeFlow( file_collection_columns=["collect_column"]) flow = flow_base.FlowBase.CreateFlowInstance( flow_test_lib.GetFlowObj(self.client_id, flow_id)) results = flow_test_lib.GetRawFlowResults(self.client_id, flow_id) mappings = list(flow.GetFilesArchiveMappings(iter(results))) self.assertCountEqual(mappings, [ flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, temp_file_path1.split("/")[1:]), f"osquery_collected_files{temp_file_path1}", ), flow_base.ClientPathArchiveMapping( db.ClientPath.OS(self.client_id, temp_file_path2.split("/")[1:]), f"osquery_collected_files{temp_file_path2}", ), ])
def GetFilesArchiveMappings( self, flow_results: Iterator[rdf_flow_objects.FlowResult] ) -> Iterator[flow_base.ClientPathArchiveMapping]: path_counters = collections.Counter() for r in flow_results: p = cast(CollectBrowserHistoryResult, r.payload) client_path = db.ClientPath.FromPathSpec(self.client_id, p.stat_entry.pathspec) target_path = os.path.join(p.browser.name.lower(), p.stat_entry.pathspec.Basename()) if path_counters[target_path] > 0: fname, ext = os.path.splitext(target_path) target_path = f"{fname}_{path_counters[target_path]}{ext}" path_counters[target_path] += 1 yield flow_base.ClientPathArchiveMapping(client_path, target_path)
def GetFilesArchiveMappings( self, flow_results: Iterator[rdf_flow_objects.FlowResult], ) -> Iterator[flow_base.ClientPathArchiveMapping]: target_path_generator = _UniquePathGenerator() for result in flow_results: try: osquery_file = _ExtractFileInfo(result) except _ResultNotRelevantError: continue client_path = db.ClientPath.FromPathSpec( self.client_id, osquery_file.stat_entry.pathspec) target_path = target_path_generator.GeneratePath( osquery_file.stat_entry.pathspec) yield flow_base.ClientPathArchiveMapping(client_path=client_path, archive_path=target_path)
def testGeneratesTarGzArchiveForFlowWithCustomMappings(self): path = abstract_db.ClientPath.OS( self.client_id, self.base_path.lstrip("/").split("/") + ["test.plist"]) mappings = [ flow_base.ClientPathArchiveMapping(path, "foo/file"), ] with mock.patch.object(file_finder.FileFinder, "GetFilesArchiveMappings", return_value=mappings): result = self.handler.Handle( flow_plugin.ApiGetFlowFilesArchiveArgs( client_id=self.client_id, flow_id=self.flow_id, archive_format="TAR_GZ"), token=self.token) manifest = self._GetTarGzManifest(result) self.assertEqual(manifest["client_id"], self.client_id) self.assertEqual(manifest["flow_id"], self.flow_id) self.assertEqual(manifest["processed_files"], {path.vfs_path: "foo/file"}) self.assertEmpty(manifest["missing_files"])