def testFirefoxHistoryFetch(self): """Test that downloading the Firefox history works.""" with self.MockClientRawDevWithImage(): # Run the flow in the simulated way session_id = flow_test_lib.TestFlowHelper( webhistory.FirefoxHistory.__name__, self.client_mock, check_flow_errors=False, client_id=self.client_id, username="******", token=self.token, pathtype=rdf_paths.PathSpec.PathType.TSK) # Now check that the right files were downloaded. fs_path = "/home/test/.mozilla/firefox/adts404t.default/places.sqlite" # Check if the History file is created. output_path = self.client_id.Add("fs/tsk").Add("/".join( [self.base_path.replace("\\", "/"), "test_img.dd"])).Add( fs_path.replace("\\", "/")) fd = aff4.FACTORY.Open(output_path, token=self.token) self.assertTrue(fd.size > 20000) self.assertEqual(fd.read(15), "SQLite format 3") if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.TSK(self.client_id.Basename(), tuple(output_path.Split()[3:])) rel_fd = file_store.OpenLatestFileVersion(cp) self.assertEqual(rel_fd.read(15), "SQLite format 3") # Check for analysis file. fd = flow.GRRFlow.ResultCollectionForFID(session_id) self.assertGreater(len(fd), 3) data = "\n".join(map(unicode, fd.GenerateItems())) self.assertTrue(data.find("Welcome to Firefox") != -1) self.assertTrue(data.find("sport.orf.at") != -1)
def CheckFilesDownloaded(self, fnames): for fname in fnames: if data_store.RelationalDBReadEnabled(category="vfs"): path_info = data_store.REL_DB.ReadPathInfo( self.client_id.Basename(), rdf_objects.PathInfo.PathType.OS, components=self.FilenameToPathComponents(fname)) size = path_info.stat_entry.st_size else: file_urn = self.FileNameToURN(fname) with aff4.FACTORY.Open(file_urn, token=self.token) as fd: size = fd.Get(fd.Schema.SIZE) self.assertGreater(size, 100) if data_store.RelationalDBReadEnabled(category="filestore"): fd = file_store.OpenLatestFileVersion( db.ClientPath( self.client_id.Basename(), rdf_objects.PathInfo.PathType.OS, components=self.FilenameToPathComponents(fname))) # Make sure we can actually read the file. self.assertEqual(len(fd.read()), size)
def testOpensFileWithTwoPathInfosWhereNewestHasHash(self): # Oldest. data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo()]) # Newest. data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo(self.hash_id)]) fd = file_store.OpenLatestFileVersion(self.client_path) self.assertEqual(fd.read(), self.data)
def testOpensLatestVersionForPathWithTwoPathInfosWithHashes(self): # Oldest. data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo(self.other_hash_id)]) # Newest. data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo(self.hash_id)]) fd = file_store.OpenLatestFileVersion(self.client_path) self.assertEqual(fd.read(), self.data)
def testMultiGetFileSizeLimit(self): client_mock = action_mocks.MultiGetFileClientMock() image_path = os.path.join(self.base_path, "test_img.dd") pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=image_path) # Read a bit more than one chunk (600 * 1024). expected_size = 750 * 1024 args = transfer.MultiGetFileArgs(pathspecs=[pathspec], file_size=expected_size) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) expected_data = open(image_path, "rb").read(expected_size) if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual(fd_rel_db.size, expected_size) data = fd_rel_db.read(2 * expected_size) self.assertEqual(len(data), expected_size) d = hashlib.sha256() d.update(expected_data) self.assertEqual(fd_rel_db.hash_id.AsBytes(), d.digest()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: urn = pathspec.AFF4Path(self.client_id) blobimage = aff4.FACTORY.Open(urn, token=self.token) # Make sure a VFSBlobImage got written. self.assertTrue(isinstance(blobimage, aff4_grr.VFSBlobImage)) self.assertEqual(len(blobimage), expected_size) data = blobimage.read(100 * expected_size) self.assertEqual(len(data), expected_size) self.assertEqual(data, expected_data) hash_obj = data_store_utils.GetFileHashEntry(blobimage) d = hashlib.sha1() d.update(expected_data) self.assertEqual(hash_obj.sha1, d.hexdigest())
def testExistingChunks(self): client_mock = action_mocks.MultiGetFileClientMock() # Make a file to download that is three chunks long. # For the second run, we change the middle chunk. This will lead to a # different hash for the whole file and three chunks to download of which we # already have two. chunk_size = transfer.MultiGetFile.CHUNK_SIZE for data in [ "A" * chunk_size + "B" * chunk_size + "C" * 100, "A" * chunk_size + "X" * chunk_size + "C" * 100 ]: path = os.path.join(self.temp_dir, "test.txt") with open(path, "wb") as fd: fd.write(data) pathspec = rdf_paths.PathSpec( pathtype=rdf_paths.PathSpec.PathType.OS, path=path) args = transfer.MultiGetFileArgs(pathspecs=[pathspec]) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual(fd_rel_db.size, len(data)) self.assertEqual(fd_rel_db.read(), data) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: urn = pathspec.AFF4Path(self.client_id) blobimage = aff4.FACTORY.Open(urn) self.assertEqual(blobimage.size, len(data)) self.assertEqual(blobimage.read(blobimage.size), data) # Three chunks to get for the first file, only one for the second. self.assertEqual(client_mock.action_counts["TransferBuffer"], 4)
def testMultiGetFile(self): """Test MultiGetFile.""" client_mock = action_mocks.MultiGetFileClientMock() pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "test_img.dd")) args = transfer.MultiGetFileArgs(pathspecs=[pathspec, pathspec]) with test_lib.Instrument(transfer.MultiGetFileMixin, "StoreStat") as storestat_instrument: flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) # We should only have called StoreStat once because the two paths # requested were identical. self.assertEqual(len(storestat_instrument.args), 1) # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") fd2 = open(pathspec.path, "rb") # Test the AFF4 file that was created. if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.CompareFDs(fd2, fd_rel_db) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: urn = pathspec.AFF4Path(self.client_id) fd1 = aff4.FACTORY.Open(urn, token=self.token) fd2.seek(0, 2) self.assertEqual(fd2.tell(), int(fd1.Get(fd1.Schema.SIZE))) self.CompareFDs(fd1, fd2)
def testGetFilePathCorrection(self): """Tests that the pathspec returned is used for the aff4path.""" client_mock = action_mocks.GetFileClientMock() # Deliberately using the wrong casing. pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "TEST_IMG.dd")) session_id = flow_test_lib.TestFlowHelper(transfer.GetFile.__name__, client_mock, token=self.token, client_id=self.client_id, pathspec=pathspec) results = flow_test_lib.GetFlowResults(self.client_id.Basename(), session_id) self.assertEqual(len(results), 1) res_pathspec = results[0].pathspec # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") fd2 = open(res_pathspec.path, "rb") fd2.seek(0, 2) if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), res_pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.CompareFDs(fd2, fd_rel_db) # Only the sha256 hash of the contents should have been calculated: # in order to put file contents into the file store. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNone(history[-1].hash_entry.sha1) self.assertIsNone(history[-1].hash_entry.md5) else: # Test the AFF4 file that was created. urn = res_pathspec.AFF4Path(self.client_id) fd1 = aff4.FACTORY.Open(urn, token=self.token) self.assertEqual(fd2.tell(), int(fd1.Get(fd1.Schema.SIZE))) self.CompareFDs(fd1, fd2)
def testMultiGetFileSetsFileHashAttributeWhenMultipleChunksDownloaded( self): client_mock = action_mocks.MultiGetFileClientMock() pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=os.path.join( self.base_path, "test_img.dd")) args = transfer.MultiGetFileArgs(pathspecs=[pathspec]) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) h = hashlib.sha256() with open(os.path.join(self.base_path, "test_img.dd"), "rb") as model_fd: h.update(model_fd.read()) if not data_store.RelationalDBReadEnabled(category="filestore"): # Fix path for Windows testing. pathspec.path = pathspec.path.replace("\\", "/") # Test the AFF4 file that was created. urn = pathspec.AFF4Path(self.client_id) fd_hash = data_store_utils.GetUrnHashEntry(urn) self.assertTrue(fd_hash) self.assertEqual(fd_hash.sha256, h.digest()) if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual(fd_rel_db.hash_id.AsBytes(), h.digest()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testMultiGetFileDeduplication(self): client_mock = action_mocks.MultiGetFileClientMock() pathspecs = [] # Make 10 files to download. for i in range(10): path = os.path.join(self.temp_dir, "test_%s.txt" % i) with open(path, "wb") as fd: fd.write("Hello") pathspecs.append( rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=path)) # All those files are the same so the individual chunks should # only be downloaded once. By forcing maximum_pending_files=1, # there should only be a single TransferBuffer call. args = transfer.MultiGetFileArgs(pathspecs=pathspecs, maximum_pending_files=1) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) self.assertEqual(client_mock.action_counts["TransferBuffer"], 1) if data_store.RelationalDBReadEnabled(category="filestore"): for pathspec in pathspecs: # Check that each referenced file can be read. cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual("Hello", fd_rel_db.read()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5)
def testClientFileFinderUpload(self): paths = [os.path.join(self.base_path, "{**,.}/*.plist")] action = rdf_file_finder.FileFinderAction.Download() session_id = self._RunClientFileFinder(paths, action) collection = flow.GRRFlow.ResultCollectionForFID(session_id) results = list(collection) self.assertEqual(len(results), 4) relpaths = [ os.path.relpath(p.stat_entry.pathspec.path, self.base_path) for p in results ] self.assertItemsEqual(relpaths, [ "History.plist", "History.xml.plist", "test.plist", "parser_test/com.google.code.grr.plist" ]) for r in results: aff4_obj = aff4.FACTORY.Open(r.stat_entry.pathspec.AFF4Path( self.client_id), token=self.token) data = open(r.stat_entry.pathspec.path, "rb").read() self.assertEqual(aff4_obj.Read(100), data[:100]) if data_store.RelationalDBReadEnabled(category="filestore"): fd = file_store.OpenLatestFileVersion( db.ClientPath.FromPathSpec(self.client_id.Basename(), r.stat_entry.pathspec)) self.assertEqual(fd.read(100), data[:100]) self.assertEqual(fd.hash_id.AsBytes(), hashlib.sha256(data).digest()) else: hash_obj = data_store_utils.GetFileHashEntry(aff4_obj) self.assertEqual(hash_obj.sha1, hashlib.sha1(data).hexdigest()) self.assertEqual(hash_obj.sha256, hashlib.sha256(data).hexdigest()) self.assertEqual(hash_obj.md5, hashlib.md5(data).hexdigest())
def testMultiGetFileMultiFiles(self): """Test MultiGetFile downloading many files at once.""" client_mock = action_mocks.MultiGetFileClientMock() pathspecs = [] # Make 30 files to download. for i in range(30): path = os.path.join(self.temp_dir, "test_%s.txt" % i) with open(path, "wb") as fd: fd.write("Hello") pathspecs.append( rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=path)) args = transfer.MultiGetFileArgs(pathspecs=pathspecs, maximum_pending_files=10) session_id = flow_test_lib.TestFlowHelper( transfer.MultiGetFile.__name__, client_mock, token=self.token, client_id=self.client_id, args=args) if data_store.RelationalDBReadEnabled(category="filestore"): # Now open each file and make sure the data is there. for pathspec in pathspecs: cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual("Hello", fd_rel_db.read()) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: # Check up on the internal flow state. flow_state = self._GetFlowState(self.client_id, session_id) # All the pathspecs should be in this list. self.assertEqual(len(flow_state.indexed_pathspecs), 30) # At any one time, there should not be more than 10 files or hashes # pending. self.assertLessEqual(len(flow_state.pending_files), 10) self.assertLessEqual(len(flow_state.pending_hashes), 10) # When we finish there should be no pathspecs stored in the flow state. for flow_pathspec in flow_state.indexed_pathspecs: self.assertIsNone(flow_pathspec) for flow_request_data in flow_state.request_data_list: self.assertIsNone(flow_request_data) for pathspec in pathspecs: urn = pathspec.AFF4Path(self.client_id) fd = aff4.FACTORY.Open(urn, token=self.token) self.assertEqual("Hello", fd.read())
def testMultiGetFileOfSpecialFiles(self): """Test that special /proc/ files are handled correctly. /proc/ files have the property that they are non seekable from their end (i.e. seeking them relative to the end is not supported). They also return an st_size of 0. For example: $ stat /proc/self/maps File: '/proc/self/maps' Size: 0 Blocks: 0 IO Block: 1024 regular empty file $ head /proc/self/maps 00400000-00409000 r-xp 00000000 fc:01 9180740 /usr/bin/head 00608000-00609000 r--p 00008000 fc:01 9180740 /usr/bin/head ... When we try to use the MultiGetFile flow, it deduplicates the files and since it thinks the file has a zero size, the flow will not download the file, and instead copy the zero size file into it. """ client_mock = action_mocks.MultiGetFileClientMock() # # Create a zero sized file. zero_sized_filename = os.path.join(self.temp_dir, "zero_size") with open(zero_sized_filename, "wb") as fd: pass pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path=zero_sized_filename) flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, file_size="1MiB", client_id=self.client_id, pathspecs=[pathspec]) # Now if we try to fetch a real /proc/ filename this will fail because the # filestore already contains the zero length file # aff4:/files/nsrl/da39a3ee5e6b4b0d3255bfef95601890afd80709. pathspec = rdf_paths.PathSpec(pathtype=rdf_paths.PathSpec.PathType.OS, path="/proc/self/environ") flow_test_lib.TestFlowHelper(transfer.MultiGetFile.__name__, client_mock, token=self.token, file_size=1024 * 1024, client_id=self.client_id, pathspecs=[pathspec]) data = open(pathspec.last.path, "rb").read() if data_store.RelationalDBReadEnabled(category="filestore"): cp = db.ClientPath.FromPathSpec(self.client_id.Basename(), pathspec) fd_rel_db = file_store.OpenLatestFileVersion(cp) self.assertEqual(fd_rel_db.size, len(data)) self.assertMultiLineEqual(fd_rel_db.read(), data) # Check that SHA256 hash of the file matches the contents # hash and that MD5 and SHA1 are set. history = data_store.REL_DB.ReadPathInfoHistory( cp.client_id, cp.path_type, cp.components) self.assertEqual(history[-1].hash_entry.sha256, fd_rel_db.hash_id.AsBytes()) self.assertIsNotNone(history[-1].hash_entry.sha1) self.assertIsNotNone(history[-1].hash_entry.md5) else: # Test the AFF4 file that was created - it should be empty since by # default we judge the file size based on its stat.st_size. urn = pathspec.AFF4Path(self.client_id) fd = aff4.FACTORY.Open(urn, token=self.token) self.assertEqual(fd.size, len(data)) self.assertMultiLineEqual(fd.read(len(data)), data)
def testRaisesForFileWithSinglePathInfoWithUnknownHash(self): data_store.REL_DB.WritePathInfos( self.client_id, [self._PathInfo(self.invalid_hash_id)]) with self.assertRaises(file_store.FileHasNoContent): file_store.OpenLatestFileVersion(self.client_path)
def testOpensFileWithSinglePathInfoWithHash(self): data_store.REL_DB.WritePathInfos(self.client_id, [self._PathInfo(self.hash_id)]) fd = file_store.OpenLatestFileVersion(self.client_path) self.assertEqual(fd.read(), self.data)