def testMultiWriteHistoryTwoPaths(self): client_id = self.InitializeClient() path_info_foo = rdf_objects.PathInfo.OS( components=["foo"], timestamp=rdfvalue.RDFDatetime.FromHumanReadable("2010-10-10")) path_info_bar = rdf_objects.PathInfo.OS( components=["bar"], timestamp=rdfvalue.RDFDatetime.FromHumanReadable("2011-11-11")) self.db.WritePathInfos(client_id, [path_info_foo, path_info_bar]) hash_entries = { path_info_foo: rdf_crypto.Hash(md5=b"foo"), path_info_bar: rdf_crypto.Hash(md5=b"bar"), } self.db.MultiWritePathHistory(client_id, {}, hash_entries) path_info = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(path_info.hash_entry.md5, b"foo") self.assertEqual(path_info.last_hash_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2010-10-10")) path_info = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["bar"])) self.assertEqual(path_info.hash_entry.md5, b"bar") self.assertEqual(path_info.last_hash_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2011-11-11"))
def testFindPathInfoByPathIDTimestampStatAndHashEntry(self): client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_id = rdf_objects.PathID(["foo"]) path_info.stat_entry = rdf_client.StatEntry(st_mode=42) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_1 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = None path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) timestamp_2 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=1337) path_info.hash_entry = None self.db.WritePathInfos(client_id, [path_info]) timestamp_3 = rdfvalue.RDFDatetime.Now() path_info.stat_entry = rdf_client.StatEntry(st_mode=4815162342) path_info.hash_entry = rdf_crypto.Hash(md5=b"norf") self.db.WritePathInfos(client_id, [path_info]) timestamp_4 = rdfvalue.RDFDatetime.Now() path_info_1 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_1) self.assertEqual(path_info_1.stat_entry.st_mode, 42) self.assertFalse(path_info_1.HasField("hash_entry")) path_info_2 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_2) self.assertEqual(path_info_2.stat_entry.st_mode, 42) self.assertEqual(path_info_2.hash_entry.md5, b"quux") path_info_3 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_3) self.assertEqual(path_info_3.stat_entry.st_mode, 1337) self.assertEqual(path_info_3.hash_entry.md5, b"quux") path_info_4 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=timestamp_4) self.assertEqual(path_info_4.stat_entry.st_mode, 4815162342) self.assertEqual(path_info_4.hash_entry.md5, b"norf")
def Start(self): hash_result = rdf_crypto.Hash( sha256=("9e8dc93e150021bb4752029ebbff51394aa36f069cf19901578" "e4f06017acdb5").decode("hex"), sha1="6dd6bee591dfcb6d75eb705405302c3eab65e21a".decode("hex"), md5="8b0a15eefe63fd41f8dc9dee01c5cf9a".decode("hex")) self.SendReply(hash_result)
def testWritePathInfoHashAndStatEntrySeparateWrites(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) stat_entry_path_info = rdf_objects.PathInfo.OS( components=["foo"], stat_entry=stat_entry) stat_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [stat_entry_path_info]) hash_entry = rdf_crypto.Hash(sha256=hashlib.sha256("foo").digest()) hash_entry_path_info = rdf_objects.PathInfo.OS( components=["foo"], hash_entry=hash_entry) hash_entry_timestamp = rdfvalue.RDFDatetime.Now() self.db.WritePathInfos(client_id, [hash_entry_path_info]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) now = rdfvalue.RDFDatetime.Now() self.assertEqual(result.components, ["foo"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry) self.assertGreater(result.last_stat_entry_timestamp, stat_entry_timestamp) self.assertLess(result.last_stat_entry_timestamp, hash_entry_timestamp) self.assertGreater(result.last_hash_entry_timestamp, hash_entry_timestamp) self.assertLess(result.last_hash_entry_timestamp, now)
def GetHashObject(self): """Returns a `Hash` object with appropriate fields filled-in.""" hash_object = rdf_crypto.Hash() hash_object.num_bytes = self._bytes_read for algorithm in self._hashers: setattr(hash_object, algorithm, self._hashers[algorithm].digest()) return hash_object
def ReceiveFileHash(self, responses): """Add hash digest to tracker and check with filestore.""" # Support old clients which may not have the new client action in place yet. # TODO(user): Deprecate once all clients have the HashFile action. if not responses.success and responses.request.request.name == "HashFile": logging.debug( "HashFile action not available, falling back to FingerprintFile.") self.CallClient( server_stubs.FingerprintFile, responses.request.request.payload, next_state="ReceiveFileHash", request_data=responses.request_data) return index = responses.request_data["index"] if not responses.success: self.Log("Failed to hash file: %s", responses.status) self.state.pending_hashes.pop(index, None) # Report the error. self._FileFetchFailed(index, responses.request.request.name) return self.state.files_hashed += 1 response = responses.First() if response.HasField("hash"): hash_obj = response.hash else: # Deprecate this method of returning hashes. hash_obj = rdf_crypto.Hash() if len(response.results) < 1 or response.results[0]["name"] != "generic": self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return result = response.results[0] try: for hash_type in ["md5", "sha1", "sha256"]: value = result.GetItem(hash_type) setattr(hash_obj, hash_type, value) except AttributeError: self.Log("Failed to hash file: %s", self.state.indexed_pathspecs[index]) self.state.pending_hashes.pop(index, None) return try: tracker = self.state.pending_hashes[index] except KeyError: # Hashing the file failed, but we did stat it. self._FileFetchFailed(index, responses.request.request.name) return tracker["hash_obj"] = hash_obj tracker["bytes_read"] = response.bytes_read self.state.files_hashed_since_check += 1 if self.state.files_hashed_since_check >= self.MIN_CALL_TO_FILE_STORE: self._CheckHashesWithFileStore()
def Handle(self, args, token=None): ValidateVfsPath(args.file_path) if args.timestamp: age = args.timestamp else: age = aff4.ALL_TIMES file_obj = aff4.FACTORY.Open(args.client_id.ToClientURN().Add( args.file_path), mode="r", age=age, token=token) if data_store.RelationalDBReadEnabled(category="vfs"): # These are not really "files" so they cannot be stored in the database # but they still can be queried so we need to return something. Sometimes # they contain a trailing slash so we need to take care of that. # # TODO(hanuszczak): Require VFS paths to be normalized so that trailing # slash is either forbidden or mandatory. if args.file_path.endswith("/"): args.file_path = args.file_path[:-1] if args.file_path in ["fs", "registry", "temp", "fs/os", "fs/tsk"]: api_file = ApiFile() api_file.name = api_file.path = args.file_path api_file.is_directory = True return ApiGetFileDetailsResult(file=api_file) path_type, components = rdf_objects.ParseCategorizedPath( args.file_path) # TODO(hanuszczak): The tests passed even without support for timestamp # filtering. The test suite should be probably improved in that regard. path_id = rdf_objects.PathID(components) path_info = data_store.REL_DB.FindPathInfoByPathID( str(args.client_id), path_type, path_id, timestamp=args.timestamp) if path_info: stat_entry = path_info.stat_entry hash_entry = path_info.hash_entry else: stat_entry = rdf_client.StatEntry() hash_entry = rdf_crypto.Hash() else: stat_entry = None hash_entry = None return ApiGetFileDetailsResult( file=ApiFile().InitFromAff4Object(file_obj, stat_entry=stat_entry, hash_entry=hash_entry, with_details=True))
def testWriteHashHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo", "bar"]) hash_entries = { datetime("2000-01-01"): rdf_crypto.Hash(md5=b"quux"), datetime("2000-02-01"): rdf_crypto.Hash(md5=b"norf"), datetime("2000-03-01"): rdf_crypto.Hash(md5=b"thud"), } self.db.WritePathInfos(client_id, [path_info]) self.db.WritePathHashHistory(client_id, path_info, hash_entries) path_info_1 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_info.GetPathID(), timestamp=datetime("2000-01-20")) self.assertEqual(path_info_1.hash_entry.md5, b"quux") path_info_2 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_info.GetPathID(), timestamp=datetime("2000-02-20")) self.assertEqual(path_info_2.hash_entry.md5, b"norf") path_info_3 = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_info.GetPathID(), timestamp=datetime("2000-03-20")) self.assertEqual(path_info_3.hash_entry.md5, b"thud") path_info = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, path_info.GetPathID()) self.assertEqual(path_info.hash_entry.md5, b"thud") self.assertEqual(path_info.last_hash_entry_timestamp, datetime("2000-03-01"))
def testHashHistory(self): datetime = rdfvalue.RDFDatetime.FromHumanReadable client_urn = self.SetupClient(0) file_urn = client_urn.Add("fs/os").Add("bar") with test_lib.FakeTime(datetime("2010-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"quux")) with test_lib.FakeTime(datetime("2020-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"norf")) with test_lib.FakeTime(datetime("2030-01-01")): with self._Aff4Open(file_urn) as fd: fd.Set(fd.Schema.HASH, rdf_crypto.Hash(md5=b"blargh")) data_migration.MigrateClientVfs(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar",), timestamp=datetime("2010-12-31")) self.assertEqual(path_info.hash_entry.md5, b"quux") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar",), timestamp=datetime("2020-12-31")) self.assertEqual(path_info.hash_entry.md5, b"norf") path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("bar",), timestamp=datetime("2030-12-31")) self.assertEqual(path_info.hash_entry.md5, b"blargh")
def testFindPathInfoByPathIDTimestampHashEntry(self): client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_id = rdf_objects.PathID(["foo"]) path_info.hash_entry = rdf_crypto.Hash(md5=b"bar") self.db.WritePathInfos(client_id, [path_info]) bar_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"baz") self.db.WritePathInfos(client_id, [path_info]) baz_timestamp = rdfvalue.RDFDatetime.Now() path_info.hash_entry = rdf_crypto.Hash(md5=b"quux") self.db.WritePathInfos(client_id, [path_info]) quux_timestamp = rdfvalue.RDFDatetime.Now() bar_path_info = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=bar_timestamp) self.assertEqual(bar_path_info.hash_entry.md5, b"bar") baz_path_info = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=baz_timestamp) self.assertEqual(baz_path_info.hash_entry.md5, b"baz") quux_path_info = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_id, timestamp=quux_timestamp) self.assertEqual(quux_path_info.hash_entry.md5, b"quux")
def testHashEntryFromSimpleFile(self): client_urn = self.SetupClient(0) with self._Aff4Open(client_urn.Add("fs/os").Add("foo")) as fd: hash_entry = rdf_crypto.Hash(md5=b"bar", sha256=b"baz") fd.Set(fd.Schema.HASH, hash_entry) data_migration.MigrateClientVfs(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",)) self.assertEqual(path_info.hash_entry.md5, b"bar") self.assertEqual(path_info.hash_entry.sha256, b"baz")
def testStatAndHashEntryFromSimpleFile(self): client_urn = self.SetupClient(0) with self._Aff4Open(client_urn.Add("fs/os").Add("foo")) as fd: stat_entry = rdf_client.StatEntry(st_mode=108) fd.Set(fd.Schema.STAT, stat_entry) hash_entry = rdf_crypto.Hash(sha256=b"quux") fd.Set(fd.Schema.HASH, hash_entry) data_migration.MigrateClientVfs(client_urn) path_info = data_store.REL_DB.ReadPathInfo( client_id=client_urn.Basename(), path_type=rdf_objects.PathInfo.PathType.OS, components=("foo",)) self.assertEqual(path_info.stat_entry.st_mode, 108) self.assertEqual(path_info.hash_entry.sha256, b"quux")
def _CreateFile(self, path, content, hashing=False): with aff4.FACTORY.Create(path, aff4.AFF4MemoryStream, token=self.token) as fd: fd.Write(content) if hashing: digest = hashlib.sha256(content).digest() fd.Set(fd.Schema.HASH, rdf_crypto.Hash(sha256=digest)) if data_store.RelationalDBWriteEnabled(): client_id, vfs_path = path.Split(2) path_type, components = rdf_objects.ParseCategorizedPath( vfs_path) path_info = rdf_objects.PathInfo() path_info.path_type = path_type path_info.components = components path_info.hash_entry.sha256 = digest data_store.REL_DB.WritePathInfos(client_id, [path_info])
def testWritePathInfosHashAndStatEntry(self): client_id = self.InitializeClient() stat_entry = rdf_client.StatEntry(st_mode=1337) hash_entry = rdf_crypto.Hash(md5=hashlib.md5("foo").digest()) path_info = rdf_objects.PathInfo.OS( components=["foo", "bar", "baz"], stat_entry=stat_entry, hash_entry=hash_entry) self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo", "bar", "baz"])) self.assertEqual(result.components, ["foo", "bar", "baz"]) self.assertTrue(result.HasField("stat_entry")) self.assertTrue(result.HasField("hash_entry")) self.assertEqual(result.stat_entry, stat_entry) self.assertEqual(result.hash_entry, hash_entry)
def testMultiWriteHistoryStatAndHash(self): client_id = self.InitializeClient() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_info.timestamp = rdfvalue.RDFDatetime.FromHumanReadable("2000-01-01") self.db.WritePathInfos(client_id, [path_info]) stat_entries = {path_info: rdf_client.StatEntry(st_size=42)} hash_entries = {path_info: rdf_crypto.Hash(md5=b"quux")} self.db.MultiWritePathHistory(client_id, stat_entries, hash_entries) result_path_info = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, path_info.GetPathID()) self.assertEqual(result_path_info.stat_entry.st_size, 42) self.assertEqual(result_path_info.hash_entry.md5, b"quux") self.assertEqual(result_path_info.last_stat_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2000-01-01")) self.assertEqual(result_path_info.last_hash_entry_timestamp, rdfvalue.RDFDatetime.FromHumanReadable("2000-01-01"))
def testWritePathInfosHashEntry(self): client_id = self.InitializeClient() hash_entry = rdf_crypto.Hash() hash_entry.sha256 = hashlib.sha256("foo").digest() hash_entry.md5 = hashlib.md5("foo").digest() hash_entry.num_bytes = len("foo") path_info = rdf_objects.PathInfo.OS( components=["foo", "bar", "baz"], hash_entry=hash_entry) self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID( client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo", "bar", "baz"])) self.assertEqual(result.components, ["foo", "bar", "baz"]) self.assertTrue(result.HasField("hash_entry")) self.assertFalse(result.HasField("stat_entry")) self.assertEqual(result.hash_entry.sha256, hashlib.sha256("foo").digest()) self.assertEqual(result.hash_entry.md5, hashlib.md5("foo").digest()) self.assertEqual(result.hash_entry.num_bytes, len("foo"))
def testWritePathInfosMetadataTimestampUpdate(self): now = rdfvalue.RDFDatetime.Now client_id = self.InitializeClient() timestamp_0 = now() self.db.WritePathInfos( client_id, [rdf_objects.PathInfo.OS(components=["foo"])]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertGreater(result.timestamp, timestamp_0) self.assertLess(result.timestamp, now()) self.assertEqual(result.last_stat_entry_timestamp, None) self.assertEqual(result.last_hash_entry_timestamp, None) timestamp_1 = now() stat_entry = rdf_client.StatEntry(st_mode=42) self.db.WritePathInfos( client_id, [rdf_objects.PathInfo.OS(components=["foo"], stat_entry=stat_entry)]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 42) self.assertGreater(result.timestamp, timestamp_1) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_stat_entry_timestamp, timestamp_1) self.assertLess(result.last_stat_entry_timestamp, now()) timestamp_2 = now() hash_entry = rdf_crypto.Hash(md5=b"foo") self.db.WritePathInfos( client_id, [rdf_objects.PathInfo.OS(components=["foo"], hash_entry=hash_entry)]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.hash_entry.md5, b"foo") self.assertGreater(result.timestamp, timestamp_2) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_hash_entry_timestamp, timestamp_2) self.assertLess(result.last_hash_entry_timestamp, now()) timestamp_3 = now() self.db.WritePathInfos( client_id, [rdf_objects.PathInfo.OS(components=["foo"], directory=True)]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 42) self.assertEqual(result.hash_entry.md5, b"foo") self.assertTrue(result.directory) self.assertGreater(result.timestamp, timestamp_3) self.assertLess(result.timestamp, now()) self.assertGreater(result.last_stat_entry_timestamp, timestamp_1) self.assertLess(result.last_stat_entry_timestamp, timestamp_2) self.assertGreater(result.last_hash_entry_timestamp, timestamp_2) self.assertLess(result.last_hash_entry_timestamp, timestamp_3) timestamp_4 = now() path_info = rdf_objects.PathInfo.OS(components=["foo"]) path_info.stat_entry.st_mode = 108 path_info.hash_entry.sha256 = b"norf" self.db.WritePathInfos(client_id, [path_info]) result = self.db.FindPathInfoByPathID(client_id, rdf_objects.PathInfo.PathType.OS, rdf_objects.PathID(["foo"])) self.assertEqual(result.components, ["foo"]) self.assertEqual(result.stat_entry.st_mode, 108) self.assertEqual(result.hash_entry.sha256, b"norf") self.assertGreater(result.timestamp, timestamp_4) self.assertGreater(result.last_stat_entry_timestamp, timestamp_4) self.assertGreater(result.last_hash_entry_timestamp, timestamp_4) self.assertLess(result.timestamp, now()) self.assertLess(result.last_stat_entry_timestamp, now()) self.assertLess(result.last_hash_entry_timestamp, now())