def ReadBuffer(self, args): return_data = self.mbr[args.offset:args.offset + args.length] return [ rdf_client.BufferReference(data=return_data, offset=args.offset, length=len(return_data)) ]
def testPasswdBufferParser(self): """Ensure we can extract users from a passwd file.""" parser = linux_file_parser.PasswdBufferParser() buf1 = rdf_client.BufferReference(data="user1:x:1000:1000:User1" " Name,,,:/home/user1:/bin/bash\n") buf2 = rdf_client.BufferReference(data="user2:x:1000:1000:User2" " Name,,,:/home/user2:/bin/bash\n") ff_result = rdf_file_finder.FileFinderResult(matches=[buf1, buf2]) out = list(parser.Parse(ff_result, None)) self.assertEqual(len(out), 2) self.assertTrue(isinstance(out[1], rdf_client.User)) self.assertTrue(isinstance(out[1], rdf_client.User)) self.assertEqual(out[0].username, "user1") self.assertEqual(out[0].full_name, "User1 Name,,,")
class ReadBuffer(actions.ActionPlugin): """Reads a buffer from a file and returns it to a server callback.""" in_rdfvalue = rdf_client.BufferReference out_rdfvalues = [rdf_client.BufferReference] def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") try: fd = vfs.VFSOpen(args.pathspec, progress_callback=self.Progress) fd.Seek(args.offset) offset = fd.Tell() data = fd.Read(args.length) except (IOError, OSError), e: self.SetStatus(rdf_flows.GrrStatus.ReturnedStatus.IOERROR, e) return # Now return the data to the server self.SendReply( rdf_client.BufferReference( offset=offset, data=data, length=len(data), pathspec=fd.pathspec))
def _StoreDataAndHash(self, data: AnyStr, offset: int) -> None: """Uploads data as blob and replies hash to flow. Args: data: Bytes to be stored as a blob. offset: Offset where the data was read from. """ data_blob = rdf_protodict.DataBlob( data=zlib.compress(data), compression=rdf_protodict.DataBlob.CompressionType.ZCOMPRESSION) # Ensure that the buffer is counted against this response. Check network # send limit. self.ChargeBytesToSession(len(data)) # Now return the data to the server into the special TransferStore well # known flow. self.grr_worker.SendReply( data_blob, session_id=rdfvalue.SessionID(flow_name="TransferStore")) # Now report the hash of this blob to our flow as well as the offset and # length. digest = hashlib.sha256(data).digest() buffer_reference = rdf_client.BufferReference(offset=offset, length=len(data), data=digest) self._partial_file_hash.update(data) partial_file_hash = self._partial_file_hash.digest() self.SendReply( rdf_read_low_level.ReadLowLevelResult( blob=buffer_reference, accumulated_hash=partial_file_hash))
def Start(self): """Schedules the ReadBuffer client action.""" pathspec = rdf_paths.PathSpec( path="\\\\.\\PhysicalDrive0\\", pathtype=rdf_paths.PathSpec.PathType.OS, path_options=rdf_paths.PathSpec.Options.CASE_LITERAL) self.state.bytes_downloaded = 0 # An array to collect buffers. This is not very efficient, MBR # data should be kept short though so this is not a big deal. self.state.buffers = [] buffer_size = constants.CLIENT_MAX_BUFFER_SIZE buffers_we_need = self.args.length // buffer_size if self.args.length % buffer_size: buffers_we_need += 1 bytes_we_need = self.args.length for i in range(buffers_we_need): request = rdf_client.BufferReference(pathspec=pathspec, offset=i * buffer_size, length=min( bytes_we_need, buffer_size)) self.CallClient(server_stubs.ReadBuffer, request, next_state=compatibility.GetName(self.StoreMBR)) bytes_we_need -= buffer_size
def Scan(self, fd, matcher): """Scans given file searching for occurrences of given pattern. Args: fd: A file descriptor of the file that needs to be searched. matcher: A matcher object specifying a pattern to search for. Yields: `BufferReference` objects pointing to file parts with matching content. """ streamer = streaming.Streamer(chunk_size=self.CHUNK_SIZE, overlap_size=self.OVERLAP_SIZE) offset = self.params.start_offset amount = self.params.length for chunk in streamer.StreamFile(fd, offset=offset, amount=amount): for span in chunk.Scan(matcher): ctx_begin = max(span.begin - self.params.bytes_before, 0) ctx_end = min(span.end + self.params.bytes_after, len(chunk.data)) ctx_data = chunk.data[ctx_begin:ctx_end] yield rdf_client.BufferReference(offset=chunk.offset + ctx_begin, length=len(ctx_data), data=ctx_data) if self.params.mode == self.params.Mode.FIRST_HIT: return
def Start(self): urn = self.args.file_urn fd = aff4.FACTORY.Open( urn, token=self.token, aff4_type=standard.AFF4SparseImage, mode="rw") pathspec = fd.Get(fd.Schema.PATHSPEC) # Use the object's chunk size, in case it's different to the class-wide # chunk size. chunksize = fd.chunksize self.state.pathspec = pathspec self.state.chunksize = chunksize self.state.blobs = [] # Make sure we always read a whole number of chunks. new_length, new_offset = self.AlignToChunks(self.args.length, self.args.offset, chunksize) # Remember where we're up to in reading the file, and how much we have left # to read. self.state.bytes_left_to_read = new_length self.state.current_offset = new_offset # Always read one chunk at a time. request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=self.state.chunksize, offset=self.state.current_offset) # Remember where we're up to, and that we're about to read one chunk. self.state.bytes_left_to_read -= chunksize self.state.current_offset += chunksize self.CallClient( server_stubs.TransferBuffer, request, next_state="TransferBuffer")
def GetBufferForChunk(self, chunk): chunk_offset = chunk * self.state.chunksize request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=self.state.chunksize, offset=chunk_offset) return request
def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") data = vfs.ReadVFS( args.pathspec, args.offset, args.length, progress_callback=self.Progress) result = rdf_protodict.DataBlob( data=zlib.compress(data), compression=rdf_protodict.DataBlob.CompressionType.ZCOMPRESSION) digest = hashlib.sha256(data).digest() # Ensure that the buffer is counted against this response. Check network # send limit. self.ChargeBytesToSession(len(data)) # Now return the data to the server into the special TransferStore well # known flow. self.grr_worker.SendReply( result, session_id=rdfvalue.SessionID(flow_name="TransferStore")) # Now report the hash of this blob to our flow as well as the offset and # length. self.SendReply( rdf_client.BufferReference( offset=args.offset, length=len(data), data=digest))
def TransferBuffer(self, args): """TransferBuffer action mock.""" response = rdf_client.BufferReference(args) offset = min(args.offset, len(self.data)) response.data = self.data[offset:] response.length = len(self.data[offset:]) return [response]
def setUp(self): super(TestNetworkByteLimits, self).setUp() pathspec = rdf_paths.PathSpec( path="/nothing", pathtype=rdf_paths.PathSpec.PathType.OS) self.buffer_ref = rdf_client.BufferReference(pathspec=pathspec, length=5000) self.data = "X" * 500 self.old_read = standard.vfs.ReadVFS standard.vfs.ReadVFS = lambda x, y, z, progress_callback=None: self.data self.transfer_buf = action_mocks.ActionMock(standard.TransferBuffer)
def testFileFinderResultExportConverterConvertsBufferRefsWithoutPathspecs( self): pathspec = rdf_paths.PathSpec(path="/some/path", pathtype=rdf_paths.PathSpec.PathType.OS) match1 = rdf_client.BufferReference(offset=42, length=43, data=b"somedata1") match2 = rdf_client.BufferReference(offset=44, length=45, data=b"somedata2") stat_entry = rdf_client_fs.StatEntry(pathspec=pathspec, st_mode=33184, st_ino=1063090, st_atime=1336469177, st_mtime=1336129892, st_ctime=1336129892, st_btime=1313131313) file_finder_result = rdf_file_finder.FileFinderResult( stat_entry=stat_entry, matches=[match1, match2]) converter = file.FileFinderResultConverter() results = list(converter.Convert(self.metadata, file_finder_result)) # We expect 2 ExportedMatch instances in the results exported_matches = [ result for result in results if isinstance(result, buffer_reference.ExportedMatch) ] exported_matches = sorted(exported_matches, key=lambda x: x.offset) self.assertLen(exported_matches, 2) self.assertEqual(exported_matches[0].offset, 42) self.assertEqual(exported_matches[0].length, 43) self.assertEqual(exported_matches[0].data, b"somedata1") self.assertEqual(exported_matches[0].urn, "aff4:/%s/fs/os/some/path" % self.client_id) self.assertEqual(exported_matches[1].offset, 44) self.assertEqual(exported_matches[1].length, 45) self.assertEqual(exported_matches[1].data, b"somedata2") self.assertEqual(exported_matches[1].urn, "aff4:/%s/fs/os/some/path" % self.client_id)
def testReadBuffer(self): """Test reading a buffer.""" path = os.path.join(self.base_path, "morenumbers.txt") p = rdf_paths.PathSpec(path=path, pathtype=rdf_paths.PathSpec.PathType.OS) result = self.RunAction( standard.ReadBuffer, rdf_client.BufferReference(pathspec=p, offset=100, length=10))[0] self.assertEqual(result.offset, 100) self.assertEqual(result.length, 10) self.assertEqual(result.data, "7\n38\n39\n40")
def setUp(self): super(TestNetworkByteLimits, self).setUp() pathspec = rdf_paths.PathSpec( path="/nothing", pathtype=rdf_paths.PathSpec.PathType.OS) self.buffer_ref = rdf_client.BufferReference(pathspec=pathspec, length=5000) self.data = b"X" * 500 stubber = mock.patch.object(standard.vfs, "ReadVFS", return_value=self.data) stubber.start() self.addCleanup(stubber.stop) self.transfer_buf = action_mocks.ActionMock(standard.TransferBuffer)
def TransferBuffer(self, args): """TransferBuffer action mock.""" response = rdf_client.BufferReference(args) offset = min(args.offset, len(self.data)) sha256 = hashlib.sha256() sha256.update(self.data[offset:]) response.data = sha256.digest() response.length = len(self.data[offset:]) data_store.BLOBS.WriteBlobWithUnknownHash(self.data[offset:]) return [response]
def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") data = vfs.ReadVFS(args.pathspec, args.offset, args.length) digest = hashlib.sha256(data).digest() # Now report the hash of this blob to our flow as well as the offset and # length. self.SendReply( rdf_client.BufferReference( offset=args.offset, length=len(data), data=digest))
def testCSVPluginWritesBytesValuesCorrectly(self): pathspec = rdf_paths.PathSpec.OS(path="/żółta/gęśla/jaźń") values = { rdf_client.BufferReference: [ rdf_client.BufferReference(data=b"\xff\x00\xff", pathspec=pathspec), rdf_client.BufferReference(data=b"\xfa\xfb\xfc", pathspec=pathspec), ], } zip_fd, prefix = self.ProcessValuesToZip(values) manifest_path = "{}/MANIFEST".format(prefix) data_path = "{}/ExportedMatch/from_BufferReference.csv".format(prefix) self.assertCountEqual(zip_fd.namelist(), [manifest_path, data_path]) with zip_fd.open(data_path) as data: results = list(compat_csv.Reader(data.read().decode("utf-8"))) self.assertLen(results, 3) data_idx = results[0].index("data") self.assertEqual(results[1][data_idx], "\\xff\\x00\\xff") self.assertEqual(results[2][data_idx], "\\xfa\\xfb\\xfc")
def FetchWindow(self, number_of_chunks_to_readahead): """Read ahead a number of buffers to fill the window.""" for _ in range(number_of_chunks_to_readahead): # Do not read past the end of file next_offset = self.state.current_chunk_number * self.CHUNK_SIZE if next_offset >= self.state.file_size: return request = rdf_client.BufferReference(pathspec=self.args.pathspec, offset=next_offset, length=self.CHUNK_SIZE) self.CallClient(server_stubs.TransferBuffer, request, next_state=compatibility.GetName(self.ReadBuffer)) self.state.current_chunk_number += 1
def TransferBuffer(self, responses): # Did it work? if not responses.success: raise IOError("Error running TransferBuffer: %s" % responses.status) response = responses.First() # Write the data we got from the client to the file. # sparse_image = self.state.fd chunk_number = response.offset // self.state.chunksize self.state.blobs.append([chunk_number, response]) length_to_read = min(self.state.chunksize, self.state.bytes_left_to_read) if length_to_read: request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=length_to_read, offset=self.state.current_offset) # TODO(user): Again, this is going to be too slow, since we're # waiting for a client response every time we request a buffer. We need to # queue up multiple reads. self.CallClient(server_stubs.TransferBuffer, request, next_state="TransferBuffer") # Move our offset along the file by how much we read. self.state.current_offset += length_to_read # Remember how much more we need to read. self.state.bytes_left_to_read = max( 0, self.state.bytes_left_to_read - length_to_read) else: with aff4.FACTORY.Open(self.args.file_urn, token=self.token, aff4_type=standard.AFF4SparseImage, mode="rw") as fd: for chunk_number, response in self.state.blobs: fd.AddBlob(blob_hash=rdf_objects.BlobID.FromBytes( response.data), length=response.length, chunk_number=chunk_number) del self.state.blobs
def testBasicConversion(self): pathspec = rdf_paths.PathSpec(path="/some/path", pathtype=rdf_paths.PathSpec.PathType.OS) buffer = rdf_client.BufferReference( length=123, offset=456, data=b"somedata", pathspec=pathspec, ) converter = buffer_reference.BufferReferenceToExportedMatchConverter() results = list(converter.Convert(self.metadata, buffer)) self.assertLen(results, 1) self.assertEqual(results[0].length, 123) self.assertEqual(results[0].offset, 456) self.assertEqual(results[0].data, b"somedata")
def testNestedProtobufAssignment(self): """Check that we can assign a nested protobuf.""" container = rdf_client.BufferReference() test_path = "C:\\test" pathspec = rdf_paths.PathSpec(path=test_path, pathtype=1) # Should raise - incompatible RDFType. self.assertRaises(ValueError, setattr, container, "pathspec", rdfvalue.RDFString("hello")) # Should raise - incompatible RDFProto type. self.assertRaises(ValueError, setattr, container, "pathspec", rdf_client_fs.StatEntry(st_size=5)) # Assign directly. container.device = pathspec self.assertEqual(container.device.path, test_path) # Clear the field. container.device = None # Check the protobuf does not have the field set at all. self.assertFalse(container.HasField("pathspec"))
def testFileFinderResultExportConverter(self): pathspec = rdf_paths.PathSpec(path="/some/path", pathtype=rdf_paths.PathSpec.PathType.OS) match1 = rdf_client.BufferReference(offset=42, length=43, data=b"somedata1", pathspec=pathspec) match2 = rdf_client.BufferReference(offset=44, length=45, data=b"somedata2", pathspec=pathspec) stat_entry = rdf_client_fs.StatEntry(pathspec=pathspec, st_mode=33184, st_ino=1063090, st_atime=1336469177, st_mtime=1336129892, st_ctime=1336129892, st_btime=1313131313) file_finder_result = rdf_file_finder.FileFinderResult( stat_entry=stat_entry, matches=[match1, match2]) converter = file.FileFinderResultConverter() results = list(converter.Convert(self.metadata, file_finder_result)) # We expect 1 ExportedFile instance in the results exported_files = [ result for result in results if isinstance(result, file.ExportedFile) ] self.assertLen(exported_files, 1) self.assertEqual(exported_files[0].basename, "path") self.assertEqual(exported_files[0].urn, "aff4:/%s/fs/os/some/path" % self.client_id) self.assertEqual(exported_files[0].st_mode, 33184) self.assertEqual(exported_files[0].st_ino, 1063090) self.assertEqual(exported_files[0].st_atime, 1336469177) self.assertEqual(exported_files[0].st_mtime, 1336129892) self.assertEqual(exported_files[0].st_ctime, 1336129892) self.assertEqual(exported_files[0].st_btime, 1313131313) self.assertFalse(exported_files[0].HasField("content")) self.assertFalse(exported_files[0].HasField("content_sha256")) self.assertFalse(exported_files[0].HasField("hash_md5")) self.assertFalse(exported_files[0].HasField("hash_sha1")) self.assertFalse(exported_files[0].HasField("hash_sha256")) # We expect 2 ExportedMatch instances in the results exported_matches = [ result for result in results if isinstance(result, buffer_reference.ExportedMatch) ] exported_matches = sorted(exported_matches, key=lambda x: x.offset) self.assertLen(exported_matches, 2) self.assertEqual(exported_matches[0].offset, 42) self.assertEqual(exported_matches[0].length, 43) self.assertEqual(exported_matches[0].data, b"somedata1") self.assertEqual(exported_matches[0].urn, "aff4:/%s/fs/os/some/path" % self.client_id) self.assertEqual(exported_matches[1].offset, 44) self.assertEqual(exported_matches[1].length, 45) self.assertEqual(exported_matches[1].data, b"somedata2") self.assertEqual(exported_matches[1].urn, "aff4:/%s/fs/os/some/path" % self.client_id) # Also test registry entries. data = rdf_protodict.DataBlob() data.SetValue(b"testdata") stat_entry = rdf_client_fs.StatEntry( registry_type="REG_SZ", registry_data=data, pathspec=rdf_paths.PathSpec(path="HKEY_USERS/S-1-1-1-1/Software", pathtype="REGISTRY")) file_finder_result = rdf_file_finder.FileFinderResult( stat_entry=stat_entry) converter = file.FileFinderResultConverter() results = list(converter.Convert(self.metadata, file_finder_result)) self.assertLen(results, 1) self.assertIsInstance(results[0], file.ExportedRegistryKey) result = results[0] self.assertEqual(result.data, b"testdata") self.assertEqual( result.urn, "aff4:/%s/registry/HKEY_USERS/S-1-1-1-1/Software" % self.client_id)
def Run(self, args): """Search the file for the pattern. This implements the grep algorithm used to scan files. It reads the data in chunks of BUFF_SIZE (10 MB currently) and can use different functions to search for matching patterns. In every step, a buffer that is a bit bigger than the block size is used in order to return all the requested results. Specifically, a preamble is used in order to not miss any patterns that start in one block of data and end in the next and also a postscript buffer is kept such that the algorithm can return bytes trailing the pattern even if the pattern is at the end of one block. One block: ----------------------------- | Pre | Data | Post | ----------------------------- Searching the pattern is done here: <-------------------> The following block is constructed like this: ----------------------------- | Pre | Data | Post | ----------------------------- | ----------------------------- | Pre | Data | Post | ----------------------------- The preamble is filled from Data so every hit that happens to fall entirely into the preamble has to be discarded since it has already been discovered in the step before. Grepping for memory If this action is used to grep the memory of a client machine using one of the GRR memory acquisition drivers, we have to be very careful not to have any hits in the GRR process memory space itself. Therefore, if the input is a literal, it is XOR encoded and only visible in memory when the pattern is matched. This is done using bytearrays which guarantees in place updates and no leaking patterns. Also the returned data is encoded using a different XOR 'key'. This should guarantee that there are no hits when the pattern is not present in memory. However, since the data will be copied to the preamble and the postscript, a single pattern might in some cases produce multiple hits. Args: args: A protobuf describing the grep request. Raises: RuntimeError: No search pattern has been given in the request. """ fd = vfs.VFSOpen(args.target, progress_callback=self.Progress) fd.Seek(args.start_offset) base_offset = args.start_offset self.xor_in_key = args.xor_in_key self.xor_out_key = args.xor_out_key if args.regex: find_func = functools.partial(self.FindRegex, args.regex.AsBytes()) elif args.literal: find_func = functools.partial(self.FindLiteral, args.literal.AsBytes()) else: raise RuntimeError("Grep needs a regex or a literal.") preamble_size = 0 postscript_size = 0 hits = 0 data = b"" while fd.Tell() < args.start_offset + args.length: # Base size to read is at most the buffer size. to_read = min(args.length, self.BUFF_SIZE, args.start_offset + args.length - fd.Tell()) # Read some more data for the snippet. to_read += self.ENVELOPE_SIZE - postscript_size read_data = fd.Read(to_read) data = data[-postscript_size - self.ENVELOPE_SIZE:] + read_data postscript_size = max( 0, self.ENVELOPE_SIZE - (to_read - len(read_data))) data_size = len(data) - preamble_size - postscript_size if data_size == 0 and postscript_size == 0: break for (start, end) in find_func(data): # Ignore hits in the preamble. if end <= preamble_size: continue # Ignore hits in the postscript. if end > preamble_size + data_size: continue # Offset of file in the end after length. if end + base_offset - preamble_size > args.start_offset + args.length: break data_start = max(0, start - args.bytes_before) data_end = min(len(data), end + args.bytes_after) out_data = utils.Xor(data[data_start:data_end], self.xor_out_key) hits += 1 self.SendReply( rdf_client.BufferReference(offset=base_offset + start - preamble_size, data=out_data, length=len(out_data), pathspec=fd.pathspec)) if args.mode == rdf_client_fs.GrepSpec.Mode.FIRST_HIT: return if hits >= self.HIT_LIMIT: msg = utils.Xor( b"This Grep has reached the maximum number of hits" b" (%d)." % self.HIT_LIMIT, self.xor_out_key) self.SendReply( rdf_client.BufferReference(offset=0, data=msg, length=len(msg))) return self.Progress() base_offset += data_size # Allow for overlap with previous matches. preamble_size = min(len(data), self.ENVELOPE_SIZE)