def Start(self): self.SendReply(rdf_rekall_types.MemoryInformation( device=rdf_paths.PathSpec( path=os.path.join(config_lib.CONFIG["Test.data_dir"], "searching/auth.log"), pathtype=rdf_paths.PathSpec.PathType.OS), runs=[rdf_client.BufferReference(length=638976, offset=5), rdf_client.BufferReference(length=145184, offset=643074)]))
def testNetgroupBufferParser(self): """Ensure we can extract users from a netgroup file.""" parser = linux_file_parser.NetgroupBufferParser() buf1 = rdf_client.BufferReference(data="group1 (-,user1,) (-,user2,) " "(-,user3,)\n") buf2 = rdf_client.BufferReference(data="super_group3 (-,user5,) (-,user6,)" " group1 group2\n") ff_result = file_finder.FileFinderResult(matches=[buf1, buf2]) config_lib.CONFIG.Set("Artifacts.netgroup_user_blacklist", ["user2", "user3"]) out = list(parser.Parse(ff_result, None)) self.assertItemsEqual([x.username for x in out], [u"user1", u"user5", u"user6"])
def TransferBuffer(self, responses): # Did it work? if not responses.success: raise IOError("Error running TransferBuffer: %s" % responses.status) response = responses.First() # Write the data we got from the client to the file. sparse_image = self.state.fd chunk_number = response.offset / sparse_image.chunksize sparse_image.AddBlob(blob_hash=response.data, length=response.length, chunk_number=chunk_number) sparse_image.Flush() length_to_read = min(self.state.chunksize, self.state.bytes_left_to_read) if length_to_read: request = rdf_client.BufferReference(pathspec=self.state.pathspec, length=length_to_read, offset=self.state.current_offset) # TODO(user): Again, this is going to be too slow, since we're # waiting for a client response every time we request a buffer. We need to # queue up multiple reads. self.CallClient("TransferBuffer", request, next_state="TransferBuffer") # Move our offset along the file by how much we read. self.state.current_offset += length_to_read # Remember how much more we need to read. self.state.bytes_left_to_read = max( 0, self.state.bytes_left_to_read - length_to_read)
def Start(self): """Schedules the ReadBuffer client action.""" pathspec = rdf_paths.PathSpec( path="\\\\.\\PhysicalDrive0\\", pathtype=rdf_paths.PathSpec.PathType.OS, path_options=rdf_paths.PathSpec.Options.CASE_LITERAL) self.state.bytes_downloaded = 0 # An array to collect buffers. This is not very efficient, MBR # data should be kept short though so this is not a big deal. self.state.buffers = [] buffer_size = constants.CLIENT_MAX_BUFFER_SIZE buffers_we_need = self.args.length / buffer_size if self.args.length % buffer_size: buffers_we_need += 1 bytes_we_need = self.args.length for i in xrange(buffers_we_need): request = rdf_client.BufferReference( pathspec=pathspec, offset=i * buffer_size, length=min(bytes_we_need, buffer_size)) self.CallClient(server_stubs.ReadBuffer, request, next_state="StoreMBR") bytes_we_need -= buffer_size
def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") data = vfs.ReadVFS(args.pathspec, args.offset, args.length, progress_callback=self.Progress) result = rdf_protodict.DataBlob( data=zlib.compress(data), compression=rdf_protodict.DataBlob.CompressionType.ZCOMPRESSION) digest = hashlib.sha256(data).digest() # Ensure that the buffer is counted against this response. Check network # send limit. self.ChargeBytesToSession(len(data)) # Now return the data to the server into the special TransferStore well # known flow. self.grr_worker.SendReply( result, session_id=rdfvalue.SessionID(flow_name="TransferStore")) # Now report the hash of this blob to our flow as well as the offset and # length. self.SendReply( rdf_client.BufferReference(offset=args.offset, length=len(data), data=digest))
def testPasswdBufferParser(self): """Ensure we can extract users from a passwd file.""" parser = linux_file_parser.PasswdBufferParser() buf1 = rdf_client.BufferReference(data="user1:x:1000:1000:User1" " Name,,,:/home/user1:/bin/bash\n") buf2 = rdf_client.BufferReference(data="user2:x:1000:1000:User2" " Name,,,:/home/user2:/bin/bash\n") ff_result = file_finder.FileFinderResult(matches=[buf1, buf2]) out = list(parser.Parse(ff_result, None)) self.assertEqual(len(out), 2) self.assertTrue(isinstance(out[1], rdf_client.User)) self.assertTrue(isinstance(out[1], rdf_client.User)) self.assertEqual(out[0].username, "user1") self.assertEqual(out[0].full_name, "User1 Name,,,")
def ReadBuffer(self, args): _ = args return [ rdf_client.BufferReference(data=mbr, offset=0, length=len(mbr)) ]
def Start(self): urn = self.args.file_urn fd = aff4.FACTORY.Open( urn, token=self.token, aff4_type=standard.AFF4SparseImage, mode="rw") pathspec = fd.Get(fd.Schema.PATHSPEC) # Use the object's chunk size, in case it's different to the class-wide # chunk size. chunksize = fd.chunksize self.state.pathspec = pathspec self.state.chunksize = chunksize self.state.blobs = [] # Make sure we always read a whole number of chunks. new_length, new_offset = self.AlignToChunks(self.args.length, self.args.offset, chunksize) # Remember where we're up to in reading the file, and how much we have left # to read. self.state.bytes_left_to_read = new_length self.state.current_offset = new_offset # Always read one chunk at a time. request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=self.state.chunksize, offset=self.state.current_offset) # Remember where we're up to, and that we're about to read one chunk. self.state.bytes_left_to_read -= chunksize self.state.current_offset += chunksize self.CallClient( server_stubs.TransferBuffer, request, next_state="TransferBuffer")
def Scan(self, path, matcher): """Scans given file searching for occurrences of given pattern. Args: path: A path to the file that needs to be searched. matcher: A matcher object specifying a pattern to search for. Yields: `BufferReference` objects pointing to file parts with matching content. """ streamer = streaming.Streamer( chunk_size=self.CHUNK_SIZE, overlap_size=self.OVERLAP_SIZE) offset = self.params.start_offset amount = self.params.length for chunk in streamer.StreamFilePath(path, offset=offset, amount=amount): for span in chunk.Scan(matcher): ctx_begin = max(span.begin - self.params.bytes_before, 0) ctx_end = min(span.end + self.params.bytes_after, len(chunk.data)) ctx_data = chunk.data[ctx_begin:ctx_end] yield rdf_client.BufferReference( offset=chunk.offset + ctx_begin, length=len(ctx_data), data=ctx_data) if self.params.mode == self.params.Mode.FIRST_HIT: return
def GetBufferForChunk(self, chunk): chunk_offset = chunk * self.state.chunksize request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=self.state.chunksize, offset=chunk_offset) return request
def ReadBuffer(self, args): return_data = self.mbr[args.offset:args.offset + args.length] return [ rdf_client.BufferReference(data=return_data, offset=args.offset, length=len(return_data)) ]
class ReadBuffer(actions.ActionPlugin): """Reads a buffer from a file and returns it to a server callback.""" in_rdfvalue = rdf_client.BufferReference out_rdfvalues = [rdf_client.BufferReference] def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") try: fd = vfs.VFSOpen(args.pathspec, progress_callback=self.Progress) fd.Seek(args.offset) offset = fd.Tell() data = fd.Read(args.length) except (IOError, OSError), e: self.SetStatus(rdf_flows.GrrStatus.ReturnedStatus.IOERROR, e) return # Now return the data to the server self.SendReply( rdf_client.BufferReference(offset=offset, data=data, length=len(data), pathspec=fd.pathspec))
def TransferBuffer(self, args): """TransferBuffer action mock.""" response = rdf_client.BufferReference(args) offset = min(args.offset, len(self.data)) response.data = self.data[offset:] response.length = len(self.data[offset:]) return [response]
def setUp(self): super(TestNetworkByteLimits, self).setUp() pathspec = rdf_paths.PathSpec( path="/nothing", pathtype=rdf_paths.PathSpec.PathType.OS) self.buffer_ref = rdf_client.BufferReference(pathspec=pathspec, length=5000) self.data = "X" * 500 self.old_read = standard.vfs.ReadVFS standard.vfs.ReadVFS = lambda x, y, z, progress_callback=None: self.data self.transfer_buf = action_mocks.ActionMock(standard.TransferBuffer)
def Start(self): """Schedules the ReadBuffer client action.""" pathspec = rdf_paths.PathSpec( path="\\\\.\\PhysicalDrive0\\", pathtype=rdf_paths.PathSpec.PathType.OS, path_options=rdf_paths.PathSpec.Options.CASE_LITERAL) request = rdf_client.BufferReference( pathspec=pathspec, offset=0, length=self.args.length) self.CallClient("ReadBuffer", request, next_state="StoreMBR")
def testReadBuffer(self): """Test reading a buffer.""" path = os.path.join(self.base_path, "morenumbers.txt") p = rdf_paths.PathSpec(path=path, pathtype=rdf_paths.PathSpec.PathType.OS) result = self.RunAction(standard.ReadBuffer, rdf_client.BufferReference( pathspec=p, offset=100, length=10))[0] self.assertEqual(result.offset, 100) self.assertEqual(result.length, 10) self.assertEqual(result.data, "7\n38\n39\n40")
def FetchWindow(self, number_of_chunks_to_readahead): """Read ahead a number of buffers to fill the window.""" for _ in range(number_of_chunks_to_readahead): # Do not read past the end of file if self.state.current_chunk_number > self.state.max_chunk_number: return request = rdf_client.BufferReference( pathspec=self.args.pathspec, offset=self.state.current_chunk_number * self.CHUNK_SIZE, length=self.CHUNK_SIZE) self.CallClient("TransferBuffer", request, next_state="ReadBuffer") self.state.current_chunk_number += 1
def Run(self, args): """Reads a buffer on the client and sends it to the server.""" # Make sure we limit the size of our output if args.length > constants.CLIENT_MAX_BUFFER_SIZE: raise RuntimeError("Can not read buffers this large.") data = vfs.ReadVFS(args.pathspec, args.offset, args.length) digest = hashlib.sha256(data).digest() # Now report the hash of this blob to our flow as well as the offset and # length. self.SendReply( rdf_client.BufferReference( offset=args.offset, length=len(data), data=digest))
def TransferBuffer(self, responses): # Did it work? if not responses.success: raise IOError("Error running TransferBuffer: %s" % responses.status) response = responses.First() # Write the data we got from the client to the file. # sparse_image = self.state.fd chunk_number = response.offset / self.state.chunksize self.state.blobs.append([chunk_number, response]) length_to_read = min(self.state.chunksize, self.state.bytes_left_to_read) if length_to_read: request = rdf_client.BufferReference( pathspec=self.state.pathspec, length=length_to_read, offset=self.state.current_offset) # TODO(user): Again, this is going to be too slow, since we're # waiting for a client response every time we request a buffer. We need to # queue up multiple reads. self.CallClient(server_stubs.TransferBuffer, request, next_state="TransferBuffer") # Move our offset along the file by how much we read. self.state.current_offset += length_to_read # Remember how much more we need to read. self.state.bytes_left_to_read = max( 0, self.state.bytes_left_to_read - length_to_read) else: with aff4.FACTORY.Open(self.args.file_urn, token=self.token, aff4_type=standard.AFF4SparseImage, mode="rw") as fd: for chunk_number, response in self.state.blobs: fd.AddBlob(blob_hash=response.data, length=response.length, chunk_number=chunk_number) del self.state.blobs
def _ScanForMatches(params, path, matching_func, matches): try: fd = open(path, mode="rb") except IOError: return False current_offset = params.start_offset findings = [] for chunk in FileFinderOS._StreamFile(fd, current_offset, params.length): pos, match_length = matching_func(chunk, 0) while pos is not None: if (len(chunk) > FileFinderOS.OVERLAP_SIZE and pos + match_length < FileFinderOS.OVERLAP_SIZE): # We already processed this hit. pos, match_length = matching_func(chunk, pos + 1) continue context_start = max(pos - params.bytes_before, 0) # This might cut off some data if the hit is at the chunk border. context_end = min(pos + match_length + params.bytes_after, len(chunk)) data = chunk[context_start:context_end] findings.append( rdf_client.BufferReference( offset=current_offset + context_start, length=len(data), data=data, )) if params.mode == params.Mode.FIRST_HIT: for finding in findings: matches.append(finding) return True pos, match_length = matching_func(chunk, pos + 1) current_offset += len(chunk) - FileFinderOS.OVERLAP_SIZE if findings: for finding in findings: matches.append(finding) return True else: return False
def Grep(self, responses): if responses.success: # Grep not specified - just list all hits. if not self.args.grep: msgs = [rdf_client.BufferReference(pathspec=r.pathspec) for r in responses] self.CallStateInline(messages=msgs, next_state="WriteHits") else: # Grep specification given, ask the client to grep the files. for response in responses: # Only fetch regular files here. if not stat.S_ISDIR(response.st_mode): # Cast the BareGrepSpec to a GrepSpec type. request = rdf_client.GrepSpec(target=response.pathspec, **self.args.grep.AsDict()) self.CallClient("Grep", request=request, next_state="WriteHits", request_data=dict(pathspec=response.pathspec))
def Run(self, args): """Search the file for the pattern. This implements the grep algorithm used to scan files. It reads the data in chunks of BUFF_SIZE (10 MB currently) and can use different functions to search for matching patterns. In every step, a buffer that is a bit bigger than the block size is used in order to return all the requested results. Specifically, a preamble is used in order to not miss any patterns that start in one block of data and end in the next and also a postscript buffer is kept such that the algorithm can return bytes trailing the pattern even if the pattern is at the end of one block. One block: ----------------------------- | Pre | Data | Post | ----------------------------- Searching the pattern is done here: <-------------------> The following block is constructed like this: ----------------------------- | Pre | Data | Post | ----------------------------- | ----------------------------- | Pre | Data | Post | ----------------------------- The preamble is filled from Data so every hit that happens to fall entirely into the preamble has to be discarded since it has already been discovered in the step before. Grepping for memory If this action is used to grep the memory of a client machine using one of the GRR memory acquisition drivers, we have to be very careful not to have any hits in the GRR process memory space itself. Therefore, if the input is a literal, it is XOR encoded and only visible in memory when the pattern is matched. This is done using bytearrays which guarantees in place updates and no leaking patterns. Also the returned data is encoded using a different XOR 'key'. This should guarantee that there are no hits when the pattern is not present in memory. However, since the data will be copied to the preamble and the postscript, a single pattern might in some cases produce multiple hits. Args: args: A protobuf describing the grep request. Raises: RuntimeError: No search pattern has been given in the request. """ fd = vfs.VFSOpen(args.target, progress_callback=self.Progress) fd.Seek(args.start_offset) base_offset = args.start_offset self.xor_in_key = args.xor_in_key self.xor_out_key = args.xor_out_key if args.regex: find_func = functools.partial(self.FindRegex, args.regex) elif args.literal: find_func = functools.partial( self.FindLiteral, bytearray(utils.SmartStr(args.literal))) else: raise RuntimeError("Grep needs a regex or a literal.") preamble_size = 0 postscript_size = 0 hits = 0 data = "" while fd.Tell() < args.start_offset + args.length: # Base size to read is at most the buffer size. to_read = min(args.length, self.BUFF_SIZE, args.start_offset + args.length - fd.Tell()) # Read some more data for the snippet. to_read += self.ENVELOPE_SIZE - postscript_size read_data = fd.Read(to_read) data = data[-postscript_size - self.ENVELOPE_SIZE:] + read_data postscript_size = max( 0, self.ENVELOPE_SIZE - (to_read - len(read_data))) data_size = len(data) - preamble_size - postscript_size if data_size == 0 and postscript_size == 0: break for (start, end) in find_func(data): # Ignore hits in the preamble. if end <= preamble_size: continue # Ignore hits in the postscript. if end > preamble_size + data_size: continue # Offset of file in the end after length. if end + base_offset - preamble_size > args.start_offset + args.length: break out_data = "" for i in xrange(max(0, start - args.bytes_before), min(len(data), end + args.bytes_after)): # pyformat: disable out_data += chr(ord(data[i]) ^ self.xor_out_key) hits += 1 self.SendReply( rdf_client.BufferReference(offset=base_offset + start - preamble_size, data=out_data, length=len(out_data), pathspec=fd.pathspec)) if args.mode == rdf_client.GrepSpec.Mode.FIRST_HIT: return if hits >= self.HIT_LIMIT: msg = utils.Xor( "This Grep has reached the maximum number of hits" " (%d)." % self.HIT_LIMIT, self.xor_out_key) self.SendReply( rdf_client.BufferReference(offset=0, data=msg, length=len(msg))) return self.Progress() base_offset += data_size # Allow for overlap with previous matches. preamble_size = min(len(data), self.ENVELOPE_SIZE)