def testFindActionCrossDev(self): """Test that devices boundaries don't get crossed, also by default.""" pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request = rdfvalue.FindSpec(pathspec=pathspec, cross_devs=True, path_regex=".") request.iterator.number = 200 results = self.RunAction("Find", request) all_files = [ x.hit for x in results if isinstance(x, rdfvalue.FindSpec) ] self.assertEqual(len(all_files), 9) request = rdfvalue.FindSpec(pathspec=pathspec, cross_devs=False, path_regex=".") request.iterator.number = 200 results = self.RunAction("Find", request) all_files = [ x.hit for x in results if isinstance(x, rdfvalue.FindSpec) ] self.assertEqual(len(all_files), 7) request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 200 results = self.RunAction("Find", request) all_files = [ x.hit for x in results if isinstance(x, rdfvalue.FindSpec) ] self.assertEqual(len(all_files), 7)
def testFindAction(self): """Test the find action.""" # First get all the files at once pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] # Ask for the files one at the time files = [] request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".") request.iterator.number = 1 while True: result = self.RunAction("Find", request) if request.iterator.state == rdfvalue.Iterator.State.FINISHED: break self.assertEqual(len(result), 2) self.assertTrue(isinstance(result[0], rdfvalue.FindSpec)) self.assertTrue(isinstance(result[1], rdfvalue.Iterator)) files.append(result[0].hit) request.iterator = result[1].Copy() for x, y in zip(all_files, files): self.assertRDFValueEqual(x, y) # Make sure the iterator is finished self.assertEqual(request.iterator.state, rdfvalue.Iterator.State.FINISHED) # Ensure we remove old states from client_state self.assertEqual(len(request.iterator.client_state.dat), 0)
def GetFindSpecs(self): """Determine the Find specifications. Yields: A path specification to search Raises: OSError: If the client operating system is not supported. """ path_spec = rdfvalue.PathSpec( path=self.GetSophosAVInfectedPath(), pathtype=self.args.pathtype) yield rdfvalue.FindSpec( pathspec=path_spec, path_regex=".*", max_depth=1) path_spec = rdfvalue.PathSpec( path=self.GetSophosAVLogsPath(), pathtype=self.args.pathtype) yield rdfvalue.FindSpec( pathspec=path_spec, path_regex=self.GetSophosAVLogsPathRegex(), max_depth=1)
def Start(self): """Determine the Firefox history directory.""" self.state.Register("hist_count", 0) self.state.Register("history_paths", []) if self.args.history_path: self.state.history_paths.append(self.args.history_path) else: self.state.history_paths = self.GuessHistoryPaths(self.args.username) if not self.state.history_paths: raise flow.FlowError("Could not find valid History paths.") if self.runner.output: self.runner.output = aff4.FACTORY.Create( self.runner.output.urn, "VFSAnalysisFile", token=self.token) filename = "places.sqlite" for path in self.state.history_paths: findspec = rdfvalue.FindSpec(max_depth=2, path_regex="^%s$" % filename) findspec.pathspec.path = path findspec.pathspec.pathtype = self.args.pathtype self.CallFlow("FetchFiles", findspec=findspec, next_state="ParseFiles")
def testFindWithMaxFiles(self): """Test that the Find flow works when specifying proto directly.""" client_mock = action_mocks.ActionMock("Find") output_path = "analysis/FindFlowTest4" # Prepare a findspec. findspec = rdfvalue.FindSpec( path_regex=".*", pathspec=rdfvalue.PathSpec(path="/", pathtype=rdfvalue.PathSpec.PathType.OS)) for _ in test_lib.TestFlowHelper("FindFiles", client_mock, client_id=self.client_id, token=self.token, findspec=findspec, iteration_count=3, output=output_path, max_results=7): pass # Check the output file is created fd = aff4.FACTORY.Open(self.client_id.Add(output_path), token=self.token) # Make sure we got the right number of results. self.assertEqual(len(fd), 7)
def testFindDirectories(self): """Test that the Find flow works with directories.""" client_mock = action_mocks.ActionMock("Find") output_path = "analysis/FindFlowTest2" # Prepare a findspec. findspec = rdfvalue.FindSpec( path_regex="bin", pathspec=rdfvalue.PathSpec(path="/", pathtype=rdfvalue.PathSpec.PathType.OS)) for _ in test_lib.TestFlowHelper("FindFiles", client_mock, client_id=self.client_id, token=self.token, output=output_path, findspec=findspec): pass # Check the output file is created fd = aff4.FACTORY.Open(self.client_id.Add(output_path), token=self.token) # Make sure that bin is a directory self.assertEqual(len(fd), 2) for child in fd: path = utils.SmartStr(child.aff4path) self.assertTrue("bin" in path) self.assertEqual(child.__class__.__name__, "StatEntry")
def runTest(self): """Launch our flows.""" for flow, args in [ ("ListDirectory", { "pathspec": rdfvalue.PathSpec(pathtype=rdfvalue.PathSpec.PathType.REGISTRY, path=self.reg_path) }), ("FindFiles", { "findspec": rdfvalue.FindSpec(pathspec=rdfvalue.PathSpec( path=self.reg_path, pathtype=rdfvalue.PathSpec.PathType.REGISTRY), path_regex="ProfileImagePath"), "output": self.output_path }) ]: if self.local_worker: self.session_id = debugging.StartFlowAndWorker( self.client_id, flow, **args) else: self.session_id = flow_utils.StartFlowAndWait(self.client_id, flow_name=flow, token=self.token, **args) self.CheckFlow()
def Start(self): """Determine the Chrome directory.""" self.state.Register("hist_count", 0) # List of paths where history files are located self.state.Register("history_paths", []) if self.state.args.history_path: self.state.history_paths.append(self.state.args.history_path) if self.runner.output: self.runner.output = aff4.FACTORY.Create( self.runner.output.urn, "VFSAnalysisFile", token=self.token) if not self.state.history_paths: self.state.history_paths = self.GuessHistoryPaths( self.state.args.username) if not self.state.history_paths: raise flow.FlowError("Could not find valid History paths.") filenames = ["History"] if self.state.args.get_archive: filenames.append("Archived History") for path in self.state.history_paths: for fname in filenames: findspec = rdfvalue.FindSpec( max_depth=1, path_regex="^{0}$".format(fname), pathspec=rdfvalue.PathSpec(pathtype=self.state.args.pathtype, path=path)) self.CallFlow("FetchFiles", findspec=findspec, next_state="ParseFiles")
def testFindFilesWithGlob(self): """Test that the Find flow works with glob.""" client_mock = action_mocks.ActionMock("Find") output_path = "analysis/FindFlowTest1" # Prepare a findspec. findspec = rdfvalue.FindSpec( path_glob="bash*", pathspec=rdfvalue.PathSpec(path="/", pathtype=rdfvalue.PathSpec.PathType.OS)) for _ in test_lib.TestFlowHelper("FindFiles", client_mock, client_id=self.client_id, token=self.token, output=output_path, findspec=findspec): pass # Check the output file is created fd = aff4.FACTORY.Open(self.client_id.Add(output_path), token=self.token) # Make sure that bash is a file. matches = set([x.aff4path.Basename() for x in fd]) self.assertEqual(sorted(matches), ["bash"]) self.assertEqual(len(fd), 2) for child in fd: path = utils.SmartStr(child.aff4path) self.assertTrue(path.endswith("bash")) self.assertEqual(child.__class__.__name__, "StatEntry")
def testNoFilters(self): """Test the we get all files with no filters in place.""" # First get all the files at once pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request = rdfvalue.FindSpec(pathspec=pathspec, cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 9)
class TestFindTSKLinux(TestListDirectoryTSKLinux): """Tests if the find flow works on Linux and Darwin using Sleuthkit.""" flow = "FindFiles" args = { "findspec": rdfvalue.FindSpec(path_regex=".", pathspec=rdfvalue.PathSpec( path="/bin/", pathtype=rdfvalue.PathSpec.PathType.TSK)) }
class TestFindOSLinuxDarwin(TestListDirectoryOSLinuxDarwin): """Tests if the find flow works on Linux and Darwin.""" flow = "FindFiles" args = { "findspec": rdfvalue.FindSpec(path_regex=".", pathspec=rdfvalue.PathSpec( path="/bin/", pathtype=rdfvalue.PathSpec.PathType.OS)) }
def testFindAction2(self): """Test the find action path regex.""" pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".*mp3") request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 1) self.assertEqual(all_files[0].pathspec.Basename(), "file.mp3")
def ProcessEntry(self, responses): """Process the responses from the client.""" if not responses.success: return # If we get a response with an unfinished iterator then we missed some # files. Call Find on the client until we're done. if (responses.iterator and responses.iterator.state != responses.iterator.State.FINISHED): findspec = rdfvalue.FindSpec(responses.request.request.args) findspec.iterator = responses.iterator self.CallClient("Find", findspec, next_state="ProcessEntry", request_data=responses.request_data) # The Find client action does not return a StatEntry but a # FindSpec. Normalize to a StatEntry. stat_responses = [ r.hit if isinstance(r, rdfvalue.FindSpec) else r for r in responses ] # If this was a pure path matching call without any regex / recursion, we # know exactly which node in the component tree we have to process next and # get it from the component_path. If this was a regex match though, we # sent the client a combined regex that matches all nodes in order to save # round trips and client processing time. In that case we only get the # base node and have to check for all subnodes if the response actually # matches that subnode before we continue processing. component_path = responses.request_data.get("component_path") if component_path is not None: for response in stat_responses: self._ProcessResponse(response, [component_path]) else: # This is a combined match. base_path = responses.request_data["base_path"] base_node = self.FindNode(base_path) for response in stat_responses: matching_components = [] for next_node in base_node.keys(): pathspec = rdfvalue.PathSpec(next_node) if self._MatchPath(pathspec, response): matching_path = base_path + [next_node] matching_components.append(matching_path) if matching_components: self._ProcessResponse(response, matching_components, base_wildcard=True)
def testFindAction3(self): """Test the find action data regex.""" # First get all the files at once pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request = rdfvalue.FindSpec(pathspec=pathspec, data_regex="Secret", cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text")
def Start(self): """Call the find flow to get the MRU data for each user.""" fd = aff4.FACTORY.Open(self.client_id, mode="r", token=self.token) for user in fd.Get(fd.Schema.USER): mru_path = ("HKEY_USERS/%s/Software/Microsoft/Windows" "/CurrentVersion/Explorer/ComDlg32" "/OpenSavePidlMRU" % user.sid) findspec = rdfvalue.FindSpec(max_depth=2, path_regex=".") findspec.iterator.number = 1000 findspec.pathspec.path = mru_path findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.REGISTRY self.CallFlow("FindFiles", findspec=findspec, output=None, next_state="StoreMRUs", request_data=dict(username=user.username))
def testDownloadDirectorySub(self): """Test a FetchFiles flow with depth=5.""" vfs.VFS_HANDLERS[ rdfvalue.PathSpec.PathType.OS] = test_lib.ClientVFSHandlerFixture # Mock the client actions FetchFiles uses client_mock = test_lib.ActionMock("HashFile", "HashBuffer", "StatFile", "Find", "TransferBuffer") pathspec = rdfvalue.PathSpec(path="/c/Downloads", pathtype=rdfvalue.PathSpec.PathType.OS) for _ in test_lib.TestFlowHelper("FetchFiles", client_mock, client_id=self.client_id, findspec=rdfvalue.FindSpec( max_depth=5, pathspec=pathspec, path_glob="*"), token=self.token): pass # Check if the base path was created output_path = self.client_id.Add("fs/os/c/Downloads") output_fd = aff4.FACTORY.Open(output_path, token=self.token) children = list(output_fd.OpenChildren()) # There should be 5 children: a.txt, b.txt, c.txt, d.txt, sub1 self.assertEqual(len(children), 5) self.assertEqual("a.txt b.txt c.txt d.txt sub1".split(), sorted([child.urn.Basename() for child in children])) # Find the child named: sub1 for child in children: if child.urn.Basename() == "sub1": break children = list(child.OpenChildren()) # There should be 4 children: a.txt, b.txt, c.txt, d.txt self.assertEqual(len(children), 4) self.assertEqual("a.txt b.txt c.txt d.txt".split(), sorted([child.urn.Basename() for child in children]))
def Start(self, responses): """Start.""" client_id = responses.request.client_id for path in self.state.paths: request = rdfvalue.FindSpec() request.pathspec.path = path request.pathspec.pathtype = rdfvalue.PathSpec.PathType.REGISTRY if self.state.max_depth: request.max_depth = self.state.max_depth # Hard coded limit so this does not get too big. request.iterator.number = 10000 self.CallClient("Find", request, client_id=client_id, next_state="StoreResults")
def testFindSizeLimits(self): """Test the find action size limits.""" # First get all the files at once request = rdfvalue.FindSpec(min_file_size=4, max_file_size=15, cross_devs=True) request.pathspec.Append(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) request.iterator.number = 200 results = self.RunAction("Find", request) all_files = [] for result in results: if isinstance(result, rdfvalue.FindSpec): all_files.append(result.hit.pathspec.Basename()) self.assertEqual(len(all_files), 5) for filename in all_files: # Our mock filesize is the length of the base filename, check all the # files we got match the size criteria self.assertTrue(4 <= len(filename) <= 15)
def StartRequests(self): """Generate and send the Find requests.""" client = aff4.FACTORY.Open(self.client_id, token=self.token) if self.runner.output: self.runner.output.Set( self.runner.output.Schema.DESCRIPTION("CacheGrep for {0}".format( self.args.data_regex))) usernames = ["%s\\%s" % (u.domain, u.username) for u in self.state.users] usernames = [u.lstrip("\\") for u in usernames] # Strip \\ if no domain. for path in self.state.all_paths: full_paths = flow_utils.InterpolatePath(path, client, users=usernames) for full_path in full_paths: findspec = rdfvalue.FindSpec(data_regex=self.args.data_regex) findspec.iterator.number = 800 findspec.pathspec.path = full_path findspec.pathspec.pathtype = self.args.pathtype self.CallFlow("FetchFiles", findspec=findspec, next_state="HandleResults")
def testCollectionOverwriting(self): """Test we overwrite the collection every time the flow is executed.""" client_mock = action_mocks.ActionMock("Find") output_path = "analysis/FindFlowTest5" # Prepare a findspec. findspec = rdfvalue.FindSpec() findspec.path_regex = "bin" findspec.pathspec.path = "/" findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.OS for _ in test_lib.TestFlowHelper("FindFiles", client_mock, client_id=self.client_id, token=self.token, findspec=findspec, output=output_path): pass # Check the output file with the right number of results. fd = aff4.FACTORY.Open(self.client_id.Add(output_path), token=self.token) self.assertEqual(len(fd), 2) # Now find a new result, should overwrite the collection findspec.path_regex = "dd" for _ in test_lib.TestFlowHelper("FindFiles", client_mock, client_id=self.client_id, token=self.token, findspec=findspec, output=output_path, max_results=1): pass fd = aff4.FACTORY.Open(self.client_id.Add(output_path), token=self.token) self.assertEqual(len(fd), 1)
def Iterate(self, request, client_state): """Restores its way through the directory using an Iterator.""" self.request = request limit = request.iterator.number # TODO(user): What is a reasonable measure of work here? for count, f in enumerate( self.ListDirectory(request.pathspec, client_state)): # Only send the reply if the file matches all criteria if self.FilterFile(f): self.SendReply(rdfvalue.FindSpec(hit=f)) # We only check a limited number of files in each iteration. This might # result in returning an empty response - but the iterator is not yet # complete. Flows must check the state of the iterator explicitly. if count >= limit - 1: logging.debug("Processed %s entries, quitting", count) return # End this iterator request.iterator.state = rdfvalue.Iterator.State.FINISHED
def testPermissionFilter(self): """Test filtering based on file/folder permission happens correctly.""" pathspec = rdfvalue.PathSpec(path="/mock2/", pathtype=rdfvalue.PathSpec.PathType.OS) # Look for files that match exact permissions request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o644, cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[0].pathspec.Basename(), "file.jpg") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory2") self.assertEqual(all_files[1].pathspec.Basename(), "file.mp3") # Look for files/folders where 'others' have 'write' permission. All other # attributes don't matter. Setuid bit must also be set and guid or sticky # bit must not be set. request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o4002, perm_mask=0o7002, cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory1") self.assertEqual(all_files[1].pathspec.Basename(), "file2.txt") # Look for files where 'others' have 'execute' permission. All other # attributes don't matter. Only look for 'regular' files. request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o10001, perm_mask=0o10001, cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 2) self.assertEqual(all_files[0].pathspec.Dirname().Basename(), "directory3") self.assertEqual(all_files[0].pathspec.Basename(), "file1.txt") self.assertEqual(all_files[1].pathspec.Dirname().Basename(), "directory3") self.assertEqual(all_files[1].pathspec.Basename(), "long_file.text") # Look for folders where 'group' have 'execute' permission. All other # attributes don't matter. Only look for folders. request = rdfvalue.FindSpec(pathspec=pathspec, path_regex=".", perm_mode=0o40010, perm_mask=0o40010, cross_devs=True) request.iterator.number = 200 result = self.RunAction("Find", request) all_files = [x.hit for x in result if isinstance(x, rdfvalue.FindSpec)] self.assertEqual(len(all_files), 3) self.assertEqual(all_files[0].pathspec.Basename(), "directory2") self.assertEqual(all_files[1].pathspec.Basename(), "directory1") self.assertEqual(all_files[2].pathspec.Basename(), "directory3")
def _ProcessResponse(self, response, component_paths, base_wildcard=False): for component_path in component_paths: regexes_to_get = [] recursions_to_get = {} node = self.FindNode(component_path) if not node: # Node is empty representing a leaf node - we found a hit - report it. self.GlobReportMatch(response) return # There are further components in the tree - iterate over them. for component_str, next_node in node.items(): component = rdfvalue.PathSpec(component_str) next_component = component_path + [component_str] # If we reach this point, we are instructed to go deeper into the # directory structure. We only want to actually do this if # - the last response was a proper directory, # - or it was a file (an image) that was explicitly given meaning # no wildcards or groupings, # - or no_file_type_check was set. # # This reduces the number of TSK opens on the client that may # sometimes lead to instabilities due to bugs in the library. if response and ( not (stat.S_ISDIR(response.st_mode) or not base_wildcard or self.state.no_file_type_check)): continue if component.path_options == component.Options.RECURSIVE: recursions_to_get.setdefault( component.recursion_depth, []).append(component) elif component.path_options == component.Options.REGEX: regexes_to_get.append(component) elif component.path_options == component.Options.CASE_INSENSITIVE: # Here we need to create the next pathspec by appending the current # component to what we already have. If we don't have anything yet, we # fall back to the root path. If there is no root path either, the # current component becomes the new base. base_pathspec = self._GetBasePathspec(response) if base_pathspec: pathspec = base_pathspec.Append(component) else: pathspec = component if not next_node: # Check for the existence of the last node. request = rdfvalue.ListDirRequest(pathspec=pathspec) if (response is None or (response and (response.st_mode == 0 or not stat.S_ISREG(response.st_mode)))): # If next node is empty, this node is a leaf node, we therefore # must stat it to check that it is there. There is a special case # here where this pathspec points to a file/directory in the root # directory. In this case, response will be None but we still need # to stat it. self.CallClient( "StatFile", request, next_state="ProcessEntry", request_data=dict(component_path=next_component)) else: # There is no need to go back to the client for intermediate # paths in the prefix tree, just emulate this by recursively # calling this state inline. self.CallStateInline( [rdfvalue.StatEntry(pathspec=pathspec)], next_state="ProcessEntry", request_data=dict(component_path=next_component)) if recursions_to_get or regexes_to_get: # Recursions or regexes need a base pathspec to operate on. If we # have neither a response or a root path, we send a default pathspec # that opens the root with pathtype "OS". base_pathspec = self._GetBasePathspec(response) if not base_pathspec: base_pathspec = rdfvalue.PathSpec(path="/", pathtype="OS") for depth, recursions in recursions_to_get.iteritems(): path_regex = "(?i)^" + "$|^".join( set([c.path for c in recursions])) + "$" findspec = rdfvalue.FindSpec(pathspec=base_pathspec, cross_devs=True, max_depth=depth, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient("Find", findspec, next_state="ProcessEntry", request_data=dict(base_path=component_path)) if regexes_to_get: path_regex = "(?i)^" + "$|^".join( set([c.path for c in regexes_to_get])) + "$" findspec = rdfvalue.FindSpec(pathspec=base_pathspec, max_depth=1, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient("Find", findspec, next_state="ProcessEntry", request_data=dict(base_path=component_path))
def ProcessEntry(self, responses): """Process the responses from the client.""" if not responses.success: return component_path = responses.request_data["component_path"] node = self.FindNode(component_path) # If we get a response with an unfinished iterator then we missed some # files. Call Find on the client until we're done. if (responses.iterator and responses.iterator.state != responses.iterator.State.FINISHED): findspec = rdfvalue.FindSpec(responses.request.request.args) findspec.iterator = responses.iterator self.CallClient("Find", findspec, next_state="ProcessEntry", request_data=responses.request_data) regexes_to_get = [] recursions_to_get = {} for response in responses: # The Find client action does not return a StatEntry but a # FindSpec. Normalize to a StatEntry. if isinstance(response, rdfvalue.FindSpec): response = response.hit if node: # There are further components in the tree - iterate over them. for component_str, next_node in node.items(): component = rdfvalue.PathSpec(component_str) next_component = component_path + [component_str] # Use the pathtype from the flow args. component.pathtype = self.state.args.pathtype if component.path_options == component.Options.RECURSIVE: recursions_to_get.setdefault(component.recursion_depth, []).append(component) elif component.path_options == component.Options.REGEX: regexes_to_get.append(component) elif component.path_options == component.Options.CASE_INSENSITIVE: # Check for the existence of the last node. if not next_node: pathspec = response.pathspec.Copy().AppendPath( component.path) request = rdfvalue.ListDirRequest( pathspec=pathspec) if response.st_mode == 0 or not stat.S_ISREG( response.st_mode): # If next node is empty, this node is a leaf node, we therefore # must stat it to check that it is there. self.CallClient( "StatFile", request, next_state="ProcessEntry", request_data=dict( component_path=next_component)) else: pathspec = response.pathspec.Copy().AppendPath( component.path) # There is no need to go back to the client for intermediate paths # in the prefix tree, just emulate this by recursively calling # this state inline. self.CallStateInline( [rdfvalue.StatEntry(pathspec=pathspec)], next_state="ProcessEntry", request_data=dict( component_path=next_component)) if recursions_to_get: for depth, recursions in recursions_to_get.iteritems(): path_regex = "(?i)^" + "$|^".join( set([c.path for c in recursions])) + "$" findspec = rdfvalue.FindSpec( pathspec=response.pathspec, cross_devs=True, max_depth=depth, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient( "Find", findspec, next_state="ProcessEntry", request_data=dict(component_path=next_component)) if regexes_to_get: path_regex = "(?i)^" + "$|^".join( set([c.path for c in regexes_to_get])) + "$" findspec = rdfvalue.FindSpec(pathspec=response.pathspec, max_depth=1, path_regex=path_regex) findspec.iterator.number = self.FILE_MAX_PER_DIR self.CallClient( "Find", findspec, next_state="ProcessEntry", request_data=dict(component_path=next_component)) else: # Node is empty representing a leaf node - we found a hit - report it. self.ReportMatch(response)
def testFetchFilesFlow(self): # Very small chunks to stress test this flow. with test_lib.MultiStubber( (transfer.MultiGetFile, "CHUNK_SIZE", self.chunk_size), (transfer.MultiGetFile, "MIN_CALL_TO_FILE_STORE", 10)): with test_lib.Instrument(filestore.FileStore, "CheckHashes") as check_hashes_instrument: path = os.path.join(self.base_path, "winexec_img.dd") self.findspec = rdfvalue.FindSpec(path_regex=r"\.(exe|sys)$") self.findspec.pathspec.path = path self.findspec.pathspec.pathtype = rdfvalue.PathSpec.PathType.OS self.findspec.pathspec.Append( path="/", pathtype=rdfvalue.PathSpec.PathType.TSK) self.base_pathspec = self.findspec.pathspec.Copy() # First create some existing files in the VFS so we can ensure they get # updated. inspect_path = self.base_pathspec.Copy() inspect_path.AppendPath("Ext2IFS_1_10b.exe") urn = aff4.AFF4Object.VFSGRRClient.PathspecToURN( inspect_path, self.client_id) fd = aff4.FACTORY.Create(urn, "AFF4MemoryStream", token=self.token) fd.Write("hello") fd.Close() # Now run the fetch all files. client_mock = test_lib.ActionMock("TransferBuffer", "StatFile", "Find", "HashFile", "HashBuffer") for _ in test_lib.TestFlowHelper("FetchFiles", client_mock, token=self.token, client_id=self.client_id, findspec=self.findspec): pass self.CheckFindExeFiles() self.CheckPresenceOfSignedData() self.CheckIndexLookup() pathlist = [ "/a/b/c/g/f/pciide.sys", "pciide.sys", "/a/b/c/g/h/pciide.sys", "/a/b/c/g/pciide.sys" ] self.CheckExistingFile(pathlist) # In this test we limit the maximum number of times the filestore check # hashes is called to 10. There are 23 hits in the test data, so we # expect 3 calls, of 10, 10, and 3: self.assertEqual(len(check_hashes_instrument.args), 3) self.assertEqual(len(check_hashes_instrument.args[0][1]), 10) self.assertEqual(len(check_hashes_instrument.args[1][1]), 10) self.assertEqual(len(check_hashes_instrument.args[2][1]), 3) fd = aff4.FACTORY.Open( self.client_id.Add("analysis/FetchFiles"), token=self.token) collection = list(fd.OpenChildren())[0] self.assertEqual(len(collection), 23)