def _DownloadFilesFromGCS(self, timeline_label, gs_paths): """Downloads files stored in Google Cloud Storage to the local filesystem. Args: timeline_label (str): Label to use to construct the path list. gs_paths (str): gs:// URI to files that need to be downloaded from GS. Returns: list(str): A list of local paths were GS files have been copied to. """ # TODO: Externalize fetching files from GCS buckets to a different module. local_paths = [] for path in gs_paths: local_path = None try: output_writer = output_manager.GCSOutputWriter( path, local_output_dir=self._output_path) local_path = output_writer.copy_from(path) except TurbiniaException as exception: # Don't add a critical error for now, until we start raising errors # instead of returning manually each self.state.AddError(exception, critical=False) if local_path: local_paths.append((timeline_label, local_path)) return local_paths
def process(self): """Process files with Turbinia.""" log_file_path = os.path.join(self._output_path, 'turbinia.log') print('Turbinia log file: {0:s}'.format(log_file_path)) evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name, project=self.project, zone=self.turbinia_zone) request = TurbiniaRequest() request.evidence.append(evidence_) try: print('Creating Turbinia request {0:s} with Evidence {1:s}'.format( request.request_id, evidence_.name)) self.client.send_request(request) print('Waiting for Turbinia request {0:s} to complete'.format( request.request_id)) self.client.wait_for_request(instance=self.instance, project=self.project, region=self.turbinia_region, request_id=request.request_id) task_data = self.client.get_task_data( instance=self.instance, project=self.project, region=self.turbinia_region, request_id=request.request_id) print( self.client.format_task_status(instance=self.instance, project=self.project, region=self.turbinia_region, request_id=request.request_id, all_fields=True)) except TurbiniaException as e: self.state.add_error(e, critical=True) return # This finds all .plaso files in the Turbinia output, and determines if they # are local or remote (it's possible this will be running against a local # instance of Turbinia). local_paths = [] gs_paths = [] timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name) for task in task_data: for path in task.get('saved_paths', []): if path.startswith('/') and path.endswith('.plaso'): local_paths.append(path) if path.startswith('gs://') and path.endswith('.plaso'): gs_paths.append(path) if not local_paths and not gs_paths: self.state.add_error('No .plaso files found in Turbinia output.', critical=True) return # Any local .plaso files that exist we can add immediately to the output self.state.output = [(timeline_label, p) for p in local_paths if os.path.exists(p)] # For files remote in GCS we copy each plaso file back from GCS and then add # to output paths # TODO: Externalize fetching files from GCS buckets to a different module. for path in gs_paths: local_path = None try: output_writer = output_manager.GCSOutputWriter( path, local_output_dir=self._output_path) local_path = output_writer.copy_from(path) except TurbiniaException as e: self.state.add_error(e, critical=True) return if local_path: self.state.output.append((timeline_label, local_path)) if not self.state.output: self.state.add_error('No .plaso files could be found.', critical=True)
def Process(self): """Process files with Turbinia.""" log_file_path = os.path.join(self._output_path, 'turbinia.log') print('Turbinia log file: {0:s}'.format(log_file_path)) if self.state.input and not self.disk_name: _, disk = self.state.input[0] self.disk_name = disk.name print('Using disk {0:s} from previous collector'.format( self.disk_name)) evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name, project=self.project, zone=self.turbinia_zone) try: evidence_.validate() except TurbiniaException as exception: self.state.AddError(exception, critical=True) return request = TurbiniaRequest(requester=getpass.getuser()) request.evidence.append(evidence_) if self.sketch_id: request.recipe['sketch_id'] = self.sketch_id if not self.run_all_jobs: request.recipe['jobs_blacklist'] = ['StringsJob'] # Get threat intelligence data from any modules that have stored some. # In this case, observables is a list of containers.ThreatIntelligence # objects. threatintel = self.state.GetContainers(containers.ThreatIntelligence) if threatintel: print( 'Sending {0:d} threatintel to Turbinia GrepWorkers...'.format( len(threatintel))) indicators = [item.indicator for item in threatintel] request.recipe['filter_patterns'] = indicators request_dict = { 'instance': self.instance, 'project': self.project, 'region': self.turbinia_region, 'request_id': request.request_id } try: print('Creating Turbinia request {0:s} with Evidence {1!s}'.format( request.request_id, evidence_.name)) self.client.send_request(request) print('Waiting for Turbinia request {0:s} to complete'.format( request.request_id)) self.client.wait_for_request(**request_dict) task_data = self.client.get_task_data(**request_dict) except TurbiniaException as exception: # TODO: determine if exception should be converted into a string as # elsewhere in the codebase. self.state.AddError(exception, critical=True) return message = self.client.format_task_status(**request_dict, full_report=True) short_message = self.client.format_task_status(**request_dict) print(short_message) # Store the message for consumption by any reporting modules. report = containers.Report(module_name='TurbiniaProcessor', text=message, text_format='markdown') self.state.StoreContainer(report) # This finds all .plaso files in the Turbinia output, and determines if they # are local or remote (it's possible this will be running against a local # instance of Turbinia). local_paths = [] gs_paths = [] timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name) for task in task_data: # saved_paths may be set to None for path in task.get('saved_paths') or []: if path.startswith('/') and path.endswith('.plaso'): local_paths.append(path) if path.startswith('gs://') and path.endswith('.plaso'): gs_paths.append(path) if not local_paths and not gs_paths: self.state.AddError('No .plaso files found in Turbinia output.', critical=True) return # Any local .plaso files that exist we can add immediately to the output self.state.output = [(timeline_label, p) for p in local_paths if os.path.exists(p)] # For files remote in GCS we copy each plaso file back from GCS and then add # to output paths # TODO: Externalize fetching files from GCS buckets to a different module. for path in gs_paths: local_path = None try: output_writer = output_manager.GCSOutputWriter( path, local_output_dir=self._output_path) local_path = output_writer.copy_from(path) except TurbiniaException as exception: # TODO: determine if exception should be converted into a string as # elsewhere in the codebase. self.state.AddError(exception, critical=True) return if local_path: self.state.output.append((timeline_label, local_path)) if not self.state.output: self.state.AddError('No .plaso files could be found.', critical=True)
def process(self): """Process files with Turbinia.""" log_file_path = os.path.join(self._output_path, 'turbinia.log') print('Turbinia log file: {0:s}'.format(log_file_path)) if self.state.input and not self.disk_name: _, disk = self.state.input[0] self.disk_name = disk.name print('Using disk {0:s} from previous collector'.format( self.disk_name)) evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name, project=self.project, zone=self.turbinia_zone) request = TurbiniaRequest() request.evidence.append(evidence_) # Get threat intelligence data from any modules that have stored some. # In this case, observables is a list of containers.ThreatIntelligence # objects. threatintel = self.state.get_containers(containers.ThreatIntelligence) if threatintel: print( 'Sending {0:d} threatintel to Turbinia GrepWorkers...'.format( len(threatintel))) indicators = [item.indicator for item in threatintel] request.recipe['filter_patterns'] = indicators request_dict = { 'instance': self.instance, 'project': self.project, 'region': self.turbinia_region, 'request_id': request.request_id } try: print('Creating Turbinia request {0:s} with Evidence {1!s}'.format( request.request_id, evidence_.name)) self.client.send_request(request) print('Waiting for Turbinia request {0:s} to complete'.format( request.request_id)) self.client.wait_for_request(**request_dict) task_data = self.client.get_task_data(**request_dict) except TurbiniaException as e: self.state.add_error(e, critical=True) return # Turbinia run complete, build a human-readable message of results. message = 'Completed {0:d} Turbinia tasks\n'.format(len(task_data)) for task in task_data: message += '{0!s} ({1!s}): {2!s}\n'.format( task.get('name'), task.get('id'), task.get('status', 'No task status')) # saved_paths may be set to None for path in task.get('saved_paths') or []: if path.endswith('worker-log.txt'): continue if path.endswith('{0!s}.log'.format(task.get('id'))): continue if path.startswith('/'): continue message += ' {0:s}\n'.format(path) print(message) # Store the message for consumption by any reporting modules. report = containers.Report(module_name='TurbiniaProcessor', text=message) self.state.store_container(report) # This finds all .plaso files in the Turbinia output, and determines if they # are local or remote (it's possible this will be running against a local # instance of Turbinia). local_paths = [] gs_paths = [] timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name) for task in task_data: # saved_paths may be set to None for path in task.get('saved_paths') or []: if path.startswith('/') and path.endswith('.plaso'): local_paths.append(path) if path.startswith('gs://') and path.endswith('.plaso'): gs_paths.append(path) if not local_paths and not gs_paths: self.state.add_error('No .plaso files found in Turbinia output.', critical=True) return # Any local .plaso files that exist we can add immediately to the output self.state.output = [(timeline_label, p) for p in local_paths if os.path.exists(p)] # For files remote in GCS we copy each plaso file back from GCS and then add # to output paths # TODO: Externalize fetching files from GCS buckets to a different module. for path in gs_paths: local_path = None try: output_writer = output_manager.GCSOutputWriter( path, local_output_dir=self._output_path) local_path = output_writer.copy_from(path) except TurbiniaException as e: self.state.add_error(e, critical=True) return if local_path: self.state.output.append((timeline_label, local_path)) if not self.state.output: self.state.add_error('No .plaso files could be found.', critical=True)