def copy_to(self, source_path): if os.path.getsize(source_path) == 0: message = ( 'Local source file {0:s} is empty. Not uploading to GCS'.format( source_path)) log.warning(message) return None bucket = self.client.get_bucket(self.bucket) destination_path = os.path.join( self.base_output_dir, self.unique_dir, os.path.basename(source_path)) log.info( 'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path)) try: blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE) blob.upload_from_filename(source_path, client=self.client) except exceptions.GoogleCloudError as exception: message = 'File upload to GCS failed: {0!s}'.format(exception) log.error(message) raise TurbiniaException(message) return os.path.join('gs://', self.bucket, destination_path)
def __init__(self, *_, **__): """Initialization for PSQ Worker.""" config.LoadConfig() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def GetInstance(self, instance_name, zone=None): """Get instance from project. Args: instance_name: The instance name. zone: The zone for the instance. Returns: A Google Compute Instance object (instance of GoogleComputeInstance). Raises: TurbiniaException: If instance does not exist. """ instances = self.ListInstances() try: instance = instances[instance_name] if not zone: zone = instance['zone'] return GoogleComputeInstance(project=self, zone=zone, name=instance_name) except KeyError: raise TurbiniaException('Unknown instance')
def _collect_windows_files(self, evidence): """Extract artifacts using image_export. Args: evidence (Evidence object): The evidence to process Returns: location (str): The file path to the extracted evidence. number of artifacts (int): The number of files extracted. """ try: collected_artifacts = extract_artifacts( artifact_names=['WindowsSystemRegistryFiles'], disk_path=evidence.local_path, output_dir=self.output_dir, credentials=evidence.credentials) except TurbiniaException as e: raise TurbiniaException('artifact extraction failed: {}'.format(str(e))) # Extract base dir from our list of collected artifacts location = os.path.dirname(collected_artifacts[0]) return (location, len(collected_artifacts))
def _collect_wordpress_file(self, evidence): """Extract artifacts using image_export. Args: evidence (Evidence object): The evidence to process Returns: location (str): The file path to the extracted evidence. number of artifacts (int): The number of files extracted. """ try: collected_artifacts = extract_files( file_name=_WP_DB_NAME, disk_path=evidence.local_path, output_dir=os.path.join(self.output_dir, 'artifacts')) except TurbiniaException as e: raise TurbiniaException( 'artifact extraction failed: {0:s}'.format(str(e))) # Extract base dir from our list of collected artifacts location = os.path.dirname(collected_artifacts[0]) return (location, len(collected_artifacts))
def GetFilesystem(path): """Uses lsblk to detect the filesystem of a partition block device. Args: path(str): the full path to the block device. Returns: str: the filesystem detected (for example: 'ext4') """ cmd = ['lsblk', path, '-f', '-o', 'FSTYPE', '-n'] log.info('Running {0!s}'.format(cmd)) fstype = subprocess.check_output(cmd).split() if not fstype: # Lets wait a bit for any previous blockdevice operation to settle time.sleep(2) fstype = subprocess.check_output(cmd).split() if len(fstype) != 1: raise TurbiniaException( '{0:s} should contain exactly one partition, found {1:d}'.format( path, len(fstype))) return fstype[0].decode('utf-8').strip()
def ParseDependencies(): """Parses the config file DEPENDENCIES variable. Raises: TurbiniaException: If bad config file. Returns: dependencies(dict): The parsed dependency values. """ dependencies = {} try: for values in CONFIG.DEPENDENCIES: job = values['job'].lower() dependencies[job] = {} dependencies[job]['programs'] = values['programs'] dependencies[job]['docker_image'] = values.get('docker_image') dependencies[job]['timeout'] = values.get('timeout') except (KeyError, TypeError) as exception: raise TurbiniaException('An issue has occurred while parsing the ' 'dependency config: {0!s}'.format(exception)) return dependencies
def setup(self, task): """Handles initializing task based attributes, after object creation. Args: task (TurbiniaTask): The calling Task object Raises: TurbiniaException: If the Output Manager is not setup. """ self.task_id = task.id self.task_name = task.name self.requester = task.requester if not self.no_state_manager: self.state_manager = state_manager.get_state_manager() if not self.no_output_manager: if task.output_manager.is_setup: ldirs = task.output_manager.get_local_output_dirs() _, self.output_dir = ldirs else: raise TurbiniaException('Output Manager is not setup yet.')
def setup(self, jobs_denylist=None, jobs_allowlist=None, *args, **kwargs): """Does setup of Task manager and its dependencies. Args: jobs_denylist (list): Jobs that will be excluded from running jobs_allowlist (list): The only Jobs will be included to run """ self._backend_setup(*args, **kwargs) job_names = jobs_manager.JobsManager.GetJobNames() if jobs_denylist or jobs_allowlist: selected_jobs = jobs_denylist or jobs_allowlist for job in selected_jobs: if job.lower() not in job_names: msg = ( 'Error creating server. Job {0!s} is not found in registered ' 'jobs {1!s}.'.format(job, job_names)) log.error(msg) raise TurbiniaException(msg) log.info('Filtering Jobs with allowlist {0!s} and denylist {1!s}'. format(jobs_allowlist, jobs_denylist)) job_names = jobs_manager.JobsManager.FilterJobNames( job_names, jobs_denylist, jobs_allowlist) # Disable any jobs from the config that were not previously allowlisted. disabled_jobs = list( config.DISABLED_JOBS) if config.DISABLED_JOBS else [] disabled_jobs = [j.lower() for j in disabled_jobs] if jobs_allowlist: disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist)) if disabled_jobs: log.info( 'Disabling non-allowlisted jobs configured to be disabled in the ' 'config file: {0:s}'.format(', '.join(disabled_jobs))) job_names = jobs_manager.JobsManager.FilterJobNames( job_names, disabled_jobs, []) self.jobs = [ job for _, job in jobs_manager.JobsManager.GetJobs(job_names) ] log.debug('Registered job list: {0:s}'.format(str(job_names)))
def _preprocess(self, _, required_states): # Need to mount parent disk if not self.parent_evidence.partition_paths: self.parent_evidence.mount_path = mount_local.PreprocessMountPartition( self.parent_evidence.device_path) else: partition_paths = self.parent_evidence.partition_paths self.parent_evidence.mount_path = mount_local.PreprocessMountDisk( partition_paths, self.parent_evidence.mount_partition) self.parent_evidence.local_path = self.parent_evidence.mount_path self.parent_evidence.state[EvidenceState.MOUNTED] = True if EvidenceState.ATTACHED in required_states or self.has_child_evidence: rawdisk_path = os.path.join( self.parent_evidence.mount_path, self.embedded_path) if not os.path.exists(rawdisk_path): raise TurbiniaException( 'Unable to find raw disk image {0:s} in GoogleCloudDisk'.format( rawdisk_path)) self.device_path = mount_local.PreprocessLosetup(rawdisk_path) self.state[EvidenceState.ATTACHED] = True self.local_path = self.device_path
def _create_mount_points(self, mount_paths, mode='rw'): """Creates file and device mounting arguments. The arguments will be passed into the container with the appropriate mounting parameters. All device blocks will be mounted as read only, regardless of the specified mode. Attributes: mount_paths(list): The paths on the host system to be mounted. mode(str): The mode the path will be mounted in. The acceptable parameters are rw for read write and ro for read only. Returns: tuple: containing: list: The device blocks that will be mounted. dict: The file paths that will be mounted. Raises: TurbiniaException: If an incorrect mode was passed. """ accepted_vars = ['rw', 'ro'] device_paths = [] file_paths = {} if mode in accepted_vars: for mpath in mount_paths: device_mpath = '{0:s}:{0:s}:{1:s}'.format(str(mpath), 'r') if mpath not in file_paths.keys( ) and device_mpath not in device_paths: if IsBlockDevice(mpath): device_paths.append(device_mpath) else: file_paths[mpath] = {'bind': mpath, 'mode': mode} else: raise TurbiniaException( 'An incorrect mode was passed: {0:s}. Unable to create the correct ' 'mount points for the Docker container.'.format(mode)) return device_paths, file_paths
def setup_stackdriver_handler(project_id, origin): """Set up Google Cloud Stackdriver Logging The Google Cloud Logging library will attach itself as a handler to the default Python logging module. Attributes: project_id: The name of the Google Cloud project. origin: Where the log is originating from.(i.e. server, worker) Raises: TurbiniaException: When an error occurs enabling GCP Stackdriver Logging. """ # Patching cloud logging to allow custom fields def my_enqueue(self, record, message, **kwargs): queue_entry = { "info": { "message": message, "python_logger": record.name, "origin": origin }, "severity": _helpers._normalize_severity(record.levelno), "timestamp": datetime.datetime.utcfromtimestamp(record.created), } queue_entry.update(kwargs) self._queue.put_nowait(queue_entry) _Worker.enqueue = my_enqueue try: client = cloud_logging.Client(project=project_id) cloud_handler = cloud_logging.handlers.CloudLoggingHandler(client) logger.addHandler(cloud_handler) except exceptions.GoogleCloudError as exception: msg = 'Error enabling Stackdriver Logging: {0:s}'.format( str(exception)) raise TurbiniaException(msg)
def __init__(self, name=None, description=None, source=None, source_path=None, tags=None, request_id=None, copyable=False): """Initialization for Evidence.""" self.copyable = copyable self.config = {} self.context_dependent = False self.cloud_only = False self.description = description self.mount_path = None self.source = source self.source_path = source_path self.tags = tags if tags else {} self.request_id = request_id self.parent_evidence = None self.save_metadata = False self.local_path = source_path # List of jobs that have processed this evidence self.processed_by = [] self.type = self.__class__.__name__ self.name = name if name else self.type self.saved_path = None self.saved_path_type = None self.state = {} for state in EvidenceState: self.state[state] = False if self.copyable and not self.local_path: raise TurbiniaException( '{0:s} is a copyable evidence and needs a source_path'.format( self.type))
def __init__(self, task, evidence=None, input_evidence=None, base_output_dir=None, request_id=None): """Initialize the TurbiniaTaskResult object. Args: task (TurbiniaTask): The calling Task object Raises: TurbiniaException: If the Output Manager is not setup. """ self.closed = False self.evidence = evidence if evidence else [] self.input_evidence = input_evidence self.id = uuid.uuid4().hex self.task_id = task.id self.task_name = task.name self.base_output_dir = base_output_dir self.request_id = request_id self.user = task.user self.start_time = datetime.now() self.run_time = None self.saved_paths = [] self.successful = None self.status = None self.error = {} self.worker_name = platform.node() # TODO(aarontp): Create mechanism to grab actual python logging data. self._log = [] if task.output_manager.is_setup: _, self.output_dir = task.output_manager.get_local_output_dirs() else: raise TurbiniaException('Output Manager is not setup yet.')
def task_deserialize(input_dict): """Converts an input dictionary back into a TurbiniaTask object. Args: input_dict (dict): TurbiniaTask object dictionary. Returns: TurbiniaTask: Deserialized object. """ type_ = input_dict['name'] task_loader = TaskLoader() task = task_loader.get_task(type_) if not task: raise TurbiniaException( 'Could not load Task module {0:s}'.format(type_)) # Remove serialized output manager because this gets reinstantiated when the # empty Task is instantiated and we don't want to overwrite it. input_dict.pop('output_manager') task.__dict__.update(input_dict) task.last_update = datetime.strptime(input_dict['last_update'], DATETIME_FORMAT) return task
def evidence_setup(self, evidence): """Validates and processes the evidence. Args: evidence(Evidence): The Evidence to setup. Raises: TurbiniaException: If the Evidence can't be validated or the current state does not meet the required state. """ evidence.validate() evidence.preprocess(self.tmp_dir, required_states=self.REQUIRED_STATES) # Final check to make sure that the required evidence state has been met # for Evidence types that have those capabilities. for state in self.REQUIRED_STATES: if state in evidence.POSSIBLE_STATES and not evidence.state.get(state): raise TurbiniaException( 'Evidence {0!s} being processed by Task {1:s} requires Evidence ' 'to be in state {2:s}, but earlier pre-processors may have ' 'failed. Current state is {3:s}. See previous logs for more ' 'information.'.format( evidence, self.name, state.name, evidence.format_state()))
def __init__(self, jobs_blacklist=None, jobs_whitelist=None): """Initialization for PSQ Worker. Args: jobs_blacklist (Optional[list[str]]): Jobs we will exclude from running jobs_whitelist (Optional[list[str]]): The only Jobs we will include to run """ config.LoadConfig() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) # Deregister jobs from blacklist/whitelist. disabled_jobs = list(config.DISABLED_JOBS) if config.DISABLED_JOBS else [] job_manager.JobsManager.DeregisterJobs(jobs_blacklist, jobs_whitelist) if disabled_jobs: log.info( 'Disabling jobs that were configured to be disabled in the ' 'config file: {0:s}'.format(', '.join(disabled_jobs))) job_manager.JobsManager.DeregisterJobs(jobs_blacklist=disabled_jobs) # Check for valid dependencies/directories. check_dependencies(config.DEPENDENCIES) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def setup(self, evidence): """Perform common setup operations and runtime environment. Even though TurbiniaTasks are initially instantiated by the Jobs under the Task Manager, this setup method needs to be run from the task on the worker because it handles setting up the task runtime environment. Args: evidence: An Evidence object to process. Returns: A TurbiniaTaskResult object. Raises: TurbiniaException: If the evidence can not be found. """ self.output_manager.setup(self) self.tmp_dir, self.output_dir = self.output_manager.get_local_output_dirs( ) if not self.result: self.result = TurbiniaTaskResult( input_evidence=evidence, base_output_dir=self.base_output_dir, request_id=self.request_id, job_id=self.job_id) self.result.setup(self) if not self.run_local: if evidence.copyable and not config.SHARED_FILESYSTEM: self.output_manager.retrieve_evidence(evidence) if evidence.source_path and not os.path.exists(evidence.source_path): raise TurbiniaException( 'Evidence source path {0:s} does not exist'.format( evidence.source_path)) evidence.preprocess(self.tmp_dir) return self.result
def FilterJobNames(cls, job_names, jobs_denylist=None, jobs_allowlist=None): """Filters a list of job names against white/black lists. jobs_allowlist and jobs_denylist must not be specified at the same time. Args: job_names (list[str]): The names of the job_names to filter. jobs_denylist (Optional[list[str]]): Job names to exclude. jobs_allowlist (Optional[list[str]]): Job names to include. Returns: list[str]: Job names Raises: TurbiniaException if both jobs_denylist and jobs_allowlist are specified. """ jobs_denylist = jobs_denylist if jobs_denylist else [] jobs_denylist = [job.lower() for job in jobs_denylist] jobs_allowlist = jobs_allowlist if jobs_allowlist else [] jobs_allowlist = [job.lower() for job in jobs_allowlist] if jobs_allowlist and jobs_denylist: raise TurbiniaException( 'jobs_allowlist and jobs_denylist cannot be specified at the same ' 'time.') elif jobs_denylist: return [ job for job in job_names if job.lower() not in jobs_denylist ] elif jobs_allowlist: return [job for job in job_names if job.lower() in jobs_allowlist] else: return job_names
def PreprocessLosetup(source_path): """Runs Losetup on a target block device or image file. Args: source_path(str): the source path to run losetup on. Raises: TurbiniaException: if the losetup command failed to run. Returns: str: the path to the created loopdevice (ie: /dev/loopX) """ losetup_device = None # TODO(aarontp): Remove hard-coded sudo in commands: # https://github.com/google/turbinia/issues/73 losetup_command = ['sudo', 'losetup', '--show', '--find', '-P', source_path] log.info('Running command {0:s}'.format(' '.join(losetup_command))) try: losetup_device = subprocess.check_output( losetup_command, universal_newlines=True).strip() except subprocess.CalledProcessError as e: raise TurbiniaException('Could not set losetup devices {0!s}'.format(e)) return losetup_device
def send_message(self, message): """Send a pubsub message. message: The message to send. """ base64_data = base64.b64encode(message.encode('utf-8')) request_body = { "messages": [{ "data": base64_data.decode('utf-8') # base64 encoded string }] } publish_client = self.pubsub_api_client.projects().topics() response = gcp_common.ExecuteRequest(publish_client, 'publish', { 'topic': self.topic_path, 'body': request_body }) # Safe to unpack since response is unpaged. if not response[0]['messageIds']: raise TurbiniaException( 'Message {0:s} was not published to topic {1:s}'.format( message, self.topic_path)) msg_id = response[0]['messageIds'][0] log.info('Published message {0!s} to topic {1!s}'.format( msg_id, self.topic_name))
def execute( self, cmd, result, save_files=None, log_files=None, new_evidence=None, close=False, shell=False, stderr_file=None, stdout_file=None, success_codes=None): """Executes a given binary and saves output. Args: cmd (list|string): Command arguments to run result (TurbiniaTaskResult): The result object to put data into. save_files (list): A list of files to save (files referenced by Evidence objects are automatically saved, so no need to include them). log_files (list): A list of files to save even if execution fails. new_evidence (list): These are new evidence objects created by the task. If the task is successful, they will be added to the result. close (bool): Whether to close out the result. shell (bool): Whether the cmd is in the form of a string or a list. success_codes (list(int)): Which return codes are considered successful. stderr_file (str): Path to location to save stderr. stdout_file (str): Path to location to save stdout. Returns: Tuple of the return code, and the TurbiniaTaskResult object """ # Avoid circular dependency. from turbinia.jobs import manager as job_manager save_files = save_files if save_files else [] log_files = log_files if log_files else [] new_evidence = new_evidence if new_evidence else [] success_codes = success_codes if success_codes else [0] stdout = None stderr = None # Get timeout value. timeout_limit = job_manager.JobsManager.GetTimeoutValue(self.job_name) # Execute the job via docker. docker_image = job_manager.JobsManager.GetDockerImage(self.job_name) if docker_image: ro_paths = [ result.input_evidence.local_path, result.input_evidence.source_path, result.input_evidence.device_path, result.input_evidence.mount_path ] rw_paths = [self.output_dir, self.tmp_dir] container_manager = docker_manager.ContainerManager(docker_image) stdout, stderr, ret = container_manager.execute_container( cmd, shell, ro_paths=ro_paths, rw_paths=rw_paths, timeout_limit=timeout_limit) # Execute the job on the host system. else: try: if shell: proc = subprocess.Popen( cmd, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) proc.wait(timeout_limit) else: proc = subprocess.Popen( cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) proc.wait(timeout_limit) except subprocess.TimeoutExpired as exception: # Log error and close result. message = ( 'Execution of [{0!s}] failed due to job timeout of ' '{1:d} seconds has been reached.'.format(cmd, timeout_limit)) result.log(message) result.close(self, success=False, status=message) # Increase timeout metric and raise exception turbinia_worker_tasks_timeout_total.inc() raise TurbiniaException(message) stdout, stderr = proc.communicate() ret = proc.returncode result.error['stdout'] = str(stdout) result.error['stderr'] = str(stderr) if stderr_file and not stderr: result.log( 'Attempting to save stderr to {0:s}, but no stderr found during ' 'execution'.format(stderr_file)) elif stderr: if not stderr_file: _, stderr_file = tempfile.mkstemp( suffix='.txt', prefix='stderr-', dir=self.output_dir) result.log( 'Writing stderr to {0:s}'.format(stderr_file), level=logging.DEBUG) with open(stderr_file, 'wb') as fh: fh.write(stderr) log_files.append(stderr_file) if stdout_file and not stdout: result.log( 'Attempting to save stdout to {0:s}, but no stdout found during ' 'execution'.format(stdout_file)) elif stdout: if not stdout_file: _, stdout_file = tempfile.mkstemp( suffix='.txt', prefix='stdout-', dir=self.output_dir) result.log( 'Writing stdout to {0:s}'.format(stdout_file), level=logging.DEBUG) with open(stdout_file, 'wb') as fh: fh.write(stdout) log_files.append(stdout_file) log_files = list(set(log_files)) for file_ in log_files: if not os.path.exists(file_): result.log( 'Log file {0:s} does not exist to save'.format(file_), level=logging.DEBUG) continue if os.path.getsize(file_) == 0: result.log( 'Log file {0:s} is empty. Not saving'.format(file_), level=logging.DEBUG) continue result.log('Output log file found at {0:s}'.format(file_)) if not self.run_local: self.output_manager.save_local_file(file_, result) if ret not in success_codes: message = 'Execution of [{0!s}] failed with status {1:d}'.format(cmd, ret) result.log(message) if close: result.close(self, success=False, status=message) else: result.log('Execution of [{0!s}] succeeded'.format(cmd)) for file_ in save_files: if os.path.getsize(file_) == 0: result.log( 'Output file {0:s} is empty. Not saving'.format(file_), level=logging.DEBUG) continue result.log('Output save file at {0:s}'.format(file_)) if not self.run_local: self.output_manager.save_local_file(file_, result) for evidence in new_evidence: # If the local path is set in the Evidence, we check to make sure that # the path exists and is not empty before adding it. if evidence.source_path and not os.path.exists(evidence.source_path): message = ( 'Evidence {0:s} source_path {1:s} does not exist. Not returning ' 'empty Evidence.'.format(evidence.name, evidence.source_path)) result.log(message, level=logging.WARN) elif (evidence.source_path and os.path.exists(evidence.source_path) and os.path.getsize(evidence.source_path) == 0): message = ( 'Evidence {0:s} source_path {1:s} is empty. Not returning ' 'empty new Evidence.'.format(evidence.name, evidence.source_path)) result.log(message, level=logging.WARN) else: result.add_evidence(evidence, self._evidence_config) if close: result.close(self, success=True) return ret, result
def get_task_data(self, instance, project, region, days=0, task_id=None, request_id=None, user=None, function_name='gettasks'): """Gets task data from Google Cloud Functions. Args: instance (string): The Turbinia instance name (by default the same as the INSTANCE_ID in the config). project (string): The name of the project. region (string): The name of the region to execute in. days (int): The number of days we want history for. task_id (string): The Id of the task. request_id (string): The Id of the request we want tasks for. user (string): The user of the request we want tasks for. function_name (string): The GCF function we want to call Returns: List of Task dict objects. """ cloud_function = gcp_function.GoogleCloudFunction(project) func_args = {'instance': instance, 'kind': 'TurbiniaTask'} if days: start_time = datetime.now() - timedelta(days=days) # Format this like '1990-01-01T00:00:00z' so we can cast it directly to a # javascript Date() object in the cloud function. start_string = start_time.strftime(DATETIME_FORMAT) func_args.update({'start_time': start_string}) elif task_id: func_args.update({'task_id': task_id}) elif request_id: func_args.update({'request_id': request_id}) if user: func_args.update({'user': user}) response = None retry_count = 0 credential_error_count = 0 while response is None and retry_count < MAX_RETRIES: try: response = cloud_function.ExecuteFunction( function_name, region, func_args) except auth.exceptions.RefreshError as exception: if credential_error_count == 0: log.info( 'GCP Credentials need to be refreshed, please refresh in another ' 'terminal and this process will resume. Error: {0!s}'. format(exception)) else: log.debug( 'GCP Credentials need to be refreshed, please refresh in another ' 'terminal and this process will resume. Attempt {0:d}. Error: ' '{1!s}'.format(credential_error_count + 1, exception)) # Note, we are intentially not incrementing the retry_count here because # we will retry indefinitely while we wait for the user to reauth. credential_error_count += 1 except httplib2.ServerNotFoundError as exception: log.info( 'Error connecting to server, will retry [{0:d} of {1:d} retries]: ' '{2!s}'.format(retry_count, MAX_RETRIES, exception)) retry_count += 1 if response is None: time.sleep(RETRY_SLEEP) if 'result' not in response: log.error('No results found') if response.get('error', '{}') != '{}': msg = 'Error executing Cloud Function: [{0!s}].'.format( response.get('error')) log.error(msg) log.debug('GCF response: {0!s}'.format(response)) raise TurbiniaException( 'Cloud Function {0:s} returned no results.'.format( function_name)) try: results = json.loads(response['result']) except (TypeError, ValueError) as e: raise TurbiniaException( 'Could not deserialize result [{0!s}] from GCF: [{1!s}]'. format(response.get('result'), e)) # Convert run_time/last_update back into datetime objects task_data = results[0] for task in task_data: if task.get('run_time'): task['run_time'] = timedelta(seconds=task['run_time']) if task.get('last_update'): task['last_update'] = datetime.strptime( task['last_update'], DATETIME_FORMAT) return task_data
from turbinia import config from turbinia import TurbiniaException from turbinia.workers import TurbiniaTask from turbinia.workers import TurbiniaTaskResult config.LoadConfig() if config.STATE_MANAGER.lower() == 'datastore': from google.cloud import datastore from google.cloud import exceptions elif config.STATE_MANAGER.lower() == 'redis': import redis else: msg = 'State Manager type "{0:s}" not implemented'.format( config.STATE_MANAGER) raise TurbiniaException(msg) DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S' MAX_DATASTORE_STRLEN = 1500 log = logging.getLogger('turbinia') def get_state_manager(): """Return state manager object based on config. Returns: Initialized StateManager object. """ config.LoadConfig() # pylint: disable=no-else-return if config.STATE_MANAGER.lower() == 'datastore':
def get_task_data(self, instance, project, region, days=0, task_id=None, request_id=None, user=None, function_name='gettasks'): """Gets task data from Google Cloud Functions. Args: instance (string): The Turbinia instance name (by default the same as the INSTANCE_ID in the config). project (string): The name of the project. region (string): The name of the region to execute in. days (int): The number of days we want history for. task_id (string): The Id of the task. request_id (string): The Id of the request we want tasks for. user (string): The user of the request we want tasks for. function_name (string): The GCF function we want to call Returns: List of Task dict objects. """ cloud_function = GoogleCloudFunction(project_id=project, region=region) func_args = {'instance': instance, 'kind': 'TurbiniaTask'} if days: start_time = datetime.now() - timedelta(days=days) # Format this like '1990-01-01T00:00:00z' so we can cast it directly to a # javascript Date() object in the cloud function. start_string = start_time.strftime('%Y-%m-%dT%H:%M:%S') func_args.update({'start_time': start_string}) elif task_id: func_args.update({'task_id': task_id}) elif request_id: func_args.update({'request_id': request_id}) if user: func_args.update({'user': user}) response = cloud_function.ExecuteFunction(function_name, func_args) if 'result' not in response: log.error('No results found') if response.get('error', '{}') != '{}': msg = 'Error executing Cloud Function: [{0!s}].'.format( response.get('error')) log.error(msg) log.debug('GCF response: {0!s}'.format(response)) raise TurbiniaException( 'Cloud Function {0:s} returned no results.'.format( function_name)) try: results = json.loads(response['result']) except (TypeError, ValueError) as e: raise TurbiniaException( 'Could not deserialize result from GCF: [{0!s}]'.format(e)) return results[0]
def PreprocessMountDisk(partition_paths, partition_number): """Locally mounts disk in an instance. Args: partition_paths(list(str)): A list of paths to partition block devices; partition_number(int): the number of the partition to mount. Remember these are 1-indexed (first partition is 1). Raises: TurbiniaException: if the mount command failed to run. Returns: str: the path to the mounted filesystem. """ config.LoadConfig() mount_prefix = config.MOUNT_DIR_PREFIX if partition_number > len(partition_paths): raise TurbiniaException( 'Can not mount partition {0:d}: found only {1:d} partitions in ' 'Evidence.'.format(partition_number, len(partition_paths))) # Partitions are 1-indexed for the user and the system if partition_number < 1: raise TurbiniaException( 'Can not mount partition {0:d}: partition numbering starts at 1'. format(partition_number)) partition_path = partition_paths[partition_number - 1] if not os.path.exists(partition_path): raise TurbiniaException( 'Could not mount partition {0:s}, the path does not exist'.format( partition_path)) if os.path.exists(mount_prefix) and not os.path.isdir(mount_prefix): raise TurbiniaException( 'Mount dir {0:s} exists, but is not a directory'.format( mount_prefix)) if not os.path.exists(mount_prefix): log.info( 'Creating local mount parent directory {0:s}'.format(mount_prefix)) try: os.makedirs(mount_prefix) except OSError as e: raise TurbiniaException( 'Could not create mount directory {0:s}: {1!s}'.format( mount_prefix, e)) mount_path = tempfile.mkdtemp(prefix='turbinia', dir=mount_prefix) mount_cmd = ['sudo', 'mount', '-o', 'ro'] fstype = GetFilesystem(partition_path) if fstype in ['ext3', 'ext4']: # This is in case the underlying filesystem is dirty, as we want to mount # everything read-only. mount_cmd.extend(['-o', 'noload']) mount_cmd.extend([partition_path, mount_path]) log.info('Running: {0:s}'.format(' '.join(mount_cmd))) try: subprocess.check_call(mount_cmd) except subprocess.CalledProcessError as e: raise TurbiniaException('Could not mount directory {0!s}'.format(e)) return mount_path
def PreprocessBitLocker(source_path, partition_offset=None, credentials=None): """Uses libbde on a target block device or image file. Creates a decrypted virtual device of the encrypted volume. Args: source_path(str): the source path to run bdemount on. partition_offset(int): offset of volume in bytes. credentials(list[{str: str}]): decryption credentials set in evidence setup Raises: TurbiniaException: if source_path doesn't exist or if the bdemount command failed to create a virtual device. Returns: str: the path to the decrypted virtual block device """ config.LoadConfig() mount_prefix = config.MOUNT_DIR_PREFIX decrypted_device = None if not os.path.exists(source_path): raise TurbiniaException( ('Cannot create virtual device for non-existing source_path ' '{0!s}').format(source_path)) if os.path.exists(mount_prefix) and not os.path.isdir(mount_prefix): raise TurbiniaException( 'Mount dir {0:s} exists, but is not a directory'.format( mount_prefix)) if not os.path.exists(mount_prefix): log.info( 'Creating local mount parent directory {0:s}'.format(mount_prefix)) try: os.makedirs(mount_prefix) except OSError as e: raise TurbiniaException( 'Could not create mount directory {0:s}: {1!s}'.format( mount_prefix, e)) mount_path = tempfile.mkdtemp(prefix='turbinia', dir=mount_prefix) for credential in credentials: libbde_command = ['sudo', 'bdemount', '-o', str(partition_offset)] credential_type = credential['credential_type'] credential_data = credential['credential_data'] if credential_type == 'password': libbde_command.extend(['-p', credential_data]) elif credential_type == 'recovery_password': libbde_command.extend(['-r', credential_data]) else: # Unsupported credential type, try the next log.warning( 'Unsupported credential type: {0!s}'.format(credential_type)) continue libbde_command.extend(['-X', 'allow_other', source_path, mount_path]) # Not logging command since it will contain credentials try: subprocess.check_call(libbde_command) except subprocess.CalledProcessError as e: # Decryption failed with these credentials, try the next continue # Decrypted volume was mounted decrypted_device = os.path.join(mount_path, 'bde1') if not os.path.exists(decrypted_device): raise TurbiniaException( 'Cannot attach decrypted device: {0!s}'.format( decrypted_device)) else: log.info( 'Decrypted device attached: {0!s}'.format(decrypted_device)) return decrypted_device
def _ProcessPartition(self, evidence_path, path_spec): """Generate RawDiskPartition from a PathSpec. Args: evidence_path (str): Local path of the parent evidence path_spec (dfvfs.PathSpec): dfVFS path spec. Returns: A new RawDiskPartition evidence item and a list of strings containing partition information to add to the status report. """ status_report = [] fs_path_spec = path_spec fs_location = None partition_location = None volume_index = None partition_index = None partition_offset = None partition_size = None # File system location / identifier fs_location = getattr(path_spec, 'location', None) while path_spec.HasParent(): type_indicator = path_spec.type_indicator if type_indicator == dfvfs_definitions.TYPE_INDICATOR_APFS_CONTAINER: # APFS volume index volume_index = getattr(path_spec, 'volume_index', None) if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION: if fs_location in ('\\', '/'): # Partition location / identifier fs_location = getattr(path_spec, 'location', None) partition_location = getattr(path_spec, 'location', None) # Partition index partition_index = getattr(path_spec, 'part_index', None) volume_system = tsk_volume_system.TSKVolumeSystem() try: volume_system.Open(path_spec) volume_identifier = partition_location.replace('/', '') volume = volume_system.GetVolumeByIdentifier( volume_identifier) partition_offset = volume.extents[0].offset partition_size = volume.extents[0].size except dfvfs_errors.VolumeSystemError as e: raise TurbiniaException( 'Could not process partition: {0!s}'.format(e)) break path_spec = path_spec.parent status_report.append(fmt.heading5('{0!s}:'.format(fs_location))) if partition_index: if not volume_index is None: status_report.append( fmt.bullet('Volume index: {0!s}'.format(volume_index))) status_report.append( fmt.bullet('Partition index: {0!s}'.format(partition_index))) status_report.append( fmt.bullet('Partition offset: {0!s}'.format(partition_offset))) status_report.append( fmt.bullet('Partition size: {0!s}'.format(partition_size))) else: status_report.append( fmt.bullet('Source evidence is a volume image')) partition_evidence = RawDiskPartition( source_path=evidence_path, path_spec=fs_path_spec, partition_offset=partition_offset, partition_size=partition_size) return partition_evidence, status_report
def execute_container( self, cmd, shell=False, ro_paths=None, rw_paths=None, **kwargs): """Executes a Docker container. A new Docker container will be created from the image id, executed, and then removed. Attributes: cmd(str|list): command to be executed. shell (bool): Whether the cmd is in the form of a string or a list. mount_paths(list): A list of paths to mount to the container. **kwargs: Any additional keywords to pass to the container. Returns: stdout(str): stdout of the container. stderr(str): stderr of the container. ret(int): the return code of the process run. Raises: TurbiniaException: If an error occurred with the Docker container. """ container = None args = {} stdout = '' # Override the entrypoint to /bin/sh kwargs['entrypoint'] = '/bin/sh' if shell: cmd = '-c ' + '\"{0:s}\"'.format(cmd) else: cmd = ' '.join(cmd) cmd = '-c ' + '\"{0:s}\"'.format(cmd) # Create the device and file mount paths device_paths = [] file_paths = {} if rw_paths: dwpath, fwpath = self._create_mount_points(rw_paths) device_paths.extend(dwpath) file_paths.update(fwpath) if ro_paths: drpath, frpath = self._create_mount_points(ro_paths, mode='ro') device_paths.extend(drpath) file_paths.update(frpath) args['devices'] = device_paths args['volumes'] = file_paths # Add any additional arguments for key, value in kwargs.items(): args[key] = value try: container = self.client.containers.create(self.image, cmd, **args) container.start() # Stream program stdout from container stdstream = container.logs(stream=True) for stdo in stdstream: stdo = codecs.decode(stdo, 'utf-8').strip() log.debug(stdo) stdout += stdo results = container.wait() except docker.errors.APIError as exception: if container: container.remove(v=True) message = ( 'An error has occurred with the container: {0!s}'.format(exception)) log.error(message) raise TurbiniaException(message) stderr, ret = results['Error'], results['StatusCode'] if container: container.remove(v=True) return stdout, stderr, ret
def preprocess(self, tmp_dir=None, required_states=None): """Runs the possible parent's evidence preprocessing code, then ours. This is a wrapper function that will call the chain of pre-processors starting with the most distant ancestor. After all of the ancestors have been processed, then we run our pre-processor. These processors get run in the context of the local task execution on the worker nodes prior to the task itself running. This can be used to prepare the evidence to be processed (e.g. attach a cloud disk, mount a local disk etc). Tasks export a list of the required_states they have for the state of the Evidence it can process in `TurbiniaTask.REQUIRED_STATES`[1]. Evidence also exports a list of the possible states it can have after pre/post-processing in `Evidence.POSSIBLE_STATES`. The pre-processors should run selectively based on the these requirements that come from the Task, and the post-processors should run selectively based on the current state of the Evidence. If a Task requires a given state supported by the given Evidence class, but it is not met after the preprocessing of the Evidence is run, then the Task will abort early. Note that for compound evidence types that have parent Evidence objects (e.g. where `context_dependent` is True), we only inspect the child Evidence type for its state as it is assumed that it would only be able to run the appropriate pre/post-processors when the parent Evidence processors have been successful. [1] Note that the evidence states required by the Task are only required if the Evidence also supports that state in `POSSSIBLE_STATES`. This is so that the Tasks are flexible enough to support multiple types of Evidence. For example, `PlasoTask` allows both `CompressedDirectory` and `GoogleCloudDisk` as Evidence input, and has states `ATTACHED` and `DECOMPRESSED` listed in `PlasoTask.REQUIRED_STATES`. Since `ATTACHED` state is supported by `GoogleCloudDisk`, and `DECOMPRESSED` is supported by `CompressedDirectory`, only those respective pre-processors will be run and the state is confirmed after the preprocessing is complete. Args: tmp_dir(str): The path to the temporary directory that the Task will write to. required_states(list[EvidenceState]): The list of evidence state requirements from the Task. Raises: TurbiniaException: If the required evidence state cannot be met by the possible states of the Evidence or if the parent evidence object does not exist when it is required by the Evidence type.. """ self.local_path = self.source_path if not required_states: required_states = [] if self.context_dependent: if not self.parent_evidence: raise TurbiniaException( 'Evidence of type {0:s} needs parent_evidence to be set'. format(self.type)) self.parent_evidence.preprocess(tmp_dir, required_states) try: log.debug('Starting pre-processor for evidence {0:s}'.format( self.name)) self._preprocess(tmp_dir, required_states) except TurbiniaException as exception: log.error('Error running preprocessor for {0:s}: {1!s}'.format( self.name, exception)) log.debug( 'Pre-processing evidence {0:s} is complete, and evidence is in state ' '{1:s}'.format(self.name, self.format_state()))