def _PreprocessSources(self, extraction_engine, session, storage_writer): """Preprocesses the sources. Args: extraction_engine (BaseEngine): extraction engine to preprocess the sources. session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer. """ logger.debug('Starting preprocessing.') try: artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry( self._artifact_definitions_path, self._custom_artifacts_path) extraction_engine.PreprocessSources( artifacts_registry, self._source_path_specs, session, storage_writer, resolver_context=self._resolver_context) except IOError as exception: logger.error( 'Unable to preprocess with error: {0!s}'.format(exception)) logger.debug('Preprocessing done.')
def _PreprocessSources(self, extraction_engine): """Preprocesses the sources. Args: extraction_engine (BaseEngine): extraction engine to preprocess the sources. """ logger.debug('Starting preprocessing.') session = sessions.Session() try: artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry( self._artifact_definitions_path, self._custom_artifacts_path) # Setting storage writer to None here since we do not want to store # preprocessing information. extraction_engine.PreprocessSources( artifacts_registry, self._source_path_specs, session, None, resolver_context=self._resolver_context) except IOError as exception: logger.error( 'Unable to preprocess with error: {0!s}'.format(exception)) logger.debug('Preprocessing done.')
def _GetExpandedParserFilterExpression(self, knowledge_base): """Determines the expanded parser filter expression. Args: knowledge_base (KnowledgeBase): contains information from the source data needed for parsing. Returns: str: expanded parser filter expression. Raises: BadConfigOption: if presets in the parser filter expression could not be expanded or if an invalid parser or plugin name is specified. """ parser_filter_expression = self._parser_filter_expression if not parser_filter_expression: operating_system_family = knowledge_base.GetValue('operating_system') operating_system_product = knowledge_base.GetValue( 'operating_system_product') operating_system_version = knowledge_base.GetValue( 'operating_system_version') operating_system_artifact = artifacts.OperatingSystemArtifact( family=operating_system_family, product=operating_system_product, version=operating_system_version) preset_definitions = self._presets_manager.GetPresetsByOperatingSystem( operating_system_artifact) if preset_definitions: self._parser_filter_expression = ','.join([ preset_definition.name for preset_definition in preset_definitions]) logger.debug('Parser filter expression set to preset: {0:s}'.format( self._parser_filter_expression)) parser_filter_helper = parser_filter.ParserFilterExpressionHelper() try: parser_filter_expression = parser_filter_helper.ExpandPresets( self._presets_manager, self._parser_filter_expression) logger.debug('Parser filter expression set to: {0:s}'.format( parser_filter_expression or 'N/A')) except RuntimeError as exception: raise errors.BadConfigOption(( 'Unable to expand presets in parser filter expression with ' 'error: {0!s}').format(exception)) parser_elements, invalid_parser_elements = ( parsers_manager.ParsersManager.CheckFilterExpression( parser_filter_expression)) if invalid_parser_elements: invalid_parser_names_string = ','.join(invalid_parser_elements) raise errors.BadConfigOption( 'Unknown parser or plugin names in element(s): "{0:s}" of ' 'parser filter expression: {1:s}'.format( invalid_parser_names_string, parser_filter_expression)) return ','.join(sorted(parser_elements))
def _Preprocess(self, file_system, mount_point): """Preprocesses the image. Args: file_system (dfvfs.FileSystem): file system to be preprocessed. mount_point (dfvfs.PathSpec): mount point path specification that refers to the base location of the file system. """ logger.debug('Starting preprocessing.') try: preprocess_manager.PreprocessPluginsManager.RunPlugins( self._artifacts_registry, file_system, mount_point, self._knowledge_base) except IOError as exception: logger.error('Unable to preprocess with error: {0!s}'.format(exception)) logger.debug('Preprocessing done.')
def _PreprocessSources(self, extraction_engine): """Preprocesses the sources. Args: extraction_engine (BaseEngine): extraction engine to preprocess the sources. """ logger.debug('Starting preprocessing.') try: artifacts_registry = engine.BaseEngine.BuildArtifactsRegistry( self._artifact_definitions_path, self._custom_artifacts_path) extraction_engine.PreprocessSources( artifacts_registry, self._source_path_specs, resolver_context=self._resolver_context) except IOError as exception: logger.error('Unable to preprocess with error: {0!s}'.format(exception)) logger.debug('Preprocessing done.')
def _PreprocessSources(self, extraction_engine): """Preprocesses the sources. Args: extraction_engine (BaseEngine): extraction engine to preprocess the sources. """ logger.debug('Starting preprocessing.') try: extraction_engine.PreprocessSources( self._artifacts_registry, self._source_path_specs, resolver_context=self._resolver_context) except IOError as exception: logger.error( 'Unable to preprocess with error: {0!s}'.format(exception)) logger.debug('Preprocessing done.')
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format(self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile(configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year, text_prepend=self._text_prepend) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a storage media image or device, or directory # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine() # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def _PromptUserForEncryptedVolumeCredential(self, scan_context, locked_scan_node, credentials): """Prompts the user to provide a credential for an encrypted volume. Args: scan_context (dfvfs.SourceScannerContext): source scanner context. locked_scan_node (dfvfs.SourceScanNode): locked scan node. credentials (dfvfs.Credentials): credentials supported by the locked scan node. Returns: bool: True if the volume was unlocked. """ # TODO: print volume description. if locked_scan_node.type_indicator == dfvfs_definitions.TYPE_INDICATOR_BDE: self._output_writer.Write('Found a BitLocker encrypted volume.\n') else: self._output_writer.Write('Found an encrypted volume.\n') credentials_list = list(credentials.CREDENTIALS) credentials_list.append('skip') self._output_writer.Write('Supported credentials:\n') self._output_writer.Write('\n') for index, name in enumerate(credentials_list): self._output_writer.Write(' {0:d}. {1:s}\n'.format(index, name)) self._output_writer.Write('\nNote that you can abort with Ctrl^C.\n\n') result = False while not result: self._output_writer.Write( 'Select a credential to unlock the volume: ') # TODO: add an input reader. input_line = self._input_reader.Read() input_line = input_line.strip() if input_line in credentials_list: credential_type = input_line else: try: credential_type = int(input_line, 10) credential_type = credentials_list[credential_type] except (IndexError, ValueError): self._output_writer.Write( 'Unsupported credential: {0:s}\n'.format(input_line)) continue if credential_type == 'skip': break getpass_string = 'Enter credential data: ' if sys.platform.startswith('win') and sys.version_info[0] < 3: # For Python 2 on Windows getpass (win_getpass) requires an encoded # byte string. For Python 3 we need it to be a Unicode string. getpass_string = self._EncodeString(getpass_string) credential_data = getpass.getpass(getpass_string) self._output_writer.Write('\n') if credential_type in self._BINARY_DATA_CREDENTIAL_TYPES: try: credential_data = credential_data.decode('hex') except TypeError: self._output_writer.Write('Unsupported credential data.\n') continue try: result = self._source_scanner.Unlock( scan_context, locked_scan_node.path_spec, credential_type, credential_data) except IOError as exception: logger.debug( 'Unable to unlock volume with error: {0!s}'.format( exception)) result = False if not result: self._output_writer.Write('Unable to unlock volume.\n') self._output_writer.Write('\n') self._output_writer.Write('\n') if result: self._AddCredentialConfiguration(locked_scan_node.path_spec, credential_type, credential_data) return result
def _CreateProcessingConfiguration(self, knowledge_base): """Creates a processing configuration. Args: knowledge_base (KnowledgeBase): contains information from the source data needed for parsing. Returns: ProcessingConfiguration: processing configuration. Raises: BadConfigOption: if presets in the parser filter expression could not be expanded or if an invalid parser or plugin name is specified. """ parser_filter_expression = self._parser_filter_expression if not parser_filter_expression and not self._single_process_mode: operating_system_family = knowledge_base.GetValue( 'operating_system') operating_system_product = knowledge_base.GetValue( 'operating_system_product') operating_system_version = knowledge_base.GetValue( 'operating_system_version') operating_system_artifact = artifacts.OperatingSystemArtifact( family=operating_system_family, product=operating_system_product, version=operating_system_version) preset_definitions = self._presets_manager.GetPresetsByOperatingSystem( operating_system_artifact) if preset_definitions: self._parser_filter_expression = ','.join([ preset_definition.name for preset_definition in preset_definitions ]) logger.debug( 'Parser filter expression set to preset: {0:s}'.format( self._parser_filter_expression)) parser_filter_helper = parser_filter.ParserFilterExpressionHelper() try: parser_filter_expression = parser_filter_helper.ExpandPresets( self._presets_manager, self._parser_filter_expression) logger.debug('Parser filter expression set to: {0:s}'.format( parser_filter_expression or 'N/A')) except RuntimeError as exception: raise errors.BadConfigOption( ('Unable to expand presets in parser filter expression with ' 'error: {0!s}').format(exception)) parser_elements, invalid_parser_elements = ( parsers_manager.ParsersManager.CheckFilterExpression( parser_filter_expression)) if invalid_parser_elements: invalid_parser_names_string = ','.join(invalid_parser_elements) raise errors.BadConfigOption( 'Unknown parser or plugin names in element(s): "{0:s}" of ' 'parser filter expression: {1:s}'.format( invalid_parser_names_string, parser_filter_expression)) if not parser_filter_expression: parser_filter_expression = ','.join(sorted(parser_elements)) self._expanded_parser_filter_expression = parser_filter_expression # TODO: pass preferred_encoding. configuration = configurations.ProcessingConfiguration() configuration.artifact_filters = self._artifact_filters configuration.credentials = self._credential_configurations configuration.debug_output = self._debug_mode configuration.extraction.hasher_file_size_limit = ( self._hasher_file_size_limit) configuration.extraction.hasher_names_string = self._hasher_names_string configuration.extraction.process_archives = self._process_archives configuration.extraction.process_compressed_streams = ( self._process_compressed_streams) configuration.extraction.yara_rules_string = self._yara_rules_string configuration.filter_file = self._filter_file configuration.log_filename = self._log_file configuration.parser_filter_expression = ( self._expanded_parser_filter_expression) configuration.preferred_year = self._preferred_year configuration.profiling.directory = self._profiling_directory configuration.profiling.sample_rate = self._profiling_sample_rate configuration.profiling.profilers = self._profilers configuration.task_storage_format = self._task_storage_format configuration.temporary_directory = self._temporary_directory return configuration
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: BadConfigOption: if the storage format is not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format(self._storage_format)) single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = engine.BaseEngine.BuildFilterFindSpecs( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) # TODO: decouple session and storage writer? session.source_configurations = ( extraction_engine.knowledge_base.GetSourceConfigurationArtifacts()) storage_writer.WriteSessionConfiguration(session) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: force_parser = False number_of_parsers = len( configuration.parser_filter_expression.split(',')) if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_parsers == 1): force_parser = True logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) self._expanded_parser_filter_expression = ( self._GetExpandedParserFilterExpression( extraction_engine.knowledge_base)) enabled_parser_names = self._expanded_parser_filter_expression.split( ',') number_of_enabled_parsers = len(enabled_parser_names) force_parser = False if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_enabled_parsers == 1): force_parser = True self._extract_winevt_resources = False elif ('winevt' not in enabled_parser_names and 'winevtx' not in enabled_parser_names): self._extract_winevt_resources = False elif (self._extract_winevt_resources and 'pe' not in enabled_parser_names): logger.warning( 'A Windows EventLog parser is enabled in combination with ' 'extraction of Windows EventLog resources, but the Portable ' 'Executable (PE) parser is disabled. Therefore Windows EventLog ' 'resources cannot be extracted.') self._extract_winevt_resources = False configuration = self._CreateExtractionProcessingConfiguration() try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) session_configuration = self._CreateExtractionSessionConfiguration( session, enabled_parser_names) storage_writer.AddAttributeContainer(session_configuration) source_configurations = [] for path_spec in self._source_path_specs: source_configuration = artifacts.SourceConfigurationArtifact( path_spec=path_spec) source_configurations.append(source_configuration) # TODO: improve to detect more than 1 system configurations. # TODO: improve to add volumes to system configuration. system_configuration = ( extraction_engine.knowledge_base.GetSystemConfigurationArtifact()) storage_writer.AddAttributeContainer(system_configuration) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, session.identifier, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status