Exemplo n.º 1
0
    def testAnalyzeEvents(self):
        """Tests the AnalyzeEvents function."""
        storage_file_path = self._GetTestFilePath(['psort_test.json.plaso'])

        session = sessions.Session()
        knowledge_base_object = knowledge_base.KnowledgeBase()

        formatter_mediator = formatters_mediator.FormatterMediator()
        formatter_mediator.SetPreferredLanguageIdentifier('en-US')

        output_mediator_object = output_mediator.OutputMediator(
            knowledge_base_object, formatter_mediator)

        output_module = null.NullOutputModule(output_mediator_object)

        data_location = ''
        analysis_plugin = tagging.TaggingAnalysisPlugin()
        analysis_plugins = {'tagging': analysis_plugin}
        # TODO: set tag file.

        test_engine = psort.PsortMultiProcessEngine()

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'storage.plaso')
            shutil.copyfile(storage_file_path, temp_file)

            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            counter = test_engine.AnalyzeEvents(knowledge_base_object,
                                                storage_writer, output_module,
                                                data_location,
                                                analysis_plugins)

        # TODO: assert if tests were successful.
        _ = counter

        test_filter = filters_test_lib.TestEventFilter()

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'storage.plaso')
            shutil.copyfile(storage_file_path, temp_file)

            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            counter = test_engine.AnalyzeEvents(knowledge_base_object,
                                                storage_writer,
                                                data_location,
                                                analysis_plugins,
                                                event_filter=test_filter)

        # TODO: assert if tests were successful.
        _ = counter
Exemplo n.º 2
0
    def testProcessSources(self):
        """Tests the PreprocessSources and ProcessSources function."""
        registry = artifacts_registry.ArtifactDefinitionsRegistry()
        reader = artifacts_reader.YamlArtifactsReader()
        path = shared_test_lib.GetTestFilePath(['artifacts'])
        registry.ReadFromDirectory(reader, path)

        test_engine = task_engine.TaskMultiProcessEngine(
            maximum_number_of_tasks=100)

        source_path = self._GetTestFilePath(['ímynd.dd'])
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location='/',
            parent=os_path_spec)

        test_engine.PreprocessSources(registry, [source_path_spec])

        session = sessions.Session()

        configuration = configurations.ProcessingConfiguration()
        configuration.parser_filter_expression = 'filestat'

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'storage.plaso')
            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            test_engine.ProcessSources(session.identifier, [source_path_spec],
                                       storage_writer, configuration)
Exemplo n.º 3
0
  def testStorageWriter(self):
    """Test the storage writer."""
    event_objects = test_lib.CreateTestEventObjects()
    session_start = sessions.SessionStart()
    preprocessing_object = event.PreprocessObject()

    with shared_test_lib.TempDirectory() as temp_directory:
      temp_file = os.path.join(temp_directory, u'storage.plaso')
      storage_writer = zip_file.ZIPStorageFileWriter(
          temp_file, preprocessing_object)

      storage_writer.Open()
      storage_writer.WriteSessionStart(session_start)

      for event_object in event_objects:
        storage_writer.AddEvent(event_object)

      storage_writer.WriteSessionCompletion()
      storage_writer.Close()

      storage_file = zipfile.ZipFile(
          temp_file, mode='r', compression=zipfile.ZIP_DEFLATED)

      expected_filename_list = sorted([
          u'event_data.000001',
          u'event_index.000001',
          u'event_timestamps.000001',
          u'information.dump',
          u'metadata.txt',
          u'session_completion.000001',
          u'session_start.000001'])

      filename_list = sorted(storage_file.namelist())
      self.assertEqual(len(filename_list), 7)
      self.assertEqual(filename_list, expected_filename_list)
Exemplo n.º 4
0
    def testProcessSources(self):
        """Tests the PreprocessSources and ProcessSources function."""
        test_engine = task_engine.TaskMultiProcessEngine(
            maximum_number_of_tasks=100)

        source_path = self._GetTestFilePath([u'ímynd.dd'])
        os_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path)
        source_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=os_path_spec)

        test_engine.PreprocessSources([source_path_spec])

        session = sessions.Session()

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, u'storage.plaso')
            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, temp_file)

            test_engine.ProcessSources(session.identifier, [source_path_spec],
                                       storage_writer,
                                       parser_filter_expression=u'filestat')
Exemplo n.º 5
0
  def testInternalAnalyzeEvents(self):
    """Tests the _AnalyzeEvents function."""
    session = sessions.Session()
    knowledge_base_object = knowledge_base.KnowledgeBase()

    test_engine = psort.PsortMultiProcessEngine()

    test_plugin = TestAnalysisPlugin()

    with shared_test_lib.TempDirectory() as temp_directory:
      temp_file = os.path.join(temp_directory, u'storage.plaso')
      self._CreateTestStorageFile(temp_file)

      storage_writer = storage_zip_file.ZIPStorageFileWriter(
          session, temp_file)

      storage_writer.StartTaskStorage()

      storage_writer.Open()
      storage_writer.ReadPreprocessingInformation(knowledge_base_object)

      # TODO: implement, this currently loops infinite.
      # test_engine._AnalyzeEvents(storage_writer, [test_plugin])
      storage_writer.Close()

    test_filter = filters_test_lib.TestEventFilter()

    with shared_test_lib.TempDirectory() as temp_directory:
      temp_file = os.path.join(temp_directory, u'storage.plaso')
      self._CreateTestStorageFile(temp_file)

      storage_writer = storage_zip_file.ZIPStorageFileWriter(
          session, temp_file)

      storage_writer.StartTaskStorage()

      storage_writer.Open()
      storage_writer.ReadPreprocessingInformation(knowledge_base_object)

      # TODO: implement, this currently loops infinite.
      _ = test_engine
      _ = test_plugin
      _ = test_filter
      # test_engine._AnalyzeEvents(
      #    storage_writer, [test_plugin], event_filter=test_filter)
      storage_writer.Close()
Exemplo n.º 6
0
  def _ProcessAnalysisPlugins(
      self, analysis_plugins, analysis_report_incoming_queue, storage_file,
      counter, pre_obj, preferred_encoding=u'utf-8'):
    """Runs the analysis plugins.

    Args:
      analysis_plugins: the analysis plugins.
      analysis_report_incoming_queue: the analysis output queue (instance of
                                      Queue).
      storage_file: a storage file object (instance of StorageFile).
      counter: a counter object (instance of collections.Counter).
      pre_obj: The preprocessor object (instance of PreprocessObject).
      preferred_encoding: optional preferred encoding.
    """
    if not analysis_plugins:
      return

    logging.info(u'Processing data from analysis plugins.')

    pre_obj.collection_information[u'Action'] = u'Adding tags to storage.'
    pre_obj.collection_information[u'time_of_run'] = timelib.Timestamp.GetNow()

    # Wait for all analysis plugins to complete.
    for analysis_process_info in self._analysis_process_info:
      name = analysis_process_info.plugin.NAME
      if analysis_process_info.plugin.LONG_RUNNING_PLUGIN:
        logging.warning(
            u'{0:s} may take a long time to run. It will not be automatically '
            u'terminated.'.format(name))
        report_wait = None
      else:
        report_wait = self.MAX_ANALYSIS_PLUGIN_REPORT_WAIT

      logging.info(
          u'Waiting for analysis plugin: {0:s} to complete.'.format(name))

      completion_event = analysis_process_info.completion_event
      if completion_event.wait(report_wait):
        logging.info(u'Plugin {0:s} has completed.'.format(name))
      else:
        logging.warning(
            u'Analysis process {0:s} failed to compile its report in a '
            u'reasonable time. No report will be displayed or stored.'.format(
                name))

    logging.info(u'All analysis plugins are now completed.')

    storage_writer = storage_zip_file.ZIPStorageFileWriter(
        storage_file, pre_obj)
    analysis_report_consumer = PsortAnalysisReportQueueConsumer(
        analysis_report_incoming_queue, storage_writer,
        self._filter_expression, preferred_encoding=preferred_encoding)

    analysis_report_consumer.ConsumeItems()
    for item, value in iter(analysis_report_consumer.reports_counter.items()):
      counter[item] = value
Exemplo n.º 7
0
  def CreateStorageWriter(self, session, storage_file_path):
    """Creates a storage writer.

    Args:
      session (Session): session the storage changes are part of.
      storage_file_path (str): path of the storage file.

    Returns:
      StorageWriter: storage writer.
    """
    self._CheckStorageFile(storage_file_path)

    return storage_zip_file.ZIPStorageFileWriter(session, storage_file_path)
Exemplo n.º 8
0
    def CreateStorageWriterForFile(cls, session, path):
        """Creates a storage writer based on the file.

    Args:
      session (Session): session the storage changes are part of.
      path (str): path to the storage file.

    Returns:
      StorageWriter: a storage writer or None if the storage file cannot be
          opened or the storage format is not supported.
    """
        if storage_sqlite_file.SQLiteStorageFile.CheckSupportedFormat(path):
            return storage_sqlite_file.SQLiteStorageFileWriter(session, path)

        elif storage_zip_file.ZIPStorageFile.CheckSupportedFormat(path):
            return storage_zip_file.ZIPStorageFileWriter(session, path)
Exemplo n.º 9
0
    def testStorageWriter(self):
        """Test the storage writer."""
        event_objects = test_lib.CreateTestEventObjects()

        # The storage writer is normally run in a separate thread.
        # For the purpose of this test it has to be run in sequence,
        # hence the call to WriteEventObjects after all the event objects
        # have been queued up.

        # TODO: add upper queue limit.
        # A timeout is used to prevent the multi processing queue to close and
        # stop blocking the current process.
        test_queue = multi_process.MultiProcessingQueue(timeout=0.1)
        test_queue_producer = plaso_queue.ItemQueueProducer(test_queue)
        test_queue_producer.ProduceItems(event_objects)

        test_queue_producer.SignalAbort()

        preprocessing_object = event.PreprocessObject()

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, u'plaso.db')
            storage_writer = zip_file.ZIPStorageFileWriter(
                test_queue, temp_file, preprocessing_object)
            storage_writer.WriteEventObjects()

            storage_file = zipfile.ZipFile(temp_file,
                                           mode='r',
                                           compression=zipfile.ZIP_DEFLATED)

            expected_filename_list = [
                u'information.dump', u'plaso_index.000001',
                u'plaso_proto.000001', u'plaso_timestamps.000001',
                u'serializer.txt'
            ]

            filename_list = sorted(storage_file.namelist())
            self.assertEqual(len(filename_list), 5)
            self.assertEqual(filename_list, expected_filename_list)
Exemplo n.º 10
0
    def testProcessSources(self):
        """Tests the ProcessSources function."""
        session = sessions.Session()
        test_front_end = extraction_frontend.ExtractionFrontend()

        test_file = self._GetTestFilePath([u'ímynd.dd'])
        volume_path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file)
        path_spec = path_spec_factory.Factory.NewPathSpec(
            dfvfs_definitions.TYPE_INDICATOR_TSK,
            location=u'/',
            parent=volume_path_spec)

        source_type = dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE

        with shared_test_lib.TempDirectory() as temp_directory:
            storage_file_path = os.path.join(temp_directory, u'storage.plaso')

            storage_writer = storage_zip_file.ZIPStorageFileWriter(
                session, storage_file_path)
            test_front_end.ProcessSources(session, storage_writer, [path_spec],
                                          source_type)

            storage_file = storage_zip_file.ZIPStorageFile()
            try:
                storage_file.Open(path=storage_file_path)
            except IOError:
                self.fail(u'Unable to open storage file after processing.')

            # Make sure we can read events from the storage.
            event_objects = list(storage_file.GetEvents())
            self.assertNotEqual(len(event_objects), 0)

            event_object = event_objects[0]

            self.assertEqual(event_object.data_type, u'fs:stat')
            self.assertEqual(event_object.filename, u'/lost+found')
Exemplo n.º 11
0
    def ExtractEventsFromSources(self):
        """Processes the sources and extract events.

    This is a stripped down copy of tools/log2timeline.py that doesn't support
    the full set of flags. The defaults for these are hard coded in the
    constructor of this class.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
        self._CheckStorageFile(self._storage_file_path)

        scan_context = self.ScanSource()
        source_type = scan_context.source_type

        self._status_view.SetMode(self._status_view_mode)
        self._status_view.SetSourceInformation(self._source_path,
                                               source_type,
                                               filter_file=self._filter_file)

        status_update_callback = (
            self._status_view.GetExtractionStatusUpdateCallback())

        self._output_writer.Write(u'\n')
        self._status_view.PrintExtractionStatusHeader(None)
        self._output_writer.Write(u'Processing started.\n')

        session = engine.BaseEngine.CreateSession(
            command_line_arguments=self._command_line_arguments,
            filter_file=self._filter_file,
            preferred_encoding=self.preferred_encoding,
            preferred_time_zone=self._preferred_time_zone,
            preferred_year=self._preferred_year)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            session, self._storage_file_path)

        configuration = self._CreateProcessingConfiguration()

        single_process_mode = self._single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            single_process_mode = True

        if single_process_mode:
            extraction_engine = single_process_engine.SingleProcessEngine()
        else:
            extraction_engine = multi_process_engine.TaskMultiProcessEngine(
                use_zeromq=self._use_zeromq)

        # If the source is a directory or a storage media image
        # run pre-processing.
        if (self._force_preprocessing
                or source_type in self._SOURCE_TYPES_TO_PREPROCESS):
            self._PreprocessSources(extraction_engine)

        if not configuration.parser_filter_expression:
            operating_system = extraction_engine.knowledge_base.GetValue(
                u'operating_system')
            operating_system_product = extraction_engine.knowledge_base.GetValue(
                u'operating_system_product')
            operating_system_version = extraction_engine.knowledge_base.GetValue(
                u'operating_system_version')
            parser_filter_expression = (
                self._parsers_manager.GetPresetForOperatingSystem(
                    operating_system, operating_system_product,
                    operating_system_version))

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

            configuration.parser_filter_expression = parser_filter_expression
            session.enabled_parser_names = list(
                self._parsers_manager.GetParserAndPluginNames(
                    parser_filter_expression=configuration.
                    parser_filter_expression))
            session.parser_filter_expression = configuration.parser_filter_expression

        if session.preferred_time_zone:
            try:
                extraction_engine.knowledge_base.SetTimeZone(
                    session.preferred_time_zone)
            except ValueError:
                # pylint: disable=protected-access
                logging.warning(
                    u'Unsupported time zone: {0:s}, defaulting to {1:s}'.
                    format(session.preferred_time_zone,
                           extraction_engine.knowledge_base._time_zone.zone))

        filter_find_specs = None
        if configuration.filter_file:
            environment_variables = (
                extraction_engine.knowledge_base.GetEnvironmentVariables())
            filter_find_specs = frontend_utils.BuildFindSpecsFromFile(
                configuration.filter_file,
                environment_variables=environment_variables)

        processing_status = None
        if single_process_mode:
            logging.debug(u'Starting extraction in single process mode.')

            processing_status = extraction_engine.ProcessSources(
                self._source_path_specs,
                storage_writer,
                self._resolver_context,
                configuration,
                filter_find_specs=filter_find_specs,
                status_update_callback=status_update_callback)

        else:
            logging.debug(u'Starting extraction in multi process mode.')

            processing_status = extraction_engine.ProcessSources(
                session.identifier,
                self._source_path_specs,
                storage_writer,
                configuration,
                enable_sigsegv_handler=self._enable_sigsegv_handler,
                filter_find_specs=filter_find_specs,
                number_of_worker_processes=self._number_of_extraction_workers,
                status_update_callback=status_update_callback)

        self._status_view.PrintExtractionSummary(processing_status)
Exemplo n.º 12
0
  def ExtractEventsFromSources(self):
    """Processes the sources and extracts events.

    Raises:
      BadConfigOption: if the storage file path is invalid.
      SourceScannerError: if the source scanner could not find a supported
          file system.
      UserAbort: if the user initiated an abort.
    """
    self._CheckStorageFile(self._storage_file_path, warn_about_existing=True)

    scan_context = self.ScanSource(self._source_path)
    self._source_type = scan_context.source_type

    self._status_view.SetMode(self._status_view_mode)
    self._status_view.SetSourceInformation(
        self._source_path, self._source_type, filter_file=self._filter_file)

    status_update_callback = (
        self._status_view.GetExtractionStatusUpdateCallback())

    self._output_writer.Write('\n')
    self._status_view.PrintExtractionStatusHeader(None)
    self._output_writer.Write('Processing started.\n')

    session = engine.BaseEngine.CreateSession(
        command_line_arguments=self._command_line_arguments,
        debug_mode=self._debug_mode,
        filter_file=self._filter_file,
        preferred_encoding=self.preferred_encoding,
        preferred_time_zone=self._preferred_time_zone,
        preferred_year=self._preferred_year)

    if self._storage_format == definitions.STORAGE_FORMAT_SQLITE:
      storage_writer = storage_sqlite_file.SQLiteStorageFileWriter(
          session, self._storage_file_path)

    else:
      storage_writer = storage_zip_file.ZIPStorageFileWriter(
          session, self._storage_file_path)

    single_process_mode = self._single_process_mode
    if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
      # No need to multi process a single file source.
      single_process_mode = True

    if single_process_mode:
      extraction_engine = single_process_engine.SingleProcessEngine()
    else:
      extraction_engine = multi_process_engine.TaskMultiProcessEngine(
          use_zeromq=self._use_zeromq)

    # If the source is a directory or a storage media image
    # run pre-processing.
    if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS:
      self._PreprocessSources(extraction_engine)

    configuration = self._CreateProcessingConfiguration()

    if not configuration.parser_filter_expression:
      operating_system = extraction_engine.knowledge_base.GetValue(
          'operating_system')
      operating_system_product = extraction_engine.knowledge_base.GetValue(
          'operating_system_product')
      operating_system_version = extraction_engine.knowledge_base.GetValue(
          'operating_system_version')
      parser_filter_expression = (
          parsers_manager.ParsersManager.GetPresetForOperatingSystem(
              operating_system, operating_system_product,
              operating_system_version))

      if parser_filter_expression:
        logging.info('Parser filter expression changed to: {0:s}'.format(
            parser_filter_expression))

      configuration.parser_filter_expression = parser_filter_expression

      names_generator = parsers_manager.ParsersManager.GetParserAndPluginNames(
          parser_filter_expression=parser_filter_expression)

      session.enabled_parser_names = list(names_generator)
      session.parser_filter_expression = parser_filter_expression

    # Note session.preferred_time_zone will default to UTC but
    # self._preferred_time_zone is None when not set.
    if self._preferred_time_zone:
      try:
        extraction_engine.knowledge_base.SetTimeZone(self._preferred_time_zone)
      except ValueError:
        # pylint: disable=protected-access
        logging.warning(
            'Unsupported time zone: {0:s}, defaulting to {1:s}'.format(
                self._preferred_time_zone,
                extraction_engine.knowledge_base._time_zone.zone))

    filter_find_specs = None
    if configuration.filter_file:
      environment_variables = (
          extraction_engine.knowledge_base.GetEnvironmentVariables())
      filter_file_object = filter_file.FilterFile(configuration.filter_file)
      filter_find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    processing_status = None
    if single_process_mode:
      logging.debug('Starting extraction in single process mode.')

      processing_status = extraction_engine.ProcessSources(
          self._source_path_specs, storage_writer, self._resolver_context,
          configuration, filter_find_specs=filter_find_specs,
          status_update_callback=status_update_callback)

    else:
      logging.debug('Starting extraction in multi process mode.')

      processing_status = extraction_engine.ProcessSources(
          session.identifier, self._source_path_specs, storage_writer,
          configuration, enable_sigsegv_handler=self._enable_sigsegv_handler,
          filter_find_specs=filter_find_specs,
          number_of_worker_processes=self._number_of_extraction_workers,
          status_update_callback=status_update_callback,
          worker_memory_limit=self._worker_memory_limit)

    self._status_view.PrintExtractionSummary(processing_status)
Exemplo n.º 13
0
    def ProcessSources(self,
                       source_path_specs,
                       source_type,
                       command_line_arguments=None,
                       enable_sigsegv_handler=False,
                       filter_file=None,
                       hasher_names_string=None,
                       number_of_extraction_workers=0,
                       preferred_encoding=u'utf-8',
                       parser_filter_expression=None,
                       single_process_mode=False,
                       status_update_callback=None,
                       timezone=pytz.UTC):
        """Processes the sources.

    Args:
      source_path_specs: list of path specifications (instances of
                         dfvfs.PathSpec) to process.
      source_type: the dfVFS source type definition.
      command_line_arguments: optional string of the command line arguments or
                              None if not set.
      enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
                              handler should be enabled.
      filter_file: optional path to a file that contains find specifications.
      hasher_names_string: optional comma separated string of names of
                           hashers to enable.
      number_of_extraction_workers: the number of extraction workers to run. If
                                    0, the number will be selected
                                    automatically.
      preferred_encoding: optional preferred encoding.
      parser_filter_expression: optional string containing the parser filter
                                expression, where None represents all parsers
                                and plugins.
      single_process_mode: optional boolean value to indicate if the front-end
                           should run in single process mode.
      status_update_callback: optional callback function for status updates.
      timezone: optional preferred timezone.

    Returns:
      The processing status (instance of ProcessingStatus) or None.

    Raises:
      SourceScannerError: if the source scanner could not find a supported
                          file system.
      UserAbort: if the user initiated an abort.
    """
        # If the source is a directory or a storage media image
        # run pre-processing.
        if source_type in [
                dfvfs_definitions.SOURCE_TYPE_DIRECTORY,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
                dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE
        ]:
            self.SetEnablePreprocessing(True)
        else:
            self.SetEnablePreprocessing(False)

        self._CheckStorageFile(self._storage_file_path)

        self._single_process_mode = single_process_mode
        if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
            # No need to multi process a single file source.
            self._single_process_mode = True

        if self._single_process_mode:
            self._engine = single_process.SingleProcessEngine(self._queue_size)
        else:
            self._engine = multi_process.MultiProcessEngine(
                maximum_number_of_queued_items=self._queue_size,
                use_zeromq=self._use_zeromq)

        self._engine.SetEnableDebugOutput(self._debug_mode)
        self._engine.SetEnableProfiling(
            self._enable_profiling,
            profiling_sample_rate=self._profiling_sample_rate,
            profiling_type=self._profiling_type)

        pre_obj = self._PreprocessSources(source_path_specs, source_type)

        self._operating_system = getattr(pre_obj, u'guessed_os', None)

        if not parser_filter_expression:
            guessed_os = self._operating_system
            os_version = getattr(pre_obj, u'osversion', u'')
            parser_filter_expression = self._GetParserFilterPreset(
                os_guess=guessed_os, os_version=os_version)

            if parser_filter_expression:
                logging.info(
                    u'Parser filter expression changed to: {0:s}'.format(
                        parser_filter_expression))

        self._parser_names = []
        for _, parser_class in parsers_manager.ParsersManager.GetParsers(
                parser_filter_expression=parser_filter_expression):
            self._parser_names.append(parser_class.NAME)

        self._hasher_names = []
        hasher_manager = hashers_manager.HashersManager
        for hasher_name in hasher_manager.GetHasherNamesFromString(
                hasher_names_string=hasher_names_string):
            self._hasher_names.append(hasher_name)

        self._PreprocessSetTimezone(pre_obj, timezone=timezone)

        if filter_file:
            filter_find_specs = engine_utils.BuildFindSpecsFromFile(
                filter_file, pre_obj=pre_obj)
        else:
            filter_find_specs = None

        # TODO: deprecate the need for this function.
        self._PreprocessSetCollectionInformation(pre_obj)

        session_start = self._CreateSessionStart(
            command_line_arguments=command_line_arguments,
            filter_file=filter_file,
            parser_filter_expression=parser_filter_expression,
            preferred_encoding=preferred_encoding)

        storage_writer = storage_zip_file.ZIPStorageFileWriter(
            self._storage_file_path, pre_obj, buffer_size=self._buffer_size)

        storage_writer.SetEnableProfiling(self._enable_profiling,
                                          profiling_type=self._profiling_type)

        storage_writer.Open()
        storage_writer.WriteSessionStart(session_start)

        processing_status = None
        try:
            if self._single_process_mode:
                logging.debug(u'Starting extraction in single process mode.')

                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    resolver_context=self._resolver_context,
                    status_update_callback=status_update_callback,
                    text_prepend=self._text_prepend)

            else:
                logging.debug(u'Starting extraction in multi process mode.')

                # TODO: pass number_of_extraction_workers.
                processing_status = self._engine.ProcessSources(
                    source_path_specs,
                    storage_writer,
                    enable_sigsegv_handler=enable_sigsegv_handler,
                    filter_find_specs=filter_find_specs,
                    filter_object=self._filter_object,
                    hasher_names_string=hasher_names_string,
                    mount_path=self._mount_path,
                    number_of_extraction_workers=number_of_extraction_workers,
                    parser_filter_expression=parser_filter_expression,
                    process_archive_files=self._process_archive_files,
                    status_update_callback=status_update_callback,
                    show_memory_usage=self._show_worker_memory_information,
                    text_prepend=self._text_prepend)

        except KeyboardInterrupt:
            self._CleanUpAfterAbort()
            raise errors.UserAbort

        # TODO: check if this still works and if still needed.
        except Exception as exception:  # pylint: disable=broad-except
            if not self._single_process_mode:
                raise

            # The tool should generally not be run in single process mode
            # for other reasons than to debug. Hence the general error
            # catching.
            logging.error(
                u'An uncaught exception occurred: {0:s}.\n{1:s}'.format(
                    exception, traceback.format_exc()))
            if self._debug_mode:
                pdb.post_mortem()

        return processing_status