Exemplo n.º 1
0
def ParseFile(file_entry):
  """Parse a file given a file entry and yield results."""
  if not file_entry:
    return

  # Create the necessary items.
  proc_queue = queue.SingleThreadedQueue()
  storage_queue = queue.SingleThreadedQueue()
  storage_queue_producer = queue.EventObjectQueueProducer(storage_queue)
  pre_obj = event.PreprocessObject()
  all_parsers = putils.FindAllParsers(pre_obj)

  # Create a worker.
  worker_object = worker.EventExtractionWorker(
      'my_worker', proc_queue, storage_queue_producer, pre_obj, all_parsers)

  # Parse the file.
  worker_object.ParseFile(file_entry)

  storage_queue.SignalEndOfInput()
  proc_queue.SignalEndOfInput()

  while True:
    try:
      item = storage_queue.PopItem()
    except errors.QueueEmpty:
      break

    if isinstance(item, queue.QueueEndOfInput):
      break

    yield item
Exemplo n.º 2
0
    def testExtractionWorkerHashing(self):
        """Test that the worker sets up and runs hashing code correctly."""
        extraction_worker = worker.EventExtractionWorker()

        extraction_worker._SetHashers('md5')
        self.assertIn('hashing', extraction_worker.GetAnalyzerNames())

        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        path_spec = self._GetTestFilePathSpec(['empty_file'])
        storage_writer = fake_storage.FakeStorageWriter(session)
        self._TestProcessPathSpec(storage_writer,
                                  path_spec,
                                  extraction_worker=extraction_worker,
                                  knowledge_base_values=knowledge_base_values)

        storage_writer.Open()

        empty_file_md5 = 'd41d8cd98f00b204e9800998ecf8427e'
        for event in storage_writer.GetSortedEvents():
            md5_hash = getattr(event, 'md5_hash', None)
            self.assertEqual(md5_hash, empty_file_md5)

        storage_writer.Close()
Exemplo n.º 3
0
    def testExtractionWorkerYara(self):
        """Tests that the worker applies Yara matching code correctly."""
        extraction_worker = worker.EventExtractionWorker()

        rule_path = self._GetTestFilePath(['yara.rules'])
        with open(rule_path, 'r') as rule_file:
            rule_string = rule_file.read()

        extraction_worker._SetYaraRules(rule_string)
        self.assertIn('yara', extraction_worker.GetAnalyzerNames())

        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        path_spec = self._GetTestFilePathSpec(['test_pe.exe'])
        storage_writer = fake_storage.FakeStorageWriter(session)
        self._TestProcessPathSpec(storage_writer,
                                  path_spec,
                                  extraction_worker=extraction_worker,
                                  knowledge_base_values=knowledge_base_values)

        storage_writer.Open()

        expected_yara_match = 'PEfileBasic,PEfile'
        for event in storage_writer.GetSortedEvents():
            yara_match = getattr(event, 'yara_match', None)
            self.assertEqual(yara_match, expected_yara_match)

        storage_writer.Close()
Exemplo n.º 4
0
    def CreateExtractionWorker(self,
                               worker_number,
                               pre_obj,
                               parsers,
                               rpc_proxy=None):
        """Creates an extraction worker object.

    Args:
      worker_number: number that identifies the worker.
      pre_obj: The preprocessing object (instance of PreprocessObject).
      parsers: A list of parser objects to use for processing.
      rpc_proxy: A proxy object (instance of proxy.ProxyServer) that can be
                 used to setup RPC functionality for the worker. This is
                 optional and if not provided the worker will not listen to RPC
                 requests.

    Returns:
      An extraction worker (instance of worker.ExtractionWorker).
    """
        return worker.EventExtractionWorker(worker_number,
                                            self._collection_queue,
                                            self._storage_queue_producer,
                                            pre_obj,
                                            parsers,
                                            rpc_proxy=rpc_proxy)
Exemplo n.º 5
0
    def testExtractionWorkerHashing(self):
        """Test that the worker sets up and runs hashing code correctly."""
        extraction_worker = worker.EventExtractionWorker()

        extraction_worker._SetHashers('md5')
        self.assertIn('hashing', extraction_worker.GetAnalyzerNames())

        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        path_spec = self._GetTestFilePathSpec(['empty_file'])
        storage_writer = fake_writer.FakeStorageWriter(session)

        # Typically there are 3 filestat events, but there can be 4 on platforms
        # that support os.stat_result st_birthtime.
        expected_event_counters = {'fs:stat': [3, 4]}

        self._TestProcessPathSpec(storage_writer,
                                  path_spec,
                                  expected_event_counters,
                                  extraction_worker=extraction_worker,
                                  knowledge_base_values=knowledge_base_values)

        storage_writer.Open()

        empty_file_md5 = 'd41d8cd98f00b204e9800998ecf8427e'
        for event in storage_writer.GetSortedEvents():
            event_data = self._GetEventDataOfEvent(storage_writer, event)
            event_data_stream = self._GetEventDataStreamOfEventData(
                storage_writer, event_data)

            self.assertEqual(event_data_stream.md5_hash, empty_file_md5)

        storage_writer.Close()
Exemplo n.º 6
0
    def testCanSkipContentExtraction(self):
        """Tests the _CanSkipContentExtraction function."""
        extraction_worker = worker.EventExtractionWorker()

        file_entry = self._GetTestFileEntry(['syslog.tgz'])

        result = extraction_worker._CanSkipContentExtraction(file_entry)
        self.assertFalse(result)
Exemplo n.º 7
0
    def testIsMetadataFile(self):
        """Tests the _IsMetadataFile function."""
        extraction_worker = worker.EventExtractionWorker()

        file_entry = self._GetTestFileEntry(['syslog.tgz'])

        result = extraction_worker._IsMetadataFile(file_entry)
        self.assertFalse(result)
Exemplo n.º 8
0
    def testCanSkipDataStream(self):
        """Tests the _CanSkipDataStream function."""
        extraction_worker = worker.EventExtractionWorker()

        file_entry = self._GetTestFileEntry(['syslog.tgz'])

        result = extraction_worker._CanSkipDataStream(file_entry, None)
        self.assertFalse(result)
Exemplo n.º 9
0
    def _TestProcessPathSpec(self,
                             storage_writer,
                             path_spec,
                             expected_event_counters,
                             extraction_worker=None,
                             knowledge_base_values=None,
                             process_archives=False):
        """Tests processing a path specification.

    Args:
      storage_writer (StorageWriter): storage writer.
      path_spec (dfvfs.PathSpec): path specification.
      expected_event_counters (dict[str, int|list[int]]): expected event
          counters per event data type.
      extraction_worker (Optional[EventExtractorWorker]): worker to process the
          path specification. If None, a new worker will be created.
      knowledge_base_values (Optional[dict]): knowledge base values.
      process_archives (Optional[bool]): whether archive files should be
          processed.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            resolver_context=resolver_context)

        if not extraction_worker:
            configuration = configurations.ExtractionConfiguration()
            configuration.process_archives = process_archives

            extraction_worker = worker.EventExtractionWorker()
            extraction_worker.SetExtractionConfiguration(configuration)

        storage_writer.Open()

        try:
            storage_writer.WriteSessionStart()

            extraction_worker.ProcessPathSpec(mediator, path_spec)
            event_source = storage_writer.GetFirstWrittenEventSource()
            while event_source:
                extraction_worker.ProcessPathSpec(mediator,
                                                  event_source.path_spec)
                event_source = storage_writer.GetNextWrittenEventSource()

            storage_writer.WriteSessionCompletion()

            if expected_event_counters:
                self.CheckEventCounters(storage_writer,
                                        expected_event_counters)

        finally:
            storage_writer.Close()
Exemplo n.º 10
0
    def testGetCompressedStreamTypes(self):
        """Tests the _GetCompressedStreamTypes function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter()

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        parser_mediator = parsers_mediator.ParserMediator(
            knowledge_base_object, resolver_context=resolver_context)
        parser_mediator.SetPreferredYear(2016)
        parser_mediator.SetStorageWriter(storage_writer)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()

        session_start = session.CreateSessionStart()
        storage_writer.AddAttributeContainer(session_start)

        extraction_worker = worker.EventExtractionWorker()

        path_spec = self._GetTestFilePathSpec(['syslog.tgz'])

        type_indicators = extraction_worker._GetCompressedStreamTypes(
            parser_mediator, path_spec)
        self.assertEqual(type_indicators,
                         [dfvfs_definitions.TYPE_INDICATOR_GZIP])

        session_completion = session.CreateSessionCompletion()
        storage_writer.AddAttributeContainer(session_completion)

        storage_writer.Close()
Exemplo n.º 11
0
    def testGetArchiveTypes(self):
        """Tests the _GetArchiveTypes function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter(session)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            preferred_year=2016,
            resolver_context=resolver_context)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        extraction_worker = worker.EventExtractionWorker()

        path_spec = self._GetTestFilePathSpec(['syslog.tar'])

        type_indicators = extraction_worker._GetArchiveTypes(
            mediator, path_spec)
        self.assertEqual(type_indicators,
                         [dfvfs_definitions.TYPE_INDICATOR_TAR])

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()
Exemplo n.º 12
0
    def testAnalyzeFileObject(self):
        """Tests the _AnalyzeFileObject function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter(session)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            preferred_year=2016,
            resolver_context=resolver_context)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        file_entry = self._GetTestFileEntry(['ímynd.dd'])
        mediator.SetFileEntry(file_entry)

        file_object = file_entry.GetFileObject()
        display_name = mediator.GetDisplayName()
        event_data_stream = events.EventDataStream()

        try:
            extraction_worker._AnalyzeFileObject(file_object, display_name,
                                                 event_data_stream)
        finally:
            file_object.close()

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()

        self.assertIsNotNone(event_data_stream)

        event_attribute = getattr(event_data_stream, 'test_result', None)
        self.assertEqual(event_attribute, 'is_vegetable')
Exemplo n.º 13
0
    def testAnalyzeDataStream(self):
        """Tests the _AnalyzeDataStream function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter()

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        parser_mediator = parsers_mediator.ParserMediator(
            knowledge_base_object, resolver_context=resolver_context)
        parser_mediator.SetPreferredYear(2016)
        parser_mediator.SetStorageWriter(storage_writer)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()

        session_start = session.CreateSessionStart()
        storage_writer.AddAttributeContainer(session_start)

        file_entry = self._GetTestFileEntry(['syslog.tgz'])
        parser_mediator.SetFileEntry(file_entry)

        display_name = parser_mediator.GetDisplayName()
        event_data_stream = events.EventDataStream()

        extraction_worker._AnalyzeDataStream(file_entry, '', display_name,
                                             event_data_stream)

        session_completion = session.CreateSessionCompletion()
        storage_writer.AddAttributeContainer(session_completion)

        storage_writer.Close()

        self.assertIsNotNone(event_data_stream)

        event_attribute = getattr(event_data_stream, 'test_result', None)
        self.assertEqual(event_attribute, 'is_vegetable')
Exemplo n.º 14
0
  def CreateExtractionWorker(self, worker_number, rpc_proxy=None):
    """Creates an extraction worker object.

    Args:
      worker_number: A number that identifies the worker.
      rpc_proxy: A proxy object (instance of proxy.ProxyServer) that can be
                 used to setup RPC functionality for the worker. This is
                 optional and if not provided the worker will not listen to RPC
                 requests.

    Returns:
      An extraction worker (instance of worker.ExtractionWorker).
    """
    return worker.EventExtractionWorker(
        worker_number, self._collection_queue, self._event_queue_producer,
        self._parse_error_queue_producer, self.knowledge_base,
        rpc_proxy=rpc_proxy)
Exemplo n.º 15
0
    def testExtractionWorkerYara(self):
        """Tests that the worker applies Yara matching code correctly."""
        yara_rule_path = self._GetTestFilePath(['yara.rules'])
        self._SkipIfPathNotExists(yara_rule_path)

        with open(yara_rule_path, 'r') as file_object:
            rule_string = file_object.read()

        extraction_worker = worker.EventExtractionWorker()
        extraction_worker._SetYaraRules(rule_string)
        self.assertIn('yara', extraction_worker.GetAnalyzerNames())

        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        path_spec = self._GetTestFilePathSpec(['test_pe.exe'])
        storage_writer = fake_writer.FakeStorageWriter(session)

        # Typically there are 3 filestat events, but there can be 4 on platforms
        # that support os.stat_result st_birthtime.
        expected_event_counters = {
            'fs:stat': [3, 4],
            'pe:compilation:compilation_time': 1,
            'pe:delay_import:import_time': 1,
            'pe:import:import_time': 1
        }

        self._TestProcessPathSpec(storage_writer,
                                  path_spec,
                                  expected_event_counters,
                                  extraction_worker=extraction_worker,
                                  knowledge_base_values=knowledge_base_values)

        storage_writer.Open()

        expected_yara_match = 'PEfileBasic,PEfile'
        for event in storage_writer.GetSortedEvents():
            event_data = self._GetEventDataOfEvent(storage_writer, event)
            event_data_stream = self._GetEventDataStreamOfEventData(
                storage_writer, event_data)

            self.assertEqual(event_data_stream.yara_match, expected_yara_match)

        storage_writer.Close()
Exemplo n.º 16
0
    def _TestProcessPathSpec(self,
                             storage_writer,
                             path_spec,
                             extraction_worker=None,
                             knowledge_base_values=None,
                             process_archives=False):
        """Tests processing a path specification.

    Args:
      storage_writer (StorageWriter): storage writer.
      path_spec (dfvfs.PathSpec): path specification.
      extraction_worker (Optional[EventExtractorWorker]): worker to process the
          pathspec. If None, a new worker will be created.
      knowledge_base_values (Optional[dict]): knowledge base values.
      process_archives (Optional[bool]): whether archive files should be
          processed.
    """
        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in iter(knowledge_base_values.items()):
                knowledge_base_object.SetValue(identifier, value)

        mediator = parsers_mediator.ParserMediator(storage_writer,
                                                   knowledge_base_object)

        if not extraction_worker:
            resolver_context = context.Context()

            extraction_worker = worker.EventExtractionWorker(
                resolver_context, process_archives=process_archives)

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        extraction_worker.ProcessPathSpec(mediator, path_spec)
        event_source = storage_writer.GetFirstWrittenEventSource()
        while event_source:
            extraction_worker.ProcessPathSpec(mediator, event_source.path_spec)
            event_source = storage_writer.GetNextWrittenEventSource()

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()
Exemplo n.º 17
0
    def testAnalyzeFileObject(self):
        """Tests the _AnalyzeFileObject function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_storage.FakeStorageWriter(session)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in iter(knowledge_base_values.items()):
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            preferred_year=2016,
            resolver_context=resolver_context)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        file_entry = self._GetTestFileEntry(['ímynd.dd'])
        mediator.SetFileEntry(file_entry)

        file_object = file_entry.GetFileObject()

        try:
            extraction_worker._AnalyzeFileObject(mediator, file_object)
        finally:
            file_object.close()

        self.assertEqual(len(mediator._extra_event_attributes), 1)

        event_attribute = mediator._extra_event_attributes.get(
            'test_result', None)
        self.assertEqual(event_attribute, 'is_vegetable')
Exemplo n.º 18
0
    def testExtractMetadataFromFileEntry(self):
        """Tests the _ExtractMetadataFromFileEntry function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter()

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        parser_mediator = parsers_mediator.ParserMediator(
            knowledge_base_object, resolver_context=resolver_context)
        parser_mediator.SetPreferredYear(2016)
        parser_mediator.SetStorageWriter(storage_writer)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()

        session_start = session.CreateSessionStart()
        storage_writer.AddAttributeContainer(session_start)

        file_entry = self._GetTestFileEntry(['syslog.tgz'])
        parser_mediator.SetFileEntry(file_entry)

        extraction_worker._ExtractMetadataFromFileEntry(
            parser_mediator, file_entry, '')

        session_completion = session.CreateSessionCompletion()
        storage_writer.AddAttributeContainer(session_completion)

        storage_writer.Close()
Exemplo n.º 19
0
    def testExtractContentFromDataStream(self):
        """Tests the _ExtractContentFromDataStream function."""
        knowledge_base_values = {'year': 2016}
        session = sessions.Session()

        storage_writer = fake_writer.FakeStorageWriter(session)

        knowledge_base_object = knowledge_base.KnowledgeBase()
        if knowledge_base_values:
            for identifier, value in knowledge_base_values.items():
                knowledge_base_object.SetValue(identifier, value)

        resolver_context = context.Context()
        mediator = parsers_mediator.ParserMediator(
            storage_writer,
            knowledge_base_object,
            preferred_year=2016,
            resolver_context=resolver_context)

        extraction_worker = worker.EventExtractionWorker()

        test_analyzer = analyzers_manager_test.TestAnalyzer()
        self.assertEqual(len(test_analyzer.GetResults()), 0)

        extraction_worker._analyzers = [test_analyzer]

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        file_entry = self._GetTestFileEntry(['syslog.tgz'])
        mediator.SetFileEntry(file_entry)

        extraction_worker._ExtractContentFromDataStream(
            mediator, file_entry, '')

        storage_writer.WriteSessionCompletion()
        storage_writer.Close()
Exemplo n.º 20
0
    def ProcessSources(self,
                       source_path_specs,
                       storage_writer,
                       resolver_context,
                       processing_configuration,
                       filter_find_specs=None,
                       status_update_callback=None):
        """Processes the sources.

    Args:
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications
          used in path specification extraction.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
        parser_mediator = parsers_mediator.ParserMediator(
            storage_writer,
            self.knowledge_base,
            preferred_year=processing_configuration.preferred_year,
            resolver_context=resolver_context,
            temporary_directory=processing_configuration.temporary_directory)

        parser_mediator.SetEventExtractionConfiguration(
            processing_configuration.event_extraction)

        parser_mediator.SetInputSourceConfiguration(
            processing_configuration.input_source)

        extraction_worker = worker.EventExtractionWorker(
            parser_filter_expression=(
                processing_configuration.parser_filter_expression))

        extraction_worker.SetExtractionConfiguration(
            processing_configuration.extraction)

        self._processing_configuration = processing_configuration
        self._status_update_callback = status_update_callback

        logging.debug('Processing started.')

        self._StartProfiling(extraction_worker)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        try:
            storage_writer.WritePreprocessingInformation(self.knowledge_base)

            self._ProcessSources(source_path_specs,
                                 extraction_worker,
                                 parser_mediator,
                                 storage_writer,
                                 filter_find_specs=filter_find_specs)

        finally:
            storage_writer.WriteSessionCompletion(aborted=self._abort)

            storage_writer.Close()

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            self._StopProfiling(extraction_worker)

        if self._abort:
            logging.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logging.debug('Processing completed.')

        self._processing_configuration = None
        self._status_update_callback = None

        return self._processing_status
Exemplo n.º 21
0
  def ProcessSources(
      self, source_configurations, storage_writer, resolver_context,
      processing_configuration, force_parser=False,
      status_update_callback=None):
    """Processes the sources.

    Args:
      source_configurations (list[SourceConfigurationArtifact]): configurations
          of the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      force_parser (Optional[bool]): True if a specified parser should be forced
          to be used to extract events.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
    parser_mediator = self._CreateParserMediator(
        self.knowledge_base, resolver_context, processing_configuration)
    parser_mediator.SetStorageWriter(storage_writer)

    self._extraction_worker = worker.EventExtractionWorker(
        force_parser=force_parser, parser_filter_expression=(
            processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        processing_configuration.extraction)

    self._parser_mediator = parser_mediator
    self._processing_configuration = processing_configuration
    self._resolver_context = resolver_context
    self._status_update_callback = status_update_callback
    self._storage_writer = storage_writer

    logger.debug('Processing started.')

    parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._analyzers_profiler:
      self._extraction_worker.SetAnalyzersProfiler(self._analyzers_profiler)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    self._StartStatusUpdateThread()

    self._parsers_counter = collections.Counter({
        parser_count.name: parser_count
        for parser_count in self._storage_writer.GetAttributeContainers(
            'parser_count')})

    try:
      self._ProcessSources(source_configurations, parser_mediator)

    finally:
      # Stop the status update thread after close of the storage writer
      # so we include the storage sync to disk in the status updates.
      self._StopStatusUpdateThread()

      if self._analyzers_profiler:
        self._extraction_worker.SetAnalyzersProfiler(None)

      if self._processing_profiler:
        self._extraction_worker.SetProcessingProfiler(None)

      if self._serializers_profiler:
        self._storage_writer.SetSerializersProfiler(None)

      if self._storage_profiler:
        self._storage_writer.SetStorageProfiler(None)

      self._StopProfiling()
      parser_mediator.StopProfiling()

    for key, value in parser_mediator.parsers_counter.items():
      parser_count = self._parsers_counter.get(key, None)
      if parser_count:
        parser_count.number_of_events += value
        self._storage_writer.UpdateAttributeContainer(parser_count)
      else:
        parser_count = counts.ParserCount(name=key, number_of_events=value)
        self._parsers_counter[key] = parser_count
        self._storage_writer.AddAttributeContainer(parser_count)

    if self._abort:
      logger.debug('Processing aborted.')
      self._processing_status.aborted = True
    else:
      logger.debug('Processing completed.')

    # Update the status view one last time.
    self._UpdateStatus()

    self._extraction_worker = None
    self._file_system_cache = []
    self._parser_mediator = None
    self._processing_configuration = None
    self._resolver_context = None
    self._status_update_callback = None
    self._storage_writer = None

    return self._processing_status
Exemplo n.º 22
0
def ProcessFile(options):
    """Process a file and produce profile results."""
    if options.proto_file and os.path.isfile(options.proto_file):
        with open(options.proto_file) as fh:
            proto_string = fh.read()

            proto = transmission_pb2.PathSpec()
            try:
                text_format.Merge(proto_string, proto)
            except text_format.ParseError as exception:
                logging.error(
                    u'Unable to parse file, error: {}'.format(exception))
                sys.exit(1)

            serializer = protobuf_serializer.ProtobufPathSpecSerializer
            path_spec = serializer.ReadSerializedObject(proto)
    else:
        path_spec = path_spec_factory.Factory.NewPathSpec(
            definitions.TYPE_INDICATOR_OS, location=options.file_to_parse)

    file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec)

    if file_entry is None:
        logging.error(u'Unable to open file: {0:s}'.format(
            options.file_to_parse))
        sys.exit(1)

    pre_obj = event.PreprocessObject()
    storage_queue = queue.SingleThreadedQueue()
    storage_queue_producer = queue.EventObjectQueueProducer(storage_queue)

    # Set few options the engine expects to be there.
    # TODO: Can we rather set this directly in argparse?
    options.single_process = True
    options.debug = False
    options.text_prepend = u''
    parsers = putils.FindAllParsers(pre_obj, options)
    my_worker = worker.EventExtractionWorker('0', None, storage_queue_producer,
                                             pre_obj, parsers)

    if options.verbose:
        profiler = cProfile.Profile()
        profiler.enable()
    else:
        time_start = time.time()
    my_worker.ParseFile(file_entry)

    if options.verbose:
        profiler.disable()
    else:
        time_end = time.time()

    storage_queue_producer.SignalEndOfInput()

    event_object_consumer = PprofEventObjectQueueConsumer(storage_queue)
    event_object_consumer.ConsumeEventObjects()

    if not options.verbose:
        print frontend_utils.FormatHeader('Time Used')
        print u'{:>20f}s'.format(time_end - time_start)

    print frontend_utils.FormatHeader('Parsers Loaded')
    # Accessing protected member.
    # pylint: disable=protected-access
    plugins = []
    for parser in sorted(my_worker._parsers['all']):
        print frontend_utils.FormatOutputString('', parser.parser_name)
        parser_plugins = getattr(parser, '_plugins', [])
        plugins.extend(parser_plugins)

    print frontend_utils.FormatHeader('Plugins Loaded')
    for plugin in sorted(plugins):
        if isinstance(plugin, basestring):
            print frontend_utils.FormatOutputString('', plugin)
        else:
            plugin_string = getattr(plugin, 'NAME', u'N/A')
            print frontend_utils.FormatOutputString('', plugin_string)

    print frontend_utils.FormatHeader('Parsers Used')
    for parser in sorted(event_object_consumer.parsers):
        print frontend_utils.FormatOutputString('', parser)

    print frontend_utils.FormatHeader('Plugins Used')
    for plugin in sorted(event_object_consumer.plugins):
        print frontend_utils.FormatOutputString('', plugin)

    print frontend_utils.FormatHeader('Counter')
    for key, value in event_object_consumer.counter.most_common():
        print frontend_utils.FormatOutputString(key, value)

    if options.verbose:
        return GetStats(profiler)
Exemplo n.º 23
0
    def _Main(self):
        """The main loop."""
        self._parser_mediator = parsers_mediator.ParserMediator(
            None,
            self._knowledge_base,
            preferred_year=self._preferred_year,
            temporary_directory=self._temporary_directory)

        if self._filter_object:
            self._parser_mediator.SetFilterObject(self._filter_object)

        if self._mount_path:
            self._parser_mediator.SetMountPath(self._mount_path)

        if self._text_prepend:
            self._parser_mediator.SetTextPrepend(self._text_prepend)

        # We need a resolver context per process to prevent multi processing
        # issues with file objects stored in images.
        resolver_context = context.Context()

        # We need to initialize the parser and hasher objects after the process
        # has forked otherwise on Windows the "fork" will fail with
        # a PickleError for Python modules that cannot be pickled.
        self._extraction_worker = worker.EventExtractionWorker(
            resolver_context,
            parser_filter_expression=self._parser_filter_expression,
            process_archives=self._process_archives,
            process_compressed_streams=self._process_compressed_streams)

        if self._hasher_names_string:
            self._extraction_worker.SetHashers(self._hasher_names_string)

        if self._yara_rules_string:
            self._extraction_worker.SetYaraRules(self._yara_rules_string)

        self._StartProfiling()

        logging.debug(u'Worker: {0!s} (PID: {1:d}) started'.format(
            self._name, self._pid))

        self._status = definitions.PROCESSING_STATUS_RUNNING

        try:
            logging.debug(
                u'{0!s} (PID: {1:d}) started monitoring task queue.'.format(
                    self._name, self._pid))

            while not self._abort:
                try:
                    task = self._task_queue.PopItem()
                except (errors.QueueClose, errors.QueueEmpty) as exception:
                    logging.debug(
                        u'ConsumeItems exiting with exception {0:s}.'.format(
                            type(exception)))
                    break

                if isinstance(task, plaso_queue.QueueAbort):
                    logging.debug(
                        u'ConsumeItems exiting, dequeued QueueAbort object.')
                    break

                self._ProcessTask(task)

            logging.debug(
                u'{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
                    self._name, self._pid))

        # All exceptions need to be caught here to prevent the process
        # from being killed by an uncaught exception.
        except Exception as exception:  # pylint: disable=broad-except
            logging.warning(
                u'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
                    self._name, self._pid))
            logging.exception(exception)

            self._abort = True

        self._StopProfiling()
        self._extraction_worker = None
        self._parser_mediator = None
        self._storage_writer = None

        if self._abort:
            self._status = definitions.PROCESSING_STATUS_ABORTED
        else:
            self._status = definitions.PROCESSING_STATUS_COMPLETED

        logging.debug(u'Worker: {0!s} (PID: {1:d}) stopped'.format(
            self._name, self._pid))

        try:
            self._task_queue.Close(abort=self._abort)
        except errors.QueueAlreadyClosed:
            logging.error(u'Queue for {0:s} was already closed.'.format(
                self.name))
Exemplo n.º 24
0
    def ProcessSources(self,
                       session,
                       source_path_specs,
                       storage_writer,
                       resolver_context,
                       processing_configuration,
                       status_update_callback=None):
        """Processes the sources.

    Args:
      session (Session): session in which the sources are processed.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
        parser_mediator = parsers_mediator.ParserMediator(
            storage_writer,
            self.knowledge_base,
            collection_filters_helper=self.collection_filters_helper,
            preferred_year=processing_configuration.preferred_year,
            resolver_context=resolver_context,
            temporary_directory=processing_configuration.temporary_directory)

        extraction_worker = worker.EventExtractionWorker(
            parser_filter_expression=(
                processing_configuration.parser_filter_expression))

        extraction_worker.SetExtractionConfiguration(
            processing_configuration.extraction)

        self._processing_configuration = processing_configuration
        self._status_update_callback = status_update_callback

        logger.debug('Processing started.')

        parser_mediator.StartProfiling(
            self._processing_configuration.profiling, self._name,
            self._process_information)
        self._StartProfiling(self._processing_configuration.profiling)

        if self._analyzers_profiler:
            extraction_worker.SetAnalyzersProfiler(self._analyzers_profiler)

        if self._processing_profiler:
            extraction_worker.SetProcessingProfiler(self._processing_profiler)

        if self._serializers_profiler:
            storage_writer.SetSerializersProfiler(self._serializers_profiler)

        if self._storage_profiler:
            storage_writer.SetStorageProfiler(self._storage_profiler)

        storage_writer.Open()
        storage_writer.WriteSessionStart()

        # TODO: decouple session and storage writer?
        session.source_configurations = (
            self.knowledge_base.GetSourceConfigurationArtifacts())

        try:
            storage_writer.WriteSessionConfiguration()

            self._ProcessSources(source_path_specs, extraction_worker,
                                 parser_mediator, storage_writer)

        finally:
            storage_writer.WriteSessionCompletion(aborted=self._abort)

            storage_writer.Close()

            if self._analyzers_profiler:
                extraction_worker.SetAnalyzersProfiler(None)

            if self._processing_profiler:
                extraction_worker.SetProcessingProfiler(None)

            if self._serializers_profiler:
                storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                storage_writer.SetStorageProfiler(None)

            self._StopProfiling()
            parser_mediator.StopProfiling()

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        self._processing_configuration = None
        self._status_update_callback = None

        return self._processing_status
Exemplo n.º 25
0
  def _Main(self):
    """The main loop."""
    # We need a resolver context per process to prevent multi processing
    # issues with file objects stored in images.
    resolver_context = context.Context()

    for credential_configuration in self._processing_configuration.credentials:
      resolver.Resolver.key_chain.SetCredential(
          credential_configuration.path_spec,
          credential_configuration.credential_type,
          credential_configuration.credential_data)

    self._parser_mediator = parsers_mediator.ParserMediator(
        None, self._knowledge_base,
        artifacts_filter_helper=self._artifacts_filter_helper,
        preferred_year=self._processing_configuration.preferred_year,
        resolver_context=resolver_context,
        temporary_directory=self._processing_configuration.temporary_directory)

    self._parser_mediator.SetEventExtractionConfiguration(
        self._processing_configuration.event_extraction)

    self._parser_mediator.SetInputSourceConfiguration(
        self._processing_configuration.input_source)

    # We need to initialize the parser and hasher objects after the process
    # has forked otherwise on Windows the "fork" will fail with
    # a PickleError for Python modules that cannot be pickled.
    self._extraction_worker = worker.EventExtractionWorker(
        parser_filter_expression=(
            self._processing_configuration.parser_filter_expression))

    self._extraction_worker.SetExtractionConfiguration(
        self._processing_configuration.extraction)

    self._parser_mediator.StartProfiling(
        self._processing_configuration.profiling, self._name,
        self._process_information)
    self._StartProfiling(self._processing_configuration.profiling)

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(self._processing_profiler)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(self._serializers_profiler)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(self._storage_profiler)

    logger.debug('Worker: {0!s} (PID: {1:d}) started.'.format(
        self._name, self._pid))

    self._status = definitions.STATUS_INDICATOR_RUNNING

    try:
      logger.debug('{0!s} (PID: {1:d}) started monitoring task queue.'.format(
          self._name, self._pid))

      while not self._abort:
        try:
          task = self._task_queue.PopItem()
        except (errors.QueueClose, errors.QueueEmpty) as exception:
          logger.debug('ConsumeItems exiting with exception {0:s}.'.format(
              type(exception)))
          break

        if isinstance(task, plaso_queue.QueueAbort):
          logger.debug('ConsumeItems exiting, dequeued QueueAbort object.')
          break

        self._ProcessTask(task)

      logger.debug('{0!s} (PID: {1:d}) stopped monitoring task queue.'.format(
          self._name, self._pid))

    # All exceptions need to be caught here to prevent the process
    # from being killed by an uncaught exception.
    except Exception as exception:  # pylint: disable=broad-except
      logger.warning(
          'Unhandled exception in process: {0!s} (PID: {1:d}).'.format(
              self._name, self._pid))
      logger.exception(exception)

      self._abort = True

    if self._processing_profiler:
      self._extraction_worker.SetProcessingProfiler(None)

    if self._serializers_profiler:
      self._storage_writer.SetSerializersProfiler(None)

    if self._storage_profiler:
      self._storage_writer.SetStorageProfiler(None)

    self._StopProfiling()
    self._parser_mediator.StopProfiling()

    self._extraction_worker = None
    self._parser_mediator = None
    self._storage_writer = None

    if self._abort:
      self._status = definitions.STATUS_INDICATOR_ABORTED
    else:
      self._status = definitions.STATUS_INDICATOR_COMPLETED

    logger.debug('Worker: {0!s} (PID: {1:d}) stopped.'.format(
        self._name, self._pid))

    try:
      self._task_queue.Close(abort=self._abort)
    except errors.QueueAlreadyClosed:
      logger.error('Queue for {0:s} was already closed.'.format(self.name))
Exemplo n.º 26
0
    def ProcessSources(self,
                       session,
                       source_path_specs,
                       storage_writer,
                       resolver_context,
                       processing_configuration,
                       force_parser=False,
                       status_update_callback=None):
        """Processes the sources.

    Args:
      session (Session): session in which the sources are processed.
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
      storage_writer (StorageWriter): storage writer for a session storage.
      resolver_context (dfvfs.Context): resolver context.
      processing_configuration (ProcessingConfiguration): processing
          configuration.
      force_parser (Optional[bool]): True if a specified parser should be forced
          to be used to extract events.
      status_update_callback (Optional[function]): callback function for status
          updates.

    Returns:
      ProcessingStatus: processing status.
    """
        self._resolver_context = resolver_context
        self._session = session

        parser_mediator = parsers_mediator.ParserMediator(
            session,
            storage_writer,
            self.knowledge_base,
            collection_filters_helper=self.collection_filters_helper,
            preferred_year=processing_configuration.preferred_year,
            resolver_context=resolver_context,
            temporary_directory=processing_configuration.temporary_directory)

        self._extraction_worker = worker.EventExtractionWorker(
            force_parser=force_parser,
            parser_filter_expression=(
                processing_configuration.parser_filter_expression))

        self._extraction_worker.SetExtractionConfiguration(
            processing_configuration.extraction)

        self._processing_configuration = processing_configuration
        self._status_update_callback = status_update_callback
        self._storage_writer = storage_writer

        logger.debug('Processing started.')

        parser_mediator.StartProfiling(
            self._processing_configuration.profiling, self._name,
            self._process_information)
        self._StartProfiling(self._processing_configuration.profiling)

        if self._analyzers_profiler:
            self._extraction_worker.SetAnalyzersProfiler(
                self._analyzers_profiler)

        if self._processing_profiler:
            self._extraction_worker.SetProcessingProfiler(
                self._processing_profiler)

        if self._serializers_profiler:
            self._storage_writer.SetSerializersProfiler(
                self._serializers_profiler)

        if self._storage_profiler:
            self._storage_writer.SetStorageProfiler(self._storage_profiler)

        self._StartStatusUpdateThread()

        try:
            self._ProcessSources(source_path_specs, parser_mediator)

        finally:
            # Stop the status update thread after close of the storage writer
            # so we include the storage sync to disk in the status updates.
            self._StopStatusUpdateThread()

            if self._analyzers_profiler:
                self._extraction_worker.SetAnalyzersProfiler(None)

            if self._processing_profiler:
                self._extraction_worker.SetProcessingProfiler(None)

            if self._serializers_profiler:
                self._storage_writer.SetSerializersProfiler(None)

            if self._storage_profiler:
                self._storage_writer.SetStorageProfiler(None)

            self._StopProfiling()
            parser_mediator.StopProfiling()

        if self._abort:
            logger.debug('Processing aborted.')
            self._processing_status.aborted = True
        else:
            logger.debug('Processing completed.')

        # Update the status view one last time.
        self._UpdateStatus()

        self._extraction_worker = None
        self._file_system_cache = []
        self._processing_configuration = None
        self._resolver_context = None
        self._session = None
        self._status_update_callback = None
        self._storage_writer = None

        return self._processing_status