Exemplo n.º 1
0
    def ProduceExtractionWarning(self, message, path_spec=None):
        """Produces an extraction warning.

    Args:
      message (str): message of the warning.
      path_spec (Optional[dfvfs.PathSpec]): path specification, where None
          will use the path specification of current file entry set in
          the mediator.

    Raises:
      RuntimeError: when storage writer is not set.
    """
        if not self._storage_writer:
            raise RuntimeError('Storage writer not set.')

        if not path_spec and self._file_entry:
            path_spec = self._file_entry.path_spec

        parser_chain = self.GetParserChain()
        warning = warnings.ExtractionWarning(message=message,
                                             parser_chain=parser_chain,
                                             path_spec=path_spec)
        self._storage_writer.AddWarning(warning)
        self._number_of_warnings += 1

        self.last_activity_timestamp = time.time()
Exemplo n.º 2
0
    def testGetAttributeNames(self):
        """Tests the GetAttributeNames function."""
        attribute_container = warnings.ExtractionWarning()

        attribute_names = sorted(attribute_container.GetAttributeNames())
        self.assertEqual(attribute_names,
                         ['message', 'parser_chain', 'path_spec'])
Exemplo n.º 3
0
    def testAddWarning(self):
        """Tests the AddWarning function."""
        extraction_warning = warnings.ExtractionWarning(
            message='Test extraction warning')

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'plaso.sqlite')
            storage_file = sqlite_file.SQLiteStorageFile()
            storage_file.Open(path=temp_file, read_only=False)

            storage_file.AddWarning(extraction_warning)

            storage_file.Close()
Exemplo n.º 4
0
    def testAddExtractionWarning(self):
        """Tests the AddExtractionWarning function."""
        session = sessions.Session()
        warning = warnings.ExtractionWarning(message='Test extraction warning')

        storage_writer = fake_writer.FakeStorageWriter(session)
        storage_writer.Open()

        storage_writer.AddExtractionWarning(warning)

        storage_writer.Close()

        with self.assertRaises(IOError):
            storage_writer.AddExtractionWarning(warning)
Exemplo n.º 5
0
    def _GetExtractionErrorsAsWarnings(self):
        """Retrieves errors from from the store, and converts them to warnings.

    This method is for backwards compatibility with pre-20190309 storage format
    stores which used ExtractionError attribute containers.

    Yields:
      ExtractionWarning: extraction warnings.
    """
        for extraction_error in self._GetAttributeContainers(
                self._CONTAINER_TYPE_EXTRACTION_ERROR):
            error_attributes = extraction_error.CopyToDict()
            warning = warnings.ExtractionWarning()
            warning.CopyFromDict(error_attributes)
            yield warning
Exemplo n.º 6
0
    def testGetWarnings(self):
        """Tests the GetWarnings function."""
        extraction_warning = warnings.ExtractionWarning(
            message='Test extraction warning')

        with shared_test_lib.TempDirectory() as temp_directory:
            temp_file = os.path.join(temp_directory, 'plaso.sqlite')
            storage_file = sqlite_file.SQLiteStorageFile()
            storage_file.Open(path=temp_file, read_only=False)

            storage_file.AddWarning(extraction_warning)

            storage_file.Close()

            storage_file = sqlite_file.SQLiteStorageFile()
            storage_file.Open(path=temp_file)

            test_warnings = list(storage_file.GetWarnings())
            self.assertEqual(len(test_warnings), 1)

            storage_file.Close()
Exemplo n.º 7
0
    def _ScheduleTasks(self, storage_writer):
        """Schedules tasks.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
    """
        logger.debug('Task scheduler started')

        self._status = definitions.STATUS_INDICATOR_RUNNING

        # TODO: make tasks persistent.

        # TODO: protect task scheduler loop by catch all and
        # handle abort path.

        event_source_heap = _EventSourceHeap()

        self._FillEventSourceHeap(storage_writer,
                                  event_source_heap,
                                  start_with_first=True)

        event_source = event_source_heap.PopEventSource()

        task = None
        while event_source or self._task_manager.HasPendingTasks():
            if self._abort:
                break

            try:
                if not task:
                    task = self._task_manager.CreateRetryTask()

                if not task and event_source:
                    task = self._task_manager.CreateTask(
                        self._session_identifier)
                    task.file_entry_type = event_source.file_entry_type
                    task.path_spec = event_source.path_spec
                    event_source = None

                    self._number_of_consumed_sources += 1

                    if self._guppy_memory_profiler:
                        self._guppy_memory_profiler.Sample()

                if task:
                    if self._ScheduleTask(task):
                        logger.debug(
                            'Scheduled task {0:s} for path specification {1:s}'
                            .format(task.identifier,
                                    task.path_spec.comparable))

                        self._task_manager.SampleTaskStatus(task, 'scheduled')

                        task = None

                    else:
                        self._task_manager.SampleTaskStatus(
                            task, 'schedule_attempted')

                self._MergeTaskStorage(storage_writer)

                if not event_source_heap.IsFull():
                    self._FillEventSourceHeap(storage_writer,
                                              event_source_heap)

                if not task and not event_source:
                    event_source = event_source_heap.PopEventSource()

            except KeyboardInterrupt:
                self._abort = True

                self._processing_status.aborted = True
                if self._status_update_callback:
                    self._status_update_callback(self._processing_status)

        for task in self._task_manager.GetFailedTasks():
            warning = warnings.ExtractionWarning(
                message='Worker failed to process path specification',
                path_spec=task.path_spec)
            self._storage_writer.AddWarning(warning)
            self._processing_status.error_path_specs.append(task.path_spec)

        self._status = definitions.STATUS_INDICATOR_IDLE

        if self._abort:
            logger.debug('Task scheduler aborted')
        else:
            logger.debug('Task scheduler stopped')
Exemplo n.º 8
0
    def _ScheduleTasks(self, storage_writer, session_identifier):
        """Schedules tasks.

    Args:
      storage_writer (StorageWriter): storage writer for a session storage.
      session_identifier (str): the identifier of the session the tasks are
          part of.
    """
        logger.debug('Task scheduler started')

        self._status = definitions.STATUS_INDICATOR_RUNNING

        # TODO: make tasks persistent.

        # TODO: protect task scheduler loop by catch all and
        # handle abort path.

        event_source_heap = _EventSourceHeap()

        self._FillEventSourceHeap(storage_writer,
                                  event_source_heap,
                                  start_with_first=True)

        event_source = event_source_heap.PopEventSource()

        task = None
        has_pending_tasks = True

        while event_source or has_pending_tasks:
            if self._abort:
                break

            try:
                if not task:
                    task = self._task_manager.CreateRetryTask()

                if not task and event_source:
                    task = self._task_manager.CreateTask(
                        session_identifier,
                        storage_format=self._task_storage_format)
                    task.file_entry_type = event_source.file_entry_type
                    task.path_spec = event_source.path_spec
                    event_source = None

                    self._number_of_consumed_sources += 1

                if task:
                    if not self._ScheduleTask(task):
                        self._task_manager.SampleTaskStatus(
                            task, 'schedule_attempted')

                    else:
                        path_spec_string = self._GetPathSpecificationString(
                            task.path_spec)
                        logger.debug(
                            'Scheduled task: {0:s} for path specification: {1:s}'
                            .format(task.identifier,
                                    path_spec_string.replace('\n', ' ')))

                        self._task_manager.SampleTaskStatus(task, 'scheduled')

                        task = None

                # Limit the number of attribute containers from a single task-based
                # storage file that are merged per loop to keep tasks flowing.
                merge_duration = time.time()

                number_of_containers = self._MergeTaskStorage(
                    storage_writer,
                    session_identifier,
                    maximum_number_of_containers=self.
                    _maximum_number_of_containers)

                merge_duration = time.time() - merge_duration

                if merge_duration > 0.0 and number_of_containers > 0:
                    containers_per_second = number_of_containers / merge_duration
                    self._maximum_number_of_containers = int(
                        0.5 * containers_per_second)

                if not event_source_heap.IsFull():
                    self._FillEventSourceHeap(storage_writer,
                                              event_source_heap)
                else:
                    logger.debug('Event source heap is full.')

                if not task and not event_source:
                    event_source = event_source_heap.PopEventSource()

                has_pending_tasks = self._task_manager.HasPendingTasks()

            except KeyboardInterrupt:
                if self._debug_output:
                    traceback.print_exc()
                self._abort = True

                self._processing_status.aborted = True
                if self._status_update_callback:
                    self._status_update_callback(self._processing_status)

        for task in self._task_manager.GetFailedTasks():
            warning = warnings.ExtractionWarning(
                message='Worker failed to process path specification',
                path_spec=task.path_spec)
            self._storage_writer.AddAttributeContainer(warning)
            self._processing_status.error_path_specs.append(task.path_spec)

        self._status = definitions.STATUS_INDICATOR_IDLE

        if self._abort:
            logger.debug('Task scheduler aborted')
        else:
            logger.debug('Task scheduler stopped')