示例#1
0
    def get_expired_tasks_for_labourer(self, labourer: Labourer) -> List[Dict]:
        """ Return a list of tasks of Labourer previously invoked, and expired without being closed. """

        _ = self.get_db_field_name

        return self.dynamo_db_client.get_by_query(
            keys={
                _('labourer_id'): labourer.id,
                f"st_between_{_('greenfield')}": labourer.get_attr('start'),
                f"en_between_{_('greenfield')}": labourer.get_attr('expired'),
            },
            index_name=self.config['dynamo_db_config']['index_greenfield'],
            filter_expression=f"attribute_not_exists {_('completed_at')}",
        )
示例#2
0
    def get_running_tasks_for_labourer(
            self,
            labourer: Labourer,
            count: bool = False) -> Union[List[Dict], int]:
        """
        Return a list of tasks of Labourer previously invoked, but not yet closed or expired.
        We assume they are still running.

        If `count` is specified as True will return just the number of tasks, not the items themselves.
        Much cheaper.
        """

        _ = self.get_db_field_name

        q = dict(
            keys={
                _('labourer_id'): labourer.id,
                f"st_between_{_('greenfield')}": labourer.get_attr('expired'),
                f"en_between_{_('greenfield')}": labourer.get_attr('invoked'),
            },
            index_name=self.config['dynamo_db_config']['index_greenfield'],
            filter_expression=f'attribute_not_exists {_("completed_at")}')

        if count:
            q['return_count'] = True

        return self.dynamo_db_client.get_by_query(**q)
示例#3
0
    def get_desired_invocation_number_for_labourer(self,
                                                   labourer: Labourer) -> int:
        """
        Decides the desired maximum number of simultaneous invocations for a specific Labourer.
        The decision is based on the ecology status of the Labourer and the configs.

        :return: Number of invocations
        """

        labourer_status = self.task_client.ecology_client.get_labourer_status(
            labourer=labourer)

        coefficient = next(
            v for k, v in self.config['invocation_number_coefficient'].items()
            if labourer_status == k)

        labourer_max = labourer.get_attr('max_simultaneous_invocations')

        max_invocations = labourer_max if labourer_max is not None else self.config[
            'max_simultaneous_invocations']

        desired = int(math.floor(max_invocations * coefficient))
        currently_running = self.task_client.ecology_client.count_running_tasks_for_labourer(
            labourer)

        logger.info(
            f"Labourer: {labourer.id} has currently running {currently_running} tasks and desired {desired} "
            f"with respect to status {labourer_status}.")
        return max(desired - currently_running, 0)
示例#4
0
文件: task.py 项目: yanigisawa/sosw
    def get_invoked_tasks_for_labourer(self, labourer: Labourer, completed: Optional[bool] = None) -> List[Dict]:
        """
        Return a list of tasks of current Labourer invoked during the current run of the Orchestrator.

        If completed is provided:
        * True - filter completed ones
        * False - filter NOT completed ones
        * None (default) - do not care about `completed` status.
        """

        _ = self.get_db_field_name

        query_args = {
            'keys':        {
                _('labourer_id'): labourer.id,
                _('greenfield'):  labourer.get_attr('invoked')
            },
            'comparisons': {_('greenfield'): '>='},
            'index_name':  self.config['dynamo_db_config']['index_greenfield'],
        }

        if completed is True:
            query_args['filter_expression'] = f"attribute_exists {_('completed_at')}"
        elif completed is False:
            query_args['filter_expression'] = f"attribute_not_exists {_('completed_at')}"
        else:
            logger.debug(f"No filtering by completed status for {query_args}")

        return self.dynamo_db_client.get_by_query(**query_args)
示例#5
0
文件: task.py 项目: yanigisawa/sosw
    def get_next_for_labourer(self, labourer: Labourer, cnt: int = 1, only_ids: bool = False) -> List[Union[str, Dict]]:
        """
        Fetch the next task(s) from the queue for the Labourer.

        :param labourer:    Labourer to get next tasks for.
        :param cnt:         Optional number of Tasks to fetch.
        :param only_ids:    If explicitly set True, then returns only the IDs of tasks.
                            This could save some transport if you are sending big batches of tasks between Lambdas.
        """

        # Maximum value to identify the task as available for invocation (either new, or ready for retry).
        max_greenfield = labourer.get_attr('start')

        result = self.dynamo_db_client.get_by_query(
                {
                    self.get_db_field_name('labourer_id'): labourer.id,
                    self.get_db_field_name('greenfield'):  max_greenfield
                },
                table_name=self.config['dynamo_db_config']['table_name'],
                index_name=self.config['dynamo_db_config']['index_greenfield'],
                strict=True,
                max_items=cnt,
                comparisons={
                    self.get_db_field_name('greenfield'): '<'
                })

        logger.debug(f"get_next_for_labourer() received: {result} from {self.config['dynamo_db_config']['table_name']} "
                     f"for labourer: {labourer.id} max greenfield: {max_greenfield}")

        return result if not only_ids else [task[self.get_db_field_name('task_id')] for task in result]
示例#6
0
 def calculate_delay_for_task_retry(self, labourer: Labourer,
                                    task: Dict) -> int:
     logger.debug(
         f"Called Scavenger.calculate_delay_for_task_retry with labourer={labourer}, task={task}"
     )
     attempts = task[self.get_db_field_name('attempts')]
     wanted_delay = labourer.get_attr('max_duration') * attempts
     return wanted_delay
示例#7
0
    def get_average_labourer_duration(self, labourer: Labourer) -> int:
        """
        Analyse latest tasks of Labourer and calculate average runtime duration.

        .. warning:: This method doesn't know the exact duration of failed attempts.
                     Thus if the task is completely failed, we assume that all attempts failed at maximum duration.

        :return:    Average duration in seconds.
        """

        _ = self.get_db_field_name
        _cfg = self.config.get

        durations = []

        q = dict(keys={
            _('labourer_id_task_status'): f"{labourer.id}_1",
        },
                 table_name=_cfg('sosw_closed_tasks_table'),
                 index_name=_cfg('sosw_closed_tasks_labourer_status_index'),
                 max_items=_cfg('max_closed_to_analyse_for_duration'),
                 desc=True)

        # Fetch last X closed tasks
        tasks = self.dynamo_db_client.get_by_query(**q)

        # Fetch failed tasks as well
        q['keys'][_('labourer_id_task_status')] = f"{labourer.id}_0"
        tasks.extend(self.dynamo_db_client.get_by_query(**q))

        # Now take the really last 50 ordered by greenfield (last invocation)
        tasks = sorted(tasks, key=lambda x: x.get(_('greenfield'))
                       )[:_cfg('max_closed_to_analyse_for_duration')]

        # Get their duration
        for task in tasks:
            # We assume duration of failed tasks to be maximum.
            if not task.get(_('completed_at')):
                durations.extend([
                    labourer.get_attr('max_duration')
                    for _ in range(int(task[_('attempts')]))
                ])
            else:
                # Duration of completed tasks we calculate based on the value of last `greenfield` and `completed_at`
                durations.append(task[_('completed_at')] -
                                 task[_('greenfield')] +
                                 _cfg('greenfield_invocation_delta'))

        # Return the average
        try:
            return round(sum(durations) / len(durations))
        except ZeroDivisionError:
            return 0
示例#8
0
    def get_tasks_to_retry_for_labourer(self,
                                        labourer: Labourer,
                                        limit: int = None) -> List[Dict]:
        _ = self.get_db_field_name

        attrs = {
            'keys': {
                _('labourer_id'): labourer.id,
                _('desired_launch_time'): str(labourer.get_attr('start'))
            },
            'comparisons': {
                _('desired_launch_time'): "<="
            },
            'table_name': self.config['sosw_retry_tasks_table'],
            'index_name': self.config['sosw_retry_tasks_greenfield_index'],
        }
        if limit:
            attrs['max_items'] = limit
        tasks = self.dynamo_db_client.get_by_query(**attrs)
        return tasks
示例#9
0
 def should_retry_task(self, labourer: Labourer, task: Dict) -> bool:
     logger.debug(
         f"Called Scavenger.should_retry_task with labourer={labourer}, task={task}"
     )
     attempts = task.get(self.get_db_field_name('attempts'))
     return True if attempts < labourer.get_attr('max_attempts') else False