示例#1
0
    def get_expired_tasks_for_labourer(self, labourer: Labourer) -> List[Dict]:
        """ Return a list of tasks of Labourer previously invoked, and expired without being closed. """

        _ = self.get_db_field_name

        return self.dynamo_db_client.get_by_query(
            keys={
                _('labourer_id'): labourer.id,
                f"st_between_{_('greenfield')}": labourer.get_attr('start'),
                f"en_between_{_('greenfield')}": labourer.get_attr('expired'),
            },
            index_name=self.config['dynamo_db_config']['index_greenfield'],
            filter_expression=f"attribute_not_exists {_('completed_at')}",
        )
示例#2
0
class Scheduler_IntegrationTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_ORCHESTRATOR_CONFIG
    LABOURER = Labourer(
        id='some_function',
        arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')

    def setUp(self):
        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.custom_config = self.TEST_CONFIG.copy()
        self.orchestrator = Orchestrator(self.custom_config)

        self.s3_client = boto3.client('s3')

    def tearDown(self):
        self.patcher.stop()

        try:
            del (os.environ['AWS_LAMBDA_FUNCTION_NAME'])
        except:
            pass

    def test_call(self):
        self.orchestrator({'event': 42})

        some_labourer = self.orchestrator.task_client.register_labourers()[0]
        self.orchestrator.task_client.ecology_client.count_running_tasks_for_labourer(
            some_labourer)
示例#3
0
    def get_running_tasks_for_labourer(
            self,
            labourer: Labourer,
            count: bool = False) -> Union[List[Dict], int]:
        """
        Return a list of tasks of Labourer previously invoked, but not yet closed or expired.
        We assume they are still running.

        If `count` is specified as True will return just the number of tasks, not the items themselves.
        Much cheaper.
        """

        _ = self.get_db_field_name

        q = dict(
            keys={
                _('labourer_id'): labourer.id,
                f"st_between_{_('greenfield')}": labourer.get_attr('expired'),
                f"en_between_{_('greenfield')}": labourer.get_attr('invoked'),
            },
            index_name=self.config['dynamo_db_config']['index_greenfield'],
            filter_expression=f'attribute_not_exists {_("completed_at")}')

        if count:
            q['return_count'] = True

        return self.dynamo_db_client.get_by_query(**q)
示例#4
0
    def setUp(self):
        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()
        self.get_config_patch.return_value = {}
        self.custom_config = deepcopy(self.TEST_CONFIG)

        with patch('boto3.client'):
            self.scavenger = Scavenger(custom_config=self.custom_config)

        # Mock clients
        self.scavenger.task_client = MagicMock()
        self.scavenger.ecology_client = MagicMock()
        self.scavenger.sns_client = MagicMock()
        self.scavenger.meta_handler = MagicMock(signature=MetaHandler)

        self.scavenger.get_db_field_name = MagicMock(side_effect=lambda x: x)
        _ = self.scavenger.get_db_field_name

        self.labourer = Labourer(id='lambda3', arn='arn3')
        self.task = {
            _('task_id'): '123',
            _('labourer_id'): 'lambda1',
            _('greenfield'): '3525624',
            _('payload'): '{"a": 1}',
            _('attempts'): 2
        }
示例#5
0
    def get_desired_invocation_number_for_labourer(self,
                                                   labourer: Labourer) -> int:
        """
        Decides the desired maximum number of simultaneous invocations for a specific Labourer.
        The decision is based on the ecology status of the Labourer and the configs.

        :return: Number of invocations
        """

        labourer_status = self.task_client.ecology_client.get_labourer_status(
            labourer=labourer)

        coefficient = next(
            v for k, v in self.config['invocation_number_coefficient'].items()
            if labourer_status == k)

        labourer_max = labourer.get_attr('max_simultaneous_invocations')

        max_invocations = labourer_max if labourer_max is not None else self.config[
            'max_simultaneous_invocations']

        desired = int(math.floor(max_invocations * coefficient))
        currently_running = self.task_client.ecology_client.count_running_tasks_for_labourer(
            labourer)

        logger.info(
            f"Labourer: {labourer.id} has currently running {currently_running} tasks and desired {desired} "
            f"with respect to status {labourer_status}.")
        return max(desired - currently_running, 0)
示例#6
0
文件: task.py 项目: yanigisawa/sosw
    def get_invoked_tasks_for_labourer(self, labourer: Labourer, completed: Optional[bool] = None) -> List[Dict]:
        """
        Return a list of tasks of current Labourer invoked during the current run of the Orchestrator.

        If completed is provided:
        * True - filter completed ones
        * False - filter NOT completed ones
        * None (default) - do not care about `completed` status.
        """

        _ = self.get_db_field_name

        query_args = {
            'keys':        {
                _('labourer_id'): labourer.id,
                _('greenfield'):  labourer.get_attr('invoked')
            },
            'comparisons': {_('greenfield'): '>='},
            'index_name':  self.config['dynamo_db_config']['index_greenfield'],
        }

        if completed is True:
            query_args['filter_expression'] = f"attribute_exists {_('completed_at')}"
        elif completed is False:
            query_args['filter_expression'] = f"attribute_not_exists {_('completed_at')}"
        else:
            logger.debug(f"No filtering by completed status for {query_args}")

        return self.dynamo_db_client.get_by_query(**query_args)
示例#7
0
文件: task.py 项目: yanigisawa/sosw
    def get_next_for_labourer(self, labourer: Labourer, cnt: int = 1, only_ids: bool = False) -> List[Union[str, Dict]]:
        """
        Fetch the next task(s) from the queue for the Labourer.

        :param labourer:    Labourer to get next tasks for.
        :param cnt:         Optional number of Tasks to fetch.
        :param only_ids:    If explicitly set True, then returns only the IDs of tasks.
                            This could save some transport if you are sending big batches of tasks between Lambdas.
        """

        # Maximum value to identify the task as available for invocation (either new, or ready for retry).
        max_greenfield = labourer.get_attr('start')

        result = self.dynamo_db_client.get_by_query(
                {
                    self.get_db_field_name('labourer_id'): labourer.id,
                    self.get_db_field_name('greenfield'):  max_greenfield
                },
                table_name=self.config['dynamo_db_config']['table_name'],
                index_name=self.config['dynamo_db_config']['index_greenfield'],
                strict=True,
                max_items=cnt,
                comparisons={
                    self.get_db_field_name('greenfield'): '<'
                })

        logger.debug(f"get_next_for_labourer() received: {result} from {self.config['dynamo_db_config']['table_name']} "
                     f"for labourer: {labourer.id} max greenfield: {max_greenfield}")

        return result if not only_ids else [task[self.get_db_field_name('task_id')] for task in result]
示例#8
0
 def calculate_delay_for_task_retry(self, labourer: Labourer,
                                    task: Dict) -> int:
     logger.debug(
         f"Called Scavenger.calculate_delay_for_task_retry with labourer={labourer}, task={task}"
     )
     attempts = task[self.get_db_field_name('attempts')]
     wanted_delay = labourer.get_attr('max_duration') * attempts
     return wanted_delay
示例#9
0
class Labourer_UnitTestCase(unittest.TestCase):
    def setUp(self):
        self.labourer = Labourer(id=42, arn='arn::aws::lambda')

    def test_init(self):

        self.assertEqual(self.labourer.id, 42)
        self.assertEqual(self.labourer.arn, 'arn::aws::lambda')

    def test_init_attrs(self):
        lab = Labourer(id='foo', arn='arn::aws::lambda', max_invocations=13)

        self.assertEqual(lab.id, 'foo')
        self.assertEqual(lab.arn, 'arn::aws::lambda')
        self.assertEqual(lab.max_invocations, 13)

    def test_init__strict_raises(self):
        self.assertRaises(AttributeError, Labourer, **{'foo': 'bar', 'strict': True}), \
        f"Labourer supports only {Labourer.ATTRIBUTES}"

    def test_set_defaults__called(self):
        with patch('sosw.labourer.Labourer.set_defaults') as sd:
            lab = Labourer(id=42)
            sd.assert_called_once()

    def test_set_defaults(self):
        self.assertEqual(self.labourer.duration, 900)

    def test_set_defaults_overrides(self):
        lab = Labourer(id=42, duration=300)
        self.assertEqual(lab.duration, 300)

    def test_get_attr(self):

        self.assertRaises(ValueError, self.labourer.get_attr, 'invalid')
        self.assertRaises(AttributeError, self.labourer.get_attr, 'start')

    def test_set_custom_attributes(self):

        self.assertIsNone(getattr(self.labourer, 'start', None))
        self.labourer.set_custom_attribute('start', time.time())

        self.assertLessEqual(self.labourer.start, time.time())
示例#10
0
文件: task.py 项目: yanigisawa/sosw
    def get_labourers(self) -> List[Labourer]:
        """
        Return configured Labourers.
        Config of the TaskManager expects 'labourers' as a dict 'name_of_lambda': {'some_setting': 'value1'}
        """

        if not self.__labourers:
            self.__labourers = [Labourer(id=name, **settings) for name, settings in self.config['labourers'].items()]

        return self.__labourers
示例#11
0
 def test_calculate_delay_for_task_retry(self):
     _ = self.scavenger.get_db_field_name
     labourer = Labourer(id='some_lambda', arn='some_arn', max_duration=45)
     task = {
         _('task_id'): '123',
         _('labourer_id'): 'some_lambda',
         _('payload'): '{}',
         _('attempts'): 5
     }
     result = self.scavenger.calculate_delay_for_task_retry(labourer, task)
     self.assertEqual(result, 225)
示例#12
0
    def get_average_labourer_duration(self, labourer: Labourer) -> int:
        """
        Analyse latest tasks of Labourer and calculate average runtime duration.

        .. warning:: This method doesn't know the exact duration of failed attempts.
                     Thus if the task is completely failed, we assume that all attempts failed at maximum duration.

        :return:    Average duration in seconds.
        """

        _ = self.get_db_field_name
        _cfg = self.config.get

        durations = []

        q = dict(keys={
            _('labourer_id_task_status'): f"{labourer.id}_1",
        },
                 table_name=_cfg('sosw_closed_tasks_table'),
                 index_name=_cfg('sosw_closed_tasks_labourer_status_index'),
                 max_items=_cfg('max_closed_to_analyse_for_duration'),
                 desc=True)

        # Fetch last X closed tasks
        tasks = self.dynamo_db_client.get_by_query(**q)

        # Fetch failed tasks as well
        q['keys'][_('labourer_id_task_status')] = f"{labourer.id}_0"
        tasks.extend(self.dynamo_db_client.get_by_query(**q))

        # Now take the really last 50 ordered by greenfield (last invocation)
        tasks = sorted(tasks, key=lambda x: x.get(_('greenfield'))
                       )[:_cfg('max_closed_to_analyse_for_duration')]

        # Get their duration
        for task in tasks:
            # We assume duration of failed tasks to be maximum.
            if not task.get(_('completed_at')):
                durations.extend([
                    labourer.get_attr('max_duration')
                    for _ in range(int(task[_('attempts')]))
                ])
            else:
                # Duration of completed tasks we calculate based on the value of last `greenfield` and `completed_at`
                durations.append(task[_('completed_at')] -
                                 task[_('greenfield')] +
                                 _cfg('greenfield_invocation_delta'))

        # Return the average
        try:
            return round(sum(durations) / len(durations))
        except ZeroDivisionError:
            return 0
示例#13
0
    def test_get_length_of_queue_for_labourer(self):
        labourer = Labourer(id='some_lambda', arn='some_arn')

        num_of_tasks = 3  # Ran this with 464 tasks and it worked

        for i in range(num_of_tasks):
            row = {
                'labourer_id': f"some_lambda",
                'task_id': f"task-{i}",
                'greenfield': i
            }
            self.dynamo_client.put(row)
            time.sleep(
                0.1
            )  # Sleep a little to fit the Write Capacity (10 WCU) of autotest table.

        queue_len = self.manager.get_length_of_queue_for_labourer(labourer)

        self.assertEqual(queue_len, num_of_tasks)
示例#14
0
    def get_tasks_to_retry_for_labourer(self,
                                        labourer: Labourer,
                                        limit: int = None) -> List[Dict]:
        _ = self.get_db_field_name

        attrs = {
            'keys': {
                _('labourer_id'): labourer.id,
                _('desired_launch_time'): str(labourer.get_attr('start'))
            },
            'comparisons': {
                _('desired_launch_time'): "<="
            },
            'table_name': self.config['sosw_retry_tasks_table'],
            'index_name': self.config['sosw_retry_tasks_greenfield_index'],
        }
        if limit:
            attrs['max_items'] = limit
        tasks = self.dynamo_db_client.get_by_query(**attrs)
        return tasks
示例#15
0
    def test_init_attrs(self):
        lab = Labourer(id='foo', arn='arn::aws::lambda', max_invocations=13)

        self.assertEqual(lab.id, 'foo')
        self.assertEqual(lab.arn, 'arn::aws::lambda')
        self.assertEqual(lab.max_invocations, 13)
示例#16
0
class Scheduler_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_SCHEDULER_CONFIG
    LABOURER = Labourer(
        id='some_function',
        arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')
    FNAME = '/tmp/aglaya.txt'
    TODAY = datetime.date(2019, 1, 31)

    # Warning! Tthis Payload is not operational as it is. Should add `isolate_SOMETHING` in several places.
    PAYLOAD = {
        'sections': {
            'section_funerals': {
                'stores': {
                    'store_flowers': None,
                    'store_caskets': None,
                },
            },
            'section_weddings': {
                'stores': {
                    'store_flowers': None,
                    'store_limos': None,
                    'store_music': {
                        'products': ['product_march', 'product_chorus', 740],
                    },
                }
            },
            'section_conversions': {
                'stores': {
                    'store_training': {
                        'products': {
                            'product_history': None,
                            'product_prayer': None,
                        }
                    },
                    'store_baptizing': None,
                    'store_circumcision': None
                }
            },
            'section_gifts': None
        }
    }

    def setUp(self):
        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.custom_config = deepcopy(self.TEST_CONFIG)

        with patch('boto3.client'):
            self.scheduler = Scheduler(self.custom_config)

        self.scheduler.s3_client = MagicMock()
        self.scheduler.sns_client = MagicMock()
        self.scheduler.task_client = MagicMock()
        self.scheduler.task_client.get_labourer.return_value = self.LABOURER

        self.scheduler.st_time = time.time()

    def tearDown(self):
        self.patcher.stop()

        try:
            del (os.environ['AWS_LAMBDA_FUNCTION_NAME'])
        except:
            pass

        for fname in [self.scheduler._local_queue_file, self.FNAME]:
            try:
                os.remove(fname)
            except:
                pass

    def put_local_file(self, file_name=None, json=False):
        with open(file_name or self.scheduler._local_queue_file, 'w') as f:
            for x in range(10):
                if json:
                    f.write(
                        '{"key": "val", "number": "42", "boolean": true, "labourer_id": "some_function"}\n'
                    )
                else:
                    f.write(f"Hello Aglaya {x} {random.randint(0, 99)}\n")

    @staticmethod
    def line_count(file):
        return int(
            subprocess.check_output('wc -l {}'.format(file),
                                    shell=True).split()[0])

    def test_init__chunkable_attrs_not_end_with_s(self):
        config = self.custom_config
        config['job_schema']['chunkable_attrs'] = [('bad_name_ending_with_s',
                                                    {})]

        with patch('boto3.client'):
            self.assertRaises(AssertionError, Scheduler, custom_config=config)

    def test_get_next_chunkable_attr(self):
        self.assertEqual(self.scheduler.get_next_chunkable_attr('store'),
                         'product')
        self.assertEqual(self.scheduler.get_next_chunkable_attr('stores'),
                         'product')
        self.assertEqual(self.scheduler.get_next_chunkable_attr('section'),
                         'store')
        self.assertIsNone(self.scheduler.get_next_chunkable_attr('product'))
        self.assertIsNone(self.scheduler.get_next_chunkable_attr('bad_name'))

    def test__queue_bucket(self):
        self.assertEqual(self.scheduler._queue_bucket,
                         self.scheduler.config['queue_bucket'])

    def test__local_queue_file(self):
        self.assertEqual(self.scheduler._local_queue_file,
                         f"/tmp/{self.scheduler.config['queue_file']}")

    def test__remote_queue_file(self):
        self.assertEqual(
            self.scheduler._remote_queue_file,
            f"{self.scheduler.config['s3_prefix'].strip('/')}/"
            f"{self.scheduler.config['queue_file'].strip('/')}")

    def test__remote_queue_locked_file(self):
        self.assertEqual(
            self.scheduler._remote_queue_locked_file,
            f"{self.scheduler.config['s3_prefix'].strip('/')}/locked_"
            f"{self.scheduler.config['queue_file'].strip('/')}")

    ### Tests of file operations ###
    def test_pop_rows_from_file(self):
        self.put_local_file(self.FNAME)

        # Initial setup made 10 rows.
        self.assertEqual(self.line_count(self.FNAME), 10)

        # Pop a single top row
        self.scheduler.pop_rows_from_file(self.FNAME)
        self.assertEqual(self.line_count(self.FNAME), 9)

        # Pop multiple rows
        self.scheduler.pop_rows_from_file(self.FNAME, rows=5)
        self.assertEqual(self.line_count(self.FNAME), 4)

        # Catch StopIteration and return only remaining.
        r = self.scheduler.pop_rows_from_file(self.FNAME, rows=42)
        self.assertFalse(os.path.isfile(self.FNAME))
        self.assertEqual(len(r), 4)

    def test_pop_rows_from_file__reads_from_top(self):
        self.put_local_file(self.FNAME)

        r = self.scheduler.pop_rows_from_file(self.FNAME, rows=9)

        self.assertEqual(len(r), 9)
        self.assertTrue(r[0].startswith('Hello Aglaya 0'))

        with open(self.FNAME) as f:
            row = f.read()
            self.assertTrue(row.startswith('Hello Aglaya 9'))

    def test_pop_rows_from_file__missing_or_empty_file(self):
        # Missing file
        self.assertEqual(self.scheduler.pop_rows_from_file(self.FNAME), list())

        # Empty file
        Path(self.FNAME).touch()
        self.assertEqual(self.scheduler.pop_rows_from_file(self.FNAME), list())

        self.assertFalse(os.path.isfile(self.FNAME))

    def test_process_file(self):
        self.put_local_file(self.FNAME, json=True)
        self.scheduler.get_and_lock_queue_file = MagicMock(
            return_value=self.FNAME)
        self.scheduler.upload_and_unlock_queue_file = MagicMock()
        self.scheduler.task_client = MagicMock()

        with patch('sosw.scheduler.Scheduler._sleeptime_for_dynamo',
                   new_callable=PropertyMock) as mock_sleeptime:
            mock_sleeptime.return_value = 0.0001

            self.scheduler.process_file()

            self.assertEqual(self.scheduler.task_client.create_task.call_count,
                             10)
            self.assertEqual(mock_sleeptime.call_count, 10)

            self.scheduler.upload_and_unlock_queue_file.assert_called_once()

    ### Tests of construct_job_data ###
    def test_construct_job_data(self):

        self.scheduler.chunk_dates = MagicMock(return_value=[{
            'a': 'foo'
        }, {
            'b': 'bar'
        }])
        self.scheduler.chunk_job = MagicMock()

        r = self.scheduler.construct_job_data({'pl': 1})

        self.scheduler.chunk_dates.assert_called_once()
        self.scheduler.chunk_job.assert_called()
        self.assertEqual(self.scheduler.chunk_job.call_count, 2)

    def test_construct_job_data__preserve_skeleton_through_chunkers(self):

        r = self.scheduler.construct_job_data({'pl': 1},
                                              skeleton={'labourer_id': 'some'})
        print(r)

        for task in r:
            self.assertEqual(task['labourer_id'], 'some')

    def test_construct_job_data__empty_job(self):

        JOB = dict()
        r = self.scheduler.construct_job_data(JOB)
        self.assertEqual(r, [JOB])

    def test_construct_job_data__real_payload__for_debuging_logs(self):
        JOB = {
            'lambda_name': 'some_lambda',
            'period': 'last_2_days',
            'isolate_days': True,
            'sections': {
                '111': {
                    'all_campaigns': True
                },
                '222': {
                    'all_campaigns': True
                },
                '333': {
                    'isolate_stores': True,
                    'all_campaigns': False,
                    'stores': {
                        '333-111': None,
                        '333-222': None,
                        '333-333': {
                            'keep_me': 7
                        }
                    },
                }
            },
            'isolate_sections': 'True'
        }

        r = self.scheduler.construct_job_data(JOB)

        for t in r:
            print(t)

        self.assertEqual(len(r), 10)
        # self.assertEqual(1, 42)

    ### Tests of chunk_dates ###
    def test_chunk_dates__preserve_skeleton(self):
        TESTS = [
            {
                'period': 'last_1_days',
                'a': 'foo'
            },
            {
                'period': 'last_10_days',
                'a': 'foo'
            },
            {
                'period': 'last_10_days',
                'isolate_days': True,
                'a': 'foo'
            },
            {
                'period': '1_days_back',
                'a': 'foo'
            },
            {
                'period': '10_days_back',
                'a': 'foo'
            },
            {
                'period': '10_days_back',
                'isolate_days': True,
                'a': 'foo'
            },  # Isolation here is abundant
        ]

        SKELETON = {'labourer_id': 'some'}

        for test in TESTS:
            if test.get('isolate_days'):
                pattern = '[a-z]+_([0-9]+)_days'
                try:
                    expected_number = int(re.match(pattern, test['period'])[1])
                except:
                    expected_number = 1
            else:
                expected_number = 1

            r = self.scheduler.chunk_dates(job=test, skeleton=SKELETON)
            self.assertEqual(len(r), expected_number)
            for task in r:
                self.assertEqual(task['labourer_id'], 'some')

    def test_chunk_dates__preserve_skeleton__if_no_chunking(self):
        TASK = {'a': 'foo'}
        SKELETON = {'labourer_id': 'some'}

        r = self.scheduler.chunk_dates(job=TASK, skeleton=SKELETON)

        for task in r:
            self.assertEqual(task['labourer_id'], 'some')
            self.assertEqual(task['a'], 'foo')

    def test_chunk_dates__pops_period(self):
        TASK = {'period': 'last_42_days', 'a': 'foo'}

        r = self.scheduler.chunk_dates(job=TASK)

        self.assertIn('period', TASK, "DANGER! Modified initial job!")
        for task in r:
            self.assertNotIn('period', task)
            self.assertEqual(task['a'], 'foo')

    def test_chunk_dates__last_x_days(self):

        TASK = {'period': 'last_5_days', 'isolate_days': True, 'a': 'foo'}
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today
            r = self.scheduler.chunk_dates(TASK)

        self.assertEqual(len(r), 5)
        for i, task in enumerate(r):
            self.assertEqual(task['a'], 'foo')
            self.assertEqual(task['date_list'],
                             [f"{self.TODAY - datetime.timedelta(days=6-i)}"])

    def test_chunk_dates__raises_invalid_period_pattern(self):
        TASK = {'period': 'putin_the_best'}
        self.assertRaises(ValueError, self.scheduler.chunk_dates,
                          job=TASK), "Putin is not supported"

    def test_last_x_days(self):

        TESTS = [
            ('last_3_days', ['2019-01-27', '2019-01-28', '2019-01-29']),
            ('last_5_days', [
                '2019-01-25', '2019-01-26', '2019-01-27', '2019-01-28',
                '2019-01-29'
            ]),
        ]
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.last_x_days(test), expected)

    def test_x_days_back(self):

        TESTS = [
            ('1_days_back', ['2019-01-29']),
            ('7_days_back', ['2019-01-23']),
            ('30_days_back', ['2018-12-31']),
        ]
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.x_days_back(test), expected)

            last_week = self.scheduler.x_days_back('7_days_back')[0]
        self.assertEqual(
            today.weekday(),
            datetime.datetime.strptime(last_week, '%Y-%m-%d').weekday())

    ### Tests of chunk_job ###
    def test_chunk_job__not_chunkable_config(self):
        self.scheduler.chunkable_attrs = []
        pl = deepcopy(self.PAYLOAD)

        r = self.scheduler.chunk_job(job=pl)
        # pprint.pprint(r)
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0], pl)

    def test_chunk_job__not_raises_unchunkable_subtask__but_preserves_in_payload(
            self):
        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True
        pl['sections']['section_conversions']['stores']['store_training'][
            'products']['product_book'] = {
                'product_versions': {
                    'product_version_audio': None,
                    'product_version_paper': None,
                }
            }

        def find_product(t):
            try:
                return set(t['product_versions'].keys()) == {
                    'product_version_audio', 'product_version_paper'
                }
            except:
                return False

        # print(pl)
        r = self.scheduler.chunk_job(job=pl)
        # for t in r:
        #     print(t)

        self.assertTrue(any(find_product(task) for task in r))

    def test_chunk_job__raises__unsupported_vals__string(self):
        pl = deepcopy(self.PAYLOAD)

        pl['sections']['section_conversions']['isolate_stores'] = True
        pl['sections']['section_conversions']['stores'][
            'store_training'] = 'some_string'

        self.assertRaises(InvalidJob, self.scheduler.chunk_job, job=pl)

    def test_chunk_job__raises__unsupported_vals__list_not_as_value(self):
        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['isolate_stores'] = True
        pl['sections']['section_conversions']['stores']['store_training'] = [
            'just_a_string'
        ]

        self.assertRaises(InvalidJob, self.scheduler.chunk_job, job=pl)

    def test_chunk_job__not_raises__notchunkable__if_no_isolation(self):
        pl = deepcopy(self.PAYLOAD)

        pl['isolate_sections'] = True
        pl['sections']['section_conversions']['stores'][
            'store_training'] = 'some_string'

        r = self.scheduler.chunk_job(job=pl)
        val = r[2]
        print(r)
        print(
            f"We chunked only first level (sections). The currently interesting is section #3, "
            f"where we put custom unchunkable payload: {val}")

        self.assertEqual(val['stores']['store_training'], 'some_string')

    def test_chunk_job(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_weddings']['stores']['store_music'][
            'isolate_products'] = True
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True

        response = self.scheduler.chunk_job(job=pl)

        # for row in response:
        #     pprint.pprint(row)
        #     print('\n')

        NUMBER_TASKS_EXPECTED = [
            ('sections', 'section_funerals', 1),
            ('sections', 'section_weddings', 5),
            ('sections', 'section_conversions', 4),
            ('stores', 'store_training', 2),
            ('stores', 'store_baptizing', 1),
            ('sections', 'section_gifts', 1),
        ]

        self.check_number_of_tasks(NUMBER_TASKS_EXPECTED, response)

    def test_chunk_job__unchunckable_preserve_custom_attrs(self):

        pl = {
            'sections': {
                'section_funerals': {
                    'custom': 'data'
                },
                'section_weddings': None,
            }
        }

        response = self.scheduler.chunk_job(job=pl)
        # print(response)

        self.assertEqual([pl], response)

    ### Tests of other methods ###
    def test_extract_job_from_payload(self):

        TESTS = [
            ({
                'job': {
                    'lambda_name': 'foo',
                    'payload_attr': 'val'
                }
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'lambda_name':
                'arn:aws:lambda:us-west-2:000000000000:function:foo',
                'payload_attr': 'val'
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'job': {
                    'lambda_name': 'foo',
                    'payload_attr': 'val'
                }
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),

            # JSONs
            ('{"lambda_name": "foo", "payload_attr": "val"}', {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ('{"job": {"lambda_name": "foo", "payload_attr": "val"}}', {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ('{"job": "{\\"lambda_name\\": \\"foo\\", \\"payload_attr\\": \\"val\\"}"}',
             {
                 'lambda_name': 'foo',
                 'payload_attr': 'val'
             }),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.scheduler.extract_job_from_payload(test),
                             expected)

    def test_extract_job_from_payload_raises(self):

        TESTS = [
            42,
            {
                'payload_attr': 'val'
            },
            "{'payload_attr': 'val'}",
            {
                'job': {
                    'payload_attr': 'val'
                }
            },
            {
                "job": "bad one"
            },
        ]

        for test in TESTS:
            self.assertRaises(Exception,
                              self.scheduler.extract_job_from_payload, test)

    def test_needs_chunking__isolate_root(self):

        pl = deepcopy(self.PAYLOAD)
        self.assertFalse(self.scheduler.needs_chunking('sections', pl))

        pl = deepcopy(self.PAYLOAD)
        pl['isolate_sections'] = True
        self.assertTrue(self.scheduler.needs_chunking('sections', pl))

    def test_needs_chunking__isolate_subdata(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_funerals']['isolate_stores'] = True

        self.assertTrue(self.scheduler.needs_chunking('sections', pl))
        self.assertTrue(
            self.scheduler.needs_chunking('stores',
                                          pl['sections']['section_funerals']))
        self.assertFalse(
            self.scheduler.needs_chunking(
                'stores', pl['sections']['section_conversions']))

    def test_needs_chunking__isolate_subdata_deep(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True
        # pprint.pprint(pl)

        self.assertFalse(
            self.scheduler.needs_chunking('stores',
                                          pl['sections']['section_funerals']))
        self.assertTrue(
            self.scheduler.needs_chunking(
                'stores', pl['sections']['section_conversions']))
        self.assertTrue(
            self.scheduler.needs_chunking(
                'products', pl['sections']['section_conversions']['stores']
                ['store_training']))
        self.assertTrue(self.scheduler.needs_chunking('sections', pl))

    def test_get_index_from_list(self):

        TESTS = [
            (0, 'a', ['a', 'b', 'c']),
            (0, 'name', ['names', 'b', 'c']),
            (2, 'c', ['a', 'b', 'c']),
            (1, 'b', {
                'a': 1,
                'b': 2,
                'c': 3
            }),
            (1, 'bob', {
                'a': 1,
                'bobs': 2,
                'c': 3
            }),
        ]

        for expected, attr, data in TESTS:
            self.assertEqual(expected,
                             self.scheduler.get_index_from_list(attr, data))

    def check_number_of_tasks(self, expected_map, response):
        for key, val, expected in expected_map:
            r = filter(lambda task: task.get(key) == [val], response)
            # print(f"TEST OF FILTER: {t}: {len(list(t))}")
            self.assertEqual(len(list(r)), expected)

    def test_validate_list_of_vals(self):
        TESTS = [
            ({
                'a': None,
                'b': None
            }, ['a', 'b']),
            (['a', 'b', 42], ['a', 'b', 42]),
            ([], []),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.scheduler.validate_list_of_vals(test),
                             expected)

    def test_get_and_lock_queue_file__s3_calls(self):

        r = self.scheduler.get_and_lock_queue_file()

        self.assertEqual(r, self.scheduler._local_queue_file)

        self.scheduler.s3_client.download_file.assert_called_once()
        self.scheduler.s3_client.copy_object.assert_called_once()
        self.scheduler.s3_client.delete_object.assert_called_once()
        self.scheduler.s3_client.upload_file.assert_not_called()

    def test_get_and_lock_queue_file__local_file_exists(self):

        with patch('os.path.isfile') as isfile_mock:
            isfile_mock.return_value = True

            r = self.scheduler.get_and_lock_queue_file()

        self.assertEqual(r, self.scheduler._local_queue_file)

        self.scheduler.s3_client.download_file.assert_not_called()
        self.scheduler.s3_client.copy_object.assert_not_called()
        self.scheduler.s3_client.delete_object.assert_not_called()

        self.scheduler.s3_client.upload_file.assert_called_once()

    def test_parse_job_to_file(self):

        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            'some_payload': 'foo',
        }

        self.scheduler.parse_job_to_file(SAMPLE_SIMPLE_JOB)

        self.assertEqual(line_count(self.scheduler._local_queue_file), 1)

        with open(self.scheduler._local_queue_file, 'r') as f:
            row = json.loads(f.read())
            print(row)

            self.assertEqual(row['labourer_id'], self.LABOURER.id)
            self.assertEqual(row['some_payload'], 'foo')

    def test_parse_job_to_file__multiple_rows(self):

        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            "isolate_sections": True,
            'sections': {
                'section_technic': None,
                'section_furniture': None,
            },
        }

        self.scheduler.parse_job_to_file(SAMPLE_SIMPLE_JOB)

        self.assertEqual(line_count(self.scheduler._local_queue_file), 2)

        with open(self.scheduler._local_queue_file, 'r') as f:
            for row in f.readlines():
                # print(row)
                parsed_row = json.loads(row)
                print(parsed_row)

                self.assertEqual(parsed_row['labourer_id'], self.LABOURER.id)
                self.assertEqual(len(parsed_row['sections']), 1)
                self.assertIn(parsed_row['sections'][0],
                              SAMPLE_SIMPLE_JOB['sections'])

    def test_call__sample(self):
        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            'some_payload': 'foo',
        }

        print(json.dumps(SAMPLE_SIMPLE_JOB))
        r = self.scheduler(json.dumps(SAMPLE_SIMPLE_JOB))
        print(r)

        self.scheduler.task_client.create_task.assert_called_once()

        self.scheduler.s3_client.download_file.assert_not_called()
        self.scheduler.s3_client.copy_object.assert_not_called()

        self.scheduler.s3_client.upload_file.assert_called_once()
        self.scheduler.s3_client.delete_object.assert_called_once()
示例#17
0
class Orchestrator_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_ORCHESTRATOR_CONFIG

    LABOURER = Labourer(id='some_function', arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')

    def setUp(self):
        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.custom_config = deepcopy(self.TEST_CONFIG)
        with patch('boto3.client'):
            self.orchestrator = Orchestrator(self.custom_config)

        # self.orchestrator.task_client = MagicMock()


    def tearDown(self):
        self.patcher.stop()

        try:
            del (os.environ['AWS_LAMBDA_FUNCTION_NAME'])
        except:
            pass


    def test_true(self):
        self.assertEqual(1, 1)


    # @unittest.skip("Depricated method")
    # def test_get_labourer_setting(self):
    #
    #     custom_config = self.TEST_CONFIG.copy()
    #     custom_config['labourers'] = {
    #         42: {'foo': 'bar'},
    #     }
    #
    #     with patch('boto3.client'):
    #         orchestrator = Orchestrator(custom_config)
    #
    #     self.assertEqual(orchestrator.get_labourer_setting(Labourer(id=42), 'foo'), 'bar')
    #     self.assertEqual(orchestrator.get_labourer_setting(Labourer(id=42), 'faz'), None)
    #
    #     self.assertEqual(orchestrator.get_labourer_setting(Labourer(id=4422), 'foo'), None)
    #     self.assertEqual(orchestrator.get_labourer_setting(Labourer(id=4422), 'faz'), None)


    def test_get_desired_invocation_number_for_labourer(self):

        # Status - expected output for max invocations = 10
        TESTS = {
            0: 0,
            1: 0,
            2: 5,
            3: 7,
            4: 10
        }

        some_labourer = self.orchestrator.task_client.register_labourers()[0]

        with patch('boto3.client'):
            orchestrator = Orchestrator(self.custom_config)

        orchestrator.task_client = MagicMock()

        print(some_labourer.get_attr('max_simultaneous_invocations'))
        for eco, expected in TESTS.items():
            orchestrator.task_client.ecology_client.get_labourer_status.return_value = eco
            orchestrator.task_client.ecology_client.count_running_tasks_for_labourer.return_value = 0
            self.assertEqual(orchestrator.get_desired_invocation_number_for_labourer(some_labourer), expected)


    def test_get_desired_invocation_number_for_labourer__default(self):

        # Status - expected output for max invocations = 2
        TESTS = {
            0: 0,
            1: 0,
            2: 5,
            3: 7,
            4: 10
        }

        # self.orchestrator.task_client.register_labourers.return_value = [self.LABOURER]
        # self.orchestrator.task_client.ecology_client.count_running_tasks_for_labourer.return_value = 0

        some_labourer = self.orchestrator.task_client.register_labourers()[0]

        # Once registered Labourers we Mock the task client.
        self.orchestrator.task_client = MagicMock()
        self.orchestrator.task_client.ecology_client.count_running_tasks_for_labourer.return_value = 0

        for eco, expected in TESTS.items():
            self.orchestrator.task_client.ecology_client.get_labourer_status.return_value = eco

            self.assertEqual(self.orchestrator.get_desired_invocation_number_for_labourer(some_labourer), expected)


    def test_invoke_for_labourer(self):
        TEST_COUNT = 3
        some_labourer = self.orchestrator.task_client.register_labourers()[0]

        self.orchestrator.get_desired_invocation_number_for_labourer = MagicMock(return_value=TEST_COUNT)

        self.orchestrator.invoke_for_labourer(some_labourer)

        self.orchestrator.get_desired_invocation_number_for_labourer.assert_called_once()


    def test_invoke_for_labourer__desired_zero(self):
        self.orchestrator.get_desired_invocation_number_for_labourer = MagicMock(return_value=0)
        self.orchestrator.task_client.invoke_task = MagicMock()

        self.orchestrator.invoke_for_labourer(self.LABOURER)

        self.orchestrator.task_client.invoke_task.assert_not_called()
示例#18
0
class TaskManager_IntegrationTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_TASK_CLIENT_CONFIG
    LABOURER = Labourer(
        id='some_function',
        arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')

    @classmethod
    def setUpClass(cls):
        """
        Clean the classic autotest table.
        """
        cls.TEST_CONFIG['init_clients'] = ['DynamoDb']

    def setUp(self):
        """
        We keep copies of main parameters here, because they may differ from test to test and cleanup needs them.
        This is responsibility of the test author to update these values if required from test.
        """
        self.config = self.TEST_CONFIG.copy()

        self.HASH_KEY = ('task_id', 'S')
        self.RANGE_KEY = ('labourer_id', 'S')
        self.table_name = self.config['dynamo_db_config']['table_name']
        self.completed_tasks_table = self.config['sosw_closed_tasks_table']
        self.retry_tasks_table = self.config['sosw_retry_tasks_table']

        self.clean_task_tables()

        self.dynamo_client = DynamoDbClient(
            config=self.config['dynamo_db_config'])
        self.manager = TaskManager(custom_config=self.config)
        self.manager.ecology_client = MagicMock()

        self.labourer = deepcopy(self.LABOURER)

    def tearDown(self):
        self.clean_task_tables()

    def clean_task_tables(self):
        clean_dynamo_table(self.table_name, (self.HASH_KEY[0], ))
        clean_dynamo_table(self.completed_tasks_table, ('task_id', ))
        clean_dynamo_table(self.retry_tasks_table, ('labourer_id', 'task_id'))

    def setup_tasks(self,
                    status='available',
                    mutiple_labourers=False,
                    count_tasks=3):
        """ Some fake adding some scheduled tasks for some workers. """

        _ = self.manager.get_db_field_name
        _cfg = self.manager.config.get

        table = _cfg('dynamo_db_config')['table_name'] if status not in ['closed', 'failed'] \
            else _cfg('sosw_closed_tasks_table')

        MAP = {
            'available': {
                self.RANGE_KEY[0]:
                lambda x: str(worker_id),
                _('greenfield'):
                lambda x: round(1000 + random.randrange(0, 100000, 1000)),
                _('attempts'):
                lambda x: 0,
            },
            'invoked': {
                self.RANGE_KEY[0]:
                lambda x: str(worker_id),
                _('greenfield'):
                lambda x: round(time.time()) + _cfg(
                    'greenfield_invocation_delta'),
                _('attempts'):
                lambda x: 1,
            },
            'expired': {
                self.RANGE_KEY[0]:
                lambda x: str(worker_id),
                _('greenfield'):
                lambda x: round(time.time()) + _cfg(
                    'greenfield_invocation_delta') - random.randint(
                        1000, 10000),
                _('attempts'):
                lambda x: 1,
            },
            'running': {
                self.RANGE_KEY[0]:
                lambda x: str(worker_id),
                _('greenfield'):
                lambda x: round(time.time()) + _cfg(
                    'greenfield_invocation_delta') - random.randint(1, 900),
                _('attempts'):
                lambda x: 1,
            },
            'closed': {
                _('greenfield'):
                lambda x: round(time.time()) + _cfg(
                    'greenfield_invocation_delta') - random.randint(
                        1000, 10000),
                _('labourer_id_task_status'):
                lambda x: f"{self.LABOURER.id}_1",
                _('completed_at'):
                lambda x: x[_('greenfield')] - _cfg(
                    'greenfield_invocation_delta') + random.randint(10, 300),
                _('closed_at'):
                lambda x: x[_('completed_at')] + random.randint(1, 60),
                _('attempts'):
                lambda x: 3,
            },
            'failed': {
                _('greenfield'):
                lambda x: round(time.time()) + _cfg(
                    'greenfield_invocation_delta') - random.randint(
                        1000, 10000),
                _('labourer_id_task_status'):
                lambda x: f"{self.LABOURER.id}_0",
                _('closed_at'):
                lambda x: x[_('greenfield')] + 900 + random.randint(1, 60),
                _('attempts'):
                lambda x: 3,
            },
        }

        # raise ValueError(f"Unsupported `status`: {status}. Should be one of: 'available', 'invoked'.")

        workers = [self.LABOURER.id] if not mutiple_labourers else range(
            42, 45)
        for worker_id in workers:

            for i in range(count_tasks):
                row = {
                    self.HASH_KEY[0]:
                    f"task_id_{worker_id}_{i}_{str(uuid.uuid4())[:8]}",  # Task ID
                }

                for field, getter in MAP[status].items():
                    row[field] = getter(row)

                print(f"Putting {row} to {table}")
                self.dynamo_client.put(row, table_name=table)
                time.sleep(
                    0.1
                )  # Sleep a little to fit the Write Capacity (10 WCU) of autotest table.

    def test_get_next_for_labourer(self):
        self.setup_tasks()
        # time.sleep(5)

        result = self.manager.get_next_for_labourer(self.LABOURER,
                                                    only_ids=True)
        # print(result)

        self.assertEqual(len(result), 1, "Returned more than one task")
        self.assertIn(f'task_id_{self.LABOURER.id}_', result[0])

    def test_get_next_for_labourer__multiple(self):
        self.setup_tasks()

        result = self.manager.get_next_for_labourer(self.LABOURER,
                                                    cnt=5000,
                                                    only_ids=True)
        # print(result)

        self.assertEqual(len(result), 3,
                         "Should be just 3 tasks for this worker in setup")
        self.assertTrue(
            all(f'task_id_{self.LABOURER.id}_' in task for task in result),
            "Returned some tasks of other Workers")

    def test_get_next_for_labourer__not_take_invoked(self):
        self.setup_tasks()
        self.setup_tasks(status='invoked')

        result = self.manager.get_next_for_labourer(self.LABOURER,
                                                    cnt=50,
                                                    only_ids=True)
        # print(result)

        self.assertEqual(
            len(result), 3,
            "Should be just 3 tasks for this worker in setup. The other 3 are invoked."
        )
        self.assertTrue(
            all(f'task_id_{self.LABOURER.id}_' in task for task in result),
            "Returned some tasks of other Workers")

    def test_get_next_for_labourer__full_tasks(self):
        self.setup_tasks()

        result = self.manager.get_next_for_labourer(self.LABOURER, cnt=2)
        # print(result)

        self.assertEqual(len(result), 2, "Should be just 2 tasks as requested")

        for task in result:
            self.assertIn(
                f'task_id_{self.LABOURER.id}_',
                task['task_id']), "Returned some tasks of other Workers"
            self.assertEqual(
                self.LABOURER.id,
                task['labourer_id']), "Returned some tasks of other Workers"

    def register_labourers(self):
        self.manager.get_labourers = MagicMock(return_value=[self.LABOURER])
        return self.manager.register_labourers()

    def test_mark_task_invoked(self):
        greenfield = round(time.time() - random.randint(100, 1000))
        delta = self.manager.config['greenfield_invocation_delta']
        self.register_labourers()

        row = {
            self.HASH_KEY[0]: f"task_id_{self.LABOURER.id}_256",  # Task ID
            self.RANGE_KEY[0]: self.LABOURER.id,  # Worker ID
            'greenfield': greenfield
        }
        self.dynamo_client.put(row)
        # print(f"Saved initial version with greenfield some date not long ago: {row}")

        # Do the actual tested job
        self.manager.mark_task_invoked(self.LABOURER, row)
        time.sleep(1)
        result = self.dynamo_client.get_by_query(
            {self.HASH_KEY[0]: f"task_id_{self.LABOURER.id}_256"},
            strict=False)
        # print(f"The new updated value of task is: {result}")

        # Rounded -2 we check that the greenfield was updated
        self.assertAlmostEqual(round(int(time.time()) + delta, -2),
                               round(result[0]['greenfield'], -2))

    def test_get_invoked_tasks_for_labourer(self):
        self.register_labourers()

        self.setup_tasks(status='running')
        self.setup_tasks(status='expired')
        self.setup_tasks(status='invoked')
        self.assertEqual(
            len(self.manager.get_invoked_tasks_for_labourer(self.LABOURER)), 3)

    def test_get_running_tasks_for_labourer(self):
        self.register_labourers()

        self.setup_tasks(status='available')
        self.setup_tasks(status='running')
        self.setup_tasks(status='expired')
        self.assertEqual(
            len(self.manager.get_running_tasks_for_labourer(self.LABOURER)), 3)

    def test_get_expired_tasks_for_labourer(self):
        self.register_labourers()

        self.setup_tasks(status='running')
        self.setup_tasks(status='expired')
        self.assertEqual(
            len(self.manager.get_expired_tasks_for_labourer(self.LABOURER)), 3)

    # @unittest.skip("Function currently depricated")
    # def test_close_task(self):
    #     _ = self.manager.get_db_field_name
    #     # Create task with id=123
    #     task = {_('task_id'): '123', _('labourer_id'): 'lambda1', _('greenfield'): 8888, _('attempts'): 2,
    #             _('completed_at'): 123123}
    #     self.dynamo_client.put(task)
    #
    #     # Call
    #     self.manager.close_task(task_id='123', labourer_id='lambda1')
    #
    #     # Get from db, check
    #     tasks = self.dynamo_client.get_by_query({_('task_id'): '123'})
    #     self.assertEqual(len(tasks), 1)
    #     task_result = tasks[0]
    #
    #     expected_result = task.copy()
    #
    #     for k in ['task_id', 'labourer_id', 'greenfield', 'attempts']:
    #         assert expected_result[k] == task_result[k]
    #
    #     self.assertTrue(_('closed_at') in task_result, msg=f"{_('closed_at')} not in task_result {task_result}")
    #     self.assertTrue(time.time() - 360 < task_result[_('closed_at')] < time.time())

    def test_archive_task(self):
        _ = self.manager.get_db_field_name
        # Create task with id=123
        task = {
            _('task_id'): '123',
            _('labourer_id'): 'lambda1',
            _('greenfield'): 8888,
            _('attempts'): 2
        }
        self.dynamo_client.put(task)

        # Call
        self.manager.archive_task('123')

        # Check the task isn't in the tasks db, but is in the completed_tasks table
        tasks = self.dynamo_client.get_by_query({_('task_id'): '123'})
        self.assertEqual(len(tasks), 0)

        completed_tasks = self.dynamo_client.get_by_query(
            {_('task_id'): '123'}, table_name=self.completed_tasks_table)
        self.assertEqual(len(completed_tasks), 1)
        completed_task = completed_tasks[0]

        for k in task.keys():
            self.assertEqual(task[k], completed_task[k])
        for k in completed_task.keys():
            if k != _('closed_at'):
                self.assertEqual(task[k], completed_task[k])

        self.assertTrue(
            time.time() - 360 < completed_task[_('closed_at')] < time.time())

    def test_move_task_to_retry_table(self):
        _ = self.manager.get_db_field_name
        labourer_id = 'lambda1'
        task = {
            _('task_id'): '123',
            _('labourer_id'): labourer_id,
            _('greenfield'): 8888,
            _('attempts'): 2
        }
        delay = 300

        self.dynamo_client.put(task)

        self.manager.move_task_to_retry_table(task, delay)

        result_tasks = self.dynamo_client.get_by_query({_('task_id'): '123'})
        self.assertEqual(len(result_tasks), 0)

        result_retry_tasks = self.dynamo_client.get_by_query(
            {_('labourer_id'): labourer_id}, table_name=self.retry_tasks_table)
        self.assertEqual(len(result_retry_tasks), 1)
        result = first_or_none(result_retry_tasks)

        for k in task:
            self.assertEqual(task[k], result[k])
        for k in result:
            if k != _('desired_launch_time'):
                self.assertEqual(result[k], task[k])

        self.assertTrue(
            time.time() + delay -
            60 < result[_('desired_launch_time')] < time.time() + delay + 60)

    def test_get_tasks_to_retry_for_labourer(self):
        _ = self.manager.get_db_field_name

        tasks = RETRY_TASKS.copy()
        # Add tasks to retry table
        for task in tasks:
            self.dynamo_client.put(task, self.config['sosw_retry_tasks_table'])

        # Call
        with patch('time.time') as t:
            t.return_value = 9500
            labourer = self.manager.register_labourers()[0]

        result_tasks = self.manager.get_tasks_to_retry_for_labourer(labourer,
                                                                    limit=20)

        self.assertEqual(len(result_tasks), 2)

        # Check it only gets tasks with timestamp <= now
        self.assertIn(tasks[0], result_tasks)
        self.assertIn(tasks[1], result_tasks)

    def test_retry_tasks(self):
        _ = self.manager.get_db_field_name

        with patch('time.time') as t:
            t.return_value = 9500
            labourer = self.manager.register_labourers()[0]

        self.manager.get_oldest_greenfield_for_labourer = Mock(
            return_value=8888)

        # Add tasks to tasks_table
        regular_tasks = [
            {
                _('labourer_id'): labourer.id,
                _('task_id'): '11',
                _('arn'): 'some_arn',
                _('payload'): {},
                _('greenfield'): 8888
            },
            {
                _('labourer_id'): labourer.id,
                _('task_id'): '22',
                _('arn'): 'some_arn',
                _('payload'): {},
                _('greenfield'): 9999
            },
        ]
        for task in regular_tasks:
            self.dynamo_client.put(task)

        # Add tasks to retry_table
        retry_tasks = RETRY_TASKS.copy()

        for task in retry_tasks:
            self.dynamo_client.put(
                task, table_name=self.config['sosw_retry_tasks_table'])

        retry_table_items = self.dynamo_client.get_by_scan(
            table_name=self.retry_tasks_table)
        self.assertEqual(len(retry_table_items), len(retry_tasks))

        # Use get_tasks_to_retry_for_labourer to get tasks
        tasks = self.manager.get_tasks_to_retry_for_labourer(labourer)

        # Call
        self.manager.retry_tasks(labourer, tasks)

        # Check removed 2 out of 3 tasks from retry queue. One is desired to be launched later.
        retry_table_items = self.dynamo_client.get_by_scan(
            table_name=self.retry_tasks_table)
        self.assertEqual(len(retry_table_items), 1)

        # Check tasks moved to `tasks_table` with lowest greenfields
        tasks_table_items = self.dynamo_client.get_by_scan()
        for x in tasks_table_items:
            print(x)
        self.assertEqual(len(tasks_table_items), 4)

        for reg_task in regular_tasks:
            self.assertIn(reg_task, tasks_table_items)

        for retry_task in retry_tasks:
            try:
                matching = next(x for x in tasks_table_items
                                if x[_('task_id')] == retry_task[_('task_id')])
            except StopIteration:
                print(
                    f"Task not retried {retry_task}. Probably not yet desired."
                )
                continue

            for k in retry_task.keys():
                if k not in [_('greenfield'), _('desired_launch_time')]:
                    self.assertEqual(retry_task[k], matching[k])

            for k in matching.keys():
                if k != _('greenfield'):
                    self.assertEqual(retry_task[k], matching[k])

            print(
                f"New greenfield of a retried task: {matching[_('greenfield')]}"
            )
            self.assertTrue(matching[_('greenfield')] < min(
                [x[_('greenfield')] for x in regular_tasks]))

    @patch.object(boto3, '__version__', return_value='1.9.53')
    def test_retry_tasks__old_boto(self, n):
        self.test_retry_tasks()

    def test_get_oldest_greenfield_for_labourer__get_newest_greenfield_for_labourer(
            self):
        with patch('time.time') as t:
            t.return_value = 9500
            labourer = self.manager.register_labourers()[0]

        min_gf = 20000
        max_gf = 10000
        for i in range(5):  # Ran this with range(1000), it passes :)
            gf = random.randint(10000, 20000)
            if gf < min_gf:
                min_gf = gf
            if gf > max_gf:
                max_gf = gf
            row = {
                'labourer_id': f"{labourer.id}",
                'task_id': f"task-{i}",
                'greenfield': gf
            }
            self.dynamo_client.put(row)
            time.sleep(
                0.1
            )  # Sleep a little to fit the Write Capacity (10 WCU) of autotest table.

        result = self.manager.get_oldest_greenfield_for_labourer(labourer)
        self.assertEqual(min_gf, result)

        newest = self.manager.get_newest_greenfield_for_labourer(labourer)
        self.assertEqual(max_gf, newest)

    def test_get_length_of_queue_for_labourer(self):
        labourer = Labourer(id='some_lambda', arn='some_arn')

        num_of_tasks = 3  # Ran this with 464 tasks and it worked

        for i in range(num_of_tasks):
            row = {
                'labourer_id': f"some_lambda",
                'task_id': f"task-{i}",
                'greenfield': i
            }
            self.dynamo_client.put(row)
            time.sleep(
                0.1
            )  # Sleep a little to fit the Write Capacity (10 WCU) of autotest table.

        queue_len = self.manager.get_length_of_queue_for_labourer(labourer)

        self.assertEqual(queue_len, num_of_tasks)

    def test_get_average_labourer_duration__calculates_average__only_failing_tasks(
            self):
        self.manager.ecology_client.get_max_labourer_duration.return_value = 900
        some_labourer = self.register_labourers()[0]

        self.setup_tasks(status='failed', count_tasks=15)

        self.assertEqual(
            900, self.manager.get_average_labourer_duration(some_labourer))

    def test_get_average_labourer_duration__calculates_average(self):
        self.manager.ecology_client.get_max_labourer_duration.return_value = 900
        some_labourer = self.register_labourers()[0]

        self.setup_tasks(status='closed', count_tasks=15)
        self.setup_tasks(status='failed', count_tasks=15)

        self.assertLessEqual(
            self.manager.get_average_labourer_duration(some_labourer), 900)
        self.assertGreaterEqual(
            self.manager.get_average_labourer_duration(some_labourer), 10)
示例#19
0
class Scheduler_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_SCHEDULER_CONFIG
    LABOURER = Labourer(
        id='some_function',
        arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')
    FNAME = '/tmp/aglaya.txt'
    TODAY = datetime.date(2019, 1, 31)

    # Warning! Tthis Payload is not operational as it is. Should add `isolate_SOMETHING` in several places.
    PAYLOAD = {
        'sections': {
            'section_funerals': {
                'stores': {
                    'store_flowers': None,
                    'store_caskets': None,
                },
            },
            'section_weddings': {
                'stores': {
                    'store_flowers': None,
                    'store_limos': None,
                    'store_music': {
                        'products': [
                            'product_march', 'product_chorus', 740,
                            'product,4', 'product 5'
                        ],
                    },
                }
            },
            'section_conversions': {
                'stores': {
                    'store_training': {
                        'products': {
                            'product_history': None,
                            'product_prayer': None,
                        }
                    },
                    'store_baptizing': None,
                    'store_circumcision': None
                }
            },
            'section_gifts': None
        }
    }

    def setUp(self):
        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()
        self.get_config_patch.return_value = {}
        self.custom_config = deepcopy(self.TEST_CONFIG)

        self.custom_config['siblings_config'] = {'auto_spawning': True}

        lambda_context = types.SimpleNamespace()
        lambda_context.aws_request_id = 'AWS_REQ_ID'
        lambda_context.invoked_function_arn = 'arn:aws:lambda:us-west-2:000000000000:function:some_function'
        lambda_context.get_remaining_time_in_millis = MagicMock(
            return_value=300000)  # 5 minutes
        global_vars.lambda_context = lambda_context
        self.custom_lambda_context = global_vars.lambda_context  # This is to access from tests.

        with patch('boto3.client'):
            self.scheduler = module.Scheduler(custom_config=self.custom_config)

        self.scheduler.s3_client = MagicMock()
        self.scheduler.sns_client = MagicMock()
        self.scheduler.task_client = MagicMock()
        self.scheduler.task_client.get_labourer.return_value = self.LABOURER
        self.scheduler.get_db_field_name = lambda key: key
        self.scheduler.siblings_client = MagicMock()
        self.scheduler.meta_handler = MagicMock(signature=MetaHandler)

        self.scheduler.st_time = time.time()

    def tearDown(self):
        self.patcher.stop()

        try:
            del (os.environ['AWS_LAMBDA_FUNCTION_NAME'])
        except Exception:
            pass

        for fname in [self.scheduler.local_queue_file, self.FNAME]:
            try:
                os.remove(fname)
            except Exception:
                pass

    def put_local_file(self, file_name=None, json=False):
        with open(file_name or self.scheduler.local_queue_file, 'w') as f:
            for x in range(10):
                if json:
                    f.write(
                        '{"key": "val", "number": "42", "boolean": true, "labourer_id": "some_function"}\n'
                    )
                else:
                    f.write(f"Hello Aglaya {x} {random.randint(0, 99)}\n")

    @staticmethod
    def line_count(file):
        return int(
            subprocess.check_output('wc -l {}'.format(file),
                                    shell=True).split()[0])

    def test_init__chunkable_attrs_not_end_with_s(self):
        config = self.custom_config
        config['job_schema']['chunkable_attrs'] = [('bad_name_ending_with_s',
                                                    {})]

        with patch('boto3.client'):
            self.assertRaises(AssertionError, Scheduler, custom_config=config)

    def test_get_next_chunkable_attr(self):
        self.assertEqual(self.scheduler.get_next_chunkable_attr('store'),
                         'product')
        self.assertEqual(self.scheduler.get_next_chunkable_attr('stores'),
                         'product')
        self.assertEqual(self.scheduler.get_next_chunkable_attr('section'),
                         'store')
        self.assertIsNone(self.scheduler.get_next_chunkable_attr('product'))
        self.assertIsNone(self.scheduler.get_next_chunkable_attr('bad_name'))

    def test__queue_bucket(self):
        self.assertEqual(self.scheduler._queue_bucket,
                         self.scheduler.config['queue_bucket'])

    def test__remote_queue_file(self):
        self.assertIn(f"{self.scheduler.config['s3_prefix'].strip('/')}",
                      self.scheduler.remote_queue_file)
        self.assertIn(self.custom_lambda_context.aws_request_id,
                      self.scheduler.remote_queue_file)

    def test__remote_queue_locked_file(self):
        self.assertIn(f"{self.scheduler.config['s3_prefix'].strip('/')}",
                      self.scheduler.remote_queue_locked_file)
        self.assertIn('locked_', self.scheduler.remote_queue_locked_file)
        self.assertIn(self.custom_lambda_context.aws_request_id,
                      self.scheduler.remote_queue_locked_file)

    ### Tests of file operations ###
    def test_pop_rows_from_file(self):
        self.put_local_file(self.FNAME)

        # Initial setup made 10 rows.
        self.assertEqual(self.line_count(self.FNAME), 10)

        # Pop a single top row
        self.scheduler.pop_rows_from_file(self.FNAME)
        self.assertEqual(self.line_count(self.FNAME), 9)

        # Pop multiple rows
        self.scheduler.pop_rows_from_file(self.FNAME, rows=5)
        self.assertEqual(self.line_count(self.FNAME), 4)

        # Catch StopIteration and return only remaining.
        r = self.scheduler.pop_rows_from_file(self.FNAME, rows=42)
        self.assertFalse(os.path.isfile(self.FNAME))
        self.assertEqual(len(r), 4)

    def test_pop_rows_from_file__reads_from_top(self):
        self.put_local_file(self.FNAME)

        r = self.scheduler.pop_rows_from_file(self.FNAME, rows=9)

        self.assertEqual(len(r), 9)
        self.assertTrue(r[0].startswith('Hello Aglaya 0'))

        with open(self.FNAME) as f:
            row = f.read()
            self.assertTrue(row.startswith('Hello Aglaya 9'))

    def test_pop_rows_from_file__missing_or_empty_file(self):
        # Missing file
        self.assertEqual(self.scheduler.pop_rows_from_file(self.FNAME), list())

        # Empty file
        Path(self.FNAME).touch()
        self.assertEqual(self.scheduler.pop_rows_from_file(self.FNAME), list())

        self.assertFalse(os.path.isfile(self.FNAME))

    def test_process_file(self):
        self.put_local_file(self.FNAME, json=True)
        self.scheduler.get_and_lock_queue_file = MagicMock(
            return_value=self.FNAME)
        self.scheduler.upload_and_unlock_queue_file = MagicMock()
        self.scheduler.task_client = MagicMock()
        self.scheduler.clean_tmp = MagicMock()

        # This is a specific test patch for logging of remaining time.
        # We actually want two rounds: first OK, second - low time. But the context.method is called twice each round.
        self.custom_lambda_context.get_remaining_time_in_millis.side_effect = [
            300000, 300000, 1000, 1000
        ]

        with patch('sosw.scheduler.Scheduler._sleeptime_for_dynamo',
                   new_callable=PropertyMock) as mock_sleeptime:
            mock_sleeptime.return_value = 0.0001

            self.scheduler.process_file()

            self.assertEqual(self.scheduler.task_client.create_task.call_count,
                             10)
            self.assertEqual(mock_sleeptime.call_count, 10)

            self.scheduler.upload_and_unlock_queue_file.assert_called_once()
            self.scheduler.clean_tmp.assert_called_once()
            # number of calls depends on the 'remaining_time_in_millis()' mock
            self.assertEqual(
                self.scheduler.siblings_client.spawn_sibling.call_count, 1)

    ### Tests of construct_job_data ###
    def test_construct_job_data(self):

        self.scheduler.chunk_dates = MagicMock(return_value=[{
            'a': 'foo'
        }, {
            'b': 'bar'
        }])
        self.scheduler.chunk_job = MagicMock()

        r = self.scheduler.construct_job_data({'pl': 1})

        self.scheduler.chunk_dates.assert_called_once()
        self.scheduler.chunk_job.assert_called()
        self.assertEqual(self.scheduler.chunk_job.call_count, 2)

    def test_construct_job_data__preserve_skeleton_through_chunkers(self):

        r = self.scheduler.construct_job_data({'pl': 1},
                                              skeleton={'labourer_id': 'some'})
        print(r)

        for task in r:
            self.assertEqual(task['labourer_id'], 'some')

    def test_construct_job_data__empty_job(self):

        JOB = dict()
        r = self.scheduler.construct_job_data(JOB)
        self.assertEqual(r, [JOB])

    def test_construct_job_data__real_payload__for_debuging_logs(self):
        JOB = {
            'lambda_name': 'some_lambda',
            'period': 'last_2_days',
            'isolate_days': True,
            'sections': {
                '111': {
                    'all_campaigns': True
                },
                '222': {
                    'all_campaigns': True
                },
                '333': {
                    'isolate_stores': True,
                    'all_campaigns': False,
                    'stores': {
                        '333-111': None,
                        '333-222': None,
                        '333-333': {
                            'keep_me': 7
                        }
                    },
                }
            },
            'isolate_sections': 'True'
        }

        r = self.scheduler.construct_job_data(JOB)

        for t in r:
            print(t)

        self.assertEqual(len(r), 10)
        # self.assertEqual(1, 42)

    ### Tests of chunk_dates ###
    def test_chunk_dates(self):
        TESTS = [({
            'period': 'today'
        }, 'today'), ({
            'period': 'yesterday'
        }, 'yesterday'), ({
            'period': 'last_3_days'
        }, 'last_x_days'), ({
            'period': '10_days_back'
        }, 'x_days_back'), ({
            'period': 'previous_2_days'
        }, 'previous_x_days'), ({
            'period': 'last_week'
        }, 'last_week')]

        for test, func_name in TESTS:
            FUNCTIONS = [
                'today', 'yesterday', 'last_x_days', 'x_days_back',
                'previous_x_days', 'last_week'
            ]
            for f in FUNCTIONS:
                setattr(self.scheduler, f, MagicMock())

            self.scheduler.chunk_dates(test)

            func = getattr(self.scheduler, func_name)
            func.assert_called_once()

            for bad_f_name in [x for x in FUNCTIONS if not x == func_name]:
                bad_f = getattr(self.scheduler, bad_f_name)
                bad_f.assert_not_called()

    def test_chunk_dates__preserve_skeleton(self):
        TESTS = [
            {
                'period': 'last_1_days',
                'a': 'foo'
            },
            {
                'period': 'last_10_days',
                'a': 'foo'
            },
            {
                'period': 'last_10_days',
                'isolate_days': True,
                'a': 'foo'
            },
            {
                'period': '1_days_back',
                'a': 'foo'
            },
            {
                'period': '10_days_back',
                'a': 'foo'
            },
            {
                'period': '10_days_back',
                'isolate_days': True,
                'a': 'foo'
            },  # Isolation here is abundant
        ]

        SKELETON = {'labourer_id': 'some'}

        for test in TESTS:
            if test.get('isolate_days'):
                pattern = '[a-z]+_([0-9]+)_days'
                try:
                    expected_number = int(re.match(pattern, test['period'])[1])
                except Exception:
                    expected_number = 1
            else:
                expected_number = 1

            r = self.scheduler.chunk_dates(job=test, skeleton=SKELETON)
            self.assertEqual(len(r), expected_number)
            for task in r:
                self.assertEqual(task['labourer_id'], 'some')

    def test_chunk_dates__preserve_skeleton__if_no_chunking(self):
        TASK = {'a': 'foo'}
        SKELETON = {'labourer_id': 'some'}

        r = self.scheduler.chunk_dates(job=TASK, skeleton=SKELETON)

        for task in r:
            self.assertEqual(task['labourer_id'], 'some')
            self.assertEqual(task['a'], 'foo')

    def test_chunk_dates__pops_period(self):
        TASK = {'period': 'last_42_days', 'a': 'foo'}

        r = self.scheduler.chunk_dates(job=TASK)

        self.assertIn('period', TASK, "DANGER! Modified initial job!")
        for task in r:
            self.assertNotIn('period', task)
            self.assertEqual(task['a'], 'foo')

    def test_chunk_dates__last_x_days(self):

        TASK = {'period': 'last_5_days', 'isolate_days': True, 'a': 'foo'}
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today
            r = self.scheduler.chunk_dates(TASK)

        self.assertEqual(len(r), 5)
        for i, task in enumerate(r):
            self.assertEqual(task['a'], 'foo')
            self.assertEqual(task['date_list'],
                             [f"{self.TODAY - datetime.timedelta(days=6-i)}"])

    def test_chunk_dates__raises_invalid_period_pattern(self):
        TASK = {'period': 'putin_the_best'}
        self.assertRaises(ValueError, self.scheduler.chunk_dates,
                          job=TASK), "Putin is not supported"

    def test_last_x_days(self):

        TESTS = [
            ('last_3_days', ['2019-01-27', '2019-01-28', '2019-01-29']),
            ('last_5_days', [
                '2019-01-25', '2019-01-26', '2019-01-27', '2019-01-28',
                '2019-01-29'
            ]),
        ]
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.last_x_days(test), expected)

    def test_x_days_back(self):

        TESTS = [
            ('1_days_back', ['2019-01-29']),
            ('7_days_back', ['2019-01-23']),
            ('30_days_back', ['2018-12-31']),
        ]
        today = datetime.date(2019, 1, 30)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.x_days_back(test), expected)

            last_week = self.scheduler.x_days_back('7_days_back')[0]
        self.assertEqual(
            today.weekday(),
            datetime.datetime.strptime(last_week, '%Y-%m-%d').weekday())

    def test_yesterday(self):

        TESTS = [
            ('yesterday', ['2019-04-10']),
        ]

        today = datetime.date(2019, 4, 11)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.yesterday(test), expected)

    def test_today(self):
        TESTS = [
            ('today', ['2019-04-10']),
        ]
        today = datetime.date(2019, 4, 10)

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.today(test), expected)

    def test_previous_x_days(self):
        today = datetime.date(2019, 4, 30)

        TESTS = [('previous_2_days', ['2019-04-26', '2019-04-27']),
                 ('previous_3_days',
                  ['2019-04-24', '2019-04-25', '2019-04-26'])]

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.previous_x_days(test),
                                 expected)

    def test_last_week(self):
        today = datetime.date(2019, 4, 30)

        TESTS = [('last_week', [
            '2019-04-21', '2019-04-22', '2019-04-23', '2019-04-24',
            '2019-04-25', '2019-04-26', '2019-04-27'
        ])]

        with patch('sosw.scheduler.datetime.date') as mdt:
            mdt.today.return_value = today

            for test, expected in TESTS:
                self.assertEqual(self.scheduler.last_week(test), expected)

    def test_custom_period_patterns(self):
        class ChildScheduler(module.Scheduler):
            def __init__(self, custom_config):
                super().__init__(custom_config=custom_config)

            def get_june_days(self):
                return ['2020-06-24', '2020-06-23', '2020-06-22']

        with patch('boto3.client'):
            custom_config = deepcopy(self.TEST_CONFIG)
            custom_config['custom_period_patterns'] = ['get_june_days']
            child = ChildScheduler(custom_config=custom_config)

            r = child.chunk_dates(job={'period': 'get_june_days'})

            self.assertEqual(r, [{
                'date_list': ['2020-06-24', '2020-06-23', '2020-06-22']
            }])

    ### Tests of chunk_job ###
    def test_chunk_job__not_chunkable_config(self):
        self.scheduler.chunkable_attrs = []
        pl = deepcopy(self.PAYLOAD)

        r = self.scheduler.chunk_job(job=pl)
        # pprint.pprint(r)
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0], pl)

    def test_chunk_job__not_raises_unchunkable_subtask__but_preserves_in_payload(
            self):
        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True
        pl['sections']['section_conversions']['stores']['store_training'][
            'products']['product_book'] = {
                'product_versions': {
                    'product_version_audio': None,
                    'product_version_paper': None,
                }
            }

        def find_product(t):
            try:
                return set(t['product_versions'].keys()) == {
                    'product_version_audio', 'product_version_paper'
                }
            except Exception:
                return False

        # print(pl)
        r = self.scheduler.chunk_job(job=pl)
        # for t in r:
        #     print(t)

        self.assertTrue(any(find_product(task) for task in r))

    def test_chunk_job__raises__unsupported_vals__string(self):
        pl = deepcopy(self.PAYLOAD)

        pl['sections']['section_conversions']['isolate_stores'] = True
        pl['sections']['section_conversions']['stores'][
            'store_training'] = 'some_string'

        self.assertRaises(InvalidJob, self.scheduler.chunk_job, job=pl)

    def test_chunk_job__raises__unsupported_vals__list_not_as_value(self):
        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['isolate_stores'] = True
        pl['sections']['section_conversions']['stores']['store_training'] = [
            'just_a_string'
        ]

        self.assertRaises(InvalidJob, self.scheduler.chunk_job, job=pl)

    def test_chunk_job__not_raises__notchunkable__if_no_isolation(self):
        pl = deepcopy(self.PAYLOAD)

        pl['isolate_sections'] = True
        pl['sections']['section_conversions']['stores'][
            'store_training'] = 'some_string'

        r = self.scheduler.chunk_job(job=pl)
        val = r[2]
        print(r)
        print(
            f"We chunked only first level (sections). The currently interesting is section #3, "
            f"where we put custom unchunkable payload: {val}")

        self.assertEqual(val['stores']['store_training'], 'some_string')

    def test_get_isolate_attributes_from_job(self):

        GOOD = {
            'isolate_sections': True,
            'isolate_Ss': False,
            'max_stores_cool_per_batch': 42
        }
        BAD = {'sections': True, 'foo': {'baz': 17}}

        result = self.scheduler.get_isolate_attributes_from_job(data={
            **GOOD,
            **BAD
        })
        self.assertDictEqual(result, GOOD)

    def test_chunk_job(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_weddings']['stores']['store_music'][
            'isolate_products'] = True
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True

        response = self.scheduler.chunk_job(job=pl)

        # for row in response:
        #     pprint.pprint(row)
        #     print('\n')

        NUMBER_TASKS_EXPECTED = [
            ('sections', 'section_funerals', 1),
            ('sections', 'section_weddings', 7),
            ('sections', 'section_conversions', 4),
            ('stores', 'store_training', 2),
            ('stores', 'store_baptizing', 1),
            ('sections', 'section_gifts', 1),
        ]

        self.check_number_of_tasks(NUMBER_TASKS_EXPECTED, response)

    def test_chunk_job__unchunckable_preserve_custom_attrs(self):

        pl = {
            'sections': {
                'section_funerals': {
                    'custom': 'data'
                },
                'section_weddings': None,
            }
        }

        response = self.scheduler.chunk_job(job=pl)
        # print(response)

        self.assertEqual([pl], response)

    def test_chunk_job__max_items_per_batch(self):
        """
        Tests that `max_products_per_batch` will actually make chunks of products of specific size.

        Here we have a tricky case:

        `section_weddings` has 3 different `stores`. In `store_music` we have 5 `products`.
        For example with max_products_per_batch = 2, we should have:

        - store_1
        - store_2
        - store_3, products 1 + 2
        - store_3, products 3 + 4
        - store_3, products 5
        """
        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_weddings']['stores']['store_music'][
            'max_products_per_batch'] = 2
        # pl['sections']['section_funerals']['isolate_stores'] = True
        # pl['isolate_sections'] = True
        # pl['isolate_stores'] = True

        response = self.scheduler.chunk_job(job=pl)

        NUMBER_TASKS_EXPECTED = [
            ('sections', 'section_weddings', 5),
            ('sections', 'section_funerals', 1),
            ('sections', 'section_conversions', 1),
            ('sections', 'section_gifts', 1),
        ]

        # for row in response:
        #     pprint.pprint(row)
        #     print('\n')

        self.check_number_of_tasks(NUMBER_TASKS_EXPECTED, response)
        batches = [
            x['products'] for x in response
            if x.get('stores') == ['store_music']
        ]
        print(batches)

        self.assertEqual(
            batches,
            list(
                chunks(
                    pl['sections']['section_weddings']['stores']['store_music']
                    ['products'], 2)))
        # self.assertEqual(1, 2)

    def test_chunk_job__root_level_isolate(self):
        """
        Tests that `isolate_ATTRs` in the root of the Payload will be respected for chunking in all the nested
        elements of the Job recursively.

        In the test payload the 'stores' is the second level of nesting attribute. But we pass it in the root of job.
        """
        pl = deepcopy(self.PAYLOAD)
        pl['isolate_stores'] = True

        response = self.scheduler.chunk_job(job=pl)

        NUMBER_TASKS_EXPECTED = [
            ('sections', 'section_funerals', 2),
            ('sections', 'section_weddings', 3),
            ('sections', 'section_conversions', 3),
            ('sections', 'section_gifts', 1),
        ]

        # for row in response:
        #     pprint.pprint(row)
        #     print('\n')

        self.check_number_of_tasks(NUMBER_TASKS_EXPECTED, response)

    ### Tests of other methods ###
    def test_extract_job_from_payload(self):

        TESTS = [
            ({
                'job': {
                    'lambda_name': 'foo',
                    'payload_attr': 'val'
                }
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'lambda_name':
                'arn:aws:lambda:us-west-2:000000000000:function:foo',
                'payload_attr': 'val'
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ({
                'job': {
                    'lambda_name': 'foo',
                    'payload_attr': 'val'
                }
            }, {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),

            # JSONs
            ('{"lambda_name": "foo", "payload_attr": "val"}', {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ('{"job": {"lambda_name": "foo", "payload_attr": "val"}}', {
                'lambda_name': 'foo',
                'payload_attr': 'val'
            }),
            ('{"job": "{\\"lambda_name\\": \\"foo\\", \\"payload_attr\\": \\"val\\"}"}',
             {
                 'lambda_name': 'foo',
                 'payload_attr': 'val'
             }),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.scheduler.extract_job_from_payload(test),
                             expected)

    def test_extract_job_from_payload_raises(self):

        TESTS = [
            42,
            {
                'payload_attr': 'val'
            },
            "{'payload_attr': 'val'}",
            {
                'job': {
                    'payload_attr': 'val'
                }
            },
            {
                "job": "bad one"
            },
        ]

        for test in TESTS:
            self.assertRaises(Exception,
                              self.scheduler.extract_job_from_payload, test)

    def test_needs_chunking__isolate_root(self):

        pl = deepcopy(self.PAYLOAD)
        self.assertFalse(self.scheduler.needs_chunking('sections', pl))

        pl = deepcopy(self.PAYLOAD)
        pl['isolate_sections'] = True
        self.assertTrue(self.scheduler.needs_chunking('sections', pl))

    def test_needs_chunking__isolate_subdata(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_funerals']['isolate_stores'] = True

        self.assertTrue(self.scheduler.needs_chunking('sections', pl))
        self.assertTrue(
            self.scheduler.needs_chunking('stores',
                                          pl['sections']['section_funerals']))
        self.assertFalse(
            self.scheduler.needs_chunking(
                'stores', pl['sections']['section_conversions']))

    def test_needs_chunking__isolate_subdata_deep(self):

        pl = deepcopy(self.PAYLOAD)
        pl['sections']['section_conversions']['stores']['store_training'][
            'isolate_products'] = True
        # pprint.pprint(pl)

        self.assertFalse(
            self.scheduler.needs_chunking('stores',
                                          pl['sections']['section_funerals']))
        self.assertTrue(
            self.scheduler.needs_chunking(
                'stores', pl['sections']['section_conversions']))
        self.assertTrue(
            self.scheduler.needs_chunking(
                'products', pl['sections']['section_conversions']['stores']
                ['store_training']))
        self.assertTrue(self.scheduler.needs_chunking('sections', pl))

    def test_needs_chunking__max_items_per_batch(self):

        pl = deepcopy(self.PAYLOAD)

        # Verify that no chunking is required by default
        self.assertFalse(self.scheduler.needs_chunking('sections', pl))

        # Inject max_items_per_batch and recheck.
        pl['sections']['section_conversions']['stores']['store_training'][
            'max_products_per_batch'] = 3
        self.assertTrue(self.scheduler.needs_chunking('sections', pl))

    def test_get_index_from_list(self):

        TESTS = [
            (0, 'a', ['a', 'b', 'c']),
            (0, 'name', ['names', 'b', 'c']),
            (2, 'c', ['a', 'b', 'c']),
            (1, 'b', {
                'a': 1,
                'b': 2,
                'c': 3
            }),
            (1, 'bob', {
                'a': 1,
                'bobs': 2,
                'c': 3
            }),
        ]

        for expected, attr, data in TESTS:
            self.assertEqual(expected,
                             self.scheduler.get_index_from_list(attr, data))

    def check_number_of_tasks(self, expected_map, response):
        for key, val, expected in expected_map:
            r = filter(lambda task: task.get(key) == [val], response)
            # print(f"TEST OF FILTER: {t}: {len(list(t))}")
            self.assertEqual(len(list(r)), expected)

    def test_validate_list_of_vals(self):
        TESTS = [
            ({
                'a': None,
                'b': None
            }, ['a', 'b']),
            (['a', 'b', 42], ['a', 'b', 42]),
            ([], []),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.scheduler.validate_list_of_vals(test),
                             expected)

    def test_get_and_lock_queue_file__s3_calls(self):

        self.scheduler.get_and_lock_queue_file()
        self.scheduler.s3_client.download_file.assert_called_once()
        self.scheduler.s3_client.copy_object.assert_called_once()
        self.scheduler.s3_client.delete_object.assert_called_once()
        self.scheduler.s3_client.upload_file.assert_not_called()

    def test_get_and_lock_queue_file__local_file_exists(self):

        with patch('os.path.isfile') as isfile_mock:
            isfile_mock.return_value = True

            r = self.scheduler.get_and_lock_queue_file()

        self.assertEqual(r, self.scheduler.local_queue_file)
        self.scheduler.s3_client.download_file.assert_not_called()
        self.scheduler.s3_client.copy_object.assert_not_called()
        self.scheduler.s3_client.delete_object.assert_not_called()

        self.scheduler.s3_client.upload_file.assert_called_once()

    def test_parse_job_to_file(self):

        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            'some_payload': 'foo',
        }

        self.scheduler.parse_job_to_file(SAMPLE_SIMPLE_JOB)

        self.assertEqual(line_count(self.scheduler.local_queue_file), 1)

        with open(self.scheduler.local_queue_file, 'r') as f:
            row = json.loads(f.read())
            print(row)

            self.assertEqual(row['labourer_id'], self.LABOURER.id)
            self.assertEqual(row['some_payload'], 'foo')

    def test_parse_job_to_file__multiple_rows(self):

        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            "isolate_sections": True,
            'sections': {
                'section_technic': None,
                'section_furniture': None,
            },
        }

        self.scheduler.parse_job_to_file(SAMPLE_SIMPLE_JOB)

        self.assertEqual(line_count(self.scheduler.local_queue_file), 2)

        with open(self.scheduler.local_queue_file, 'r') as f:
            for row in f.readlines():
                # print(row)
                parsed_row = json.loads(row)
                print(parsed_row)

                self.assertEqual(parsed_row['labourer_id'], self.LABOURER.id)
                self.assertEqual(len(parsed_row['sections']), 1)
                self.assertIn(parsed_row['sections'][0],
                              SAMPLE_SIMPLE_JOB['sections'])

    def test_call__sample(self):
        SAMPLE_SIMPLE_JOB = {
            'lambda_name': self.LABOURER.id,
            'some_payload': 'foo',
        }
        print(json.dumps(SAMPLE_SIMPLE_JOB))

        self.scheduler.task_client.create_task.return_value = {
            'task_id': 123,
            'labourer_id': SAMPLE_SIMPLE_JOB['lambda_name'],
            **SAMPLE_SIMPLE_JOB
        }

        with patch('sosw.scheduler.Scheduler._sleeptime_for_dynamo',
                   new_callable=PropertyMock) as mock_sleeptime:
            mock_sleeptime.return_value = 0.0001

            r = self.scheduler(json.dumps(SAMPLE_SIMPLE_JOB))
            print(r)

        self.scheduler.task_client.create_task.assert_called_once()

        self.scheduler.s3_client.download_file.assert_not_called()
        self.scheduler.s3_client.copy_object.assert_not_called()

        self.scheduler.s3_client.upload_file.assert_called_once()
        self.scheduler.s3_client.delete_object.assert_called_once()

    def test_sleeptime_for_dynamo(self):

        self.scheduler.task_client.dynamo_db_client.get_capacity.return_value = {
            'read': 10,
            'write': 10
        }
        self.assertEqual(round(self.scheduler._sleeptime_for_dynamo, 2), 0.07)

        self.scheduler.task_client.dynamo_db_client.get_capacity.return_value = {
            'read': 10,
            'write': 25
        }
        self.assertEqual(round(self.scheduler._sleeptime_for_dynamo, 2), 0.01)

        self.scheduler.task_client.dynamo_db_client.get_capacity.return_value = {
            'read': 10,
            'write': 50
        }
        self.assertEqual(round(self.scheduler._sleeptime_for_dynamo, 2), 0)

    def test_apply_job_schema(self):
        self.scheduler.config['job_schema_variants']['sample_schema_name'] = {
            'chunkable_attrs': [
                ('a', {}),
            ]
        }

        self.scheduler.parse_job_to_file = MagicMock()
        self.scheduler.process_file = MagicMock()
        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
                'job_schema_name': 'sample_schema_name'
            },
        })
        self.assertEqual(
            self.scheduler.config['job_schema']['chunkable_attrs'][0][0], 'a')

    def test_apply_job_schema__default(self):
        self.scheduler.parse_job_to_file = MagicMock()
        self.scheduler.process_file = MagicMock()
        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
            },
        })

        self.assertEqual(
            self.scheduler.config['job_schema']['chunkable_attrs'][0][0], 'b')

    def test_apply_job_schema__default_preserved(self):
        """
        First test checks a specific job schema name.
        Second test checks if after calling the scheduler again we overwrite the config and use the default
        specific job schema.

        """

        self.scheduler.config['job_schema_variants']['sample_schema_name'] = {
            'chunkable_attrs': [
                ('a', {}),
            ]
        }

        self.scheduler.parse_job_to_file = MagicMock()
        self.scheduler.process_file = MagicMock()
        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
                'job_schema_name': 'sample_schema_name'
            },
        })

        self.assertEqual(
            self.scheduler.config['job_schema']['chunkable_attrs'][0][0], 'a')
        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
            },
        })

        self.assertEqual(
            self.scheduler.config['job_schema']['chunkable_attrs'][0][0], 'b')

    def test_apply_job_schema__chunkable_attrs_reinitialized(self):
        """
        Check that the job schema variant was applied and
        that its job schema chunkable_attrs have been reinitialized to self.chunkable_attrs.

        """

        self.scheduler.config['job_schema_variants']['sample_schema_name'] = {
            'chunkable_attrs': [
                ('a', {}),
                ('b', {}),
                ('c', {}),
            ]
        }

        self.scheduler.parse_job_to_file = MagicMock()
        self.scheduler.process_file = MagicMock()
        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
                'job_schema_name': 'sample_schema_name'
            },
        })

        expected_chunkable_attrs = ['a', 'b', 'c']

        for index, value in enumerate(expected_chunkable_attrs):
            self.assertEqual(
                self.scheduler.config['job_schema']['chunkable_attrs'][index]
                [0], value)

        self.assertEqual(self.scheduler.chunkable_attrs,
                         expected_chunkable_attrs)

        self.scheduler({
            'job': {
                'lambda_name': 'test_lambda',
            },
        })

        self.assertEqual(
            self.scheduler.config['job_schema']['chunkable_attrs'][0][0], 'b')
        self.assertEqual(self.scheduler.chunkable_attrs, ['b'])
示例#20
0
 def should_retry_task(self, labourer: Labourer, task: Dict) -> bool:
     logger.debug(
         f"Called Scavenger.should_retry_task with labourer={labourer}, task={task}"
     )
     attempts = task.get(self.get_db_field_name('attempts'))
     return True if attempts < labourer.get_attr('max_attempts') else False
示例#21
0
 def setUp(self):
     self.labourer = Labourer(id=42, arn='arn::aws::lambda')
示例#22
0
    'attempts': 3,
    'greenfield': '123'
}, {
    'task_id': '124',
    'labourer_id': 'another_lambda',
    'attempts': 4,
    'greenfield': '321'
}, {
    'task_id': '125',
    'labourer_id': 'some_lambda',
    'attempts': 3,
    'greenfield': '123'
}]

LABOURERS = [
    Labourer(id='some_lambda', arn='some_arn', some_attr='yes'),
    Labourer(id='another_lambda', arn='another_arn'),
    Labourer(id='lambda3', arn='arn3')
]

RETRY_TASKS = [
    {
        'task_id': '1',
        'labourer_id': 'some_function',
        'greenfield': 1234,
        'attempts': 2,
        'desired_launch_time': 7777,
        'arn': 'some_arn',
        'payload': {}
    },
    {
示例#23
0
class task_manager_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_TASK_CLIENT_CONFIG

    LABOURER = Labourer(id='some_function', arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')


    def setUp(self):
        """
        We keep copies of main parameters here, because they may differ from test to test and cleanup needs them.
        This is responsibility of the test author to update these values if required from test.
        """

        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.config = deepcopy(self.TEST_CONFIG)

        self.labourer = deepcopy(self.LABOURER)

        self.HASH_KEY = ('task_id', 'S')
        self.RANGE_KEY = ('labourer_id', 'S')
        self.table_name = self.config['dynamo_db_config']['table_name']

        with patch('boto3.client'):
            self.manager = TaskManager(custom_config=self.config)

        self.manager.dynamo_db_client = MagicMock(spec=dynamo_db.DynamoDbClient)
        self.manager.ecology_client = MagicMock()
        self.manager.ecology_client.get_labourer_status.return_value = 2
        self.manager.lambda_client = MagicMock()


    def tearDown(self):
        self.patcher.stop()


    def test_get_db_field_name(self):
        self.assertEqual(self.manager.get_db_field_name('task_id'), self.HASH_KEY[0], "Configured field name failed")
        self.assertEqual(self.manager.get_db_field_name('some_name'), 'some_name', "Default column name failed")


    def test_mark_task_invoked__calls_dynamo(self):
        self.manager.get_labourers = MagicMock(return_value=[self.labourer])
        self.manager.register_labourers()

        greenfield = round(time.time() - random.randint(0, 1000))
        delta = self.manager.config['greenfield_invocation_delta']

        task = {
            self.HASH_KEY[0]:  f"task_id_{self.labourer.id}_256",  # Task ID
            self.RANGE_KEY[0]: self.labourer.id,  # Worker ID
            'greenfield':      greenfield
        }

        # Do the actual tested job
        self.manager.mark_task_invoked(self.labourer, task)

        # Check the dynamo_client was called with correct payload to update
        self.manager.dynamo_db_client.update.assert_called_once()

        call_args, call_kwargs = self.manager.dynamo_db_client.update.call_args

        self.assertEqual(call_args[0],
                         {
                             self.HASH_KEY[0]: f"task_id_{self.labourer.id}_256"
                         }), "The key of task is missing"
        self.assertEqual(call_kwargs['attributes_to_increment'], {'attempts': 1}), "Attempts counter not increased"

        gf = call_kwargs['attributes_to_update']['greenfield']
        self.assertEqual(round(gf, -2), round(time.time() + delta, -2)), "Greenfield was not updated"


    # @unittest.skip("This behavior is deprecated")
    # def test_mark_task_invoked__greenfield_counts_attempts(self):
    #     self.manager.dynamo_db_client = MagicMock()
    #
    #     greenfield = round(time.time() - random.randint(0, 1000))
    #     delta = self.manager.config['greenfield_invocation_delta']
    #
    #     task = {
    #         self.HASH_KEY[0]:  "task_id_42_256",  # Task ID
    #         self.RANGE_KEY[0]: 42,  # Worker ID
    #         'greenfield':      greenfield,
    #         'attempts':        3
    #     }
    #
    #     # Do the actual tested job
    #     self.manager.mark_task_invoked(task)
    #
    #     # Check the dynamo_client was called with correct payload to update
    #     self.manager.dynamo_db_client.update.assert_called_once()
    #
    #     call_args, call_kwargs = self.manager.dynamo_db_client.update.call_args
    #
    #     self.assertEqual(call_args[0],
    #                      {self.HASH_KEY[0]: "task_id_42_256", self.RANGE_KEY[0]: 42}), "The key of task is missing"
    #     self.assertEqual(call_kwargs['attributes_to_increment'], {'attempts': 1}), "Attempts counter not increased"
    #
    #     gf = call_kwargs['attributes_to_update']['greenfield']
    #     self.assertEqual(round(gf, -2), round(time.time() + delta * 4, -2),
    #                      "Greenfield was increased with respect to number of attempts")

    def test_invoke_task__validates_task(self):
        self.assertRaises(AttributeError, self.manager.invoke_task, labourer=self.labourer), "Missing task and task_id"
        self.assertRaises(AttributeError, self.manager.invoke_task, labourer=self.labourer, task_id='qwe',
                          task={1: 2}), "Both task and task_id."


    def test_invoke_task__calls__mark_task_invoked(self):
        self.manager.mark_task_invoked = MagicMock()
        self.manager.is_valid_task = MagicMock(return_value=True)
        self.manager.get_task_by_id = MagicMock(return_value={})

        self.manager.invoke_task(task_id='qwe', labourer=self.labourer)
        self.manager.mark_task_invoked.assert_called_once()


    def test_invoke_task__calls__get_task_by_id(self):
        self.manager.is_valid_task = MagicMock(return_value=True)
        self.manager.mark_task_invoked = MagicMock()
        self.manager.get_task_by_id = MagicMock(return_value={})

        self.manager.invoke_task(task_id='qwe', labourer=self.labourer)
        self.manager.is_valid_task.assert_called_once()
        self.manager.get_task_by_id.assert_called_once()


    def test_invoke_task__calls__lambda_client(self):
        self.manager.is_valid_task = MagicMock(return_value=True)
        self.manager.get_labourers = MagicMock(return_value=[self.labourer])
        self.manager.register_labourers()

        task = {
            self.HASH_KEY[0]:  f"task_id_{self.labourer.id}_256",  # Task ID
            self.RANGE_KEY[0]: self.labourer.id,  # Worker ID
            'payload':         {'foo': 23}
        }

        self.manager.get_task_by_id = MagicMock(return_value=task)

        self.manager.invoke_task(task_id=f'task_id_{self.labourer}_256', labourer=self.labourer)

        self.manager.lambda_client.invoke.assert_called_once()

        call_args, call_kwargs = self.manager.lambda_client.invoke.call_args

        self.assertEqual(call_kwargs['FunctionName'], self.labourer.arn)
        # self.assertEqual(call_kwargs['Payload'], json.dumps(task['payload']))


    def test_invoke_task__not_calls__lambda_client_if_raised_conditional_exception(self):
        self.manager.register_labourers()

        task = {
            self.HASH_KEY[0]:  f"task_id_{self.labourer.id}_256",  # Task ID
            self.RANGE_KEY[0]: self.labourer.id,  # Worker ID
            'created_at':      1000,
            'payload':         {'foo': 23}
        }


        class ConditionalCheckFailedException(Exception):
            pass


        self.manager.get_task_by_id = MagicMock(return_value=task)
        self.manager.mark_task_invoked = MagicMock()
        self.manager.mark_task_invoked.side_effect = ConditionalCheckFailedException("Boom")

        self.manager.invoke_task(task_id=f'task_id_{self.labourer}_256', labourer=self.labourer)

        self.manager.lambda_client.invoke.assert_not_called()
        self.assertEqual(self.manager.stats['concurrent_task_invocations_skipped'], 1)


    def test_invoke_task__with_explicit_task__not_calls_get_task_by_id(self):
        self.manager.get_task_by_id = MagicMock()
        self.manager.is_valid_task = MagicMock(return_value=True)
        self.manager.mark_task_invoked = MagicMock()

        self.manager.invoke_task(labourer=self.LABOURER, task={1:2})
        self.manager.is_valid_task.assert_called_once()
        self.manager.get_task_by_id.assert_not_called()


    def test_register_labourers(self):
        with patch('time.time') as t:
            t.return_value = 123

            labourers = self.manager.register_labourers()

        lab = labourers[0]
        invoke_time = 123 + self.manager.config['greenfield_invocation_delta']

        self.assertEqual(lab.get_attr('start'), 123)
        self.assertEqual(lab.get_attr('invoked'), invoke_time)
        self.assertEqual(lab.get_attr('expired'), invoke_time - lab.duration - lab.cooldown)
        self.assertEqual(lab.get_attr('health'), 2)
        self.assertEqual(lab.get_attr('max_attempts'), 3)


    def test_register_labourers__calls_register_task_manager(self):

        self.manager.register_labourers()
        self.manager.ecology_client.register_task_manager.assert_called_once_with(self.manager)


    def test_get_count_of_running_tasks_for_labourer(self):

        labourer = self.manager.register_labourers()[0]
        self.manager.dynamo_db_client.get_by_query.return_value = 3

        self.assertEqual(self.manager.get_count_of_running_tasks_for_labourer(labourer=labourer), 3)
        self.manager.dynamo_db_client.get_by_query.assert_called_once()

        call_args, call_kwargs = self.manager.dynamo_db_client.get_by_query.call_args
        self.assertTrue(call_kwargs['return_count'])


    def test_get_labourers(self):
        self.config['labourers'] = {
            'some_lambda':  {'foo': 'bar', 'arn': '123'},
            'some_lambda2': {'foo': 'baz'},
        }

        with patch('boto3.client'):
            self.task_client = TaskManager(custom_config=self.config)

        result = self.task_client.get_labourers()
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].foo, 'bar')
        self.assertEqual(result[0].arn, '123')
        self.assertEqual(result[1].foo, 'baz')


    def test_archive_task(self):
        task_id = '918273'
        task = {
            'labourer_id': 'some_lambda', 'task_id': task_id, 'payload': '{}', 'completed_at': '1551962375',
            'closed_at':   '111'
        }

        # Mock
        self.manager.dynamo_db_client = MagicMock()
        self.manager.get_task_by_id = Mock(return_value=task)

        # Call
        self.manager.archive_task(task_id)

        # Check calls
        expected_completed_task = task.copy()
        expected_completed_task['labourer_id_task_status'] = 'some_lambda_1'
        self.manager.dynamo_db_client.put.assert_called_once_with(expected_completed_task, table_name=self.TEST_CONFIG[
            'sosw_closed_tasks_table'])
        self.manager.dynamo_db_client.delete.assert_called_once_with({'task_id': task_id})


    def test__jsonify_payload_of_task(self):
        TESTS = [
            ({'foo': 'some_lambda', 'payload': '{"bar": 42}'}, {'foo': 'some_lambda', 'payload': '{"bar": 42}'}),
            ({'foo': 'some_lambda', 'payload': {'bar': 42}}, {'foo': 'some_lambda', 'payload': '{"bar": 42}'}),
            ({'foo': {'a': 1}}, {'foo': {'a': 1}}),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.manager._jsonify_payload_of_task(test), expected)


    def test_move_task_to_retry_table(self):
        task_id = '123'
        TEST = {'labourer_id': 'some_lambda', 'task_id': task_id, 'payload': '{"bar": 42}'}
        delay = 350


        with patch('time.time') as t:
            t.return_value = 1000
            self.manager.move_task_to_retry_table(TEST, delay)

        params = extract_call_params(self.manager.dynamo_db_client.put.call_args, dynamo_db.DynamoDbClient.put)
        # print(params)

        desired_time = params['row'].pop('desired_launch_time')
        self.assertEqual(desired_time, 1000 + delay, "Delay was not added to the current time.")

        self.assertDictEqual(TEST, params['row'], "Task for retry table doesn't match original.")
        self.assertEqual(params['table_name'], self.config['sosw_retry_tasks_table'], "Retry writes to invalid table.")


    def test_move_task_to_retry_table__dumps_payload(self):
        TEST = {'labourer_id': 'foo', 'task_id': 123, 'payload': {'bar': 42}}

        self.manager.move_task_to_retry_table(TEST, 1)

        params = extract_call_params(self.manager.dynamo_db_client.put.call_args, dynamo_db.DynamoDbClient.put)

        self.assertEqual(json.dumps(TEST['payload']), params['row']['payload'], "Payload was JSON-nified")


    def test_get_tasks_to_retry_for_labourer(self):

        with patch('time.time') as t:
            t.return_value = 123
            labourer = self.manager.register_labourers()[0]

        TASK = {'labourer_id': 'some_lambda', 'task_id': str(uuid.uuid4()), 'greenfield': 122}

        # Requires Labourer
        self.assertRaises(TypeError, self.manager.get_tasks_to_retry_for_labourer)

        self.manager.dynamo_db_client.get_by_query.return_value = [TASK]

        r = self.manager.get_tasks_to_retry_for_labourer(labourer=labourer)

        self.manager.dynamo_db_client.get_by_query.assert_called_once()
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0]['task_id'], TASK['task_id'])


    def test_get_tasks_to_retry_for_labourer__respects_greenfield(self):

        with patch('time.time') as t:
            t.return_value = 123
            labourer = self.manager.register_labourers()[0]

        self.manager.get_tasks_to_retry_for_labourer(labourer=labourer, limit=1)

        call_args, call_kwargs = self.manager.dynamo_db_client.get_by_query.call_args
        self.assertEqual(call_kwargs['keys']['desired_launch_time'], '123')
        self.assertEqual(call_kwargs['comparisons']['desired_launch_time'], '<=')


    def test_get_tasks_to_retry_for_labourer__limit(self):

        with patch('time.time') as t:
            t.return_value = 123
            labourer = self.manager.register_labourers()[0]

        TASK = {'labourer_id': 'some_lambda', 'task_id': str(uuid.uuid4()), 'greenfield': 122}
        mock_get_by_query = lambda **kwargs: [TASK for _ in range(kwargs.get('max_items', 42))]

        self.manager.dynamo_db_client.get_by_query.side_effect = mock_get_by_query

        r = self.manager.get_tasks_to_retry_for_labourer(labourer=labourer, limit=1)

        self.manager.dynamo_db_client.get_by_query.assert_called_once()
        self.assertEqual(len(r), 1)


    def test_get_oldest_greenfield_for_labourer__no_queued_tasks(self):

        self.manager.dynamo_db_client.get_by_query.return_value = []

        result = self.manager.get_oldest_greenfield_for_labourer(labourer=self.LABOURER)

        self.assertEqual(result, 0 + self.manager.config['greenfield_task_step'])


    def test_get_newest_greenfield_for_labourer__no_queued_tasks(self):

        self.manager.dynamo_db_client.get_by_query.return_value = []

        result = self.manager.get_newest_greenfield_for_labourer(labourer=self.LABOURER)

        self.assertEqual(result, 0 + self.manager.config['greenfield_task_step'])


    def test_create_task(self):

        TASK = dict(labourer=self.LABOURER, payload={'foo': 42})
        self.manager.get_newest_greenfield_for_labourer = MagicMock(return_value=5000)

        with patch('time.time') as t:
            t.return_value = 1234567
            self.manager.create_task(**TASK)

        self.manager.dynamo_db_client.put.assert_called_once()

        call_args, call_kwargs = self.manager.dynamo_db_client.put.call_args
        arg = call_args[0]
        # print('########')
        # print(arg, call_kwargs)

        self.assertEqual(str(arg['labourer_id']), str(self.LABOURER.id))
        self.assertEqual(str(arg['created_at']), str(1234567))
        self.assertEqual(str(arg['greenfield']), str(6000))
        self.assertEqual(str(arg['attempts']), str(0))
        self.assertEqual(str(arg['payload']), '{"foo": 42}')

        for field in self.manager.config['dynamo_db_config']['required_fields']:
            self.assertIn(field, arg.keys())


    def test_create_task__combine_complex_payload(self):
        TASK = dict(labourer=self.LABOURER, payload={'foo': 42}, shops=[1, 3], lloyd='green ninja')
        self.manager.get_newest_greenfield_for_labourer = MagicMock(return_value=5000)

        self.manager.create_task(**TASK)

        self.manager.dynamo_db_client.put.assert_called_once()

        call_args, call_kwargs = self.manager.dynamo_db_client.put.call_args
        payload = call_args[0]['payload']
        payload = json.loads(payload)
        # print('########')
        # print(payload)

        self.assertEqual(payload['foo'], 42)
        self.assertEqual(payload['shops'], [1, 3])
        self.assertEqual(payload['lloyd'], 'green ninja')


    def test_construct_payload_for_task(self):
        TESTS = [
            (dict(payload={'foo': 42}), {'foo': 42}),  # Dictionary
            (dict(payload='{"foo": 42}'), {'foo': 42}),  # JSON
            (dict(payload={'foo': 42}, shops=[1, 3]), {'foo': 42, 'shops': [1, 3]}),  # Combine custom attrs
            (dict(bar="foo"), {'bar': "foo"}),  # Missing initial payload
            (dict(bar={"foo": 3}), {'bar': {"foo": 3}}),  # Missing initial payload
        ]

        for test, expected in TESTS:
            self.assertEqual(self.manager.construct_payload_for_task(**test), json.dumps(expected))


    def test_get_average_labourer_duration__calls_dynamo_twice(self):
        """
        This is am important test for other ones of this method.
        If for some reason the DynamoMock is called not twice, then the side_effects don't imitate
        real data and tests will be unpredictable.
        """

        some_labourer = self.manager.register_labourers()[0]

        self.manager.get_average_labourer_duration(some_labourer)
        self.assertEqual(self.manager.dynamo_db_client.get_by_query.call_count, 2)


    def test_get_average_labourer_duration__calculates_average(self):

        NOW = 10000
        START = NOW + self.manager.config['greenfield_invocation_delta']

        some_labourer = self.manager.register_labourers()[0]
        some_labourer.max_duration = 900

        CLOSED = [
            {
                'task_id':      '123', 'labourer_id': 'some_function', 'attempts': 1, 'greenfield': START - 1000,
                'completed_at': NOW - 500
            },  # Duration 500
            {
                'task_id':      '124', 'labourer_id': 'some_function', 'attempts': 1, 'greenfield': START - 2000,
                'completed_at': NOW - 1700
            },  # Duration 300
            {
                'task_id':      '125', 'labourer_id': 'some_function', 'attempts': 1, 'greenfield': START - 2000,
                'completed_at': NOW - 1700
            },  # Duration 300
        ]

        FAILED = [
            {'task_id': '235', 'labourer_id': 'some_function', 'attempts': 3, 'greenfield': START - 3000},
            {'task_id': '236', 'labourer_id': 'some_function', 'attempts': 4, 'greenfield': START - 3000},
            {'task_id': '237', 'labourer_id': 'some_function', 'attempts': 3, 'greenfield': START - 4000},

        ]

        self.manager.dynamo_db_client.get_by_query.side_effect = [CLOSED, FAILED]

        count_failed = sum(x['attempts'] for x in FAILED)

        expected = round((500 + 300 + 300 +  # closed
                          (some_labourer.get_attr('max_duration') * count_failed))  # failed
                         / (len(CLOSED) + count_failed))  # total number of closed + failed

        self.assertEqual(expected, self.manager.get_average_labourer_duration(some_labourer))


    def test_validate_task__good(self):
        TESTS = [
            ({'task_id': '235', 'labourer_id': 'foo', 'created_at': 5000, 'greenfield': 1000}, True),
            ({'task_id': 235, 'labourer_id': 'foo', 'created_at': 5000, 'greenfield': 1000}, True),
            ({'task_id': '235', 'labourer_id': 'foo', 'created_at': 5000, 'greenfield': 1000, 'bar': 42}, True),
        ]

        for test, expected in TESTS:
            self.assertEqual(self.manager.is_valid_task(test), expected)


    def test_validate_task__bad(self):
        _ = self.manager.get_db_field_name
        TASK = {'task_id': '235', 'labourer_id': 'foo', 'created_at': 5000, 'greenfield': 1000, 'bar': 42}

        for field in [_('task_id'), _('labourer_id'), _('created_at')]:
            test = deepcopy(TASK)
            test.pop(field)

            self.assertFalse(self.manager.is_valid_task(test))


    def test_health_metrics_received(self):
        TEST_CFG = {
            'some_function': {
                'arn':                          'arn:aws:lambda:us-west-2:0000000000:function:some_function',
                'max_simultaneous_invocations': 10,
                'health_metrics':               {
                    'SomeDBCPU': {
                        'Name':                        'CPUUtilization',
                        'Namespace':                   'AWS/RDS',
                        'Period':                      60,
                        'Statistics':                  ['Average'],
                        'Dimensions':                  [
                            {
                                'Name':  'DBInstanceIdentifier',
                                'Value': 'YOUR-DB'
                            },
                        ],

                        # These is the mapping of how the Labourer should "feel" about this metric.
                        # See EcologyManager.ECO_STATUSES.
                        # This is just a mapping ``ECO_STATUS: value`` using ``feeling_comparison_operator``.
                        'feelings':                    {
                            3: 50,
                            4: 25,
                        },
                        'feeling_comparison_operator': '<='
                    },
                },
            }
        }
示例#24
0
 def test_set_defaults_overrides(self):
     lab = Labourer(id=42, duration=300)
     self.assertEqual(lab.duration, 300)
示例#25
0
class ecology_manager_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_ECOLOGY_CLIENT_CONFIG
    LABOURER = Labourer(id='some_function', arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')


    def setUp(self):
        """
        We keep copies of main parameters here, because they may differ from test to test and cleanup needs them.
        This is responsibility of the test author to update these values if required from test.
        """

        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.config = self.TEST_CONFIG.copy()

        self.manager = EcologyManager(custom_config=self.config)


    def tearDown(self):
        self.patcher.stop()


    def test_eco_statuses(self):
        self.assertEqual(set(self.manager.eco_statuses), set(range(5)))


    def test_count_running_tasks_for_labourer__raises_not_task_client(self):
        self.assertRaises(RuntimeError, self.manager.count_running_tasks_for_labourer, self.LABOURER)


    def test_count_running_tasks_for_labourer__use_local_cache(self):
        tm = MagicMock()
        self.manager.register_task_manager(tm)

        # Setting something to local cache.
        self.manager.running_tasks[self.LABOURER.id] = 5

        self.assertEqual(self.manager.count_running_tasks_for_labourer(self.LABOURER), 5)
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_not_called()


    def test_count_running_tasks_for_labourer__calls_task_manager(self):
        tm = MagicMock()
        tm.get_count_of_running_tasks_for_labourer.return_value = 42
        self.manager.register_task_manager(tm)

        self.assertEqual(self.manager.count_running_tasks_for_labourer(self.LABOURER), 42)
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once()


    def test_register_task_manager__resets_stats(self):
        # Should be defaultdict(int)
        self.assertEqual(self.manager.running_tasks['foo'], 0)

        # Manually increase counter
        self.manager.running_tasks['foo'] += 10
        self.assertEqual(self.manager.running_tasks['foo'], 10)

        # Call register_task_manager
        self.manager.register_task_manager(MagicMock())
        self.assertEqual(self.manager.running_tasks['foo'], 0, "Did not reset cache of running_tasks")


    def test_add_running_tasks_for_labourer(self):
        tm = MagicMock()
        tm.get_count_of_running_tasks_for_labourer.return_value = 12
        self.manager.register_task_manager(tm)

        # Not yet cached
        self.assertNotIn(self.LABOURER.id, self.manager.running_tasks.keys())

        # Add default number
        self.manager.add_running_tasks_for_labourer(labourer=self.LABOURER)

        # Should have been called first time to cache info about this Labourer.
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once()

        # Make sure the cache is fetched and increased by the counter
        self.assertEqual(self.manager.running_tasks[self.LABOURER.id],
                         tm.get_count_of_running_tasks_for_labourer.return_value + 1)

        # Call again to add 5 more tasks
        self.manager.add_running_tasks_for_labourer(labourer=self.LABOURER, count=5)

        # The counter of the task manager should not have been increased.
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once()

        # But the counter of tasks in cache should have.
        self.assertEqual(self.manager.running_tasks[self.LABOURER.id],
                         tm.get_count_of_running_tasks_for_labourer.return_value + 1 + 5)
示例#26
0
 def test_set_defaults__called(self):
     with patch('sosw.labourer.Labourer.set_defaults') as sd:
         lab = Labourer(id=42)
         sd.assert_called_once()
示例#27
0
文件: test_ecology.py 项目: sosw/sosw
class ecology_manager_UnitTestCase(unittest.TestCase):
    TEST_CONFIG = TEST_ECOLOGY_CLIENT_CONFIG
    LABOURER = Labourer(
        id='some_function',
        arn='arn:aws:lambda:us-west-2:000000000000:function:some_function')
    SAMPLE_HEALTH_METRICS = {
        'test1': {
            'details': {
                'Name': 'CPUUtilization',
                'Namespace': 'AWS/RDS'
            }
        },
        'test2': {
            'details': {
                'Name': 'CPUUtilization2',
                'Namespace': 'AWS/RDS'
            }
        },
        'test3': {
            'details': {
                'Name': 'CPUUtilization3',
                'Namespace': 'AWS/RDS'
            }
        },
    }

    SAMPLE_GET_METRICS_STATISTICS_RESPONSE = {
        'Label':
        'CPUUtilization',
        'Datapoints': [
            {
                'Timestamp':
                datetime.datetime(2019, 5, 13, 14, 3, tzinfo=tzlocal()),
                'Average':
                31.3333333345751,
                'Unit':
                'Percent'
            },
            {
                'Timestamp':
                datetime.datetime(2019, 5, 13, 14, 0, tzinfo=tzlocal()),
                'Average':
                100.0,
                'Unit':
                'Percent'
            },
            {
                'Timestamp':
                datetime.datetime(2019, 5, 13, 14, 4, tzinfo=tzlocal()),
                'Average':
                29.4999999987582,
                'Unit':
                'Percent'
            },
        ],
        'ResponseMetadata': {
            'HTTPStatusCode': 200,
        }
    }

    def setUp(self):
        """
        We keep copies of main parameters here, because they may differ from test to test and cleanup needs them.
        This is responsibility of the test author to update these values if required from test.
        """

        self.patcher = patch("sosw.app.get_config")
        self.get_config_patch = self.patcher.start()

        self.config = self.TEST_CONFIG.copy()

        with patch('boto3.client'):
            self.manager = EcologyManager(custom_config=self.config)

    def tearDown(self):
        self.patcher.stop()

    def test_eco_statuses(self):
        self.assertEqual(set(self.manager.eco_statuses), set(range(5)))

    def test_count_running_tasks_for_labourer__raises_not_task_client(self):
        self.assertRaises(RuntimeError,
                          self.manager.count_running_tasks_for_labourer,
                          self.LABOURER)

    def test_count_running_tasks_for_labourer__use_local_cache(self):
        tm = MagicMock()
        self.manager.register_task_manager(tm)

        # Setting something to local cache.
        self.manager.running_tasks[self.LABOURER.id] = 5

        self.assertEqual(
            self.manager.count_running_tasks_for_labourer(self.LABOURER), 5)
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_not_called(
        )

    def test_count_running_tasks_for_labourer__calls_task_manager(self):
        tm = MagicMock()
        tm.get_count_of_running_tasks_for_labourer.return_value = 42
        self.manager.register_task_manager(tm)

        self.assertEqual(
            self.manager.count_running_tasks_for_labourer(self.LABOURER), 42)
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once(
        )

    def test_register_task_manager__resets_stats(self):
        # Should be defaultdict(int)
        self.assertEqual(self.manager.running_tasks['foo'], 0)

        # Manually increase counter
        self.manager.running_tasks['foo'] += 10
        self.assertEqual(self.manager.running_tasks['foo'], 10)

        # Call register_task_manager
        self.manager.register_task_manager(MagicMock())
        self.assertEqual(self.manager.running_tasks['foo'], 0,
                         "Did not reset cache of running_tasks")

    def test_add_running_tasks_for_labourer(self):
        tm = MagicMock()
        tm.get_count_of_running_tasks_for_labourer.return_value = 12
        self.manager.register_task_manager(tm)

        # Not yet cached
        self.assertNotIn(self.LABOURER.id, self.manager.running_tasks.keys())

        # Add default number
        self.manager.add_running_tasks_for_labourer(labourer=self.LABOURER)

        # Should have been called first time to cache info about this Labourer.
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once(
        )

        # Make sure the cache is fetched and increased by the counter
        self.assertEqual(
            self.manager.running_tasks[self.LABOURER.id],
            tm.get_count_of_running_tasks_for_labourer.return_value + 1)

        # Call again to add 5 more tasks
        self.manager.add_running_tasks_for_labourer(labourer=self.LABOURER,
                                                    count=5)

        # The counter of the task manager should not have been increased.
        self.manager.task_client.get_count_of_running_tasks_for_labourer.assert_called_once(
        )

        # But the counter of tasks in cache should have.
        self.assertEqual(
            self.manager.running_tasks[self.LABOURER.id],
            tm.get_count_of_running_tasks_for_labourer.return_value + 1 + 5)

    def test_get_max_labourer_duration(self):
        self.manager.task_client = MagicMock()
        self.manager.task_client.lambda_client.get_function_configuration.return_value = {
            'Timeout': 300
        }

        self.assertEqual(self.manager.get_max_labourer_duration(self.LABOURER),
                         300)

    def test_get_health(self):
        METRIC = {
            'details': {},
            'feelings': {
                3: 50,
                4: 25,
            },
            'feeling_comparison_operator': '__le__'
        }

        TESTS = [
            (0, 4),
            (1.0, 4),
            (25, 4),
            (25.000001, 3),
            (30, 3),
            (50, 3),
            (51, 0),
        ]

        for value, expected in TESTS:
            self.assertEqual(self.manager.get_health(value, METRIC), expected,
                             f"Failed: {value} t")

    def test_get_health__invalid(self):
        METRIC = {
            'details': {},
            'feelings': {
                1: 40,
                3: 50,
                4: 25,
            },
            'feeling_comparison_operator': '__le__'
        }

        self.assertRaises(ValueError, self.manager.get_health, 60, METRIC), \
        "Did not raise while the feelings are invalid. Order of values should respect order of health statuses."

    def test_get_labourer_status(self):
        self.manager.get_health = MagicMock(side_effect=[3, 2, 4])
        self.manager.register_task_manager(MagicMock())
        self.manager.fetch_metric_stats = MagicMock()
        self.health_metrics = dict()

        labourer = deepcopy(self.LABOURER)
        setattr(labourer, 'health_metrics', self.SAMPLE_HEALTH_METRICS)

        # Calling the actual tested method.
        result = self.manager.get_labourer_status(labourer)

        # The result should be the lowest of values get_health would have returned out of three calls.
        self.assertEqual(
            result, 2,
            f"Did not get the lowest health result. Received: {result}")

        # Chech the the get_health had been called three times (for each metric).
        self.manager.get_health.assert_called()
        self.assertEqual(self.manager.get_health.call_count, 3)

        self.manager.fetch_metric_stats.assert_called()
        self.assertEqual(self.manager.fetch_metric_stats.call_count, 3)

    def test_get_labourer_status__uses_cache(self):
        self.manager.get_health = MagicMock(return_value=0)
        self.manager.register_task_manager(MagicMock())
        self.manager.fetch_metric_stats = MagicMock()

        labourer = deepcopy(self.LABOURER)
        setattr(labourer, 'health_metrics', self.SAMPLE_HEALTH_METRICS)

        self.manager.health_metrics = {
            make_hash(labourer.health_metrics['test1']['details']): 42
        }

        # Calling the actual tested method.
        result = self.manager.get_labourer_status(labourer)

        # Assert calculator (get_health) was called 3 times.
        self.assertEqual(self.manager.get_health.call_count, 3)
        self.assertEqual(
            self.manager.fetch_metric_stats.call_count, 2,
            f"Fetcher was supposed to be called only for 2 metrics. One is in cache."
        )

    def test_fetch_metric_stats__calls_boto(self):
        self.manager.cloudwatch_client = MagicMock()
        self.manager.cloudwatch_client.get_metric_statistics.return_value = self.SAMPLE_GET_METRICS_STATISTICS_RESPONSE
        self.manager.fetch_metric_stats(metric={'a': 1, 'b': {3: 42}})

        self.manager.cloudwatch_client.get_metric_statistics.assert_called_once(
        )

    def test_fetch_metric_stats__calculates_time(self):
        MOCK_DATE = datetime.datetime(2019, 1, 1, 0, 42, 0)
        self.manager.cloudwatch_client = MagicMock()
        self.manager.cloudwatch_client.get_metric_statistics.return_value = self.SAMPLE_GET_METRICS_STATISTICS_RESPONSE

        with patch('datetime.datetime') as t:
            t.now.return_value = MOCK_DATE
            self.manager.fetch_metric_stats(metric={'a': 1, 'b': {3: 42}})

        args, kwargs = self.manager.cloudwatch_client.get_metric_statistics.call_args
        # print(kwargs)

        self.assertEqual(kwargs['EndTime'], MOCK_DATE)
        self.assertEqual(
            kwargs['StartTime'], MOCK_DATE - datetime.timedelta(
                seconds=self.manager.config['default_metric_values']
                ['MetricAggregationTimeSlice']))

    def test_fetch_metric_stats__use_defaults(self):
        self.manager.cloudwatch_client = MagicMock()
        self.manager.cloudwatch_client.get_metric_statistics.return_value = self.SAMPLE_GET_METRICS_STATISTICS_RESPONSE

        self.manager.fetch_metric_stats(metric={'a': 1})

        _, kwargs = self.manager.cloudwatch_client.get_metric_statistics.call_args

        # Checking some default from hardcoded DEFAULT_CONFIG
        self.assertEqual(
            kwargs['Period'],
            self.manager.config['default_metric_values']['Period'])