def test_catching_up_time_build_tree(self):
        delta = 5

        for tree in self.trees:
            assert isinstance(tree, MultiLevelTree)
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_synergy_start_time = time_helper.increment_timeperiod(
                time_qualifier, self.actual_timeperiod, -delta)
            settings.settings[
                'synergy_start_timeperiod'] = new_synergy_start_time

            tree.build_tree()
            self._perform_assertions(tree, delta)

        for tree in self.trees:
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_actual_timeperiod = time_helper.increment_timeperiod(
                time_qualifier, self.actual_timeperiod, delta)

            new_actual_dt = time_helper.synergy_to_datetime(
                time_qualifier, new_actual_timeperiod)
            time_helper.actual_timeperiod = mock.MagicMock(
                side_effect=lambda time_qualifier: time_helper.
                datetime_to_synergy(time_qualifier, new_actual_dt))

            assert isinstance(tree, MultiLevelTree)
            tree.build_tree()
            self._perform_assertions(tree, 2 * delta)
Пример #2
0
    def test_catching_up_time_build_tree(self):
        delta = 5

        for tree in self.trees:
            assert isinstance(tree, MultiLevelTree)
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_synergy_start_time = time_helper.increment_timeperiod(time_qualifier,
                                                                      self.actual_timeperiod,
                                                                      -delta)
            settings.settings['synergy_start_timeperiod'] = new_synergy_start_time

            tree.build_tree()
            self._perform_assertions(tree, delta)

        for tree in self.trees:
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_actual_timeperiod = time_helper.increment_timeperiod(time_qualifier,
                                                                     self.actual_timeperiod,
                                                                     delta)

            new_actual_dt = time_helper.synergy_to_datetime(time_qualifier, new_actual_timeperiod)
            time_helper.actual_timeperiod = mock.MagicMock(
                side_effect=lambda time_qualifier: time_helper.datetime_to_synergy(time_qualifier, new_actual_dt))

            assert isinstance(tree, MultiLevelTree)
            tree.build_tree()
            self._perform_assertions(tree, 2 * delta)
Пример #3
0
    def test_hourly_translation(self):
        test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3)
        fixture = OrderedDict()
        fixture[(0, 4)] = '2010120303'
        fixture[(4, 7)] = '2010120306'
        fixture[(7, 10)] = '2010120309'
        fixture[(10, 13)] = '2010120312'
        fixture[(13, 16)] = '2010120315'
        fixture[(16, 19)] = '2010120318'
        fixture[(19, 22)] = '2010120321'
        fixture[(22, 24)] = '2010120323'

        timeperiod = '2010120300'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                actual_value = test_dict._translate_timeperiod(timeperiod)
                self.assertEqual(
                    actual_value,
                    value,
                    msg=
                    'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'
                    .format(timeperiod, i, actual_value, value))
                timeperiod = time_helper.increment_timeperiod(
                    QUALIFIER_HOURLY, timeperiod)
Пример #4
0
    def test_validate_2(self):
        """
        test coverage:
        - request_skip
        """
        next_timeperiod = time_helper.increment_timeperiod(self.the_node.time_qualifier, TEST_PRESET_TIMEPERIOD)
        self.parent_node_mock.children[next_timeperiod] = mock.create_autospec(TreeNode)

        for _index in range(10):
            mock_job = mock.create_autospec(Job)
            mock_job.is_finished = True
            child_mock = mock.create_autospec(TreeNode)
            child_mock.job_record = mock.create_autospec(Job)
            child_mock.job_record.is_active = False
            child_mock.job_record.is_skipped = True
            self.the_node.children[_index] = child_mock

        # verify if this node should be transferred to STATE_SKIPPED
        self.the_node.job_record.is_skipped = False
        self.time_table_mocked.reprocess_tree_node = mock.Mock()
        self.time_table_mocked.skip_tree_node = mock.Mock()
        self.time_table_mocked.assign_job_record = mock.Mock()
        self.the_node.validate()

        # assertions:
        self.assertEqual(len(self.time_table_mocked.assign_job_record.call_args_list), 0)
        self.assertEqual(len(self.time_table_mocked.reprocess_tree_node.call_args_list), 0)
        self.time_table_mocked.skip_tree_node.assert_called_once_with(self.the_node)
    def _process_state_in_progress(self, job_record):
        """ method that takes care of processing job records in STATE_IN_PROGRESS state"""
        time_qualifier = context.process_context[job_record.process_name].time_qualifier
        end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
        actual_timeperiod = time_helper.actual_timeperiod(time_qualifier)
        is_job_finalizable = self.timetable.is_job_record_finalizable(job_record)
        uow = self.uow_dao.get_one(job_record.related_unit_of_work)

        if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False:
            if uow.is_invalid or uow.is_requested:
                # current uow has not been processed yet. update it
                self.update_scope_of_processing(job_record.process_name, uow, job_record.timeperiod, end_timeperiod)
            else:
                # STATE_IN_PROGRESS, STATE_PROCESSED, STATE_CANCELED
                # create new uow to cover new inserts
                self._compute_and_transfer_to_progress(job_record.process_name, job_record.timeperiod,
                                                       end_timeperiod, job_record)

        elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True:
            # create new uow for FINAL RUN
            self._compute_and_transfer_to_final_run(job_record.process_name, job_record.timeperiod,
                                                    end_timeperiod, job_record)

        else:
            msg = 'job record %s has timeperiod from future %s vs current time %s' \
                  % (job_record.db_id, job_record.timeperiod, actual_timeperiod)
            self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
    def _process_state_in_progress(self, job_record):
        """ method that takes care of processing job records in STATE_IN_PROGRESS state"""
        def _process_state(target_state, uow):
            if uow.is_active:
                # Large Job processing takes more than 1 tick of the Scheduler
                # Let the Job processing complete - do no updates to Scheduler records
                pass
            elif uow.is_finished:
                # create new uow to cover new inserts
                new_uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name,
                                                                    job_record.timeperiod,
                                                                    end_timeperiod,
                                                                    0,
                                                                    iteration + 1)
                self.timetable.update_job_record(job_record, new_uow, target_state)

        time_qualifier = context.process_context[job_record.process_name].time_qualifier
        end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
        actual_timeperiod = time_helper.actual_timeperiod(time_qualifier)
        is_job_finalizable = self.timetable.is_job_record_finalizable(job_record)
        uow = self.uow_dao.get_one(job_record.related_unit_of_work)
        iteration = int(uow.end_id)

        if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False:
            _process_state(job.STATE_IN_PROGRESS, uow)

        elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True:
            _process_state(job.STATE_FINAL_RUN, uow)

        else:
            msg = 'Job record %s has timeperiod from future %s vs current time %s' \
                  % (job_record.db_id, job_record.timeperiod, actual_timeperiod)
            self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
Пример #7
0
    def test_daily_translation(self):
        test_dict = TimeperiodDict(QUALIFIER_DAILY, 3)
        fixture = OrderedDict()
        fixture[(1, 4)] = '2010120300'
        fixture[(4, 7)] = '2010120600'
        fixture[(7, 10)] = '2010120900'
        fixture[(10, 13)] = '2010121200'
        fixture[(13, 16)] = '2010121500'
        fixture[(16, 19)] = '2010121800'
        fixture[(19, 22)] = '2010122100'
        fixture[(22, 25)] = '2010122400'
        fixture[(25, 28)] = '2010122700'
        fixture[(28, 31)] = '2010123000'
        fixture[(31, 32)] = '2010123100'

        timeperiod = '2010120100'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                actual_value = test_dict._translate_timeperiod(timeperiod)
                self.assertEqual(
                    actual_value,
                    value,
                    msg=
                    'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'
                    .format(timeperiod, i, actual_value, value))
                timeperiod = time_helper.increment_timeperiod(
                    QUALIFIER_DAILY, timeperiod)
    def _process_single_document(self, document):
        source_obj = self._init_source_object(document)
        week_old_timeperiod = time_helper.increment_timeperiod(
            QUALIFIER_DAILY, source_obj.key[1], delta=-7)
        try:
            week_old_obj = self.site_dao.get_one(COLLECTION_SITE_DAILY,
                                                 source_obj.key[0],
                                                 week_old_timeperiod)

            visits_threshold_crossed = source_obj.number_of_visits / week_old_obj.number_of_visits < 0.8 \
                                       or source_obj.number_of_visits / week_old_obj.number_of_visits > 1.2

            pageviews_threshold_crossed = source_obj.number_of_pageviews / week_old_obj.number_of_pageviews < 0.8 \
                                          or source_obj.number_of_pageviews / week_old_obj.number_of_pageviews > 1.2

            if visits_threshold_crossed or pageviews_threshold_crossed:
                composite_key = self._init_sink_key(source_obj.key[0],
                                                    source_obj.key[1])
                target_obj = self._get_aggregated_object(composite_key)
                target_obj.number_of_visits = source_obj.number_of_visits - week_old_obj.number_of_visits
                target_obj.number_of_pageviews = source_obj.number_of_pageviews - week_old_obj.number_of_pageviews
        except LookupError:
            self.logger.debug(
                'site statistics ({0}:{1}) was not found. skipping comparison'.
                format(source_obj.key[0], week_old_timeperiod))
    def _process_state_in_progress(self, job_record):
        """ method that takes care of processing job records in STATE_IN_PROGRESS state"""
        def _process_state(target_state, uow):
            if uow.is_active:
                # Large Job processing takes more than 1 tick of the Scheduler
                # Let the Job processing complete - do no updates to Scheduler records
                pass
            elif uow.is_finished:
                # create new UOW to cover new inserts
                new_uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name,
                                                                    job_record.timeperiod,
                                                                    end_timeperiod,
                                                                    0,
                                                                    int(uow.end_id) + 1)
                self.update_job(job_record, new_uow, target_state)

        time_qualifier = context.process_context[job_record.process_name].time_qualifier
        end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
        uow = self.uow_dao.get_one(job_record.related_unit_of_work)

        try:
            target_state = self._compute_next_job_state(job_record)
            _process_state(target_state, uow)
        except ValueError:
            # do no processing for the future timeperiods
            pass
Пример #10
0
        def _fire_worker(process_entry, prev_job_record):
            assert isinstance(process_entry, ManagedProcessEntry)
            job_record = self.timetable.get_next_job_record(process_entry.process_name)
            state_machine = self.timetable.state_machines[process_entry.state_machine_name]
            if job_record == prev_job_record:
                # avoid the loop
                return None

            if not state_machine.run_on_active_timeperiod:
                time_qualifier = process_entry.time_qualifier
                incremented_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
                dt_record_timestamp = time_helper.synergy_to_datetime(time_qualifier, incremented_timeperiod)
                dt_record_timestamp += timedelta(minutes=LAG_5_MINUTES)

                if datetime.utcnow() <= dt_record_timestamp:
                    self.logger.info('Job {0} for {1}@{2} will not be triggered until {3}.'
                                     .format(job_record.db_id,
                                             job_record.process_name,
                                             job_record.timeperiod,
                                             dt_record_timestamp.strftime('%Y-%m-%d %H:%M:%S')))
                    return None

            blocking_type = process_entry.blocking_type
            if blocking_type == BLOCKING_DEPENDENCIES:
                state_machine.manage_job_with_blocking_dependencies(job_record)
            elif blocking_type == BLOCKING_CHILDREN:
                state_machine.manage_job_with_blocking_children(job_record)
            elif blocking_type == BLOCKING_NORMAL:
                state_machine.manage_job(job_record)
            else:
                raise ValueError('Unknown managed process type {0}'.format(blocking_type))

            return job_record
Пример #11
0
    def freeruns(self):
        processor = FreerunStatements(self.logger)
        actual_timeperiod = time_helper.actual_timeperiod(QUALIFIER_DAILY)
        delta = int(self.time_window[len(TIME_WINDOW_DAY_PREFIX) + 1:])
        start_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, actual_timeperiod, -delta)

        selection = processor.retrieve_records(start_timeperiod, self.is_unprocessed_only)
        return OrderedDict(sorted(selection.items()))
Пример #12
0
 def _process_state_embryo(self, job_record):
     """ method that takes care of processing job records in STATE_EMBRYO state"""
     time_qualifier = context.process_context[job_record.process_name].time_qualifier
     end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
     uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name,
                                                     job_record.timeperiod,
                                                     end_timeperiod,
                                                     0,
                                                     0)
     self.timetable.update_job_record(job_record, uow, job.STATE_IN_PROGRESS)
Пример #13
0
    def test_container_methods(self):
        test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3)
        timeperiod = '2010123100'
        for i in range(0, 24):
            # format {grouped_timeperiod: highest_loop_index}
            test_dict[timeperiod] = i
            timeperiod = time_helper.increment_timeperiod(
                QUALIFIER_HOURLY, timeperiod)

        fixture = OrderedDict()
        fixture[(0, 4)] = 3
        fixture[(4, 7)] = 6
        fixture[(7, 10)] = 9
        fixture[(10, 13)] = 12
        fixture[(13, 16)] = 15
        fixture[(16, 19)] = 18
        fixture[(19, 22)] = 21
        fixture[(22, 24)] = 23

        timeperiod = '2010123100'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                self.assertEqual(
                    test_dict[timeperiod],
                    value,
                    msg=
                    'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'
                    .format(timeperiod, i, test_dict[timeperiod], value))
                # get method
                self.assertIsNotNone(test_dict.get(timeperiod), )

                timeperiod = time_helper.increment_timeperiod(
                    QUALIFIER_HOURLY, timeperiod)

        # test __len__ method
        self.assertEqual(len(test_dict), 8)

        # test __iter__ method
        counter = 0
        for _ in test_dict:
            counter += 1
        self.assertEqual(counter, 8)
Пример #14
0
def generate_site_composite_key(index, time_qualifier):
    start_time = '20010303101010'  # YYYYMMDDHHmmSS

    iteration_index = index // 33  # number larger than number of hours in a day and days in a month
    iteration_timeperiod = time_helper.cast_to_time_qualifier(time_qualifier, start_time)
    if iteration_index:
        iteration_timeperiod = time_helper.increment_timeperiod(time_qualifier,
                                                                iteration_timeperiod,
                                                                delta=iteration_index)

    return 'domain_name_{0}'.format(index - iteration_index * 33), iteration_timeperiod
Пример #15
0
def generate_site_composite_key(index, time_qualifier):
    start_time = '20010303101010'  # YYYYMMDDHHmmSS

    iteration_index = index // 33  # number larger than number of hours in a day and days in a month
    iteration_timeperiod = time_helper.cast_to_time_qualifier(
        time_qualifier, start_time)
    if iteration_index:
        iteration_timeperiod = time_helper.increment_timeperiod(
            time_qualifier, iteration_timeperiod, delta=iteration_index)

    return f'domain_name_{index - iteration_index * 33}', iteration_timeperiod
Пример #16
0
    def test_increment_time(self):
        stamps = ['2011010100', '2011010112', '2011010123']
        expected = ['2011010101', '2011010113', '2011010200']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp), expected[idx])

        stamps = ['2011010100', '2011013100', '2010123100']
        expected = ['2011010200', '2011020100', '2011010100']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp), expected[idx])

        stamps = ['2011010000', '2011120000', '2011100000']
        expected = ['2011020000', '2012010000', '2011110000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp), expected[idx])

        stamps = ['2011000000', '2012000000', '2099000000']
        expected = ['2012000000', '2013000000', '2100000000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp), expected[idx])
Пример #17
0
    def test_less_simple_build_tree(self):
        delta = 105

        for tree in self.trees:
            assert isinstance(tree, MultiLevelTree)
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_synergy_start_time = time_helper.increment_timeperiod(time_qualifier,
                                                                      self.actual_timeperiod,
                                                                      -delta)
            settings.settings['synergy_start_timeperiod'] = new_synergy_start_time

            tree.build_tree()
            self._perform_assertions(tree, delta)
    def test_less_simple_build_tree(self):
        delta = 105

        for tree in self.trees:
            assert isinstance(tree, MultiLevelTree)
            time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier
            new_synergy_start_time = time_helper.increment_timeperiod(
                time_qualifier, self.actual_timeperiod, -delta)
            settings.settings[
                'synergy_start_timeperiod'] = new_synergy_start_time

            tree.build_tree()
            self._perform_assertions(tree, delta)
Пример #19
0
    def compute_start_timeperiod(self, process_name, timeperiod):
        """ computes lowest *inclusive* timeperiod boundary for job to process
            for process with time_grouping == 1, it returns given timeperiod with no change
            for process with time_grouping != 1, it computes first timeperiod, not processed by the previous job run
            For instance: with time_grouping = 3, QUALIFIER_HOURLY, and timeperiod = 2016042018,
            the start_timeperiod will be = 2016042016 (computed as 2016042018 - 3 + 1)
        """
        time_grouping = context.process_context[process_name].time_grouping
        if time_grouping == 1:
            return timeperiod

        # step1: translate given timeperiod to the time grouped one
        process_hierarchy = self.timetable.get_tree(process_name).process_hierarchy
        timeperiod_dict = process_hierarchy[process_name].timeperiod_dict
        translated_timeperiod = timeperiod_dict._translate_timeperiod(timeperiod)

        # step 2: compute previous grouped period
        # NOTICE: simple `time_helper.increment_timeperiod(time_qualifier, timeperiod)` is insufficient
        #         as it does not address edge cases, such as the last day of the month or the last hour of the day
        # For instance: with time_grouping=3, QUALIFIER_DAILY, and 2016123100
        # the `increment_timeperiod` will yield 2016122800 instead of 2016123100
        time_qualifier = context.process_context[process_name].time_qualifier
        for i in range(1, time_grouping + 1):
            prev_timeperiod = time_helper.increment_timeperiod(time_qualifier, translated_timeperiod, delta=-i)
            if prev_timeperiod == timeperiod_dict._translate_timeperiod(prev_timeperiod):
                # prev_timeperiod is currently at the last grouped timeperiod
                break

        # step 3: compute first exclusive timeperiod after the *prev_timeperiod*,
        # which becomes first inclusive timeperiod for this job run
        over_the_edge_timeperiod = time_helper.increment_timeperiod(time_qualifier, prev_timeperiod, delta=-1)
        if prev_timeperiod != timeperiod_dict._translate_timeperiod(over_the_edge_timeperiod):
            # over_the_edge_timeperiod fell into previous day or month or year
            # *prev_timeperiod* points to the first month, first day of the month or 00 hour
            start_timeperiod = prev_timeperiod
        else:
            start_timeperiod = self.compute_end_timeperiod(process_name, prev_timeperiod)

        return start_timeperiod
Пример #20
0
    def test_container_methods(self):
        test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3)
        timeperiod = '2010123100'
        for i in range(0, 24):
            # format {grouped_timeperiod: highest_loop_index}
            test_dict[timeperiod] = i
            timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod)

        fixture = OrderedDict()
        fixture[(0, 4)] = 3
        fixture[(4, 7)] = 6
        fixture[(7, 10)] = 9
        fixture[(10, 13)] = 12
        fixture[(13, 16)] = 15
        fixture[(16, 19)] = 18
        fixture[(19, 22)] = 21
        fixture[(22, 24)] = 23

        timeperiod = '2010123100'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                self.assertEqual(test_dict[timeperiod], value,
                                 msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'.
                                 format(timeperiod, i, test_dict[timeperiod], value))
                # get method
                self.assertIsNotNone(test_dict.get(timeperiod), )

                timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod)

        # test __len__ method
        self.assertEqual(len(test_dict), 8)

        # test __iter__ method
        counter = 0
        for _ in test_dict:
            counter += 1
        self.assertEqual(counter, 8)
Пример #21
0
    def __init__(self, request, **values):
        super(DashboardHandler, self).__init__(request, **values)

        self.time_window = self.request.args.get('time_window')
        self.is_include_running = self.request.args.get('include_running') == 'on'
        self.is_include_processed = self.request.args.get('include_processed') == 'on'
        self.is_include_noop = self.request.args.get('include_noop') == 'on'
        self.is_include_failed = self.request.args.get('include_failed') == 'on'
        self.is_include_disabled = self.request.args.get('include_disabled') == 'on'
        self.is_request_valid = bool(self.time_window)

        if self.is_request_valid:
            actual_timeperiod = time_helper.actual_timeperiod(QUALIFIER_DAILY)
            delta = int(self.time_window)
            self.query_start_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, actual_timeperiod, -delta)
    def _process_state_embryo(self, job_record):
        """ method that takes care of processing job records in STATE_EMBRYO state"""
        time_qualifier = context.process_context[job_record.process_name].time_qualifier
        end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
        uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name,
                                                        job_record.timeperiod,
                                                        end_timeperiod,
                                                        0,
                                                        0)

        try:
            target_state = self._compute_next_job_state(job_record)
            self.update_job(job_record, uow, target_state)
        except ValueError:
            # do no processing for the future timeperiods
            pass
    def _process_state_in_progress(self, job_record):
        """ method that takes care of processing job records in STATE_IN_PROGRESS state """
        time_qualifier = context.process_context[job_record.process_name].time_qualifier
        end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
        actual_timeperiod = time_helper.actual_timeperiod(time_qualifier)
        is_job_finalizable = self.timetable.is_job_record_finalizable(job_record)
        uow = self.uow_dao.get_one(job_record.related_unit_of_work)

        if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False:
            self.__process_non_finalizable_job(job_record, uow, job_record.timeperiod, end_timeperiod)

        elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True:
            self.__process_finalizable_job(job_record, uow)

        else:
            msg = 'Job {0} has timeperiod {1} from the future vs current timeperiod {2}' \
                  .format(job_record.db_id, job_record.timeperiod, actual_timeperiod)
            self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
Пример #24
0
    def validate(self):
        """method traverse tree and performs following activities:
        * requests a job record in STATE_EMBRYO if no job record is currently assigned to the node
        * requests nodes for reprocessing, if STATE_PROCESSED node relies on unfinalized nodes
        * requests node for skipping if it is daily node and all 24 of its Hourly nodes are in STATE_SKIPPED state"""

        # step 0: request Job record if current one is not set
        if self.job_record is None:
            self.request_embryo_job_record()

        # step 1: define if current node has a younger sibling
        next_timeperiod = time_helper.increment_timeperiod(self.time_qualifier, self.timeperiod)
        has_younger_sibling = next_timeperiod in self.parent.children

        # step 2: define if all children are done and if perhaps they all are in STATE_SKIPPED
        all_children_skipped = True
        all_children_finished = True
        for timeperiod in self.children:
            child = self.children[timeperiod]
            child.validate()

            if child.job_record.is_active:
                all_children_finished = False
            if not child.job_record.is_skipped:
                all_children_skipped = False

        # step 3: request this node's reprocessing if it is enroute to STATE_PROCESSED
        # while some of its children are still performing processing
        if all_children_finished is False and self.job_record.is_finished:
            self.request_reprocess()

        # step 4: verify if this node should be transferred to STATE_SKIPPED
        # algorithm is following:
        # point a: node must have children
        # point b: existence of a younger sibling means that the tree contains another node of the same level
        # thus - should the tree.build_timeperiod be not None - the children level of this node is fully constructed
        # point c: if all children of this node are in STATE_SKIPPED then we will set this node state to STATE_SKIPPED
        if len(self.children) != 0 \
                and all_children_skipped \
                and self.tree.build_timeperiod is not None \
                and has_younger_sibling is True \
                and not self.job_record.is_skipped:
            self.request_skip()
Пример #25
0
    def build_tree(self, rebuild=False):
        """ method builds tree by iterating from the synergy_start_timeperiod to the current time
            and inserting corresponding nodes """

        time_qualifier = self.process_hierarchy.bottom_process.time_qualifier
        process_name = self.process_hierarchy.bottom_process.process_name
        if rebuild or self.build_timeperiod is None:
            timeperiod = settings.settings['synergy_start_timeperiod']
        else:
            timeperiod = self.build_timeperiod

        timeperiod = cast_to_time_qualifier(time_qualifier, timeperiod)
        actual_timeperiod = time_helper.actual_timeperiod(time_qualifier)

        while actual_timeperiod >= timeperiod:
            self.get_node(process_name, timeperiod)
            timeperiod = time_helper.increment_timeperiod(time_qualifier, timeperiod)

        self.build_timeperiod = actual_timeperiod
Пример #26
0
    def _process_single_document(self, document):
        source_obj = self._init_source_object(document)
        week_old_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, source_obj.key[1], delta=-7)
        try:
            week_old_obj = self.site_dao.get_one(COLLECTION_SITE_DAILY, source_obj.key[0], week_old_timeperiod)

            visits_threshold_crossed = source_obj.number_of_visits / week_old_obj.number_of_visits < 0.8 \
                                       or source_obj.number_of_visits / week_old_obj.number_of_visits > 1.2

            pageviews_threshold_crossed = source_obj.number_of_pageviews / week_old_obj.number_of_pageviews < 0.8 \
                                          or source_obj.number_of_pageviews / week_old_obj.number_of_pageviews > 1.2

            if visits_threshold_crossed or pageviews_threshold_crossed:
                composite_key = self._init_sink_key(source_obj.key[0], source_obj.key[1])
                target_obj = self._get_aggregated_object(composite_key)
                target_obj.number_of_visits = source_obj.number_of_visits - week_old_obj.number_of_visits
                target_obj.number_of_pageviews = source_obj.number_of_pageviews - week_old_obj.number_of_pageviews
        except LookupError:
            self.logger.debug('site statistics (%s:%s) was not found. skipping comparison'
                              % (source_obj.key[0], week_old_timeperiod))
Пример #27
0
    def test_hourly_translation(self):
        test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3)
        fixture = OrderedDict()
        fixture[(0, 4)] = '2010120303'
        fixture[(4, 7)] = '2010120306'
        fixture[(7, 10)] = '2010120309'
        fixture[(10, 13)] = '2010120312'
        fixture[(13, 16)] = '2010120315'
        fixture[(16, 19)] = '2010120318'
        fixture[(19, 22)] = '2010120321'
        fixture[(22, 24)] = '2010120323'

        timeperiod = '2010120300'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                actual_value = test_dict._translate_timeperiod(timeperiod)
                self.assertEqual(actual_value, value,
                                 msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'.
                                 format(timeperiod, i, actual_value, value))
                timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod)
Пример #28
0
def create_unit_of_work(process_name,
                        start_id,
                        end_id,
                        timeperiod='INVALID_TIMEPERIOD',
                        state=unit_of_work.STATE_REQUESTED,
                        created_at=datetime.utcnow(),
                        submitted_at=datetime.utcnow(),
                        uow_id=None):
    """ method creates and returns unit_of_work """
    process_obj = context.process_context[process_name]

    try:
        end_timeperiod = time_helper.increment_timeperiod(
            process_obj.time_qualifier, timeperiod)
    except:
        end_timeperiod = timeperiod

    uow = UnitOfWork()
    uow.process_name = process_name
    uow.timeperiod = timeperiod
    uow.start_id = start_id
    uow.end_id = end_id
    uow.start_timeperiod = timeperiod
    uow.end_timeperiod = end_timeperiod
    uow.created_at = created_at
    uow.submitted_at = submitted_at
    uow.source = process_obj.source if hasattr(process_obj, 'source') else None
    uow.sink = process_obj.sink if hasattr(process_obj, 'sink') else None
    uow.state = state
    uow.unit_of_work_type = unit_of_work.TYPE_MANAGED
    uow.number_of_retries = 0
    uow.arguments = process_obj.arguments

    if uow_id is not None:
        uow.db_id = uow_id

    return uow
Пример #29
0
    def test_daily_translation(self):
        test_dict = TimeperiodDict(QUALIFIER_DAILY, 3)
        fixture = OrderedDict()
        fixture[(1, 4)] = '2010120300'
        fixture[(4, 7)] = '2010120600'
        fixture[(7, 10)] = '2010120900'
        fixture[(10, 13)] = '2010121200'
        fixture[(13, 16)] = '2010121500'
        fixture[(16, 19)] = '2010121800'
        fixture[(19, 22)] = '2010122100'
        fixture[(22, 25)] = '2010122400'
        fixture[(25, 28)] = '2010122700'
        fixture[(28, 31)] = '2010123000'
        fixture[(31, 32)] = '2010123100'

        timeperiod = '2010120100'
        for boundaries, value in fixture.items():
            lower_boundary, upper_boundary = boundaries
            for i in range(lower_boundary, upper_boundary):
                actual_value = test_dict._translate_timeperiod(timeperiod)
                self.assertEqual(actual_value, value,
                                 msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'.
                                 format(timeperiod, i, actual_value, value))
                timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, timeperiod)
__author__ = 'Bohdan Mushkevych'

from synergy.db.error import DuplicateKeyError
from synergy.db.model.job import Job
from synergy.system import time_helper
from synergy.system.time_qualifier import QUALIFIER_HOURLY
from tests.base_fixtures import create_unit_of_work

TEST_PRESET_TIMEPERIOD = '2013010122'
TEST_ACTUAL_TIMEPERIOD = time_helper.actual_timeperiod(QUALIFIER_HOURLY)
TEST_PAST_TIMEPERIOD = time_helper.increment_timeperiod(QUALIFIER_HOURLY, TEST_ACTUAL_TIMEPERIOD, delta=-1)
TEST_FUTURE_TIMEPERIOD = time_helper.increment_timeperiod(QUALIFIER_HOURLY, TEST_ACTUAL_TIMEPERIOD)


def then_raise_uw(*_):
    """mocks AbstractStateMachine._insert_uow and AbstractStateMachine.insert_and_publish_uow"""
    raise UserWarning('Simulated UserWarning Exception')


def mock_insert_uow_return_uow(process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id):
    """mocks AbstractStateMachine._insert_uow"""
    return create_unit_of_work(process_name, start_id, end_id, timeperiod, uow_id='a_uow_id')


def mock_insert_uow_raise_dpk(process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id):
    """mocks AbstractStateMachine._insert_uow"""
    raise DuplicateKeyError(process_name, timeperiod, start_id, end_id, 'Simulated Exception')


def then_raise_dpk(job_record, start_id, end_id):
    """mocks AbstractStateMachine.insert_and_publish_uow"""
 def _process_state_embryo(self, job_record):
     """ method that takes care of processing job records in STATE_EMBRYO state"""
     time_qualifier = context.process_context[job_record.process_name].time_qualifier
     end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod)
     self._compute_and_transfer_to_progress(job_record.process_name, job_record.timeperiod,
                                            end_timeperiod, job_record)
Пример #32
0
    def test_shift_time_by_delta(self):
        stamps = ['2011010100', '2011010112', '2011010123']
        expected = ['2011010103', '2011010115', '2011010202']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp, delta=3), expected[idx])

        stamps = ['2011010100', '2011010112', '2011010123']
        expected = ['2010123121', '2011010109', '2011010120']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp, delta=-3), expected[idx])

        stamps = ['2011010100', '2011013100', '2010123100']
        expected = ['2011010400', '2011020300', '2011010300']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp, delta=3), expected[idx])

        stamps = ['2011010100', '2011013100', '2010123100']
        expected = ['2010122900', '2011012800', '2010122800']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp, delta=-3), expected[idx])

        stamps = ['2011010000', '2011090000', '2010120000']
        expected = ['2011040000', '2011120000', '2011030000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=3), expected[idx])

        stamps = ['2011010000', '2011090000', '2010120000']
        expected = ['2010100000', '2011060000', '2010090000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-3), expected[idx])

        stamps = ['2011010000', '2011090000', '2010120000']
        expected = ['2013020000', '2013100000', '2013010000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=25), expected[idx])

        stamps = ['2011010000', '2011090000', '2010120000']
        expected = ['2008120000', '2009080000', '2008110000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-25), expected[idx])

        stamps = ['2011010000', '2011120000', '2011100000']
        expected = ['2010120000', '2011110000', '2011090000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-1),
                             expected[idx])

        stamps = ['2011000000', '2012000000', '2099000000']
        expected = ['2016000000', '2017000000', '2104000000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp, delta=5), expected[idx])

        stamps = ['2011000000', '2012000000', '2099000000']
        expected = ['2006000000', '2007000000', '2094000000']
        for idx, stamp in enumerate(stamps):
            self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp, delta=-5), expected[idx])
Пример #33
0
 def compute_end_timeperiod(self, process_name, timeperiod):
     """ computes first *exclusive* timeperiod for job to process """
     time_qualifier = context.process_context[process_name].time_qualifier
     end_timeperiod = time_helper.increment_timeperiod(time_qualifier, timeperiod)
     return end_timeperiod