def test_catching_up_time_build_tree(self): delta = 5 for tree in self.trees: assert isinstance(tree, MultiLevelTree) time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_synergy_start_time = time_helper.increment_timeperiod( time_qualifier, self.actual_timeperiod, -delta) settings.settings[ 'synergy_start_timeperiod'] = new_synergy_start_time tree.build_tree() self._perform_assertions(tree, delta) for tree in self.trees: time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_actual_timeperiod = time_helper.increment_timeperiod( time_qualifier, self.actual_timeperiod, delta) new_actual_dt = time_helper.synergy_to_datetime( time_qualifier, new_actual_timeperiod) time_helper.actual_timeperiod = mock.MagicMock( side_effect=lambda time_qualifier: time_helper. datetime_to_synergy(time_qualifier, new_actual_dt)) assert isinstance(tree, MultiLevelTree) tree.build_tree() self._perform_assertions(tree, 2 * delta)
def test_catching_up_time_build_tree(self): delta = 5 for tree in self.trees: assert isinstance(tree, MultiLevelTree) time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_synergy_start_time = time_helper.increment_timeperiod(time_qualifier, self.actual_timeperiod, -delta) settings.settings['synergy_start_timeperiod'] = new_synergy_start_time tree.build_tree() self._perform_assertions(tree, delta) for tree in self.trees: time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_actual_timeperiod = time_helper.increment_timeperiod(time_qualifier, self.actual_timeperiod, delta) new_actual_dt = time_helper.synergy_to_datetime(time_qualifier, new_actual_timeperiod) time_helper.actual_timeperiod = mock.MagicMock( side_effect=lambda time_qualifier: time_helper.datetime_to_synergy(time_qualifier, new_actual_dt)) assert isinstance(tree, MultiLevelTree) tree.build_tree() self._perform_assertions(tree, 2 * delta)
def test_hourly_translation(self): test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3) fixture = OrderedDict() fixture[(0, 4)] = '2010120303' fixture[(4, 7)] = '2010120306' fixture[(7, 10)] = '2010120309' fixture[(10, 13)] = '2010120312' fixture[(13, 16)] = '2010120315' fixture[(16, 19)] = '2010120318' fixture[(19, 22)] = '2010120321' fixture[(22, 24)] = '2010120323' timeperiod = '2010120300' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): actual_value = test_dict._translate_timeperiod(timeperiod) self.assertEqual( actual_value, value, msg= 'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}' .format(timeperiod, i, actual_value, value)) timeperiod = time_helper.increment_timeperiod( QUALIFIER_HOURLY, timeperiod)
def test_validate_2(self): """ test coverage: - request_skip """ next_timeperiod = time_helper.increment_timeperiod(self.the_node.time_qualifier, TEST_PRESET_TIMEPERIOD) self.parent_node_mock.children[next_timeperiod] = mock.create_autospec(TreeNode) for _index in range(10): mock_job = mock.create_autospec(Job) mock_job.is_finished = True child_mock = mock.create_autospec(TreeNode) child_mock.job_record = mock.create_autospec(Job) child_mock.job_record.is_active = False child_mock.job_record.is_skipped = True self.the_node.children[_index] = child_mock # verify if this node should be transferred to STATE_SKIPPED self.the_node.job_record.is_skipped = False self.time_table_mocked.reprocess_tree_node = mock.Mock() self.time_table_mocked.skip_tree_node = mock.Mock() self.time_table_mocked.assign_job_record = mock.Mock() self.the_node.validate() # assertions: self.assertEqual(len(self.time_table_mocked.assign_job_record.call_args_list), 0) self.assertEqual(len(self.time_table_mocked.reprocess_tree_node.call_args_list), 0) self.time_table_mocked.skip_tree_node.assert_called_once_with(self.the_node)
def _process_state_in_progress(self, job_record): """ method that takes care of processing job records in STATE_IN_PROGRESS state""" time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) actual_timeperiod = time_helper.actual_timeperiod(time_qualifier) is_job_finalizable = self.timetable.is_job_record_finalizable(job_record) uow = self.uow_dao.get_one(job_record.related_unit_of_work) if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False: if uow.is_invalid or uow.is_requested: # current uow has not been processed yet. update it self.update_scope_of_processing(job_record.process_name, uow, job_record.timeperiod, end_timeperiod) else: # STATE_IN_PROGRESS, STATE_PROCESSED, STATE_CANCELED # create new uow to cover new inserts self._compute_and_transfer_to_progress(job_record.process_name, job_record.timeperiod, end_timeperiod, job_record) elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True: # create new uow for FINAL RUN self._compute_and_transfer_to_final_run(job_record.process_name, job_record.timeperiod, end_timeperiod, job_record) else: msg = 'job record %s has timeperiod from future %s vs current time %s' \ % (job_record.db_id, job_record.timeperiod, actual_timeperiod) self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
def _process_state_in_progress(self, job_record): """ method that takes care of processing job records in STATE_IN_PROGRESS state""" def _process_state(target_state, uow): if uow.is_active: # Large Job processing takes more than 1 tick of the Scheduler # Let the Job processing complete - do no updates to Scheduler records pass elif uow.is_finished: # create new uow to cover new inserts new_uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name, job_record.timeperiod, end_timeperiod, 0, iteration + 1) self.timetable.update_job_record(job_record, new_uow, target_state) time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) actual_timeperiod = time_helper.actual_timeperiod(time_qualifier) is_job_finalizable = self.timetable.is_job_record_finalizable(job_record) uow = self.uow_dao.get_one(job_record.related_unit_of_work) iteration = int(uow.end_id) if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False: _process_state(job.STATE_IN_PROGRESS, uow) elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True: _process_state(job.STATE_FINAL_RUN, uow) else: msg = 'Job record %s has timeperiod from future %s vs current time %s' \ % (job_record.db_id, job_record.timeperiod, actual_timeperiod) self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
def test_daily_translation(self): test_dict = TimeperiodDict(QUALIFIER_DAILY, 3) fixture = OrderedDict() fixture[(1, 4)] = '2010120300' fixture[(4, 7)] = '2010120600' fixture[(7, 10)] = '2010120900' fixture[(10, 13)] = '2010121200' fixture[(13, 16)] = '2010121500' fixture[(16, 19)] = '2010121800' fixture[(19, 22)] = '2010122100' fixture[(22, 25)] = '2010122400' fixture[(25, 28)] = '2010122700' fixture[(28, 31)] = '2010123000' fixture[(31, 32)] = '2010123100' timeperiod = '2010120100' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): actual_value = test_dict._translate_timeperiod(timeperiod) self.assertEqual( actual_value, value, msg= 'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}' .format(timeperiod, i, actual_value, value)) timeperiod = time_helper.increment_timeperiod( QUALIFIER_DAILY, timeperiod)
def _process_single_document(self, document): source_obj = self._init_source_object(document) week_old_timeperiod = time_helper.increment_timeperiod( QUALIFIER_DAILY, source_obj.key[1], delta=-7) try: week_old_obj = self.site_dao.get_one(COLLECTION_SITE_DAILY, source_obj.key[0], week_old_timeperiod) visits_threshold_crossed = source_obj.number_of_visits / week_old_obj.number_of_visits < 0.8 \ or source_obj.number_of_visits / week_old_obj.number_of_visits > 1.2 pageviews_threshold_crossed = source_obj.number_of_pageviews / week_old_obj.number_of_pageviews < 0.8 \ or source_obj.number_of_pageviews / week_old_obj.number_of_pageviews > 1.2 if visits_threshold_crossed or pageviews_threshold_crossed: composite_key = self._init_sink_key(source_obj.key[0], source_obj.key[1]) target_obj = self._get_aggregated_object(composite_key) target_obj.number_of_visits = source_obj.number_of_visits - week_old_obj.number_of_visits target_obj.number_of_pageviews = source_obj.number_of_pageviews - week_old_obj.number_of_pageviews except LookupError: self.logger.debug( 'site statistics ({0}:{1}) was not found. skipping comparison'. format(source_obj.key[0], week_old_timeperiod))
def _process_state_in_progress(self, job_record): """ method that takes care of processing job records in STATE_IN_PROGRESS state""" def _process_state(target_state, uow): if uow.is_active: # Large Job processing takes more than 1 tick of the Scheduler # Let the Job processing complete - do no updates to Scheduler records pass elif uow.is_finished: # create new UOW to cover new inserts new_uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name, job_record.timeperiod, end_timeperiod, 0, int(uow.end_id) + 1) self.update_job(job_record, new_uow, target_state) time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) uow = self.uow_dao.get_one(job_record.related_unit_of_work) try: target_state = self._compute_next_job_state(job_record) _process_state(target_state, uow) except ValueError: # do no processing for the future timeperiods pass
def _fire_worker(process_entry, prev_job_record): assert isinstance(process_entry, ManagedProcessEntry) job_record = self.timetable.get_next_job_record(process_entry.process_name) state_machine = self.timetable.state_machines[process_entry.state_machine_name] if job_record == prev_job_record: # avoid the loop return None if not state_machine.run_on_active_timeperiod: time_qualifier = process_entry.time_qualifier incremented_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) dt_record_timestamp = time_helper.synergy_to_datetime(time_qualifier, incremented_timeperiod) dt_record_timestamp += timedelta(minutes=LAG_5_MINUTES) if datetime.utcnow() <= dt_record_timestamp: self.logger.info('Job {0} for {1}@{2} will not be triggered until {3}.' .format(job_record.db_id, job_record.process_name, job_record.timeperiod, dt_record_timestamp.strftime('%Y-%m-%d %H:%M:%S'))) return None blocking_type = process_entry.blocking_type if blocking_type == BLOCKING_DEPENDENCIES: state_machine.manage_job_with_blocking_dependencies(job_record) elif blocking_type == BLOCKING_CHILDREN: state_machine.manage_job_with_blocking_children(job_record) elif blocking_type == BLOCKING_NORMAL: state_machine.manage_job(job_record) else: raise ValueError('Unknown managed process type {0}'.format(blocking_type)) return job_record
def freeruns(self): processor = FreerunStatements(self.logger) actual_timeperiod = time_helper.actual_timeperiod(QUALIFIER_DAILY) delta = int(self.time_window[len(TIME_WINDOW_DAY_PREFIX) + 1:]) start_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, actual_timeperiod, -delta) selection = processor.retrieve_records(start_timeperiod, self.is_unprocessed_only) return OrderedDict(sorted(selection.items()))
def _process_state_embryo(self, job_record): """ method that takes care of processing job records in STATE_EMBRYO state""" time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name, job_record.timeperiod, end_timeperiod, 0, 0) self.timetable.update_job_record(job_record, uow, job.STATE_IN_PROGRESS)
def test_container_methods(self): test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3) timeperiod = '2010123100' for i in range(0, 24): # format {grouped_timeperiod: highest_loop_index} test_dict[timeperiod] = i timeperiod = time_helper.increment_timeperiod( QUALIFIER_HOURLY, timeperiod) fixture = OrderedDict() fixture[(0, 4)] = 3 fixture[(4, 7)] = 6 fixture[(7, 10)] = 9 fixture[(10, 13)] = 12 fixture[(13, 16)] = 15 fixture[(16, 19)] = 18 fixture[(19, 22)] = 21 fixture[(22, 24)] = 23 timeperiod = '2010123100' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): self.assertEqual( test_dict[timeperiod], value, msg= 'failing combination: timeperiod={0} i={1} actual/expected={2}/{3}' .format(timeperiod, i, test_dict[timeperiod], value)) # get method self.assertIsNotNone(test_dict.get(timeperiod), ) timeperiod = time_helper.increment_timeperiod( QUALIFIER_HOURLY, timeperiod) # test __len__ method self.assertEqual(len(test_dict), 8) # test __iter__ method counter = 0 for _ in test_dict: counter += 1 self.assertEqual(counter, 8)
def generate_site_composite_key(index, time_qualifier): start_time = '20010303101010' # YYYYMMDDHHmmSS iteration_index = index // 33 # number larger than number of hours in a day and days in a month iteration_timeperiod = time_helper.cast_to_time_qualifier(time_qualifier, start_time) if iteration_index: iteration_timeperiod = time_helper.increment_timeperiod(time_qualifier, iteration_timeperiod, delta=iteration_index) return 'domain_name_{0}'.format(index - iteration_index * 33), iteration_timeperiod
def generate_site_composite_key(index, time_qualifier): start_time = '20010303101010' # YYYYMMDDHHmmSS iteration_index = index // 33 # number larger than number of hours in a day and days in a month iteration_timeperiod = time_helper.cast_to_time_qualifier( time_qualifier, start_time) if iteration_index: iteration_timeperiod = time_helper.increment_timeperiod( time_qualifier, iteration_timeperiod, delta=iteration_index) return f'domain_name_{index - iteration_index * 33}', iteration_timeperiod
def test_increment_time(self): stamps = ['2011010100', '2011010112', '2011010123'] expected = ['2011010101', '2011010113', '2011010200'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp), expected[idx]) stamps = ['2011010100', '2011013100', '2010123100'] expected = ['2011010200', '2011020100', '2011010100'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp), expected[idx]) stamps = ['2011010000', '2011120000', '2011100000'] expected = ['2011020000', '2012010000', '2011110000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp), expected[idx]) stamps = ['2011000000', '2012000000', '2099000000'] expected = ['2012000000', '2013000000', '2100000000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp), expected[idx])
def test_less_simple_build_tree(self): delta = 105 for tree in self.trees: assert isinstance(tree, MultiLevelTree) time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_synergy_start_time = time_helper.increment_timeperiod(time_qualifier, self.actual_timeperiod, -delta) settings.settings['synergy_start_timeperiod'] = new_synergy_start_time tree.build_tree() self._perform_assertions(tree, delta)
def test_less_simple_build_tree(self): delta = 105 for tree in self.trees: assert isinstance(tree, MultiLevelTree) time_qualifier = tree.process_hierarchy.bottom_process.time_qualifier new_synergy_start_time = time_helper.increment_timeperiod( time_qualifier, self.actual_timeperiod, -delta) settings.settings[ 'synergy_start_timeperiod'] = new_synergy_start_time tree.build_tree() self._perform_assertions(tree, delta)
def compute_start_timeperiod(self, process_name, timeperiod): """ computes lowest *inclusive* timeperiod boundary for job to process for process with time_grouping == 1, it returns given timeperiod with no change for process with time_grouping != 1, it computes first timeperiod, not processed by the previous job run For instance: with time_grouping = 3, QUALIFIER_HOURLY, and timeperiod = 2016042018, the start_timeperiod will be = 2016042016 (computed as 2016042018 - 3 + 1) """ time_grouping = context.process_context[process_name].time_grouping if time_grouping == 1: return timeperiod # step1: translate given timeperiod to the time grouped one process_hierarchy = self.timetable.get_tree(process_name).process_hierarchy timeperiod_dict = process_hierarchy[process_name].timeperiod_dict translated_timeperiod = timeperiod_dict._translate_timeperiod(timeperiod) # step 2: compute previous grouped period # NOTICE: simple `time_helper.increment_timeperiod(time_qualifier, timeperiod)` is insufficient # as it does not address edge cases, such as the last day of the month or the last hour of the day # For instance: with time_grouping=3, QUALIFIER_DAILY, and 2016123100 # the `increment_timeperiod` will yield 2016122800 instead of 2016123100 time_qualifier = context.process_context[process_name].time_qualifier for i in range(1, time_grouping + 1): prev_timeperiod = time_helper.increment_timeperiod(time_qualifier, translated_timeperiod, delta=-i) if prev_timeperiod == timeperiod_dict._translate_timeperiod(prev_timeperiod): # prev_timeperiod is currently at the last grouped timeperiod break # step 3: compute first exclusive timeperiod after the *prev_timeperiod*, # which becomes first inclusive timeperiod for this job run over_the_edge_timeperiod = time_helper.increment_timeperiod(time_qualifier, prev_timeperiod, delta=-1) if prev_timeperiod != timeperiod_dict._translate_timeperiod(over_the_edge_timeperiod): # over_the_edge_timeperiod fell into previous day or month or year # *prev_timeperiod* points to the first month, first day of the month or 00 hour start_timeperiod = prev_timeperiod else: start_timeperiod = self.compute_end_timeperiod(process_name, prev_timeperiod) return start_timeperiod
def test_container_methods(self): test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3) timeperiod = '2010123100' for i in range(0, 24): # format {grouped_timeperiod: highest_loop_index} test_dict[timeperiod] = i timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod) fixture = OrderedDict() fixture[(0, 4)] = 3 fixture[(4, 7)] = 6 fixture[(7, 10)] = 9 fixture[(10, 13)] = 12 fixture[(13, 16)] = 15 fixture[(16, 19)] = 18 fixture[(19, 22)] = 21 fixture[(22, 24)] = 23 timeperiod = '2010123100' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): self.assertEqual(test_dict[timeperiod], value, msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'. format(timeperiod, i, test_dict[timeperiod], value)) # get method self.assertIsNotNone(test_dict.get(timeperiod), ) timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod) # test __len__ method self.assertEqual(len(test_dict), 8) # test __iter__ method counter = 0 for _ in test_dict: counter += 1 self.assertEqual(counter, 8)
def __init__(self, request, **values): super(DashboardHandler, self).__init__(request, **values) self.time_window = self.request.args.get('time_window') self.is_include_running = self.request.args.get('include_running') == 'on' self.is_include_processed = self.request.args.get('include_processed') == 'on' self.is_include_noop = self.request.args.get('include_noop') == 'on' self.is_include_failed = self.request.args.get('include_failed') == 'on' self.is_include_disabled = self.request.args.get('include_disabled') == 'on' self.is_request_valid = bool(self.time_window) if self.is_request_valid: actual_timeperiod = time_helper.actual_timeperiod(QUALIFIER_DAILY) delta = int(self.time_window) self.query_start_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, actual_timeperiod, -delta)
def _process_state_embryo(self, job_record): """ method that takes care of processing job records in STATE_EMBRYO state""" time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) uow, is_duplicate = self.insert_and_publish_uow(job_record.process_name, job_record.timeperiod, end_timeperiod, 0, 0) try: target_state = self._compute_next_job_state(job_record) self.update_job(job_record, uow, target_state) except ValueError: # do no processing for the future timeperiods pass
def _process_state_in_progress(self, job_record): """ method that takes care of processing job records in STATE_IN_PROGRESS state """ time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) actual_timeperiod = time_helper.actual_timeperiod(time_qualifier) is_job_finalizable = self.timetable.is_job_record_finalizable(job_record) uow = self.uow_dao.get_one(job_record.related_unit_of_work) if job_record.timeperiod == actual_timeperiod or is_job_finalizable is False: self.__process_non_finalizable_job(job_record, uow, job_record.timeperiod, end_timeperiod) elif job_record.timeperiod < actual_timeperiod and is_job_finalizable is True: self.__process_finalizable_job(job_record, uow) else: msg = 'Job {0} has timeperiod {1} from the future vs current timeperiod {2}' \ .format(job_record.db_id, job_record.timeperiod, actual_timeperiod) self._log_message(ERROR, job_record.process_name, job_record.timeperiod, msg)
def validate(self): """method traverse tree and performs following activities: * requests a job record in STATE_EMBRYO if no job record is currently assigned to the node * requests nodes for reprocessing, if STATE_PROCESSED node relies on unfinalized nodes * requests node for skipping if it is daily node and all 24 of its Hourly nodes are in STATE_SKIPPED state""" # step 0: request Job record if current one is not set if self.job_record is None: self.request_embryo_job_record() # step 1: define if current node has a younger sibling next_timeperiod = time_helper.increment_timeperiod(self.time_qualifier, self.timeperiod) has_younger_sibling = next_timeperiod in self.parent.children # step 2: define if all children are done and if perhaps they all are in STATE_SKIPPED all_children_skipped = True all_children_finished = True for timeperiod in self.children: child = self.children[timeperiod] child.validate() if child.job_record.is_active: all_children_finished = False if not child.job_record.is_skipped: all_children_skipped = False # step 3: request this node's reprocessing if it is enroute to STATE_PROCESSED # while some of its children are still performing processing if all_children_finished is False and self.job_record.is_finished: self.request_reprocess() # step 4: verify if this node should be transferred to STATE_SKIPPED # algorithm is following: # point a: node must have children # point b: existence of a younger sibling means that the tree contains another node of the same level # thus - should the tree.build_timeperiod be not None - the children level of this node is fully constructed # point c: if all children of this node are in STATE_SKIPPED then we will set this node state to STATE_SKIPPED if len(self.children) != 0 \ and all_children_skipped \ and self.tree.build_timeperiod is not None \ and has_younger_sibling is True \ and not self.job_record.is_skipped: self.request_skip()
def build_tree(self, rebuild=False): """ method builds tree by iterating from the synergy_start_timeperiod to the current time and inserting corresponding nodes """ time_qualifier = self.process_hierarchy.bottom_process.time_qualifier process_name = self.process_hierarchy.bottom_process.process_name if rebuild or self.build_timeperiod is None: timeperiod = settings.settings['synergy_start_timeperiod'] else: timeperiod = self.build_timeperiod timeperiod = cast_to_time_qualifier(time_qualifier, timeperiod) actual_timeperiod = time_helper.actual_timeperiod(time_qualifier) while actual_timeperiod >= timeperiod: self.get_node(process_name, timeperiod) timeperiod = time_helper.increment_timeperiod(time_qualifier, timeperiod) self.build_timeperiod = actual_timeperiod
def _process_single_document(self, document): source_obj = self._init_source_object(document) week_old_timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, source_obj.key[1], delta=-7) try: week_old_obj = self.site_dao.get_one(COLLECTION_SITE_DAILY, source_obj.key[0], week_old_timeperiod) visits_threshold_crossed = source_obj.number_of_visits / week_old_obj.number_of_visits < 0.8 \ or source_obj.number_of_visits / week_old_obj.number_of_visits > 1.2 pageviews_threshold_crossed = source_obj.number_of_pageviews / week_old_obj.number_of_pageviews < 0.8 \ or source_obj.number_of_pageviews / week_old_obj.number_of_pageviews > 1.2 if visits_threshold_crossed or pageviews_threshold_crossed: composite_key = self._init_sink_key(source_obj.key[0], source_obj.key[1]) target_obj = self._get_aggregated_object(composite_key) target_obj.number_of_visits = source_obj.number_of_visits - week_old_obj.number_of_visits target_obj.number_of_pageviews = source_obj.number_of_pageviews - week_old_obj.number_of_pageviews except LookupError: self.logger.debug('site statistics (%s:%s) was not found. skipping comparison' % (source_obj.key[0], week_old_timeperiod))
def test_hourly_translation(self): test_dict = TimeperiodDict(QUALIFIER_HOURLY, 3) fixture = OrderedDict() fixture[(0, 4)] = '2010120303' fixture[(4, 7)] = '2010120306' fixture[(7, 10)] = '2010120309' fixture[(10, 13)] = '2010120312' fixture[(13, 16)] = '2010120315' fixture[(16, 19)] = '2010120318' fixture[(19, 22)] = '2010120321' fixture[(22, 24)] = '2010120323' timeperiod = '2010120300' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): actual_value = test_dict._translate_timeperiod(timeperiod) self.assertEqual(actual_value, value, msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'. format(timeperiod, i, actual_value, value)) timeperiod = time_helper.increment_timeperiod(QUALIFIER_HOURLY, timeperiod)
def create_unit_of_work(process_name, start_id, end_id, timeperiod='INVALID_TIMEPERIOD', state=unit_of_work.STATE_REQUESTED, created_at=datetime.utcnow(), submitted_at=datetime.utcnow(), uow_id=None): """ method creates and returns unit_of_work """ process_obj = context.process_context[process_name] try: end_timeperiod = time_helper.increment_timeperiod( process_obj.time_qualifier, timeperiod) except: end_timeperiod = timeperiod uow = UnitOfWork() uow.process_name = process_name uow.timeperiod = timeperiod uow.start_id = start_id uow.end_id = end_id uow.start_timeperiod = timeperiod uow.end_timeperiod = end_timeperiod uow.created_at = created_at uow.submitted_at = submitted_at uow.source = process_obj.source if hasattr(process_obj, 'source') else None uow.sink = process_obj.sink if hasattr(process_obj, 'sink') else None uow.state = state uow.unit_of_work_type = unit_of_work.TYPE_MANAGED uow.number_of_retries = 0 uow.arguments = process_obj.arguments if uow_id is not None: uow.db_id = uow_id return uow
def test_daily_translation(self): test_dict = TimeperiodDict(QUALIFIER_DAILY, 3) fixture = OrderedDict() fixture[(1, 4)] = '2010120300' fixture[(4, 7)] = '2010120600' fixture[(7, 10)] = '2010120900' fixture[(10, 13)] = '2010121200' fixture[(13, 16)] = '2010121500' fixture[(16, 19)] = '2010121800' fixture[(19, 22)] = '2010122100' fixture[(22, 25)] = '2010122400' fixture[(25, 28)] = '2010122700' fixture[(28, 31)] = '2010123000' fixture[(31, 32)] = '2010123100' timeperiod = '2010120100' for boundaries, value in fixture.items(): lower_boundary, upper_boundary = boundaries for i in range(lower_boundary, upper_boundary): actual_value = test_dict._translate_timeperiod(timeperiod) self.assertEqual(actual_value, value, msg='failing combination: timeperiod={0} i={1} actual/expected={2}/{3}'. format(timeperiod, i, actual_value, value)) timeperiod = time_helper.increment_timeperiod(QUALIFIER_DAILY, timeperiod)
__author__ = 'Bohdan Mushkevych' from synergy.db.error import DuplicateKeyError from synergy.db.model.job import Job from synergy.system import time_helper from synergy.system.time_qualifier import QUALIFIER_HOURLY from tests.base_fixtures import create_unit_of_work TEST_PRESET_TIMEPERIOD = '2013010122' TEST_ACTUAL_TIMEPERIOD = time_helper.actual_timeperiod(QUALIFIER_HOURLY) TEST_PAST_TIMEPERIOD = time_helper.increment_timeperiod(QUALIFIER_HOURLY, TEST_ACTUAL_TIMEPERIOD, delta=-1) TEST_FUTURE_TIMEPERIOD = time_helper.increment_timeperiod(QUALIFIER_HOURLY, TEST_ACTUAL_TIMEPERIOD) def then_raise_uw(*_): """mocks AbstractStateMachine._insert_uow and AbstractStateMachine.insert_and_publish_uow""" raise UserWarning('Simulated UserWarning Exception') def mock_insert_uow_return_uow(process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id): """mocks AbstractStateMachine._insert_uow""" return create_unit_of_work(process_name, start_id, end_id, timeperiod, uow_id='a_uow_id') def mock_insert_uow_raise_dpk(process_name, timeperiod, start_timeperiod, end_timeperiod, start_id, end_id): """mocks AbstractStateMachine._insert_uow""" raise DuplicateKeyError(process_name, timeperiod, start_id, end_id, 'Simulated Exception') def then_raise_dpk(job_record, start_id, end_id): """mocks AbstractStateMachine.insert_and_publish_uow"""
def _process_state_embryo(self, job_record): """ method that takes care of processing job records in STATE_EMBRYO state""" time_qualifier = context.process_context[job_record.process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, job_record.timeperiod) self._compute_and_transfer_to_progress(job_record.process_name, job_record.timeperiod, end_timeperiod, job_record)
def test_shift_time_by_delta(self): stamps = ['2011010100', '2011010112', '2011010123'] expected = ['2011010103', '2011010115', '2011010202'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp, delta=3), expected[idx]) stamps = ['2011010100', '2011010112', '2011010123'] expected = ['2010123121', '2011010109', '2011010120'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_HOURLY, stamp, delta=-3), expected[idx]) stamps = ['2011010100', '2011013100', '2010123100'] expected = ['2011010400', '2011020300', '2011010300'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp, delta=3), expected[idx]) stamps = ['2011010100', '2011013100', '2010123100'] expected = ['2010122900', '2011012800', '2010122800'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_DAILY, stamp, delta=-3), expected[idx]) stamps = ['2011010000', '2011090000', '2010120000'] expected = ['2011040000', '2011120000', '2011030000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=3), expected[idx]) stamps = ['2011010000', '2011090000', '2010120000'] expected = ['2010100000', '2011060000', '2010090000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-3), expected[idx]) stamps = ['2011010000', '2011090000', '2010120000'] expected = ['2013020000', '2013100000', '2013010000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=25), expected[idx]) stamps = ['2011010000', '2011090000', '2010120000'] expected = ['2008120000', '2009080000', '2008110000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-25), expected[idx]) stamps = ['2011010000', '2011120000', '2011100000'] expected = ['2010120000', '2011110000', '2011090000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_MONTHLY, stamp, delta=-1), expected[idx]) stamps = ['2011000000', '2012000000', '2099000000'] expected = ['2016000000', '2017000000', '2104000000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp, delta=5), expected[idx]) stamps = ['2011000000', '2012000000', '2099000000'] expected = ['2006000000', '2007000000', '2094000000'] for idx, stamp in enumerate(stamps): self.assertEqual(time_helper.increment_timeperiod(QUALIFIER_YEARLY, stamp, delta=-5), expected[idx])
def compute_end_timeperiod(self, process_name, timeperiod): """ computes first *exclusive* timeperiod for job to process """ time_qualifier = context.process_context[process_name].time_qualifier end_timeperiod = time_helper.increment_timeperiod(time_qualifier, timeperiod) return end_timeperiod