def process(self, data_retrieve_params, timestamps): print 'PROCESSING BASIC CONDITION' print 'Timestamps:', timestamps self.timestamps = copy(timestamps) data_retriever = DataRetriever(data_dao=self.data_dao, data_retrieve_params=data_retrieve_params) data = data_retriever.get_data(end=timestamps['current_execution_time'], start=timestamps['last_execution_time']) return self.__get_rule_matching_data__(data=data)
def process(self, data_retrieve_params, timestamps): print 'PROCESSING STATISTICS CONDITION' self.timestamps = copy(timestamps) self.baseline.update(self.conditions_builder.baseline) self.time_limit = self.baseline['baselineSecondsBack'] if not self.is_full_period_from_last_trigger_time(): print ('There was not full period of ' + str(self.baseline['baselineSecondsBack']) + ' seconds from last trigger time') return [] data_retriever = DataRetriever(data_dao=self.data_dao, data_retrieve_params=data_retrieve_params) data = data_retriever.get_data(start=self.timestamps['last_obs_trigger_time'], end=self.timestamps['current_execution_time']) count = data.count() if self.baseline['baselineMinimalInstances'] == 0 or count < self.baseline['baselineMinimalInstances']: print "Number or observations in lower than baselineMinimalInstances." return [] sum = data.reduce(lambda x, y: reduce_func(x, y)) avg = sum / count stdev = data.map(lambda x: float(x[1])).stdev() self.conditions_builder.convert_to_statistics(avg, stdev) fun = self.conditions_builder.get_condition() filtered_data = data.filter(lambda x: fun(x)).take(FILTERED_DATA_LIMIT) if len(filtered_data) > 0: self.timestamps['last_obs_trigger_time'] = self.timestamps['current_execution_time'] return filtered_data
class TimebasedConditionProcessor(TimePeriodProcessor): def __init__(self, data_dao, conditions_builder, time_limit): super(TimebasedConditionProcessor, self).__init__() self.data_dao = data_dao self.conditions_builder = conditions_builder self.time_limit = time_limit self.timestamps = None self.data_retriever = None self.matching_period = None def process(self, data_retrieve_params, timestamps): print 'PROCESSING TIME BASED CONDITION' self.timestamps = copy(timestamps) self.data_retriever = DataRetriever(data_dao=self.data_dao, data_retrieve_params=data_retrieve_params) if not self.is_full_period_from_last_trigger_time(): print ('There was not full period of ' + str(self.time_limit) + ' seconds from last trigger time') return [] self.__find_matching_period() if self.__is_condition_fulfilled(): print ('Matching time period length: ' + str(self.matching_period)) map_fun = self.conditions_builder.get_condition() data = self.data_retriever.get_data(start=self.matching_period_start_time, end=self.__get_matching_period_end_time()) filtered_data = data.filter(lambda x: map_fun(x)['passed']).take(FILTERED_DATA_LIMIT) self.timestamps['last_obs_trigger_time'] = self.__get_matching_period_end_time() return filtered_data return [] def __is_condition_fulfilled(self): return self.time_limit * 1000 <= self.matching_period def __get_matching_period_end_time(self): return self.matching_period_start_time + self.time_limit + 1 def __find_matching_period(self): data = self.data_retriever.get_data(start=self.timestamps['last_obs_trigger_time'], end=self.timestamps['current_execution_time']) matching_period_finder = MatchingPeriodFinder(data) self.matching_period = matching_period_finder.get_largest(self.conditions_builder.get_condition()) self.matching_period_start_time = matching_period_finder.get_largest_start_time()
def process(self, data_retrieve_params, timestamps): print 'PROCESSING TIME BASED CONDITION' self.timestamps = copy(timestamps) self.data_retriever = DataRetriever(data_dao=self.data_dao, data_retrieve_params=data_retrieve_params) if not self.is_full_period_from_last_trigger_time(): print ('There was not full period of ' + str(self.time_limit) + ' seconds from last trigger time') return [] self.__find_matching_period() if self.__is_condition_fulfilled(): print ('Matching time period length: ' + str(self.matching_period)) map_fun = self.conditions_builder.get_condition() data = self.data_retriever.get_data(start=self.matching_period_start_time, end=self.__get_matching_period_end_time()) filtered_data = data.filter(lambda x: map_fun(x)['passed']).take(FILTERED_DATA_LIMIT) self.timestamps['last_obs_trigger_time'] = self.__get_matching_period_end_time() return filtered_data return []