def test_synthetic_EQF_4(self): d = EqualFrequency(4, 0) p2t = {1:[]} p2t[1] = [TimeStamp(-75,1,1,0),TimeStamp(25,1,1,0)] # min = -75, max = 25 expected_cutpoints = {1:[-50,-25,0]} d.discretize_property_without_abstracting({},{},p2t,1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints) self.assertTrue(res,msg)
def test_synthetic_EQW_3(self): d = EqualWidth(3, 0) p2t = {1: []} p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)] # min = -75, max = 25 expected_cutpoints = {1: [-75 + 100/3, -75 + 200/3]} d.discretize_property_without_abstracting({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints) self.assertTrue(res, msg)
def test_synthetic_stress_EQW_2(self): d = EqualWidth(2, 0) p2t = {1:[]} p2t[1] = [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [TimeStamp(-75, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [ TimeStamp(25, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT # min = -75, max = 25 expected_cutpoints = {1:[-25]} d.discretize_property_without_abstracting({},{},p2t,1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints) self.assertTrue(res,msg)
def test_synthetic_EQF_Stress_Many_Requests(self): res = True msg = "" p2t = {1: [TimeStamp(0,1,1,0),TimeStamp(1,1,1,0)]} for bin_count in range(2,10000): d = EqualFrequency(bin_count, 0) d.discretize_property_without_abstracting({},{},p2t,1) sum_real_cutpoints = sum(d.bins_cutpoints[1]) expected_sum = (bin_count-1)/2 t_res, t_msg = assert_almost_equality({1:[expected_sum]}, {1:[sum_real_cutpoints]}) res &= t_res msg += t_msg self.assertTrue(res,msg)
def test_Abstraction_No_Impact_p2t(self): p2t = { 1: [ TimeStamp(0, 1, 1, 0, 0), TimeStamp(0, 1, 1, 1, 0), TimeStamp(1, 1, 1, 2, 1), TimeStamp(1, 1, 1, 3, 1) ] } d: Discretization = Expert({1: [1.5]}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property({}, {}, p2t, 1) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message != "", "Original data was changed during abstraction")
def test_syntetic_cutpoint_generation(self): res = True msg = "" for c in range(2,1000): d = EqualWidth(c,0) p2t = {1: []} p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)] # min = -75, max = 25 interval = 100/c expected_cutpoints = {1: [-75+interval*i for i in range(1,c)]} d.discretize_property_without_abstracting({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) res &= t_res msg += t_msg self.assertTrue(res, msg)
def test_Abstraction_No_Impact_c2e(self): e0: Entity = Entity(0, 0, -1) e1: Entity = Entity(1, 0, -1) e2: Entity = Entity(2, 1, -1) e3: Entity = Entity(3, 1, -1) e0.properties = {1: [TimeStamp(0, 1, 1, 0, 0)]} e1.properties = {1: [TimeStamp(0, 1, 1, 1, 0)]} e2.properties = {1: [TimeStamp(1, 1, 1, 2, 1)]} e3.properties = {1: [TimeStamp(1, 1, 1, 3, 1)]} c2e = {0: {e0, e1}, 1: {e2, e3}} d: Discretization = Expert({1: [1.5]}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property({}, c2e, {}, 1) message = "" message += compare_time_stamps_c2e(c2e, d_c2e) self.assertTrue(message != "", "Original data was changed during abstraction")
def test_Abstraction_Most_Bins(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} expected = {1: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, i)) expected[1].append(TimeStamp(i - 1, i, i, i)) cutpoints.append(i + 0.5) d: Discretization = Expert({1: cutpoints}, -1) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_Ignore_Properties(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: [], 2: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, i)) cutpoints.append(5) p2t[2].append(TimeStamp(0, 0, 0, 0)) expected = {1: p2t[1], 2: [TimeStamp(0, 0, 0, 0)]} d: Discretization = Expert({2: cutpoints}, -1) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 2) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_MaxGap_0_Bins_2(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, 0)) cutpoints = [500] d: Discretization = Expert({1: cutpoints}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) expected = {1: []} lst = expected[1] lst.append(TimeStamp(0, 1, 499, 0)) lst.append(TimeStamp(1, 500, 1000, 0)) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def test_synthetic_EQF_Stress_Big_Request_4(self): p2t = {1: [TimeStamp(i,1,1,0) for i in range(STRESS_VALUE_COUNT)]} max_index = STRESS_VALUE_COUNT - 1 BIN_COUNT = 4 d = EqualFrequency(BIN_COUNT, 0) d.discretize_property_without_abstracting({}, {}, p2t, 1) expected_res = {1:[i*max_index/BIN_COUNT for i in range(1,BIN_COUNT)]} res = d.bins_cutpoints res, msg = assert_almost_equality(expected_res,res) self.assertTrue(res,msg)
def get_data_from_row(line: str) -> 'DataRow': try: line = line.rstrip().split(',') eid = int(line[0]) tid = int(line[1]) time = int(line[2]) val = float(line[3]) except: raise FileFormatNotCorrect() time_stamp = TimeStamp(val, time, time, eid) return DataRow(eid, tid, time_stamp)
def test_PAA_Discretization_Difference(self): msg = "" res = True d = EqualWidth(2, 0, window_size=1) p2t = {1: []} p2t[1] = [ TimeStamp(-75, 1, 1, 0), TimeStamp(-25, 2, 2, 0), TimeStamp(1, 3, 3, 0), TimeStamp(25, 4, 4, 0) ] # min = -75, max = 25 expected_cutpoints = {1: [-25]} d.discretize_property({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) msg += t_msg res &= t_res no_paa_cutpoints = real_cutpoints d = EqualWidth(2, 0, window_size=2) p2t = {1: []} p2t[1] = [ TimeStamp(-75, 1, 1, 0), TimeStamp(-25, 2, 2, 0), TimeStamp(1, 3, 3, 0), TimeStamp(25, 4, 4, 0) ] # min = -50 max = 13 expected_cutpoints = {1: [-50 + 63 / 2]} d.discretize_property({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) if t_msg != "": t_msg = "\n" + t_msg msg += t_msg res &= t_res paa_cutpoints = real_cutpoints t_res, t_msg = assert_almost_equality({1: no_paa_cutpoints}, {1: paa_cutpoints}) if t_res: msg += "\nExpected different cutpoints with PAA! Got %s" % no_paa_cutpoints res = False self.assertTrue(res, msg)
def test_Abstraction_MaxGap_0_Bins_2_Different_Entities(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} expected = {1: []} c2e = {} p2e = {} cutpoints = [500] for i in range(1, TOTAL_POINTS + 1): expected_bin = 0 if i >= 500: expected_bin = 1 if i % 2 == 0: p2t[1].append(TimeStamp(i, i, i, 0)) expected[1].append(TimeStamp(expected_bin, i, i, 0)) else: p2t[1].append(TimeStamp(i, i, i, 1)) expected[1].append(TimeStamp(expected_bin, i, i, 1)) d: Discretization = Expert({1: cutpoints}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def test_PAA_Window_1(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} WINDOW_SIZE = 1 for i in range(TOTAL_POINTS + 1, WINDOW_SIZE): p2t[1].append(TimeStamp(i, i, i, 0)) d: Discretization = Expert({1: cutpoints}, -1, window_size=WINDOW_SIZE) d_p2t = d.paa_p2t(p2t) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message == "", message)
def get_copy_of_maps( old_property_to_entities: Dict[int, Set[Entity]], old_class_to_entities: Dict[int, Set[Entity]], old_property_to_timestamps: Dict[int, List[TimeStamp]] ) -> Tuple[Dict[int, Set['Entity']], Dict[int, Set['Entity']], Dict[ int, List[TimeStamp]]]: """ Returns deep copies of the input dictionaries :param old_property_to_entities: A dictionary mapping property ids to the set of entities containing the property :param old_class_to_entities: A dictionary mapping class ids to the set of entities under this class :param old_property_to_timestamps: A dictionary mapping property ids to the list of timestamps belonging to that property. :return: Deep copies of these dictionaries. """ property_to_entities: Dict[int, Set['Entity']] = {} class_to_entities: Dict[int, Set['Entity']] = {} property_to_timestamps: Dict[int, List[TimeStamp]] = {} old_timestamp_to_new: Dict[Tuple, TimeStamp] = { ts: TimeStamp.deep_copy(ts) for time_stamps in old_property_to_timestamps.values() for ts in time_stamps } property_to_timestamps = { property_id: [ old_timestamp_to_new[ts] for ts in old_property_to_timestamps[property_id] ] for property_id in old_property_to_timestamps.keys() } for class_id in old_class_to_entities.keys(): class_to_entities[class_id] = set() for entity in old_class_to_entities[class_id]: properties = entity.properties.copy() e = Entity(entity.entity_id, class_id, entity.class_separator) properties = { key: [old_timestamp_to_new[ts] for ts in properties[key]] for key in properties.keys() } property_ids = properties.keys() diff = set(property_ids).difference( property_to_entities.keys()) property_to_entities.update({p_id: set() for p_id in diff}) for key in property_ids: property_to_entities[key].add(e) e.properties = properties class_to_entities[class_id].add(e) return property_to_entities, class_to_entities, property_to_timestamps
def paa_timestamps(self, timestamps: List[TimeStamp]): if self.window_size == 1: return [ TimeStamp(ts.value, ts.start_point, ts.end_point, ts.entity_id, ts.ts_class) for ts in timestamps ] timestamps = sorted(timestamps, key=lambda ts: ts.start_point) start_point = timestamps[0].start_point end_point = timestamps[-1].start_point time_point = start_point i = 0 new_values = [] while time_point < end_point: count = 0 s = 0 while i < len(timestamps) and timestamps[ i].start_point < time_point + self.window_size: s += timestamps[i].value count += 1 i += 1 if count != 0: val = s / count new_values.append( TimeStamp(val, time_point, time_point + self.window_size, timestamps[i - 1].entity_id, timestamps[i - 1].ts_class)) time_point += self.window_size ''' values = [ts.value for ts in timestamps] values_length = len(values) frame_size = self.window_size frame_start = 0 approximation = [] indices_ranges = [] loop_limit = values_length - frame_size while frame_start <= loop_limit: to = int(frame_start + frame_size) indices_ranges.append((frame_start, to)) new_values.append(TimeStamp(np.mean(np.array(values[frame_start: to])), timestamps[frame_start].start_point, timestamps[to-1].end_point,timestamps[frame_start].entity_id, timestamps[frame_start].ts_class)) frame_start += frame_size # handle the remainder if n % w != 0 if frame_start < values_length: indices_ranges.append((frame_start, values_length)) new_values.append(TimeStamp(np.mean(np.array(values[frame_start: values_length])), timestamps[frame_start].start_point, timestamps[values_length-1].end_point, timestamps[frame_start].entity_id, timestamps[frame_start].ts_class)) ''' return new_values
return "BINARY_%s" % self.bin_count def __init__(self, bin_count, max_gap, window_size=1): super(Binary, self).__init__(max_gap, window_size) self.bin_count = int(bin_count) # if __name__ == '__main__': print(math.log(0.5, 10) * 0.5) print([1, 2, 3][:1]) p_to_ent = {} e1 = Entity(1, 1) e2 = Entity(2, 1) e3 = Entity(3, 2) e1.properties[0] = [TimeStamp(10, TimeInterval(0, 1))] e2.properties[0] = [TimeStamp(15, TimeInterval(2, 3))] e3.properties[0] = [TimeStamp(7, TimeInterval(4, 5))] binary = Binary(2) p_to_ent[0] = set() p_to_ent[0].add(e1) p_to_ent[0].add(e2) p_to_ent[0].add(e3) c_to_ent = {1: set(), 2: set()} c_to_ent[1].add(e1) c_to_ent[1].add(e2) c_to_ent[2].add(e3) print(binary.set_bin_ranges_for_property(p_to_ent, c_to_ent, {}, 0)) # x = set() # y = set()
def discretize(self, time_stamp: TimeStamp) -> bool: if self.min_val <= time_stamp.value < self.max_val: time_stamp.value = self.bin_symbol return True return False