def load_data(self, file_path): """ Load the data contained in file_path, file_path is a csv generated by pandas method to_csv. :param file_path: :return: """ # We test if the file exist print(os.getcwd()) self.points_list = [] path_file = Path(file_path) DataVector.logger.info('We get the points from ' + str(path_file)) if not path_file.is_file(): DataVector.logger.error('The file does not exist') raise Exception("The file does not exist "+ str(path_file)) data_file = pd.read_csv(str(path_file)) line_counter = -1 try: for i, row in enumerate(data_file.values): line_counter += 1 coodinates = [Decimal(int(elem)) for index, elem in enumerate(row) if index != 0] # We get a dimension problem. if len(coodinates)!= self.dimension: DataVector.logger.error('The dimension is not correct, expected : '+ str(len(coodinates)) + str(self.dimension)) raise Exception("The dimension of the point at line " + str(line_counter) + " is not correct") self.points_list.append(Point(coodinates)) self.size_of_data = len(self.points_list) except Exception as e: DataVector.logger.error("Probleme during reading line " + str(line_counter)) raise e
def discretise_point(self, point): """ Apply a combinatory ceil and floor to each coordinate of the given point in parameter, according to the lambda_error. Args : :param point: coordinates which have to be maximised :return: a list of points that enclose the given point in parameter, in function of the dimension of the point and the lambda_error """ #deepcopy the point given in parmeter to prevent instruction to modify it and be able able to use it again without modification point_c = deepcopy(point) results = [] results.append(Point(self.minimisePoint(point_c.coordinates))) self.discretise_recursive(point_c.coordinates, point.coordinates, 0, results) return results
def __print(self, cur_node, cur_range, cur_range_size, cur_middle, depth): """ Allow to print all the nodes which compose the ARF :return: """ def compute_next_middles(cur_middle, list_of_building_middle, domain_of_cur_middle): if len(list_of_building_middle) < (2**len(cur_middle)): l1 = copy.deepcopy(list_of_building_middle) for i in range(len(list_of_building_middle)): l1[i].append( (cur_middle[len(l1[i])] + domain_of_cur_middle / 4)) l2 = copy.deepcopy(list_of_building_middle) for i in range(len(list_of_building_middle)): l2[i].append( (cur_middle[len(l2[i])] - domain_of_cur_middle / 4)) return compute_next_middles(cur_middle, l1 + l2, domain_of_cur_middle) else: return list_of_building_middle if type(cur_node) == Node: indentation = "\t" * depth print(indentation + "N " + cur_node.__repr__() + str(cur_range)) next_middles = compute_next_middles(cur_middle, [[]], cur_range_size) next_ranges = list( map( lambda x: list( map( lambda y: [ y - cur_range_size / 4, y + cur_range_size / 4 ], x)), next_middles)) next_range_size = cur_range_size / 2 for i in range(len(next_ranges)): next_node, _ = self.__move_to_the_next_node( Point(next_middles[i]), cur_middle, cur_node, cur_range_size) self.__print(next_node, next_ranges[i], next_range_size, next_middles[i], depth + 1) elif type(cur_node) == Leaf: # It's a leaf indentation = "\t" * depth if cur_node.get_value() == True: print(indentation + "L " + cur_node.__repr__() + str(cur_range) + " *") else: print(indentation + "L " + cur_node.__repr__() + str(cur_range))
def genarate_similar(self, delta, data_set, save_file_name=None): """ This method genrate the data and store it into the object attribute point_list. Each point generated in list_point is at least at <delta> from any point in <data_set>. :param delta: (float) the minimum distance between the point <point> to the <data_set> :param data_set: The data set in form of list of object Point :param save_file_name: name of the file in which we will store the generate data for next tests. if this parameter if not registered the data will be not save. :return: Nothing """ number_of_vector = 0 number_of_test = 0 array_vector = [] while number_of_vector < self.size_of_data_set: index = randint(0, len(data_set) - 1) vct = list(data_set[index].coordinates) for i in range(len(vct)): vct[i] += uniform(0, 1) * delta point = Point(vct) if point.distance(data_set[index]) < delta: self.point_list.append(point) array_vector.append(vct) number_of_vector += 1 number_of_test += 1 if number_of_test >= 1000 * len(data_set): logger.error("infinit loop to construct similar") assert (False) if save_file_name: try: os.remove( os.path.join(os.getcwd(), DATA_FOLDER, save_file_name)) except OSError: pass pd.DataFrame(array_vector).to_csv(os.path.join( os.getcwd(), DATA_FOLDER, save_file_name), encoding='utf-8')
def discretise_recursive(self, point, original_points, starting_index, results): """ shouldn't be called directly, call for discretise """ for i in range(starting_index, len(point)): if point[i] != original_points[i]: #the original value of point should be the same for each passage in the loop so deepcopy point_c = point[:] if self.method_type == Constants.DIS_DOUBLE: point_c[i] += self.lambda_error else: point_c[i] += 2 * self.lambda_error results.append(Point(point_c)) self.discretise_recursive(point_c, original_points, i + 1, results)
def genarate(self, save_file_name=None): """ This method genrate the data and store it into the object attribute point_list. :param save_file_name: name of the file in which we will store the generate data for next tests. if this parameter if not registered the data will be not save. :return: Nothing. """ array_vector = np.random.randint(self.domain, size=(self.size_of_data_set, self.dimension)) for vct in array_vector: self.point_list.append(Point(list(vct))) if save_file_name: try: os.remove( os.path.join(os.getcwd(), DATA_FOLDER, save_file_name)) except OSError: pass vector_data_frame_ = pd.DataFrame(array_vector) vector_data_frame_.to_csv(os.path.join(os.getcwd(), DATA_FOLDER, save_file_name), encoding='utf-8')
def genarate_falses(self, delta, data_set, save_file_name=None): """ This method genrate the data and store it into the object attribute point_list. Each point generated in list_point is at least at <delta> from any point in <data_set>. :param delta: (float) the minimum distance between the point <point> to the <data_set> :param data_set: The data set in form of list of object Point :param save_file_name: name of the file in which we will store the generate data for next tests. if this parameter if not registered the data will be not save. :return: Nothing """ number_of_vector = 0 number_of_test = 0 array_vector = [] while number_of_vector < self.size_of_data_set: vct = np.random.randint(self.domain, size=(self.size_of_data_set, self.dimension)).tolist()[0] point = Point(vct) if self.distant_enough(point, delta, data_set): self.point_list.append(point) array_vector.append(vct) number_of_vector += 1 number_of_test += 1 if number_of_test >= 1000 * len(data_set): logger.error("infinite loop to construct different") assert (False) if save_file_name: try: os.remove( os.path.join(os.getcwd(), DATA_FOLDER, save_file_name)) except OSError: pass pd.DataFrame(array_vector).to_csv(os.path.join( os.getcwd(), DATA_FOLDER, save_file_name), encoding='utf-8') return array_vector
def test_arf1_pile(self): arf1 = ARF(dim=2, domain=32, min_range_size=4) arf1.insert_one_point(Point([3, 5])) res = arf1.test_one_point(Point([3, 5])) self.assertEqual(res, True, "ARF test exact same value")
def test_erase(self): myArf = ARF(dim=1, domain=32, min_range_size=4, size=9) myArf.insert_one_point(Point([1])) myArf.erase() assert myArf.get_bit_size() == 7, "Problem with erase"
def test_arf_dim3_or(self): arf1 = ARF(dim=3, domain=32, min_range_size=4) arf1.insert_one_point(Point([3, 5, 3])) res, _ = arf1.test_set_of_points([[Point([3, 5, 3]), Point([8, 5, 3])]]) self.assertEqual(res, [True], "Problem with ARF dim = 3")
def test_arf_dim4(self): arf1 = ARF(dim=4, domain=32, min_range_size=4) arf1.insert_one_point(Point([3, 5, 3, 5])) res, _ = arf1.test_set_of_points([[Point([3, 5, 3, 5])], [Point([7, 12, 3, 5])]]) self.assertEqual(res, [True, False], "Problem with ARF dim = 4")
def test_arf_collision(self): arf1 = ARF(dim=2, domain=32, min_range_size=4) arf1.insert_one_point(Point([3, 5])) arf1.insert_one_point(Point([3, 5])) res = arf1.test_one_point(Point([3, 5])) self.assertEqual(res, True, "Problem with ARF with collision inside inputs")
def test_arf_mrs_1(self): arf1 = ARF(dim=2, domain=32, min_range_size=1) arf1.insert_one_point(Point([3, 5])) res = arf1.test_one_point(Point([3, 5])) self.assertEqual(res, True, "Problem with ARF with min range size = 1")
def test_arf1_almost(self): arf1 = ARF(dim=2, domain=32, min_range_size=4) arf1.insert_one_point(Point([3, 5])) res = arf1.test_one_point(Point([2, 6])) self.assertEqual(res, True, "ARF don't match on almost sames values")