Exemplo n.º 1
0
    def load_data(self, file_path):
        """
        Load the data contained in file_path, file_path is a csv generated by pandas method to_csv.
        :param file_path:
        :return:
        """
        # We test if the file exist

        print(os.getcwd())
        self.points_list = []
        path_file = Path(file_path)
        DataVector.logger.info('We get the points from ' + str(path_file))

        if not path_file.is_file():
            DataVector.logger.error('The file does not exist')
            raise Exception("The file does not exist "+ str(path_file))

        data_file = pd.read_csv(str(path_file))
        line_counter = -1
        try:
            for i, row in enumerate(data_file.values):
                line_counter += 1
                coodinates = [Decimal(int(elem)) for index, elem in enumerate(row) if index != 0]

                # We get a dimension problem.
                if len(coodinates)!= self.dimension:
                    DataVector.logger.error('The dimension is not correct, expected : '+ str(len(coodinates)) + str(self.dimension))
                    raise Exception("The dimension of the point at line " + str(line_counter) + " is not correct")
                self.points_list.append(Point(coodinates))

            self.size_of_data = len(self.points_list)

        except Exception as e:
            DataVector.logger.error("Probleme during reading line " + str(line_counter))
            raise e
 def discretise_point(self, point):
     """
     Apply a combinatory ceil and floor to each coordinate of the given point in parameter, according to the lambda_error.
     Args :
     :param point: coordinates which have to be maximised
     :return: a list of points that enclose the given point in parameter, in function of the dimension of the point and the lambda_error
     """
     #deepcopy the point given in parmeter to prevent instruction to modify it and be able able to use it again without modification
     point_c = deepcopy(point)
     results = []
     results.append(Point(self.minimisePoint(point_c.coordinates)))
     self.discretise_recursive(point_c.coordinates, point.coordinates, 0,
                               results)
     return results
Exemplo n.º 3
0
    def __print(self, cur_node, cur_range, cur_range_size, cur_middle, depth):
        """
        Allow to print all the nodes which compose the ARF
        :return:
        """
        def compute_next_middles(cur_middle, list_of_building_middle,
                                 domain_of_cur_middle):
            if len(list_of_building_middle) < (2**len(cur_middle)):
                l1 = copy.deepcopy(list_of_building_middle)
                for i in range(len(list_of_building_middle)):
                    l1[i].append(
                        (cur_middle[len(l1[i])] + domain_of_cur_middle / 4))
                l2 = copy.deepcopy(list_of_building_middle)
                for i in range(len(list_of_building_middle)):
                    l2[i].append(
                        (cur_middle[len(l2[i])] - domain_of_cur_middle / 4))
                return compute_next_middles(cur_middle, l1 + l2,
                                            domain_of_cur_middle)
            else:
                return list_of_building_middle

        if type(cur_node) == Node:
            indentation = "\t" * depth
            print(indentation + "N " + cur_node.__repr__() + str(cur_range))
            next_middles = compute_next_middles(cur_middle, [[]],
                                                cur_range_size)
            next_ranges = list(
                map(
                    lambda x: list(
                        map(
                            lambda y: [
                                y - cur_range_size / 4, y + cur_range_size / 4
                            ], x)), next_middles))
            next_range_size = cur_range_size / 2
            for i in range(len(next_ranges)):
                next_node, _ = self.__move_to_the_next_node(
                    Point(next_middles[i]), cur_middle, cur_node,
                    cur_range_size)
                self.__print(next_node, next_ranges[i], next_range_size,
                             next_middles[i], depth + 1)
        elif type(cur_node) == Leaf:
            # It's a leaf
            indentation = "\t" * depth
            if cur_node.get_value() == True:
                print(indentation + "L " + cur_node.__repr__() +
                      str(cur_range) + " *")
            else:
                print(indentation + "L " + cur_node.__repr__() +
                      str(cur_range))
Exemplo n.º 4
0
 def genarate_similar(self, delta, data_set, save_file_name=None):
     """
     This method genrate the data and store it into the object attribute point_list. Each point generated in
         list_point is at least at <delta> from any point in <data_set>.
     :param delta: (float) the minimum distance between the point <point> to the <data_set>
     :param data_set: The data set in form of list of object Point
     :param save_file_name: name of the file in which we will store the generate data for next tests.
         if this parameter if not registered the data will be not save.
     :return: Nothing
     """
     number_of_vector = 0
     number_of_test = 0
     array_vector = []
     while number_of_vector < self.size_of_data_set:
         index = randint(0, len(data_set) - 1)
         vct = list(data_set[index].coordinates)
         for i in range(len(vct)):
             vct[i] += uniform(0, 1) * delta
         point = Point(vct)
         if point.distance(data_set[index]) < delta:
             self.point_list.append(point)
             array_vector.append(vct)
             number_of_vector += 1
         number_of_test += 1
         if number_of_test >= 1000 * len(data_set):
             logger.error("infinit loop to construct similar")
             assert (False)
     if save_file_name:
         try:
             os.remove(
                 os.path.join(os.getcwd(), DATA_FOLDER, save_file_name))
         except OSError:
             pass
         pd.DataFrame(array_vector).to_csv(os.path.join(
             os.getcwd(), DATA_FOLDER, save_file_name),
                                           encoding='utf-8')
 def discretise_recursive(self, point, original_points, starting_index,
                          results):
     """
     shouldn't be called directly, call for discretise
     """
     for i in range(starting_index, len(point)):
         if point[i] != original_points[i]:
             #the original value of point should be the same for each passage in the loop  so deepcopy
             point_c = point[:]
             if self.method_type == Constants.DIS_DOUBLE:
                 point_c[i] += self.lambda_error
             else:
                 point_c[i] += 2 * self.lambda_error
             results.append(Point(point_c))
             self.discretise_recursive(point_c, original_points, i + 1,
                                       results)
Exemplo n.º 6
0
 def genarate(self, save_file_name=None):
     """
     This method genrate the data and store it into the object attribute point_list.
     :param save_file_name: name of the file in which we will store the generate data for next tests.
         if this parameter if not registered the data will be not save.
     :return: Nothing.
     """
     array_vector = np.random.randint(self.domain,
                                      size=(self.size_of_data_set,
                                            self.dimension))
     for vct in array_vector:
         self.point_list.append(Point(list(vct)))
     if save_file_name:
         try:
             os.remove(
                 os.path.join(os.getcwd(), DATA_FOLDER, save_file_name))
         except OSError:
             pass
         vector_data_frame_ = pd.DataFrame(array_vector)
         vector_data_frame_.to_csv(os.path.join(os.getcwd(), DATA_FOLDER,
                                                save_file_name),
                                   encoding='utf-8')
Exemplo n.º 7
0
    def genarate_falses(self, delta, data_set, save_file_name=None):
        """
        This method genrate the data and store it into the object attribute point_list. Each point generated in
            list_point is at least at <delta> from any point in <data_set>.
        :param delta: (float) the minimum distance between the point <point> to the <data_set>
        :param data_set: The data set in form of list of object Point
        :param save_file_name: name of the file in which we will store the generate data for next tests.
            if this parameter if not registered the data will be not save.
        :return: Nothing
        """
        number_of_vector = 0
        number_of_test = 0
        array_vector = []
        while number_of_vector < self.size_of_data_set:
            vct = np.random.randint(self.domain,
                                    size=(self.size_of_data_set,
                                          self.dimension)).tolist()[0]
            point = Point(vct)
            if self.distant_enough(point, delta, data_set):
                self.point_list.append(point)
                array_vector.append(vct)
                number_of_vector += 1
            number_of_test += 1
            if number_of_test >= 1000 * len(data_set):
                logger.error("infinite loop to construct different")
                assert (False)

        if save_file_name:
            try:
                os.remove(
                    os.path.join(os.getcwd(), DATA_FOLDER, save_file_name))
            except OSError:
                pass
            pd.DataFrame(array_vector).to_csv(os.path.join(
                os.getcwd(), DATA_FOLDER, save_file_name),
                                              encoding='utf-8')

        return array_vector
Exemplo n.º 8
0
 def test_arf1_pile(self):
     arf1 = ARF(dim=2, domain=32, min_range_size=4)
     arf1.insert_one_point(Point([3, 5]))
     res = arf1.test_one_point(Point([3, 5]))
     self.assertEqual(res, True, "ARF test exact same value")
Exemplo n.º 9
0
 def test_erase(self):
     myArf = ARF(dim=1, domain=32, min_range_size=4, size=9)
     myArf.insert_one_point(Point([1]))
     myArf.erase()
     assert myArf.get_bit_size() == 7, "Problem with erase"
Exemplo n.º 10
0
 def test_arf_dim3_or(self):
     arf1 = ARF(dim=3, domain=32, min_range_size=4)
     arf1.insert_one_point(Point([3, 5, 3]))
     res, _ = arf1.test_set_of_points([[Point([3, 5, 3]), Point([8, 5, 3])]])
     self.assertEqual(res, [True], "Problem with ARF dim = 3")
Exemplo n.º 11
0
 def test_arf_dim4(self):
     arf1 = ARF(dim=4, domain=32, min_range_size=4)
     arf1.insert_one_point(Point([3, 5, 3, 5]))
     res, _ = arf1.test_set_of_points([[Point([3, 5, 3, 5])], [Point([7, 12, 3, 5])]])
     self.assertEqual(res, [True, False], "Problem with ARF dim = 4")
Exemplo n.º 12
0
 def test_arf_collision(self):
     arf1 = ARF(dim=2, domain=32, min_range_size=4)
     arf1.insert_one_point(Point([3, 5]))
     arf1.insert_one_point(Point([3, 5]))
     res = arf1.test_one_point(Point([3, 5]))
     self.assertEqual(res, True, "Problem with ARF with collision inside inputs")
Exemplo n.º 13
0
 def test_arf_mrs_1(self):
     arf1 = ARF(dim=2, domain=32, min_range_size=1)
     arf1.insert_one_point(Point([3, 5]))
     res = arf1.test_one_point(Point([3, 5]))
     self.assertEqual(res, True, "Problem with ARF with min range size = 1")
Exemplo n.º 14
0
 def test_arf1_almost(self):
     arf1 = ARF(dim=2, domain=32, min_range_size=4)
     arf1.insert_one_point(Point([3, 5]))
     res = arf1.test_one_point(Point([2, 6]))
     self.assertEqual(res, True, "ARF don't match on almost sames values")