def test_LOF_normalize_instances(): l = lof.LOF(((1, 1), (2, 2)), normalize=True) assert l.instances == [(0.0, 0.0), (1.0, 1.0)] l = lof.LOF(((1, 1), (2, 2), (3, 3)), normalize=True) assert l.instances == [(0.0, 0.0), (0.5, 0.5), (1.0, 1.0)]
def test_get_lof_sorted_filtered_ascending_filtered_range(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_lof_sorted_filtered(False, 0, 2) assert (len(lofs) == 3) assert (lofs[0] == ('a', 0.8749999999999999)) assert (lofs[2] == ('b', 1.3333333333333333)) assert (lofs[1] == ('c', 0.8749999999999999))
def test_get_all_lof(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_all_lof() assert (len(lofs) == 4) assert (lofs[0] == ('a', 0.8749999999999999)) assert (lofs[1] == ('b', 1.3333333333333333)) assert (lofs[2] == ('c', 0.8749999999999999)) assert (lofs[3] == ('d', 2.0))
def test_init_with_coords_input_from_csv_file_name(self): test_lof = lof.LOF('test.csv', lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_all_lof() assert (len(lofs) == 4) assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999)) assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333)) assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999)) assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
def test_get_lof_sorted_filtered_descending(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_lof_sorted_filtered(True) print(lofs) assert (len(lofs) == 4) assert (lofs[2] == ('a', 0.8749999999999999)) assert (lofs[1] == ('b', 1.3333333333333333)) assert (lofs[3] == ('c', 0.8749999999999999)) assert (lofs[0] == ('d', 2.0))
def test_init_with_coords_input_as_array_of_tuples(self): coords_as_array_of_tuples = [(0, 0), (0, 1), (1, 1), (3, 0)] test_lof = lof.LOF(coords_as_array_of_tuples, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_all_lof() assert (len(lofs) == 4) assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999)) assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333)) assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999)) assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
def test_init_with_coords_input_as_x_and_y_array(self): x = [0, 0, 1, 3] y = [0, 1, 1, 0] coords_as_x_and_y_array = [x, y] test_lof = lof.LOF(coords_as_x_and_y_array, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_all_lof() assert (len(lofs) == 4) assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999)) assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333)) assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999)) assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
def test_get_unique_pairs(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) test_lof.get_unique_pairs() pairs = [] for pair in test_lof.get_unique_pairs(): pairs.append(pair) print(pair) assert (len(pairs) == 6) assert (pairs[0] == ('a', 'b')) assert (pairs[1] == ('a', 'c')) assert (pairs[2] == ('a', 'd')) assert (pairs[3] == ('b', 'c')) assert (pairs[4] == ('b', 'd')) assert (pairs[5] == ('c', 'd'))
def test_init_with_coords_input_as_ordered_dict(self): coords_as_ordered_dict = OrderedDict([ ('a', OrderedDict([('x', 0), ('y', 0)])), ('b', OrderedDict([('x', 0), ('y', 1)])), ('c', OrderedDict([('x', 1), ('y', 1)])), ('d', OrderedDict([('x', 3), ('y', 0)])) ]) test_lof = lof.LOF(coords_as_ordered_dict, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_all_lof() assert (len(lofs) == 4) assert (lofs[0] == ('a', 0.8749999999999999)) assert (lofs[1] == ('b', 1.3333333333333333)) assert (lofs[2] == ('c', 0.8749999999999999)) assert (lofs[3] == ('d', 2.0))
def test_LOF_normalize_instances(): l = lof.LOF(np.array(((1, 1), (2, 2))), normalize=True) assert np.allclose(l.instances, np.array([(0.0, 0.0), (1.0, 1.0)])) l = lof.LOF(np.array(((1, 1), (2, 2), (3, 3))), normalize=True) assert np.allclose(l.instances, np.array([(0.0, 0.0), (0.5, 0.5), (1.0, 1.0)]))
def index(): # open data file data = [] isFirstLine = 1 with open('./static/shuttle20.csv', newline='') as readfile: reader = csv.reader(readfile) for row in reader: # do not read the first row! if isFirstLine == 1: feature_name = row isFirstLine = 0 else: tmp1 = map(eval, row) tmp2 = list(tmp1) data.append(tmp2) # empty data in csv will cause "unexpected EOF while parsing" # print(data) print("data loaded!") # calculate feature distribution (feature_position, feature_position_min, feature_position_max, feature_variance, feature_mean) = feature_explore.feature_analysis(data) # initialize LOF class k_list = [] k_list = lof.initialize_k_list(data) benchmark = 1 # the gap between k and RC's size l = lof.LOF(data, k_list) # calculate data position normalized_data_array = np.array(l.instances) (data_position, data_position_min, data_position_max) = mymds.get_position(normalized_data_array, 2) '''print("start to compute RC") rare_centers = lof.outliers(data, k_list) center = rare_centers[0] center_index = center["index"] center_kinf = center["k_inf"] # center point and its k-neighbours neighbours = lof.get_neighbours(center_kinf, data[center_index], data) # put togather as RC category = copy.deepcopy(neighbours) category.append(data[center_index]) #print(category) category_mean = ca.category_mean_relative(category,feature_mean)''' category_mean = [ -3.066666666666663, -0.8555555555555556, -0.8944444444444457, -0.1222222222222222, 3.811111111111117, 5.094444444444445, 2.1611111111111114, -4.916666666666664, -7.033333333333333 ] category_index = [14, 7, 19, 15, 16, 5, 2, 9, 13] print(category_mean) return_data = { "raw_data": data, "feature_position": feature_position, "feature_position_min": feature_position_min, "feature_position_max": feature_position_max, "feature_variance": feature_variance, "feature_name": feature_name, "data_position": data_position, "data_position_min": data_position_min, "data_position_max": data_position_max, "category_mean": category_mean, "category_index": category_index } ######### POST request ######### '''if request.method == "POST": print("This is post") list_add = [] getdata = json.loads(request.get_data())["data"] print(getdata)''' print("This is post") recv_data = request.get_data() print(recv_data) return render_template('index.html', data=return_data)
import matplotlib.pyplot as plt import lof x = [0, 0, 1, 1, 4, 8, 9, 8, 9] y = [0, 1, 1, 0, 0, 9, 8, 8, 9] coords_as_x_and_y_array = [x, y] test_lof = lof.LOF(coords_as_x_and_y_array, lof.LOF.CONST_MANHATTAN, 1) lofs = test_lof.get_lof_sorted_filtered(True) for l in lofs: print(str(l[0]) + ": " + str(l[1])) plt.scatter(x, y) plt.xlabel('x') plt.ylabel('y') plt.title('K Value Too Small') plt.grid(True) # plt.savefig("test.png") plt.show()
def test_get_lof_sorted_filtered_descending_filtered(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) lofs = test_lof.get_lof_sorted_filtered(True, 1, 2) assert (len(lofs) == 1) assert (lofs[0] == ('b', 1.3333333333333333))
def test_print_all_data(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) test_lof.print_all_data()
def test(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) test_lof.print_all_lof() pass
# empty data in csv will cause "unexpected EOF while parsing" #print(data) print("data loaded!") # calculate feature distribution (feature_position, feature_position_min, feature_position_max, feature_variance, feature_mean) = feature_explore.feature_analysis(data) #print(feature_position) #print(feature_position_max) print(feature_mean) #calculate rare category k_list = [] k_list = lof.initialize_k_list(data) benchmark = 1 # the gap between k and RC's size l = lof.LOF(data, k_list) normalized_data_array = np.array(l.instances) ( data_position, data_position_min, data_position_max, ) = mymds.get_position(normalized_data_array, 2) #print(data_position) print("start to compute RC") rare_centers = lof.outliers(data, k_list) #print(rare_centers) center = rare_centers[0]
def test_print_all_lof(self): test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2) print(test_lof.print_all_lof())