def calculate_distance(self): distance = 0 excess_duration = 0 for depot_id, routes in self.routes.items(): depot_coordinate = self.depots[depot_id][0] max_duration = self.depots[depot_id][1] for route in routes: key = hash((depot_id, tuple(route))) if key in Chromosome.route_memo: route_distance = Chromosome.route_memo[key] distance += route_distance if max_duration != 0: if route_distance > max_duration: excess_duration += route_distance - max_duration else: trip = list(map(lambda x: self.customers[x][0], route)) trip.append(depot_coordinate) trip.insert(0, depot_coordinate) route_distance = 0 for i in range(len(trip) - 1): route_distance += euclidean_distance(trip[i], trip[i + 1]) Chromosome.route_memo[key] = route_distance distance += route_distance if max_duration != 0: if route_distance > max_duration: excess_duration += route_distance - max_duration return distance, excess_duration
def get_nearest_neighbhours(self, test_instance, training_data=None, k_neighbours=None): """ Computes the euclidian distances for the test instance with all training data instances, and returns the first K instances. :param test_instance: a 1*n feature vector to be the main operand in the euclidian distance calculations :param training_data: the feature vectors with their classes to have their distances compared :param k_neighbours: how many :return: a list of the closest K training items and their classes """ if k_neighbours is None: k_neighbours = 51 if training_data is None: if self.training_data is None: raise ValueError("KNN Model has not been given with any training data to compute neighbours") else: training_data = self.training_data items_with_distances = [] for item in training_data: dist = util.euclidean_distance(item[0], test_instance) items_with_distances.append({"item": item, "distance": dist}) # We use a lambda to sort each tuple based on the value at its 'distance' key items_with_distances = sorted(items_with_distances, key=lambda k: k["distance"]) if k_neighbours != None: return items_with_distances[:k_neighbours] else: return items_with_distances
def cluster(self, vectors, assign_clusters=False,ClusterNum=None, DisType='euc',Stype='mean',trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if(0==l): return [] if('cos'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = cosine_distance(vectors[i], vectors[j]) elif('euc'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = euclidean_distance(vectors[i], vectors[j]) result = VectorSpaceClusterer.cluster(self, vectors,assign_clusters,ClusterNum, Stype, trace) #/////////////////////// 测试,输出距离 ///////////////// # m = 0 # for k,v in self._distMap: # m +=1 # print v,"\t", # if (m%7==0): # print #///////////////////////////////////////////////////// if(2==len(vectors[0])): # 二维样本则显示可视化结果 draw_2D_cluster(vectors, result) return result
def cluster(self, vectors, assign_clusters=False, ClusterNum=None, DisType='cos', Stype='avg', trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if ('cos' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = cosine_distance( vectors[i], vectors[j]) elif ('euc' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = euclidean_distance( vectors[i], vectors[j]) self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors]) result = VectorSpaceClusterer.cluster(self, vectors, assign_clusters, ClusterNum, Stype, trace) if (2 == len(vectors[0])): # 二维样本则显示可视化结果 self.draw_2D(vectors, result) return result
def __init__(self, position: VECTOR, radius: float, center: VECTOR): self.position = position self.radius = radius self.center = center self.orbit_radius = euclidean_distance(*position, *center) self.rotation_angle = 365 / (self.radius * self.orbit_radius) self.rotation_angle /= 10 # Arbitrary constant to slow things down
def get_swap_cluster(self) -> Dict: cluster = defaultdict(lambda: (int, float("inf"))) for customer_id in self.customers: customer_coordinate = self.customers[customer_id][0] for depot_id in self.depots: depot_coordinate = self.depots[depot_id][0] distance = euclidean_distance(customer_coordinate, depot_coordinate) if distance < cluster[customer_id][1]: cluster[customer_id] = depot_id, distance swap_cluster = defaultdict(list) for customer_id in self.customers: customer_coordinate = self.customers[customer_id][0] for depot_id in self.depots: depot_coordinate = self.depots[depot_id][0] distance = euclidean_distance(customer_coordinate, depot_coordinate) if ((distance - cluster[customer_id][1]) / cluster[customer_id][1]) <= 2: swap_cluster[customer_id].append(depot_id) return swap_cluster
def make_label(dim, radius): label = np.full((dim, dim), -1) center = int(dim / 2.0) start = center - ceil(radius) end = center + ceil(radius) for i in inclusive_range(start, end): for j in inclusive_range(start, end): if euclidean_distance(i, j, center, center) <= radius: label[i,j] = 1 return label
def get_neighbourhood(self, index: tensor, radius: float) -> List[Tuple]: result = [] for i in range(self.output_rows): for j in range(self.output_cols): indices = (i, j) distance = euclidean_distance(index, tensor(list(indices))) if distance <= radius: if not np.array_equal(index, tensor(list(indices))): result.append((indices, distance)) return result
def get_relational_features(target, landmark): t_point = (target['pos_x'],target['pos_y']) l_point = (landmark['pos_x'],landmark['pos_y']) x_diff = l_point[0] - t_point[0] y_diff = l_point[1] - t_point[1] distance = util.euclidean_distance(t_point, l_point) ab = 0 if y_diff < 0 else 1 lr = 0 if x_diff > 0 else 1 return {'ab':ab, 'lr':lr,'xdiff':x_diff, 'ydiff': y_diff,'dist':distance}
def calculate_food_distance(self, game_state, pacman_position, food_position): """ problem = graphSearchProblem.PositionSearchProblem(game_state, start=pacman_position, goal=food_position, warn=False, visualize=False) path_to_food = graphSearchProblem.aStarSearch(problem) distance = len(path_to_food) """ distance = util.euclidean_distance(pacman_position, food_position) return distance
def averageCost(data, costF_idx, medoids_idx, cacheOn=False): ''' Compute the average cost of medoids based on certain cost function and do the clustering ''' # Init the cluster size = len(data) total_cost = {} medoids = {} for idx in medoids_idx: medoids[idx] = [] total_cost[idx] = 0.0 # Compute the distance and do the clustering for i in range(size): choice = -1 # Make a big number min_cost = float('inf') for m in medoids: if cacheOn == True: # Check for cache tmp = distances_cache.get((m,i), None) if cacheOn == False or tmp == None: if costF_idx == 0: # euclidean_distance tmp = euclidean_distance(data[m], data[i]) elif costF_idx == 1: # manhattan_distance tmp = manhattan_distance(data[m], data[i]) elif costF_idx == 2: # pearson_distance tmp = pearson_distance(data[m], data[i]) else: print('Error: unknown cost function idx: ' % (costF_idx)) if cacheOn == True: # Save the distance for acceleration distances_cache[(m,i)] = tmp # Clustering if tmp < min_cost: choice = m min_cost = tmp # Done the clustering medoids[choice].append(i) total_cost[choice] += min_cost # Compute the average cost avg_cost = 0.0 for idx in medoids_idx: avg_cost += total_cost[idx] / len(medoids[idx]) # Return the average cost and clustering return(avg_cost, medoids)
def get_customer_cluster(self) -> Dict: cluster = defaultdict(list) for customer_id in self.customers: customer_coordinate = self.customers[customer_id][0] best_distance = float('inf') best_depot = None for depot_id in self.depots: depot_coordinate = self.depots[depot_id][0] distance = euclidean_distance(customer_coordinate, depot_coordinate) if distance < best_distance: best_depot = depot_id best_distance = distance cluster[best_depot].append(customer_id) return cluster
def __get_neighbors(self, data, k): distances = [] for i in range(len(self.train_x)): dist = euclidean_distance(data, self.train_x[i]) distances.append((self.train_x[i] + [self.train_y[i]], dist)) distances.sort(key=operator.itemgetter(1)) # return the first k neighbors with the smallest # distance neighbors = [distances[i][0] for i in range(k)] return neighbors
def cluster_points(points, cluster_dist=7): old_points = np.array(points) new_points = [] while len(old_points) > 1: p1 = old_points[0] distances = np.array( [util.euclidean_distance(p1, p2) for p2 in old_points]) idx = (distances < cluster_dist) points_cluster = old_points[idx] centroid = util.get_centroid(points_cluster) new_points.append(centroid) old_points = old_points[np.invert(idx)] return new_points
def update_distance_from_car(self, car_pose): new_distance = util.euclidean_distance(self.x, self.y, car_pose.position.x, car_pose.position.y) if self.distance_from_car: if floats_equal(new_distance, self.distance_from_car): # No change in shift_relative_to_car pass elif new_distance < self.distance_from_car: self.shift_relative_to_car = LIGHT_GETTING_CLOSER elif new_distance > self.distance_from_car: self.shift_relative_to_car = LIGHT_GETTING_FARTHER self.distance_from_car = new_distance
def cluster(self, vectors, assign_clusters=False, ClusterNum=None, DisType='euc', Stype='mean', trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if (0 == l): return [] if ('cos' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = cosine_distance( vectors[i], vectors[j]) elif ('euc' == DisType): for i in range(l): for j in range(i + 1, l): self._distMap[(i, j)] = euclidean_distance( vectors[i], vectors[j]) result = VectorSpaceClusterer.cluster(self, vectors, assign_clusters, ClusterNum, Stype, trace) #/////////////////////// 测试,输出距离 ///////////////// # m = 0 # for k,v in self._distMap: # m +=1 # print v,"\t", # if (m%7==0): # print #///////////////////////////////////////////////////// if (2 == len(vectors[0])): # 二维样本则显示可视化结果 draw_2D_cluster(vectors, result) return result
def get_features(self, data): for eid in data: row = data[eid] del row['episode_id'] del row['position'] del row['id'] # row['v_top-skewed'] = 1 if row['v_skew'] == 'top-skewed' else 0 # row['v_symmetric'] = 1 if row['v_skew'] == 'symmetric' else 0 # row['v_bottom-skewed'] = 1 if row['v_skew'] == 'bottom-skewed' else 0 # row['h_top-skewed'] = 1 if row['h_skew'] == 'right-skewed' else 0 # row['h_symmetric'] = 1 if row['h_skew'] == 'symmetric' else 0 # row['h_left-skewed'] = 1 if row['h_skew'] == 'left-skewed' else 0 del row['v_skew'] del row['h_skew'] del row['orientation'] row['c_diff'] = util.euclidean_distance((320,240), (row['pos_x'], row['pos_y'])) # distance from center return data
def cluster(self, vectors, assign_clusters=False, DisType='cos',Stype='avg',trace=False): # stores the merge order #------------------------------------------------- self._distMap.clear() # 每次聚类不同样本之前必须更新 #------------------------------------------------- l = len(vectors) if('cos'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = cosine_distance(vectors[i], vectors[j]) elif('euc'==DisType): for i in range(l): for j in range(i+1,l): self._distMap[(i,j)] = euclidean_distance(vectors[i], vectors[j]) self._dendrogram = Dendrogram( [numpy.array(vector, numpy.float64) for vector in vectors]) result = VectorSpaceClusterer.cluster(self, vectors,assign_clusters, Stype, trace) return result
def cluster(self, matrix): l = len(matrix) #--------------------------------------------------------------------------------------- self.distList = np.zeros((l,l),np.float) for i in range(l): self.distList[i][i] = float('inf') # 自身不参与聚类比较 for j in range(i+1,l): self.distList[i][j] = euclidean_distance(np.array(matrix[i]), np.array(matrix[j])) self.distList[j][i] = self.distList[i][j] #---------------------------------------------------------------------------------------- mostSimList = [] # 记录与第 i 个样本最相似的前 m 个样本的距离 m = 3 marks = [i for i in range(l)] for i in range(l): lis = self.distList[i].tolist() lis = zip(marks,lis) mostSimList.append(sorted(lis, key=lambda x:x[1])[0:m]) ADist = [] for i in range(l): ADist.append(mostSimList[i][m-1][1]) ADist = sorted(ADist) mostSimList = zip(marks,mostSimList) mostSimList = sorted(mostSimList, key=lambda x:x[1][m-1][1], reverse=True) noise = [] for i in mostSimList[0:l/5]: noise.append(i[0]) #----------------------------------------------------------------------------------------- print mostSimList print ADist print noise return ADist , noise
def targetFunction(data, costF_idx, medoids_idx, cacheOn=False, distDict={}, simDict={}, affinities={}, costType=CostType, namedPoints=True): ''' Compute the average cost of medoids based on certain cost function and do the clustering given the medoids ''' if costType not in ["total", "average", "modularity"]: print "unknown target function - check the global variables in the code" return (1) # Init the cluster size = len(data) total_cost = {} medoids = {} for idx in medoids_idx: medoids[idx] = [] total_cost[idx] = 0.0 assignErrors = [] # Compute the distance and do the clustering for i in range(size): choice = -1 # Make a big number min_cost = float('inf') # medoids themselves are also included into resulting cluster lists for m in medoids: if cacheOn == True: # Check for cache tmp = distances_cache.get((m, i), None) if cacheOn == False or tmp == None: if costF_idx == 0: # euclidean_distance tmp = euclidean_distance(data[m], data[i]) elif costF_idx == 1: # manhattan_distance tmp = manhattan_distance(data[m], data[i]) elif costF_idx == 2: # pearson_distance tmp = pearson_distance(data[m], data[i]) elif costF_idx == 3: # direct_distance tmp = direct_distance(data[m], data[i], distDict) elif costF_idx == 4: # similarity_distance tmp = similarity_distance(data[m], data[i], simDict) else: print('Error: unknown cost function idx: ' % (costF_idx)) if cacheOn == True: # Save the distance for acceleration distances_cache[(m, i)] = tmp # Clustering # Randomization for nodes/points isolated from all the medoids # in order to assign them to random clusters. Hope averaging will # be able to glean cases for which some medoids did appear in the # same connected component, and group those nodes together. if tmp == 0.0 and min_cost == 0.0: # no connection to either medoid rv = bernoulli.rvs(1. / len(medoids_idx), size=1) if rv[0] == 1.: choice = m elif tmp < min_cost: #if tmp < min_cost: choice = m min_cost = tmp # Done the clustering if choice == -1: print "ERROR: the node cannot be assigned" assignErrors.append(i) else: medoids[choice].append(i) total_cost[choice] += min_cost # Compute the target function if costType == "total": #print total_cost return (sum(total_cost.values()), medoids) elif costType == "average": # Compute the average cost avg_cost = 0.0 for idx in medoids_idx: avg_cost += total_cost[idx] / len(medoids[idx]) # Return the average cost and clustering return (avg_cost, medoids) elif costType == "modularity": # If the points are named, display the names if namedPoints == True: named_medoids = {} for medID in medoids_idx: named_medoids[data[medID]] = [] for pointID in medoids[medID]: named_medoids[data[medID]].append(data[pointID]) # "-" because we maximize modularity mod = -modularity(data, COST=costF_idx, distDict=distDict, edgeDict=affinities, medoids=named_medoids) else: mod = -modularity(data, COST=costF_idx, distDict=distDict, edgeDict=affinities, medoids=medoids) print "modularity computed" else: print "unknown target function" return (1) if len(assignErrors) > 0: print "unassigned nodes: ", assignErrors else: print "no unassigned nodes, all right" return (mod, medoids)
def is_BIH_inlier(all_BIH_ip, corner, pix_dist=5): return any([(util.euclidean_distance(ip, corner) <= pix_dist) for ip in all_BIH_ip])
def pacman_will_die(self, next_pacman_position, next_ghost_positions): for next_ghost_position in next_ghost_positions: pacman_distance_from_ghost = util.euclidean_distance(next_pacman_position, next_ghost_position) if pacman_distance_from_ghost <= ReflexAgent.pacman_distance_from_ghost_coefficient: return True return False
def totalCost(data, costF_idx, medoids_idx, cacheOn=CacheOn, distDict={}, simDict={}, acceleration=0): ''' Compute the total cost and do the clustering based on certain cost function (that is, assign each data point to certain cluster given the medoids) ''' # Init the cluster size = len(data) total_cost = 0.0 medoids = {} for idx in medoids_idx: medoids[idx] = [] # medoids['unassigned'] = [] unassigned = [] tmp = None # Compute the distance and do the clustering for i in xrange(size): choice = -1 # Make a big number min_cost = float('inf') for m in medoids: if cacheOn == True: # Check for cache tmp = distances_cache.get((m, i), None) if cacheOn == False or tmp == None: if costF_idx == 0: # euclidean_distance tmp = euclidean_distance(data[m], data[i]) elif costF_idx == 1: # manhattan_distance tmp = manhattan_distance(data[m], data[i]) elif costF_idx == 2: # pearson_distance tmp = pearson_distance(data[m], data[i]) elif costF_idx == 3: # direct_distance tmp = direct_distance(data[m], data[i], distDict) elif costF_idx == 4: # similarity_distance try: tmp = similarity_distance(data[m], data[i], simDict) except: print m, i print data[m] print data[i] else: print('Error: unknown cost function idx: %d' % (costF_idx)) if cacheOn == True: # Save the distance for acceleration distances_cache[(m, i)] = tmp # Clustering if tmp < min_cost: choice = m min_cost = tmp # Done the clustering if min_cost == 0: # 0 similarity to all the medoids unassigned.append(i) # medoids['unassigned'].append(i) else: medoids[choice].append(i) total_cost += min_cost if acceleration == 2: transformed_medoids = {} #dict(medoids) for i, m in enumerate(medoids.keys()): #print i, m transformed_medoids[str(i)] = {'med': m, 'nodes': medoids[m]} #transformed_medoids[i] = transformed_medoids.pop(m) return (total_cost, transformed_medoids) # Return the total cost and clustering return (total_cost, medoids )
def overlaps(self, other: "Planet") -> bool: dist = euclidean_distance(*self.position, *other.position) return dist <= (self.radius + other.radius)
def _overlaps(self, other: "Cell") -> bool: """Check if 2 cells overlaps each other""" distance = euclidean_distance(self.x, self.y, other.x, other.y) radius_sum = self.radius + other.radius return distance <= radius_sum
def totalCost(data, costF_idx, medoids_idx, cacheOn=CacheOn, distDict={}, simDict={}, acceleration=0): ''' Compute the total cost and do the clustering based on certain cost function (that is, assign each data point to certain cluster given the medoids) ''' # Init the cluster size = len(data) total_cost = 0.0 medoids = {} for idx in medoids_idx: medoids[idx] = [] # medoids['unassigned'] = [] unassigned = [] tmp = None # Compute the distance and do the clustering for i in xrange(size): choice = -1 # Make a big number min_cost = float('inf') for m in medoids: if cacheOn == True: # Check for cache tmp = distances_cache.get((m, i), None) if cacheOn == False or tmp == None: if costF_idx == 0: # euclidean_distance tmp = euclidean_distance(data[m], data[i]) elif costF_idx == 1: # manhattan_distance tmp = manhattan_distance(data[m], data[i]) elif costF_idx == 2: # pearson_distance tmp = pearson_distance(data[m], data[i]) elif costF_idx == 3: # direct_distance tmp = direct_distance(data[m], data[i], distDict) elif costF_idx == 4: # similarity_distance try: tmp = similarity_distance(data[m], data[i], simDict) except: print m, i print data[m] print data[i] else: print('Error: unknown cost function idx: %d' % (costF_idx)) if cacheOn == True: # Save the distance for acceleration distances_cache[(m, i)] = tmp # Clustering if tmp < min_cost: choice = m min_cost = tmp # Done the clustering if min_cost == 0: # 0 similarity to all the medoids unassigned.append(i) # medoids['unassigned'].append(i) else: medoids[choice].append(i) total_cost += min_cost if acceleration == 2: transformed_medoids = {} #dict(medoids) for i, m in enumerate(medoids.keys()): #print i, m transformed_medoids[str(i)] = {'med': m, 'nodes': medoids[m]} #transformed_medoids[i] = transformed_medoids.pop(m) return (total_cost, transformed_medoids) # Return the total cost and clustering return (total_cost, medoids)
def targetFunction(data, costF_idx, medoids_idx, cacheOn=False, distDict={}, simDict={}, affinities={}, costType=CostType, namedPoints=True): ''' Compute the average cost of medoids based on certain cost function and do the clustering given the medoids ''' if costType not in ["total", "average", "modularity"]: print "unknown target function - check the global variables in the code" return(1) # Init the cluster size = len(data) total_cost = {} medoids = {} for idx in medoids_idx: medoids[idx] = [] total_cost[idx] = 0.0 assignErrors = [] # Compute the distance and do the clustering for i in range(size): choice = -1 # Make a big number min_cost = float('inf') # medoids themselves are also included into resulting cluster lists for m in medoids: if cacheOn == True: # Check for cache tmp = distances_cache.get((m,i), None) if cacheOn == False or tmp == None: if costF_idx == 0: # euclidean_distance tmp = euclidean_distance(data[m], data[i]) elif costF_idx == 1: # manhattan_distance tmp = manhattan_distance(data[m], data[i]) elif costF_idx == 2: # pearson_distance tmp = pearson_distance(data[m], data[i]) elif costF_idx == 3: # direct_distance tmp = direct_distance(data[m], data[i], distDict) elif costF_idx == 4: # similarity_distance tmp = similarity_distance(data[m], data[i], simDict) else: print('Error: unknown cost function idx: ' % (costF_idx)) if cacheOn == True: # Save the distance for acceleration distances_cache[(m,i)] = tmp # Clustering # Randomization for nodes/points isolated from all the medoids # in order to assign them to random clusters. Hope averaging will # be able to glean cases for which some medoids did appear in the # same connected component, and group those nodes together. if tmp==0.0 and min_cost==0.0: # no connection to either medoid rv = bernoulli.rvs(1./len(medoids_idx), size=1) if rv[0]==1.: choice = m elif tmp < min_cost: #if tmp < min_cost: choice = m min_cost = tmp # Done the clustering if choice == -1: print "ERROR: the node cannot be assigned" assignErrors.append(i) else: medoids[choice].append(i) total_cost[choice] += min_cost # Compute the target function if costType == "total": #print total_cost return(sum(total_cost.values()), medoids) elif costType == "average": # Compute the average cost avg_cost = 0.0 for idx in medoids_idx: avg_cost += total_cost[idx] / len(medoids[idx]) # Return the average cost and clustering return(avg_cost, medoids) elif costType == "modularity": # If the points are named, display the names if namedPoints == True: named_medoids = {} for medID in medoids_idx: named_medoids[data[medID]] = [] for pointID in medoids[medID]: named_medoids[data[medID]].append(data[pointID]) # "-" because we maximize modularity mod = -modularity(data, COST=costF_idx, distDict=distDict, edgeDict=affinities, medoids=named_medoids) else: mod = -modularity(data, COST=costF_idx, distDict=distDict, edgeDict=affinities, medoids=medoids) print "modularity computed" else: print "unknown target function" return(1) if len(assignErrors) > 0: print "unassigned nodes: ", assignErrors else: print "no unassigned nodes, all right" return(mod, medoids)
def cutNoise(matrix): print "total:",len(matrix) ''' 适用类似基于密度聚类的方法,识别样本中的无意义样本(特征过少),和 噪声样本(不属于任何类,或自成一类) ''' discard = [] # discard 无需返回, 根据 noise 和 real 可以得出 noise = [] real = [] noise_matrix = [] real_matrix = [] rest = [] rm = False if(len(matrix[0])>100): rm = True for i,vector in enumerate(matrix): No_0 = 0 # 记录非零特征 for j in vector: if 0!=j: No_0 += 1 if rm: low = 3 else: low = 1 if (low > No_0): # discard 掉特征数量小于下限的向量 discard.append(i) else: real.append(i) rest.append(vector) print "discard:",len(discard) #--------------------------------------------------------------------------------------- l = len(rest) distList = np.zeros((l,l),np.float) for i in range(l): distList[i][i] = float('inf') # 自身不参与聚类比较 for j in range(i+1,l): distList[i][j] = euclidean_distance(np.array(rest[i]), np.array(rest[j])) distList[j][i] = distList[i][j] # if(distList[i][j]==0): # print i,":",rest[i] # print j,":",rest[j] #---------------------------------------------------------------------------------------- mostSimList = [] # 记录与第 i 个样本第 m 相似的距离 m = 1 if(l<=m): noise = copy.copy(real) real = [] noise_matrix = rest real_matrix = [] tmp = [] angles = [] return real_matrix,noise_matrix,tmp,angles,real,noise marks = [i for i in range(l)] for i in range(l): lis = distList[i].tolist() lis = sorted(lis) mostSimList.append(lis[m-1]) ADist = zip(marks,mostSimList) ADist = sorted(ADist, key = lambda x: x[1], reverse=True) end = l-1 Dlist = [] while end>=0: Dlist.append(ADist[end][1]) end -= 1 # print Dlist # draw_line(Dlist) tmp, angles, part = min_Angle_part(Dlist) # 注意 Dlist 里面应该是从小到大的顺序 print part for e in ADist[0:(1+part)*l/10]: noise.append(real[e[0]]) real0 = copy.copy(real) # 对应 rest 中的向量 real = sorted(list(set(real) - set(noise))) noise = sorted(noise) real_matrix = [] for i , vector in enumerate(rest): if real0[i] in noise: noise_matrix.append(vector) else: real_matrix.append(vector) return real_matrix,noise_matrix,tmp,angles,real,noise