class KDBasedKNearestNeighbor(object): """ KDTree-based KNN classifier with L2 distance """ def __init__(self, k=1): self.k = k def fit(self, X_train, y_train): """ Build KDtree using http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html """ self.X_train = X_train self.y_train = y_train return self def calc_dist(self, X_test, metric, k=None): if k == None: k = self.k self.kd_tree = KDTree(self.X_train, metric=metric, leaf_size=self.k) return self def get_neighbors(self, X_test, k=None): if k == None: k = self.k neighbors = self.kd_tree.query(X_test, k) num_test = X_test.shape[0] y_pred = numpy.zeros(num_test) return neighbors[1] def predict_labels(self, X_test, k=None): """ Make prediction using kdtree Return array of predicted labels """ if k == None: k = self.k neighbors = self.kd_tree.query(X_test, k) num_test = X_test.shape[0] y_pred = numpy.zeros(num_test) for i in range(num_test): closest_y = self.y_train[neighbors[1][i]] count = Counter(closest_y) # print(count.most_common(1)) y_pred[i] = count.most_common(1)[0][0] return y_pred
def patch_classify(): """ patch可视化:观察patch在。 PCA空间,训练数据和实际数据的关系。 构造了kd-tree """ with open('training_data_full.pickle') as f: # 读取对应的原始patch kk = open("raw_data_full.pickle", 'rb') raw_lib = cPickle.load(kk) raw_lib = np.asarray(raw_lib, dtype='float32') # 读取数据转换特征 training_data = cPickle.load(f) patch_lib, feature_lib = training_data feature_lib, patch_lib = (np.asarray(feature_lib, dtype='float32'), np.asarray(patch_lib, dtype='float32')) feature_lib = feature_lib.reshape((-1, 4 * 9 * 9)) # 构造KD-tree tree = KDTree(feature_lib, leaf_size=len(feature_lib) / 100) # 在KD-tree当中搜索最近的100个点 dist, ind1 = tree.query(feature_lib[5678], k=100) nn1 = feature_lib[ind1][0] dist, ind2 = tree.query(feature_lib[10000], k=100) nn2 = feature_lib[ind2][0] dist, ind3 = tree.query(feature_lib[1233], k=100) nn3 = feature_lib[ind3][0] # 计算并转换PCA空间 pca = PCA(n_components=2) d2_data = pca.fit_transform(feature_lib).T # 降临近点的高维坐标转换成PCA空间的低维坐标 r1 = pca.transform(nn1).T r2 = pca.transform(nn2).T r3 = pca.transform(nn3).T # 设置绘制范围 ax = plt.axes([0.1, 0.1, 0.8, 0.8]) # 绘制全部数据的散点图 ax.scatter(d2_data[0], d2_data[1], c='g') # 绘制三个类别的散点图 ax.scatter(r1[0], r1[1], c='r') ax.scatter(r2[0], r2[1], c='b') ax.scatter(r3[0], r3[1], c='y') # patch_lib \ raw_lib分别是差值patch和原始patch patch_show(raw_lib[ind1][0], [0.05, 0.05, 0.4, 0.4], 'red') patch_show(raw_lib[ind2][0], [0.05, 0.55, 0.4, 0.4], 'blue') patch_show(raw_lib[ind3][0], [0.55, 0.05, 0.4, 0.4], 'yellow') plt.show()
def neighbour3dpoints(seqno,f1,f2,no_sets,pointsperset): pcl1name = 'seq'+seqno+'frame'+str(f1) pcl2name = 'seq'+seqno+'frame'+str(f2) path1 = '/home/manish/Awesomestuff/Subjects/IVP/Project_stereo/gen_data/coordinates/'+ str(pcl1name)+'.npy' path2 = '/home/manish/Awesomestuff/Subjects/IVP/Project_stereo/gen_data/coordinates/'+ str(pcl2name)+'.npy' cords1 = np.load(path1) cords2 = np.load(path2) i1 = hp.loadimage_kitti(seqno,'l',f1,0) i2 = hp.loadimage_kitti(seqno,'l',f2,0) (h,l) = i1.shape (pts_1,pts_2) = getfeatures(img, template, no_sets, 0) pts3d_1 = featurepoint_toworldtransform(pts_1, (h,l), cords1) pts3d_2 = featurepoint_toworldtransform(pts_2, (h,l), cords2) mask1_1 = np.abs(pts3d_1[:,2])<50; mask1_2 = pts3d_1[:,2]>0 mask1 = np.logical_and(mask1_1,mask1_2) mask2_1 = np.abs(pts3d_2[:,2])<50; mask2_2 = pts3d_2[:,2]>0 mask2 = np.logical_and(mask2_1,mask2_2) mask = np.logical_and(mask1,mask2) pts3d_1 = pts3d_1[mask] pts3d_2 = pts3d_2[mask] n_keypoints = len(pts3d_1) print('Total of ' + str(n_keypoints) + ' keypoints are found') kdt1=KDTree(cords1,leaf_size=30,metric='euclidean') dist1, idx1 = kdt1.query(pts3d_1, k=pointsperset, return_distance=True) #Gives in sorted order. pset1 = [] n_sets = min(n_keypoints,no_sets) #Checking if we have given number of keypoint matches as the sets or not. print('Total of ' + str(n_sets)+ ' sets are found') for i in range(n_sets): pset1.append(pts3d_1[i]) for j in range(pointsperset): pset1.append(cords1[idx1[i][j]]) pset1 = np.array(pset1) kdt2 = KDTree(cords2, leaf_size=30, metric='euclidean') dist2, idx2 = kdt2.query(pts3d_2, k=pointsperset, return_distance= True) pset2 = [] for i in range(n_sets): pset2.append(pts3d_2[i]) for j in range(pointsperset): pset2.append(cords2[idx2[i][j]]) pset2 = np.array(pset2) return(pset1,pset2)
def _hdbscan_prims_kdtree(X, min_samples=5, alpha=1.0, metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False): if metric == 'minkowski': if p is None: raise TypeError('Minkowski metric given but no p value supplied!') if p < 0: raise ValueError('Minkowski metric with negative p value is not defined!') elif p is None: p = 2 # Unused, but needs to be integer; assume euclidean dim = X.shape[0] min_samples = min(dim - 1, min_samples) tree = KDTree(X, metric=metric, leaf_size=leaf_size) dist_metric = DistanceMetric.get_metric(metric) core_distances = tree.query(X, k=min_samples, dualtree=True, breadth_first=True)[0][:, -1] min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha) min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :] single_linkage_tree = label(min_spanning_tree) return single_linkage_tree, None
class Document: def __init__(self, embeddings=None, doc_file_name=None, word_index=None, model=None, use_lemma=False): # Normal case, build kdtree right from embeddings: if (embeddings== None and (not word_index == None) and (not model == None)): (idx, embeddings) = Word2VecExecuter.Word2VecLoadWordsHashTable(model, word_index) embeddings = np.array(embeddings) elif ((not doc_file_name == None) and (not model == None)): Features.USE_LEMMA = use_lemma Features.REMOVE_FEATURES_ONLY_APPEARING_ONE_TIME = False Features.REMOVE_FEATURES_APPEARING_IN_ONLY_ONE_DOCUMENT = False words = Features.ReadDependencyParseFile(doc_file_name, funit=Features.FeatureUnits.WORD, remove=False) (word_index, embeddings) = Word2VecExecuter.Word2VecLoadWordsHashTable(model, words) embeddings = np.array(embeddings) del word_index self.kd_tree = KDTree(normalize(embeddings), leaf_size=30, metric='euclidean') def distance(self, other, theta=0.5): if other.__class__ == Document: (d_self_to_other, i_self_to_other) = self.kd_tree.query(other.kd_tree.data, k=1, return_distance=True) del i_self_to_other (d_other_to_self, i_other_to_self) = other.kd_tree.query(self.kd_tree.data, k=1, return_distance=True) del i_other_to_self return np.mean(d_self_to_other)*theta + np.mean(d_other_to_self)*(1-theta)
def match(x,y,mytab): """Routine that matches the truth catalog with the input table Args: ---- x: `float` RA of the truth objects to match (in degrees) y: `float` dec of the truth objects to match (in degrees) mytab: `astropy.table.Table` table containing the L2 input catalog. Returns: ------- ind: `int` array of indices to select the truth objects that match the detected objects """ X = np.zeros((len(x),2)) X[:,0]=x X[:,1]=y tree = KDTree(X,leaf_size=40) Y = np.zeros((len(mytab),2)) Y[:,0]=mytab['coord_ra']*180/np.pi Y[:,1]=mytab['coord_dec']*180/np.pi dist, ind = tree.query(Y,k=1) print 'Matches with distance > 1 px, ', np.count_nonzero(dist>1) return ind
def compute_centroids(X, C): """Compute the centroids for dataset X given centers C. Note: centers C may not belong to X. """ tree = KDTree(X) centroids = tree.query(C, k=1, return_distance=False).squeeze() return centroids
def compute_labels(X, C): """Compute the cluster labels for dataset X given centers C. """ # labels = np.argmin(pairwise_distances(C, X), axis=0) # THIS REQUIRES TOO MUCH MEMORY FOR LARGE X tree = KDTree(C) labels = tree.query(X, k=1, return_distance=False).squeeze() return labels
def buildDistanceMap (self, X, Y): classes = np.unique(Y) nClasses = len(classes) tree = KDTree(X) nRows = X.shape[0] TSOri = np.array([]).reshape(0,self.k) distanceMap = np.array([]).reshape(0,self.k) labels = np.array([]).reshape(0,self.k) for row in range(nRows): distances, indicesOfNeighbors = tree.query(X[row].reshape(1,-1), k = self.k+1) distances = distances[0][1:] indicesOfNeighbors = indicesOfNeighbors[0][1:] distanceMap = np.append(distanceMap, np.array(distances).reshape(1,self.k), axis=0) labels = np.append(labels, np.array(Y[indicesOfNeighbors]).reshape(1,self.k),axis=0) for c in classes: nTraining = np.sum(Y == c) labelTmp = labels[Y.ravel() == c,:] tmpKNNClass = labelTmp.ravel() TSOri = np.append(TSOri, len(tmpKNNClass[tmpKNNClass == c]) / (nTraining*float(self.k))) return distanceMap, labels, TSOri
def kdtree(data, lake_matrix, k_neighbors = 10, leaf_size = 20): # training kdtree = KDTree(data, leaf_size=leaf_size, metric='euclidean') # testing distances, indices = kdtree.query(lake_matrix, k=k_neighbors) return np.array(indices), distances
def match(x1, y1, x2=None, y2=None, k=5, kdt=None): X2 = np.vstack([x2, y2]).T X1 = np.vstack([x1, y1]).T if kdt is None: kdt = KDTree(X2, leaf_size=30, metric='euclidean') dists, inds = kdt.query(X1, k=k, return_distance=True) return dists, inds, kdt
def margin(indices, k, X, y): margins = [] kd_tree = KDTree(X) for img_index in indices: margin = 0 in_class = 0 # most_frequent_class = 0 current_class = y[img_index] # print current_class dists, neighbour_indices = kd_tree.query(X[img_index].reshape((1, X[img_index].shape[0])), k) for index in neighbour_indices[0]: # print y[index] if y[index] == current_class: in_class += 1 neighbour_dict = {} for index in neighbour_indices[0]: if y[index] in neighbour_dict: neighbour_dict[y[index]] += 1 else: neighbour_dict[y[index]] = 1 neighbour_dict.pop(current_class) if neighbour_dict: most_frequent = max(neighbour_dict.items(), key=lambda x: x[1])[1] margin = in_class - most_frequent margins.append(margin) return margins
def _hdbscan_large_kdtree_cdist(X, min_cluster_size=5, min_samples=None, alpha=1.0, metric='minkowski', p=2, gen_min_span_tree=False): if p is None: p = 2 dim = X.shape[0] min_samples = min(dim - 1, min_samples) if metric == 'minkowski': tree = KDTree(X, metric=metric, p=p) else: tree = KDTree(X, metric=metric) core_distances = tree.query(X, k=min_samples)[0][:,-1] min_spanning_tree = mst_linkage_core_cdist(X, core_distances, metric, p) min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :] single_linkage_tree = label(min_spanning_tree) condensed_tree = condense_tree(single_linkage_tree, min_cluster_size) stability_dict = compute_stability(condensed_tree) cluster_list = get_clusters(condensed_tree, stability_dict) labels = -1 * np.ones(X.shape[0], dtype=int) probabilities = np.zeros(X.shape[0], dtype=float) for index, (cluster, prob) in enumerate(cluster_list): labels[cluster] = index probabilities[cluster] = prob return labels, probabilities, condensed_tree, single_linkage_tree, None
def _hdbscan_prims_kdtree(X, min_samples=5, alpha=1.0, metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False): if metric == 'minkowski': if p is None: raise TypeError('Minkowski metric given but no p value supplied!') if p < 0: raise ValueError('Minkowski metric with negative p value is not defined!') elif p is None: p = 2 # Unused, but needs to be integer; assume euclidean size = X.shape[0] min_samples = min(size - 1, min_samples) tree = KDTree(X, metric=metric, leaf_size=leaf_size) #TO DO: Deal with p for minkowski appropriately dist_metric = DistanceMetric.get_metric(metric) #Get distance to kth nearest neighbour core_distances = tree.query(X, k=min_samples, dualtree=True, breadth_first=True)[0][:, -1] #Mutual reachability distance is implicite in mst_linkage_core_cdist min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha) #Sort edges of the min_spanning_tree by weight min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :] #Convert edge list into standard hierarchical clustering format single_linkage_tree = label(min_spanning_tree) return single_linkage_tree, None
def _rsl_prims_kdtree(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric='minkowski', p=2): if metric == 'minkowski': if p is None: raise TypeError('Minkowski metric given but no p value supplied!') if p < 0: raise ValueError('Minkowski metric with negative p value is not defined!') elif p is None: p = 2 # Unused, but needs to be integer; assume euclidean dim = X.shape[0] k = min(dim - 1, k) tree = KDTree(X, metric=metric) dist_metric = DistanceMetric.get_metric(metric) core_distances = tree.query(X, k=k)[0][:,-1] min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric) single_linkage_tree = label(min_spanning_tree) single_linkage_tree = SingleLinkageTree(single_linkage_tree) labels = single_linkage_tree.get_clusters(cut, gamma) return labels, single_linkage_tree
def margin_new(indices, k, X, y): margins = [] kd_tree = KDTree(X) for img_index in indices: margin = 0 dist_to_class = 0 dist_to_others = 0 current_class = y[img_index] dists, neighbour_indices = kd_tree.query(X[img_index].reshape((1, X[img_index].shape[0])), k) classes = {} for i in xrange(neighbour_indices[0].shape[0]): index = neighbour_indices[0][i] if y[index] in classes: classes[y[index]] += dists[0][i] else: classes[y[index]] = dists[0][i] dist_to_class = classes[current_class] classes.pop(current_class) # print classes.items() if classes: dist_to_others = min(classes.items(), key=lambda x: x[1])[1] margin = dist_to_class - dist_to_others margins.append(margin) return margins
def test_kdtree_projection(datas): from sklearn.neighbors import KDTree from sklearn import random_projection # datas = parse() Fs = fingerprints(datas) # The random projection transformer = random_projection.GaussianRandomProjection(n_components = 128) Fs_new = transformer.fit_transform(Fs) print Fs_new.shape tree = KDTree(Fs_new, leaf_size=20) # Select a random target target_i = random.choice(range(len( datas ))) target = datas[target_i] Tf = np.vstack([fingerprint(target)]) Tf_new = transformer.transform(Tf) # Match it with timer(10): for _ in xrange(10): dist, ind = tree.query(Tf_new, k=3) assert datas[ind[0][0]] == datas[target_i]
def constructLMap(self): self.obstacleArray = [] self.allPositions = [] #build your obstacle array for i in range( len(self.map.grid) ): for j in range( len(self.map.grid[0])): [x, y] = self.map.cell_position(i, j) if self.map.get_cell(x,y) == 1.0: self.obstacleArray.append(np.array(self.map.cell_position(i, j))) #print self.map.cell_position(i, j) self.allPositions.append(np.array(self.map.cell_position(i, j))) #pass it into kdtree eExp = [] kdt = KDTree(self.obstacleArray) dists = kdt.query(self.allPositions, k=1)[0][:] self.laserStdDev = self.config["laser_sigma_hit"] constant = 1.0/( m.sqrt( 2 * m.pi) * self.laserStdDev ) eExp = np.exp(-0.5*( dists**2 )/( self.laserStdDev**2 ) ) probObsGivenLaser = eExp self.lMap.grid = probObsGivenLaser.reshape(self.lMap.grid.shape) self.occupancyGridMsg = self.lMap.to_message() self.lMapPublisher.publish(self.occupancyGridMsg)
def match_regions(polygons, regionlocs, n_dim=2): """ Parameters ---------- polygons: list or array_like the polygons information. regionlocs: array_like the location information of the regions. n_dim: integer the number of dimensions. Returns ------- assign_r: array_like the assignated regions. """ n = len(polygons) centroids = np.zeros((n, n_dim)) for i in xrange(n): centroids[i, :] = np.array(polygons[i]) ret = KDTree(regionlocs) assign_r = np.zeros(n).astype(int) for i in xrange(n): assign_r[i] = ret.query(centroids[[i]])[1][0] return assign_r
def uniform_points_points_sampling(limits, points, n): """Select the spatial uniform points in the sample by sampling uniform spatial points and getting the nearest ones in the available ones. Parameters ---------- limits: numpy.ndarray, shape (2, 2) the limits of the space. There is the square four limits which defines the whole retrievable region. points: numpy.ndarray the points in the space selected. n: int the number of samples we want. Returns ------- indices: numpy.ndarray, shape(n) the indices of the samples. """ ## 0. Initialize retriever retriever = KDTree(points) ## 1. Compute spatial uniform points points_s = uniform_points_sampling(limits, n) ## 2. Get the nearest points in the sample result = retriever.query(points_s, k=1) indices = result[1] indices = indices.astype(int) return indices
def estimatenormals(points, npoints = 40, method = 'pca'): """ estimate the normals of points :param points: an array of [x, y, z] :param method: 'pca' or 'ransac', theoretically ransac is more precise when there are more points :return: a list of normal vectors author: weiwei date: 20170714 """ pointsnormals = [] camerapos = np.array([0.0,0.0,0.0]) kdt = KDTree(points) if method == 'pca': regionpntidlist = kdt.query(points, k=npoints, return_distance=False) for i, pntidlist in enumerate(regionpntidlist): regionpnts = points[pntidlist] covmat = np.cov(regionpnts.T) eigvalues, eigmat = np.linalg.eig(covmat) idx = np.argmin(eigvalues) eigvec = eigmat[:, idx] if np.dot(eigvec, camerapos-points[i]) < 0: eigvec = -eigvec pointsnormals.append(eigvec) elif method == 'ransac': # NOTE: this part is not usable due to small npoints ransacer = linear_model.RANSACRegressor(linear_model.LinearRegression()) regionpntidlist = kdt.query(points, k=npoints, return_distance=False) for i, pntidlist in enumerate(regionpntidlist): XYZ = points[pntidlist] ransacer.fit(XYZ[:, 0:2], XYZ[:, 2]) inlier_mask = ransacer.inlier_mask_ regionpnts = XYZ[inlier_mask] covmat = np.cov(regionpnts.T) eigvalues, eigmat = np.linalg.eig(covmat) idx = np.argmin(eigvalues) eigvec = eigmat[:, idx] if np.dot(eigvec, camerapos-points[i]) < 0: eigvec = -eigvec pointsnormals.append(eigvec) return pointsnormals
def get_hip_rank(points, sub): sub_coords = sub[['lat', 'lng']].values if not sub_coords.shape: return [] sub_scores = sub.checkinsCount.apply(int).values kdt = KDTree(sub_coords, metric='euclidean') d, i = kdt.query(np.array(points), k=10) return (sub_scores[i] / d**2 * 1e-11).sum(axis=1)
def get_median_neighbors(df, n_neighbors, adj_r): ''' INPUT: Pandas dataframe, and the number of comparable neighbors of each listing we'll take the median price of in adding the median_neighbor_prices feature OUTPUT: Pandas dataframe with the median prices of the n_neighbors closest comparables added as a feature. This is accomplished using a KD-Tree model to search for nearest-neighbors ''' kd_df = df[['latitude', 'longitude']] kdvals = kd_df.values kd = KDTree(kdvals, leaf_size = 1000) cPickle.dump(kd, open('../models/kd_tree.pkl', 'wb')) neighbors = kd.query(kdvals, k=100) median_neighbor_prices = [] for i in xrange(len(df)): listing_neighbors = neighbors[1][i] listing_id = df.ix[i,'id'] n_beds = df.ix[i,'beds'] sale_y = df.ix[i, 'sale_y'] sub_df = df[(df.index.isin(listing_neighbors))] sub_df = sub_df[ (sub_df['beds'] == n_beds) & (sub_df['id'] != listing_id) ] comp_listings = [item for item in listing_neighbors if item in sub_df.index] df_filtered = pd.DataFrame() df_filtered['last sale price']= df['last sale price'][comp_listings][:n_neighbors] df_filtered['sale_y'] = df['sale_y'][comp_listings][:n_neighbors] df_filtered['price adjusted'] = df_filtered['last sale price'] * (1.0 + (sale_y - df_filtered['sale_y']) * adj_r) med_price = df_filtered['price adjusted'].median() if med_price > 0: median_neighbor_prices.append(med_price) else: df_filtered = pd.DataFrame() df_filtered['last sale price']= df['last sale price'][comp_listings][:n_neighbors+10] df_filtered['sale_y'] = df['sale_y'][comp_listings][:n_neighbors+10] df_filtered['price adjusted'] = df_filtered['last sale price'] * (1.0 + (sale_y - df_filtered['sale_y']) * adj_r) med_price = df_filtered['price adjusted'].median() if med_price > 0: median_neighbor_prices.append(med_price) else: df['price adjusted'] = df['last sale price'] * (1.0 + (sale_y - df['sale_y']) * adj_r) med_price = df['price adjusted'][comp_listings].median() median_neighbor_prices.append(med_price) df['med_neighbor_price'] = median_neighbor_prices rmse = np.mean((df['med_neighbor_price'] - df['last sale price'])**2)**0.5 print 'RMSE is ', rmse return df
def environment(x_h, y_h, z_h, x, y, z, D3): DD = np.array([x, y, z]) DD = DD.T tree = KDTree(DD, leaf_size=20000) index = np.where(x_h == x)[0] dist, ind = tree.query(DD[index], k=4) r3 = max(dist[0]) delta3 = D3**3.0 * (1.0/(r3**3.0) - 1.0/(D3**3.0)) return delta3
def retrieve_7major_cp(locs, raw_locs, raw_cps): raw_cps = np.array(raw_cps).astype(int) ret = KDTree(raw_locs) new_cps = [] for i in range(len(locs)): neighs = ret.query(locs[[i]], 7)[1].ravel() c = Counter([raw_cps[nei] for nei in neighs]) new_cps.append(c.keys()[np.argmax(c.values())]) return new_cps
def negativeLabels(features, positiveLabels): neg_lab = [[]]*len(features) for i in range(1, len(features)): kdt = KDTree(features[i]['RegionCenter'], metric='euclidean') neighb = kdt.query(features[i-1]['RegionCenter'], k=3, return_distance=False) for j in range(1, len(features[i])): for m in range(0, neighb.shape[1]): neg_lab[i].append([j,neighb[j][m]]) return neg_lab
def find_knn(pts0, eval_pts, k=15): ''' find the points within `pts0` closest to `eval_pts` ''' pts0range = (pts0.max(axis=0) - pts0.min(axis=0)) neigh = KDTree(pts0 / pts0range) nni = neigh.query(eval_pts / pts0range, k=k, return_distance=False) return nni
def main(): digits = load_digits() X = digits.data y = digits.target num_classes = np.unique(y).shape[0] plot_digits(X) # TSNE # Barnes-Hut: O(d NlogN) where d is dim and N is the number of samples # Exact: O(d N^2) t0 = time() tsne = manifold.TSNE(n_components=2, init="pca", method="barnes_hut", verbose=1) X_tsne = tsne.fit_transform(X) t1 = time() print "t-SNE: %.2f sec" % (t1 - t0) tsne.get_params() plt.figure(2) for k in range(num_classes): plt.plot(X_tsne[y == k, 0], X_tsne[y == k, 1], "o") plt.title("t-SNE embedding of digits dataset") plt.xlabel("X1") plt.ylabel("X2") axes = plt.gca() axes.set_xlim([X_tsne[:, 0].min() - 1, X_tsne[:, 0].max() + 1]) axes.set_ylim([X_tsne[:, 1].min() - 1, X_tsne[:, 1].max() + 1]) plt.show() # ISOMAP # 1. Nearest neighbors search: O(d log k N log N) # 2. Shortest path graph search: O(N^2(k+log(N)) # 3. Partial eigenvalue decomposition: O(dN^2) t0 = time() isomap = manifold.Isomap(n_neighbors=5, n_components=2) X_isomap = isomap.fit_transform(X) t1 = time() print "Isomap: %.2f sec" % (t1 - t0) isomap.get_params() plt.figure(3) for k in range(num_classes): plt.plot(X_isomap[y == k, 0], X_isomap[y == k, 1], "o", label=str(k), linewidth=2) plt.title("Isomap embedding of the digits dataset") plt.xlabel("X1") plt.ylabel("X2") plt.show() # Use KD-tree to find k-nearest neighbors to a query image kdt = KDTree(X_isomap) Q = np.array([[-160, -30], [-102, 14]]) kdt_dist, kdt_idx = kdt.query(Q, k=20) plot_digits(X[kdt_idx.ravel(), :])
def test_distance(datas): from sklearn.neighbors import KDTree from sklearn import random_projection Fs = fingerprints(datas) # The random projection transformer = random_projection.GaussianRandomProjection(n_components = 7) Fs_new = transformer.fit_transform(Fs) print Fs_new.shape tree = KDTree(Fs_new, leaf_size=20) # Select a random target correct = [] wrong = [] for _ in range(100): target_i = random.choice(range(len( datas ))) target_j = random.choice(range(len( datas ))) # target i target = datas[target_i] Tf = np.vstack([fingerprint(target)]) Tf_new = transformer.transform(Tf) # target j target2 = datas[target_j] Tf2 = np.vstack([fingerprint(target2)]) Tf_new2 = transformer.transform(Tf2) # Match it start = clock() dist, ind = tree.query(Tf_new.astype(int), k=1) dist2, ind2 = tree.query(Tf_new2.astype(int), k=1) correct.append(match(Fs[ind[0][0]], Tf[0])) wrong.append(match(Fs[ind2[0][0]], Tf[0])) end = clock() print "Correct: %2.5f (%2.5f), Random: %2.5f (%2.5f)" % (np.mean(correct), np.std(correct), np.mean(wrong), np.std(wrong))
def knn_cond_mutual_information(x, y, z, k, standardize = True, dualtree = False): """ Computes conditional mutual information between two time series x and y conditioned on a third z (which can be multi-dimensional) as I(x; y | z) = sum( p(x,y,z) * log( p(z)*p(x,y,z) / p(x,z)*p(y,z) ), where p(z), p(x,z), p(y,z) and p(x,y,z) are probability distributions. Performs k-nearest neighbours search using k-dimensional tree. Uses sklearn.neighbors for KDTree class. standardize - whether transform data to zero mean and unit variance dualtree - whether to use dualtree formalism in k-d tree for the k-NN search could lead to better performance with large N According to Frenzel S. and Pompe B., Phys. Rev. Lett., 99, 2007. """ from sklearn.neighbors import KDTree # prepare data if standardize: x = _center_ts(x) y = _center_ts(y) if isinstance(z, np.ndarray): z = _center_ts(z) elif isinstance(z, list): for cond_ts in z: cond_ts = _center_ts(cond_ts) z = np.atleast_2d(z) data = np.vstack([x, y, z]).T # build k-d tree using the maximum (Chebyshev) norm tree = KDTree(data, leaf_size = 15, metric = "chebyshev") # find distance to k-nearest neighbour per point dist, _ = tree.query(data, k = k + 1, return_distance = True, dualtree = dualtree) sum_ = 0 # prepare marginal vectors xz, yz and z n_x_z_data = np.delete(data, 1, axis = 1) n_y_z_data = np.delete(data, 0, axis = 1) n_z_data = np.delete(data, [0, 1], axis = 1) # build and query k-d trees in marginal spaces for number of points in a given dist from a point tree_x_z = KDTree(n_x_z_data, leaf_size = 15, metric = "chebyshev") n_x_z = tree_x_z.query_radius(n_x_z_data, r = dist[:, -1], count_only = True) - 2 tree_y_z = KDTree(n_y_z_data, leaf_size = 15, metric = "chebyshev") n_y_z = tree_y_z.query_radius(n_y_z_data, r = dist[:, -1], count_only = True) - 2 tree_z = KDTree(n_z_data, leaf_size = 15, metric = "chebyshev") n_z = tree_z.query_radius(n_z_data, r = dist[:, -1], count_only = True) - 2 # count points for n in range(data.shape[0]): sum_ += _neg_harmonic(n_x_z[n]) + _neg_harmonic(n_y_z[n]) - _neg_harmonic(n_z[n]) sum_ /= data.shape[0] return sum_ - _neg_harmonic(k-1)
class KDTreeUtil: def __init__(self, data, leaf_size=40): self.core = KDTree(np.concatenate(data, axis=0), leaf_size) def query(self, dot: np.ndarray, k=1, return_dist=False): return self.core.query(dot, k=k, return_distance=return_dist)
"AGE", "YRS_CLIMBING", "HEIGHT", "APEINDEX", "WEIGHT", "BMI", "B_AVG", "S_AVG" ] knnData = normData[knnProfileVars] knnData.describe() # The reason I propose this variables is because it contains their current level, and descriptive variables that they cannot change, they simply describe their current phisique and experience. In order to create groups of climbers with similar phisiques, experience and current performance, regardless of the way they train, how often they climb, how they approach improvement, what they eat, etc. because those are the variables that the climber can actually change in order to produce a change in their performance. # # Now lets use a KDTree (the algorithm inside the KNN algorithm) to find the nearest neighbors of a random climber, lets say the 10th climber in the list. # In[22]: climberID = 10 randomClimber = knnData.loc[climberID, :] tree = KDTree(knnData) dist, ids = tree.query([randomClimber], k=int(len(knnData.index) / 3)) closestClimbers = knnData.loc[ids[0], :] comparison = pd.DataFrame() comparison["SUBJECT_CLIMBER"] = randomClimber comparison["AVERAGE_CLIMBER"] = knnData.mean() comparison["KNN_CLOSEST_AVG"] = closestClimbers.mean() print("After finding the", str(int(len(knnData.index) / 3)), "nearest neighbors we see this behavior in the data distribution") #display(comparison) # As you can see, the new group is conformed of climbers with a profile that is closer in similarity to the climber we care about. So now, learning the effect of particular actions on perforance makes more sense, since climbers with very similar bodies and experience intuitively would benefit from similar actions. So if a climber with a similar profile had certain benefit from an action, it would suggest that you probably should too. # # So lets get back to a predictive modeling using only these subsets.
class ChessBoardCornerDetector: def __init__(self): self.distance_threshold = 0.06 self.calibration_points = None self.centers = None self.centers_kdtree = None self.points_to_examine_queue = None def detect_chess_board_corners(self, img, debug=False, *, path_to_image=None, path_to_output_folder=None): # Calculate corner responses response = self.calculate_corner_responses(img) # print("%8.2f, convolution" % (time.time() - t_start)) # Localized normalization of responses response_relative_to_neighbourhood = self.local_normalization( response, 511) # print("%8.2f, relative response" % (time.time() - t_start)) # Threshold responses relative_responses_thresholded = self.threshold_responses( response_relative_to_neighbourhood) # Locate centers of peaks centers = self.locate_centers_of_peaks(relative_responses_thresholded) # Select central center of mass selected_center = self.select_central_peak_location(centers) # Enumerate detected peaks calibration_points = self.enumerate_peaks(centers, selected_center) # print("%8.2f, grid mapping" % (time.time() - t_start)) # write output images if debug is True if debug: # making the output folders path_to_output_local_maxima_folder = path_to_output_folder / '4_local_maxima' path_to_output_local_maxima_folder.mkdir(parents=False, exist_ok=True) path_to_output_response_folder = path_to_output_folder / '1_response' path_to_output_response_folder.mkdir(parents=False, exist_ok=True) path_to_output_response_neighbourhood_folder = path_to_output_folder / '2_respond_relative_to_neighbourhood' path_to_output_response_neighbourhood_folder.mkdir(parents=False, exist_ok=True) path_to_output_response_threshold_folder = path_to_output_folder / '3_relative_response_thresholded' path_to_output_response_threshold_folder.mkdir(parents=False, exist_ok=True) path_response_1 = path_to_output_response_folder / ( path_to_image.stem + '_response.png') cv2.imwrite(str(path_response_1), response) path_response_2 = path_to_output_response_neighbourhood_folder / ( path_to_image.stem + '_response_relative_to_neighbourhood.png') cv2.imwrite(str(path_response_2), response_relative_to_neighbourhood * 255) path_response_3 = path_to_output_response_threshold_folder / ( path_to_image.stem + '_relative_responses_thresholded.png') cv2.imwrite(str(path_response_3), relative_responses_thresholded) canvas = self.show_detected_calibration_points( img, self.calibration_points) cv2.circle(canvas, tuple(selected_center.astype(int)), 10, (0, 0, 255), -1) path_local_max = path_to_output_local_maxima_folder / ( path_to_image.stem + '_local_maxima.png') cv2.imwrite(str(path_local_max), canvas) # Detect image covered percentage_image_covered = self.image_coverage(calibration_points, img) # How straight are the points? stats = self.statistics(calibration_points) return self.calibration_points, percentage_image_covered, stats # Not necessary to output the images when we just want the statistics after undistorting def make_statistics(self, img): # Calculate corner responses response = self.calculate_corner_responses(img) # Localized normalization of responses response_relative_to_neighbourhood = self.local_normalization( response, 511) # Threshold responses relative_responses_thresholded = self.threshold_responses( response_relative_to_neighbourhood) # Locate centers of peaks centers = self.locate_centers_of_peaks(relative_responses_thresholded) # Select central center of mass selected_center = self.select_central_peak_location(centers) # Enumerate detected peaks calibration_points = self.enumerate_peaks(centers, selected_center) # How straight are the points? stats = self.statistics(calibration_points) return stats @staticmethod def calculate_corner_responses(img): locator = MarkerTracker(order=2, kernel_size=45, scale_factor=40) greyscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) response = locator.apply_convolution_with_complex_kernel( greyscale_image) return response def local_normalization(self, response, neighbourhoodsize): _, max_val, _, _ = cv2.minMaxLoc(response) response_relative_to_neighbourhood = self.peaks_relative_to_neighbourhood( response, neighbourhoodsize, 0.05 * max_val) return response_relative_to_neighbourhood @staticmethod def threshold_responses(response_relative_to_neighbourhood): _, relative_responses_thresholded = cv2.threshold( response_relative_to_neighbourhood, 0.5, 255, cv2.THRESH_BINARY) return relative_responses_thresholded def locate_centers_of_peaks(self, relative_responses_thresholded): contours, t1 = cv2.findContours( np.uint8(relative_responses_thresholded), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) centers = list(map(self.get_center_of_mass, contours)) return centers @staticmethod def select_central_peak_location(centers): mean_position_of_centers = np.mean(centers, axis=0) central_center = np.array( sorted(list(centers), key=lambda c: np.sqrt( (c[0] - mean_position_of_centers[0])**2 + (c[1] - mean_position_of_centers[1])**2))) return central_center[0] def enumerate_peaks(self, centers, selected_center): self.centers = centers self.centers_kdtree = KDTree(np.array(self.centers)) self.calibration_points = self.initialize_calibration_points( selected_center) self.points_to_examine_queue = [(0, 0), (1, 0), (0, 1)] for x_index, y_index in self.points_to_examine_queue: self.apply_all_rules_to_add_calibration_points(x_index, y_index) return self.calibration_points @staticmethod def show_detected_calibration_points(img, calibration_points): canvas = img.copy() for x_index, temp in calibration_points.items(): for y_index, cal_point in temp.items(): cv2.circle(canvas, tuple(cal_point.astype(int)), 20, (0, 255 * (y_index % 2), 255 * (x_index % 2)), 2) return canvas def initialize_calibration_points(self, selected_center): closest_neighbour, _ = self.locate_nearest_neighbour(selected_center) direction = selected_center - closest_neighbour rotation_matrix = np.array([[0, 1], [-1, 0]]) hat_vector = np.matmul(direction, rotation_matrix) direction_b_neighbour, _ = self.locate_nearest_neighbour( selected_center + hat_vector, minimum_distance_from_selected_center=-1) calibration_points = collections.defaultdict(dict) calibration_points[0][0] = selected_center calibration_points[1][0] = closest_neighbour calibration_points[0][1] = direction_b_neighbour return calibration_points def apply_all_rules_to_add_calibration_points(self, x_index, y_index): self.rule_one(x_index, y_index) self.rule_two(x_index, y_index) self.rule_three(x_index, y_index) self.rule_four(x_index, y_index) self.rule_five(x_index, y_index) def rule_three(self, x_index, y_index): try: # Ensure that we don't overwrite already located # points. if y_index + 1 in self.calibration_points[x_index]: return position_one = self.calibration_points[x_index - 1][y_index] position_two = self.calibration_points[x_index - 1][y_index + 1] position_three = self.calibration_points[x_index][y_index] predicted_location = position_two + position_three - position_one location, distance = self.locate_nearest_neighbour( predicted_location, minimum_distance_from_selected_center=-1) reference_distance = np.linalg.norm(position_three - position_one) if distance / reference_distance < self.distance_threshold: self.calibration_points[x_index][y_index + 1] = location self.points_to_examine_queue.append((x_index, y_index + 1)) except KeyError: pass def rule_two(self, x_index, y_index): try: if y_index in self.calibration_points[x_index + 1]: return position_one = self.calibration_points[x_index - 1][y_index] position_two = self.calibration_points[x_index][y_index] predicted_location = 2 * position_two - position_one location, distance = self.locate_nearest_neighbour( predicted_location, minimum_distance_from_selected_center=-1) reference_distance = np.linalg.norm(position_two - position_one) if distance / reference_distance < self.distance_threshold: self.calibration_points[x_index + 1][y_index] = location self.points_to_examine_queue.append((x_index + 1, y_index)) except KeyError: pass def rule_one(self, x_index, y_index): try: # Ensure that we don't overwrite already located # points. if y_index + 1 in self.calibration_points[x_index]: return position_one = self.calibration_points[x_index][y_index] position_two = self.calibration_points[x_index][y_index - 1] predicted_location = 2 * position_one - position_two location, distance = self.locate_nearest_neighbour( predicted_location, minimum_distance_from_selected_center=-1) reference_distance = np.linalg.norm(position_two - position_one) if distance / reference_distance < self.distance_threshold: self.calibration_points[x_index][y_index + 1] = location self.points_to_examine_queue.append((x_index, y_index + 1)) except KeyError: pass def rule_four(self, x_index, y_index): try: # Ensure that we don't overwrite already located # points. if y_index - 1 in self.calibration_points[x_index]: return position_one = self.calibration_points[x_index][y_index] position_two = self.calibration_points[x_index][y_index + 1] predicted_location = 2 * position_one - position_two location, distance = self.locate_nearest_neighbour( predicted_location, minimum_distance_from_selected_center=-1) reference_distance = np.linalg.norm(position_two - position_one) if distance / reference_distance < self.distance_threshold: self.calibration_points[x_index][y_index - 1] = location self.points_to_examine_queue.append((x_index, y_index - 1)) except KeyError: pass def rule_five(self, x_index, y_index): try: if y_index in self.calibration_points[x_index - 1]: return position_one = self.calibration_points[x_index + 1][y_index] position_two = self.calibration_points[x_index][y_index] predicted_location = 2 * position_two - position_one location, distance = self.locate_nearest_neighbour( predicted_location, minimum_distance_from_selected_center=-1) reference_distance = np.linalg.norm(position_two - position_one) if distance / reference_distance < self.distance_threshold: self.calibration_points[x_index - 1][y_index] = location self.points_to_examine_queue.append((x_index - 1, y_index)) except KeyError: pass def locate_nearest_neighbour(self, selected_center, minimum_distance_from_selected_center=0): reshaped_query_array = np.array(selected_center).reshape(1, -1) (distances, indices) = self.centers_kdtree.query(reshaped_query_array, 2) if distances[0][0] <= minimum_distance_from_selected_center: return self.centers[indices[0][1]], distances[0][1] else: return self.centers[indices[0][0]], distances[0][0] @staticmethod def distance_to_ref(ref_point): return lambda c: ((c[0] - ref_point[0])**2 + (c[1] - ref_point[1])**2)**0.5 @staticmethod def get_center_of_mass(contour): m = cv2.moments(contour) if m["m00"] > 0: cx = m["m10"] / m["m00"] cy = m["m01"] / m["m00"] result = np.array([cx, cy]) else: result = np.array([contour[0][0][0], contour[0][0][1]]) return result def peaks_relative_to_neighbourhood(self, response, neighbourhoodsize, value_to_add): local_min_image = self.minimum_image_value_in_neighbourhood( response, neighbourhoodsize) local_max_image = self.maximum_image_value_in_neighbourhood( response, neighbourhoodsize) response_relative_to_neighbourhood = (response - local_min_image) / ( value_to_add + local_max_image - local_min_image) return response_relative_to_neighbourhood @staticmethod def minimum_image_value_in_neighbourhood(response, neighbourhood_size): """ A fast method for determining the local minimum value in a neighbourhood for an entire image. """ kernel_1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) orig_size = response.shape for x in range(int(math.log(neighbourhood_size, 2))): eroded_response = cv2.morphologyEx(response, cv2.MORPH_ERODE, kernel_1) response = cv2.resize(eroded_response, None, fx=0.5, fy=0.5) local_min_image_temp = cv2.resize(response, (orig_size[1], orig_size[0])) return local_min_image_temp @staticmethod def maximum_image_value_in_neighbourhood(response, neighbourhood_size): """ A fast method for determining the local maximum value in a neighbourhood for an entire image. """ kernel_1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) orig_size = response.shape for x in range(int(math.log(neighbourhood_size, 2))): eroded_response = cv2.morphologyEx(response, cv2.MORPH_DILATE, kernel_1) response = cv2.resize(eroded_response, None, fx=0.5, fy=0.5) local_min_image_temp = cv2.resize(response, (orig_size[1], orig_size[0])) return local_min_image_temp @staticmethod def image_coverage(calibration_points, img): h = img.shape[0] w = img.shape[1] score = np.zeros((10, 10)) for calibration_point_dict in calibration_points.values(): for x, y in calibration_point_dict.values(): (x_bin, x_rem) = divmod(x, w / 10) (y_bin, y_rem) = divmod(y, h / 10) if x_bin is 10: x_bin = 9 if y_bin is 10: y_bin = 9 score[int(x_bin)][int(y_bin)] += 1 return np.count_nonzero(score) @staticmethod def shortest_distance(x1, y1, a, b, c): d = abs((a * x1 + b * y1 + c)) / (math.sqrt(a * a + b * b)) return d def statistics(self, points): # Make a list in which we will return the statistics. This list will be contain two elements, each a tuple. # The first tuple is the amount of tested points and average pixel deviation from straight lines for the # horizontal points, the second tuple is the same for the vertical points. return_list = [] # Check if the outer key defines the rows or the columns, this is not always the same. horizontal = 1 if points[0][0][0] - points[0][1][0] < points[0][0][ 1] - points[0][1][1] else 0 # Flip the dictionary so we can do this statistic for horizontal and vertical points. flipped = collections.defaultdict(dict) for key, val in points.items(): for subkey, subval in val.items(): flipped[subkey][key] = subval # Make sure that we always have the same order, horizontal first in this case. horiz_first = (points, flipped) if horizontal else (flipped, points) for index, points_list in enumerate(horiz_first): count, som = 0, 0 for k in points_list.values(): single_col_x, single_col_y = [], [] if len(k) > 2: for l in k.values(): # for the vertical points, X and Y values are switched because polyfit # does not work (well) for points (almost) vertical points if index == 0: single_col_x.append(l[0]) single_col_y.append(l[1]) else: single_col_x.append(l[1]) single_col_y.append(l[0]) # Fit a line through the horizontal or vertical points z = np.polynomial.polynomial.polyfit( single_col_x, single_col_y, 1) # Calculate the distance for each point to the line for x, y in zip(single_col_x, single_col_y): d = self.shortest_distance(x, y, z[1], -1, z[0]) count += 1 som += d if count is not 0: return_list.append([count, som / count]) else: return_list.append([count, 0]) return return_list
def execute(context): # Load income distribution df = pd.read_excel( "%s/filosofi_2015/FILO_DISP_COM.xls" % context.config("data_path"), sheet_name="ENSEMBLE", skiprows=5)[["CODGEO"] + ["D%d15" % q if q != 5 else "Q215" for q in range(1, 10)]] df.columns = [ "commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9" ] df["reference_median"] = df["q5"].values # Verify spatial data for education df_municipalities = context.stage("data.spatial.municipalities") requested_communes = set(df_municipalities["commune_id"].unique()) df = df[df["commune_id"].isin(requested_communes)] # Find communes without data df["commune_id"] = df["commune_id"].astype("category") missing_communes = set(df_municipalities["commune_id"].unique()) - set( df["commune_id"].unique()) print("Found %d/%d municipalities that are missing" % (len(missing_communes), len(requested_communes))) # Find communes without full distribution df["is_imputed"] = df["q2"].isna() df["is_missing"] = False print("Found %d/%d municipalities which do not have full distribution" % (sum(df["is_imputed"]), len(requested_communes))) # First, find suitable distribution for incomplete cases by finding the one with the most similar median incomplete_medians = df[df["is_imputed"]]["q5"].values df_complete = df[~df["is_imputed"]] complete_medians = df_complete["q5"].values indices = np.argmin(np.abs(complete_medians[:, np.newaxis] - incomplete_medians[np.newaxis, :]), axis=0) for k in range(1, 10): df.loc[df["is_imputed"], "q%d" % k] = df_complete.iloc[indices]["q%d" % k].values # Second, add missing municipalities by neirest neighbor # ... build tree of existing communes df_existing = df_municipalities[df_municipalities["commune_id"].astype( str).isin(df["commune_id"])] # pandas Bug coordinates = np.vstack([ df_existing["geometry"].centroid.x, df_existing["geometry"].centroid.y ]).T kd_tree = KDTree(coordinates) # ... query tree for missing communes df_missing = df_municipalities[df_municipalities["commune_id"].astype( str).isin(missing_communes)] # pandas Bug coordinates = np.vstack( [df_missing["geometry"].centroid.x, df_missing["geometry"].centroid.y]).T indices = kd_tree.query(coordinates)[1].flatten() # ... build data frame of imputed communes df_reconstructed = pd.concat([ df[df["commune_id"] == df_existing.iloc[index]["commune_id"]] for index in indices ]) df_reconstructed["commune_id"] = df_missing["commune_id"].values df_reconstructed["is_imputed"] = True df_reconstructed["is_missing"] = True # ... merge the data frames df = pd.concat([df, df_reconstructed]) assert len(df) == len(df["commune_id"].unique()) assert len(requested_communes - set(df["commune_id"].unique())) == 0 return df[[ "commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "is_imputed", "is_missing", "reference_median" ]]
def slice_file(resolution, f=None, scale_model=None, width_px=None, height_px=None, width_printer=None, height_printer=None): print("Status: Loading File.") width_multiplier = calculateMultiplier( width_px, width_printer) #converstion from mm to pixels height_multiplier = calculateMultiplier( height_px, height_printer) #conversion from mm to pixels model = STLModel(f) stats = model.stats() #Note these are in inches not mm sub_vertex = Vector3(stats['extents']['x']['lower'], stats['extents']['y']['lower'], stats['extents']['z']['lower']) center_image = [int(width_px / 2), int(height_px / 2)] #pixels model.xmin = model.xmax = None model.ymin = model.ymax = None model.zmin = model.zmax = None print("Status: Scaling Triangles.") for triangle in model.triangles: triangle.vertices[0] -= sub_vertex triangle.vertices[1] -= sub_vertex triangle.vertices[2] -= sub_vertex # The lines above have no effect on the normal. triangle.vertices[0] = (triangle.vertices[0] * scale_model) #in inches triangle.vertices[1] = (triangle.vertices[1] * scale_model) #in inches triangle.vertices[2] = (triangle.vertices[2] * scale_model) #in inches # Recalculate the triangle normal u = model.triangles[0].vertices[1] - model.triangles[0].vertices[0] v = model.triangles[0].vertices[2] - model.triangles[0].vertices[0] triangle.n = Normal((u.y * v.z) - (u.z * v.y), (u.z * v.x) - (u.x * v.z), (u.x * v.y) - (u.y * v.x)) model.update_extents(triangle) print("Status: Calculating Slices") stats = model.stats() #This is after scaling the object sub_vertex = Vector3(stats['extents']['x']['lower'], stats['extents']['y']['lower'], stats['extents']['z']['lower']) sup_vertex = Vector3(stats['extents']['x']['upper'], stats['extents']['y']['upper'], stats['extents']['z']['upper']) obj_center_xyz = [(sup_vertex.x + sub_vertex.x) / 2, (sup_vertex.y + sub_vertex.y) / 2, (sup_vertex.z + sub_vertex.z) / 2] #in inches slices = np.linspace( 0.001, stats['extents']['z']['upper'] - 0.001, int(stats['extents']['z']['upper'] / (mmToinch(resolution))) + 1) tic = time.time() for slice in range(len( slices)): #1, int(stats['extents']['z']['upper']), int(interval)): dwg = Drawing('outputs/svg/' + str(slice) + '.svg', profile='full') pairs = model.slice_at_z(slices[slice]) #for pair in pairs: # dwg.add(dwg.line(pair[0], pair[1], stroke=rgb(0, 0, 0, "%"))) #dwg.attribs['viewBox']= str(model.xmin)+" "+str(model.ymin)+" "+ str(model.xmax)+" "+str(model.ymax) #dwg.save() #cairosvg.svg2png(url = 'outputs/svg/'+str(targetz)+'.svg' , write_to='outputs/png/'+str(targetz)+'.png') #Now process vertices a = np.asarray(pairs) b = a.flatten() vert_array = b.reshape( int(b.shape[0] / 2), 2 ) #this is now twice as long and just not four wide, it is now too wide tree = KDTree(vert_array, leaf_size=3) current_index = 1 vertices = [] vertice_sets = [] visited_vertices = [current_index] vertices.append(tuple(vert_array[current_index])) for i in range(int(vert_array.shape[0] / 2)): to_query = np.reshape(vert_array[current_index], (1, 2)) dist, ind = tree.query(to_query, k=2) for id in list(ind[0]): #there should only ever be two if id != current_index: #if len(visited_vertices) >= vert_array.shape[0]/2: # print 'GOT INTO HERE' # break #if we have found a loop, if id in visited_vertices: vertices.append(tuple(vert_array[id])) vertice_sets.append(vertices) vertices = [] for next_vert in range(vert_array.shape[0]): if next_vert not in visited_vertices: current_index = next_vert #Now that we have found the match, find the corresponding vertex, remember that they are in pairs of two elif id % 2 == 1: current_index = id - 1 break else: current_index = id + 1 break visited_vertices.append(id) vertices.append(tuple(vert_array[current_index])) visited_vertices.append(current_index) #Draw the percentage done sys.stdout.write("\r%d%%" % int(slice / len(slices) * 100)) sys.stdout.flush() #Save the last one to the vertice set vertice_sets.append(vertices) img = Image.new( 'RGB', (height_px, width_px)) # Use RGB, these may be backwards TODO draw = ImageDraw.Draw(img) for i in range(len(vertice_sets)): if len(vertice_sets[i]) > 2: set = convertToPixels(vertice_sets[i], width_multiplier, height_multiplier, obj_center_xyz, center_image) draw.polygon(set, fill=(255, 255, 255)) img.save('outputs/png_filled/' + str(slice) + '.png', 'PNG') print("Status: Finished Outputting Slices") print('Time: ', time.time() - tic)
def generate_label_views(kzip_path, ssd_version, gt_type, n_voting=40, nb_views=2, ws=(256, 128), comp_window=8e3, out_path=None, verbose=False): """ Parameters ---------- kzip_path : str gt_type : str ssd_version : str n_voting : int Number of collected nodes during BFS for majority vote (label smoothing) nb_views : int ws: Tuple[int] comp_window : float initial_run : bool if True, will copy SSV from default SSD to SSD with version=gt_type out_path : str If given, export mesh colored accoring to GT labels verbose : bool Print additional information Returns ------- Tuple[np.array] raw, label and index views """ _render_mesh_coords = load_rendering_func('_render_mesh_coords') assert gt_type in ["axgt", "spgt"], "Currently only spine and axon GT is supported" n_labels = 5 if gt_type == "axgt" else 4 palette = generate_palette(n_labels) sso_id = int(re.findall(r"/(\d+).", kzip_path)[0]) sso = SuperSegmentationObject(sso_id, version=ssd_version) if initial_run: # use default SSD version orig_sso = SuperSegmentationObject(sso_id) orig_sso.copy2dir(dest_dir=sso.ssv_dir, safe=False) if not sso.attr_dict_exists: msg = 'Attribute dict of original SSV was not copied successfully ' \ 'to target SSD.' raise ValueError(msg) sso.load_attr_dict() indices, vertices, normals = sso.mesh # # Load mesh vertices = vertices.reshape((-1, 3)) # load skeleton skel = load_skeleton(kzip_path) if len(skel) == 1: skel = list(skel.values())[0] else: skel = skel["skeleton"] skel_nodes = list(skel.getNodes()) node_coords = np.array( [n.getCoordinate() * sso.scaling for n in skel_nodes]) node_labels = np.array( [str2intconverter(n.getComment(), gt_type) for n in skel_nodes], dtype=np.int) node_coords = node_coords[(node_labels != -1)] node_labels = node_labels[(node_labels != -1)] # create KD tree from skeleton node coordinates tree = KDTree(node_coords) # transfer labels from skeleton to mesh dist, ind = tree.query(vertices, k=1) vertex_labels = node_labels[ind] # retrieving labels of vertices if n_voting > 0: vertex_labels = bfs_smoothing(vertices, vertex_labels, n_voting=n_voting) color_array = palette[vertex_labels].astype(np.float32) / 255. if out_path is not None: if gt_type == 'spgt': # colors = [[0.6, 0.6, 0.6, 1], [0.9, 0.2, 0.2, 1], [0.1, 0.1, 0.1, 1], [0.05, 0.6, 0.6, 1], [0.9, 0.9, 0.9, 1]] else: # dendrite, axon, soma, bouton, terminal, background colors = [[0.6, 0.6, 0.6, 1], [0.9, 0.2, 0.2, 1], [0.1, 0.1, 0.1, 1], [0.05, 0.6, 0.6, 1], [0.6, 0.05, 0.05, 1], [0.9, 0.9, 0.9, 1]] colors = (np.array(colors) * 255).astype(np.uint8) color_array_mesh = colors[ vertex_labels][:, 0] # TODO: check why only first element, maybe colors introduces an additional axis write_mesh2kzip("{}/sso_{}_gtlabels.k.zip".format(out_path, sso.id), sso.mesh[0], sso.mesh[1], sso.mesh[2], color_array_mesh, ply_fname="gtlabels.ply") # Initializing mesh object with ground truth coloring mo = MeshObject("neuron", indices, vertices, color=color_array) # use downsampled locations for view locations, only if they are close to a # labeled skeleton node locs = generate_rendering_locs(vertices, comp_window / 6) # 6 rendering locations per comp. # window dist, ind = tree.query(locs) locs = locs[dist[:, 0] < 2000] #[::3][:5] # TODO add as parameter # # # To get view locations # dest_folder = os.path.expanduser("~") + \ # "/spiness_skels/{}/view_imgs_{}/".format(sso_id, n_voting) # if not os.path.isdir(dest_folder): # os.makedirs(dest_folder) # loc_text = '' # for i, c in enumerate(locs): # loc_text += str(i) + "\t" + str((c / np.array([10, 10, 20])).astype(np.int)) +'\n' #rescalling to the voxel grid # with open("{}/viewcoords.txt".format(dest_folder), "w") as f: # f.write(loc_text) # # # DEBUG PART END label_views, rot_mat = _render_mesh_coords(locs, mo, depth_map=False, return_rot_matrices=True, ws=ws, smooth_shade=False, nb_views=nb_views, comp_window=comp_window, verbose=verbose) label_views = remap_rgb_labelviews(label_views[..., :3], palette)[:, None] # TODO: the 3 neglects the alpha channel, i.e. remapping labels bigger than 256**3 becomes # invalid index_views = render_sso_coords_index_views(sso, locs, rot_mat=rot_mat, verbose=verbose, nb_views=nb_views, ws=ws, comp_window=comp_window) raw_views = render_sso_coords(sso, locs, nb_views=nb_views, ws=ws, comp_window=comp_window, verbose=verbose, rot_mat=rot_mat) return raw_views, label_views, index_views
def kdtree_nn(points): tree = KDTree(points, leaf_size=2) dist, ind = tree.query(points[:], k=2) return dist[:, 1]
class ABOD(BaseDetector): """ABOD class for Angle-base Outlier Detection. For an observation, the variance of its weighted cosine scores to all neighbors could be viewed as the outlying score. See :cite:`kriegel2008angle` for details. Two version of ABOD are supported: - Fast ABOD: use k nearest neighbors to approximate. - Original ABOD: consider all training points with high time complexity at O(n^3). Parameters ---------- contamination : float in (0., 0.5), optional (default=0.1) The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function. n_neighbors : int, optional (default=10) Number of neighbors to use by default for k neighbors queries. method: str, optional (default='fast') Valid values for metric are: - 'fast': fast ABOD. Only consider n_neighbors of training points - 'default': original ABOD with all training points, which could be slow Attributes ---------- decision_scores_ : numpy array of shape (n_samples,) The outlier scores of the training data. The higher, the more abnormal. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the ``n_samples * contamination`` most abnormal samples in ``decision_scores_``. The threshold is calculated for generating binary outlier labels. labels_ : int, either 0 or 1 The binary labels of the training data. 0 stands for inliers and 1 for outliers/anomalies. It is generated by applying ``threshold_`` on ``decision_scores_``. """ def __init__(self, contamination=0.1, n_neighbors=5, method='fast'): super(ABOD, self).__init__(contamination=contamination) self.method = method self.n_neighbors = n_neighbors def fit(self, X, y=None): """Fit detector. y is optional for unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.X_train_ = X self.n_train_ = X.shape[0] self.decision_scores_ = np.zeros([self.n_train_, 1]) if self.method == 'fast': self._fit_fast() elif self.method == 'default': self._fit_default() else: raise ValueError(self.method, "is not a valid method") # flip the scores self.decision_scores_ = self.decision_scores_.ravel() * -1 self._process_decision_scores() return self def _fit_default(self): """Default ABOD method. Use all training points with high complexity O(n^3). For internal use only. """ for i in range(self.n_train_): curr_pt = self.X_train_[i, :] # get the index pairs of the neighbors, remove itself from index X_ind = list(range(0, self.n_train_)) X_ind.remove(i) self.decision_scores_[i, 0] = _calculate_wocs(curr_pt, self.X_train_, X_ind) return self def _fit_fast(self): """Fast ABOD method. Only use n_neighbors for angle calculation. Internal use only """ # make sure the n_neighbors is in the range check_parameter(self.n_neighbors, 1, self.n_train_) self.tree_ = KDTree(self.X_train_) neigh = NearestNeighbors(n_neighbors=self.n_neighbors) neigh.fit(self.X_train_) ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors, return_distance=False) for i in range(self.n_train_): curr_pt = self.X_train_[i, :] X_ind = ind_arr[i, :] self.decision_scores_[i, 0] = _calculate_wocs(curr_pt, self.X_train_, X_ind) return self # noinspection PyPep8Naming def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_', 'threshold_', 'labels_']) X = check_array(X) if self.method == 'fast': # fast ABOD # outliers have higher outlier scores return self._decision_function_fast(X) * -1 else: # default ABOD return self._decision_function_default(X) * -1 def _decision_function_default(self, X): """Internal method for predicting outlier scores using default ABOD. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Returns ------- pred_score : array, shape (n_samples,) The anomaly score of the input samples. """ # initialize the output score pred_score = np.zeros([X.shape[0], 1]) for i in range(X.shape[0]): curr_pt = X[i, :] # get the index pairs of the neighbors X_ind = list(range(0, self.n_train_)) pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind) return pred_score.ravel() def _decision_function_fast(self, X): """Internal method for predicting outlier scores using Fast ABOD. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Returns ------- pred_score : array, shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['tree_']) # initialize the output score pred_score = np.zeros([X.shape[0], 1]) # get the indexes of the X's k nearest training points _, ind_arr = self.tree_.query(X, k=self.n_neighbors) for i in range(X.shape[0]): curr_pt = X[i, :] X_ind = ind_arr[i, :] pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind) return pred_score.ravel()
metavar='O', help='Labelled ground truth cloud.') parser.add_argument('-k', type=int, default=10, metavar='k', help='Number of neighbors.') args = parser.parse_args() cloud = read_ply(args.input_file) points = np.vstack((cloud['x'], cloud['y'], cloud['z'])).T labels = cloud['class'].astype(np.int32) labeled = np.nonzero(labels)[0] not_labeled = np.nonzero(labels == 0)[0] tree = KDTree(points[labeled]) dist, neighbors = tree.query(points[not_labeled], k=args.k) neighborslabels = labels[labeled][neighbors] ind = stats.mode(neighborslabels, axis=1)[0] labels[not_labeled] = ind.reshape(-1) # Labelled cloud write_ply('{}.ply'.format(args.output_prefix), (points, labels), ('x', 'y', 'z', 'scalar_class')) # Label list for IoU computation and benchmark submission np.savetxt('{}.txt'.format(args.output_prefix), labels, fmt='%d')
seed = (0, 0, 0) # print("visitednodes",visited_nodes) # print("all_nodes", all_nodes) print("\n") while (goalcheck_circle(goal_x, goal_y, goal_z, seed[0], seed[1], seed[2]) == False): seed = generate_seed() # print("generated_seed", seed) if ((seed not in visited_nodes) and not obstacle_check(seed[0], seed[1], seed[2])): all_nodes.insert(0, seed) X = np.array(all_nodes) tree = KDTree(X, leaf_size=2) dist, ind = tree.query(X[:1], k=2) p = ind[0][1] parent = all_nodes[p] par = seed s = parent a = 0 # print(s) while (cost2go(par, s) >= 0.1): a = line_obstacle_check(s, par) # print(a) if obstacle_check(a[0], a[1], a[2]): # print("inside") # print("stop point", a) break s = a
similarity_input_train = pd.concat([attributes1, dummied_features], axis=1) # store the train and test files train.to_csv('search/train') test.to_csv('search/test') similarity_input.to_csv('search/similarity_input') similarity_input_train.to_csv('search/similarity_input_train') user_20_sample.to_csv('search/user_20_sample') # Use KD Tree from sklearn kdt = KDTree(np.asarray(similarity_input), leaf_size=30, metric='euclidean') split1, split2, split3, split4, split5 = np.array_split( similarity_input_train, 5) distance, indices = kdt.query(split1, k=100, return_distance=True) distance_t, indices_t = kdt.query(split2, k=100, return_distance=True) distance = np.vstack((distance, distance_t)) indices = np.vstack((indices, indices_t)) distance_t, indices_t = kdt.query(split3, k=100, return_distance=True) distance = np.vstack((distance, distance_t)) indices = np.vstack((indices, indices_t)) distance_t, indices_t = kdt.query(split4, k=100, return_distance=True) distance = np.vstack((distance, distance_t)) indices = np.vstack((indices, indices_t)) distance_t, indices_t = kdt.query(split5, k=100, return_distance=True) distance = np.vstack((distance, distance_t)) indices = np.vstack((indices, indices_t))
class RFAttack(AttackModel): def __init__(self, trnX: np.ndarray, trny: np.ndarray, clf: RandomForestClassifier, ord, method: str, n_searches: int = -1, random_state=None): """Attack on Random forest classifier Arguments: trnX {ndarray, shape=(n_samples, n_features)} -- Training data trny {ndarray, shape=(n_samples)} -- Training label clf {RandomForestClassifier} -- The Random Forest classifier ord {int} -- Order of the norm for perturbation distance, see numpy.linalg.norm for more information method {str} -- 'all' means optimal attack (RBA-Exact), 'rev' means RBA-Approx Keyword Arguments: n_searches {int} -- number of regions to search, only used when method=='rev' (default: {-1}) random_state {[type]} -- random seed (default: {None}) """ super().__init__(ord=ord) paths, constraints = [], [] self.clf = clf self.method = method self.n_searches = n_searches self.trnX = trnX self.trny = trny self.random_state = random_state if self.n_searches != -1: self.kd_tree = KDTree(self.trnX) else: self.kd_tree = None if self.method == 'all': for tree_clf in clf.estimators_: path, constraint = get_tree_constraints(tree_clf) paths.append(path) constraints.append(constraint) n_classes = clf.n_classes_ n_estimators = len(clf.estimators_) self.regions = [] self.region_preds = [] vacuan_regions = 0 for res in product(range(n_classes), repeat=n_estimators): perm_consts = [list() for _ in range(n_estimators)] for i in range(n_estimators): value = clf.estimators_[i].tree_.value path = paths[i] constraint = constraints[i] for p in range(len(path)): if np.argmax(value[path[p][-1]]) == res[i]: perm_consts[i].append(constraint[p]) for pro in product(*perm_consts): r = union_constraints( np.vstack([j[0] for j in pro]), np.concatenate([j[1] for j in pro]), ) G, h = constraint_list_to_matrix(r) status, _ = solve_lp(np.zeros((len(G[0]))), G, h.reshape(-1, 1), len(G[0])) if status == 'optimal': self.region_preds.append(np.argmax(np.bincount(res))) #self.regions.append((G, h)) self.regions.append(r) else: vacuan_regions += 1 print(f"number of regions: {len(self.regions)}") print(f"number of vacuan regions: {vacuan_regions}") elif self.method == 'rev': #Gss, hss = [list() for _ in trnX], [list() for _ in trnX] #for tree_clf in clf.estimators_: # Gs, hs = tree_instance_constraint(tree_clf, trnX) # #print(len(Gs[0])) # for i, (G, h) in enumerate(zip(Gs, hs)): # Gss[i].append(G) # hss[i].append(h) #self.regions = [] #for i, (Gs, hs) in enumerate(zip(Gss, hss)): # t1, t2 = np.vstack(Gs), np.concatenate(hs) # self.regions.append(union_constraints(t1, t2)) r = tree_instance_constraint(clf.estimators_[0], trnX) for tree_clf in clf.estimators_[1:]: t = tree_instance_constraint(tree_clf, trnX) r = np.min(np.concatenate( (r[np.newaxis, :], t[np.newaxis, :])), axis=0) self.regions = r for i in range(len(trnX)): G, h = constraint_list_to_matrix(self.regions[i]) assert np.all(np.dot(G, trnX[i]) <= (h + 1e-8)) #assert np.all(np.dot(np.vstack(Gss[i]), trnX[i]) <= np.concatenate(hss[i])), i #assert np.all(np.dot(G, trnX[i]) <= h), i else: raise ValueError("Not supported method: %s", self.method) def perturb(self, X, y, eps=0.1): X = X.astype(np.float32) if self.ord == 2: get_sol_fn = rev_get_sol_l2 elif self.ord == np.inf: get_sol_fn = rev_get_sol_linf else: raise ValueError("ord %s not supported", self.ord) pred_y = self.clf.predict(X) pred_trn_y = self.clf.predict(self.trnX) if self.method == 'all': def _helper(target_x, target_y, pred_yi): if pred_yi != target_y: return np.zeros_like(target_x) temp_regions = [self.regions[i] for i in range(len(self.regions)) \ if self.region_preds[i] != target_y] return get_sol_fn(target_x, target_y, pred_trn_y, temp_regions, self.clf) pert_xs = Parallel(n_jobs=4, verbose=5)( delayed(_helper)(X[i], y[i], pred_y[i]) for i in range(len(X))) pert_X = np.array(pert_xs) assert np.all(self.clf.predict(X + pert_X) != y) elif self.method == 'rev': pert_X = np.zeros_like(X) for sample_id in tqdm(range(len(X)), ascii=True, desc="Perturb"): if pred_y[sample_id] != y[sample_id]: continue target_x, target_y = X[sample_id], y[sample_id] if self.n_searches != -1: ind = self.kd_tree.query(target_x.reshape((1, -1)), k=len(self.trnX), return_distance=False)[0] ind = list(filter(lambda x: pred_trn_y[x] != target_y, ind))[:self.n_searches] else: ind = list( filter(lambda x: pred_trn_y[x] != target_y, np.arange(len(self.trnX)))) temp_regions = [self.regions[i] for i in ind] pert_x = get_sol_fn(target_x, y[sample_id], pred_trn_y, temp_regions, self.clf, self.trnX[ind]) if np.linalg.norm(pert_x) != 0: assert self.clf.predict([X[sample_id] + pert_x ])[0] != y[sample_id] pert_X[sample_id, :] = pert_x else: raise ValueError("shouldn't happen") else: raise ValueError("Not supported method %s", self.method) self.perts = pert_X return self._pert_with_eps_constraint(pert_X, eps)
class SUNNY: def __init__(self, determine_best='min-par10'): self._name = 'sunny' self._imputer = SimpleImputer(missing_values=np.nan, strategy='mean') self._scaler = StandardScaler() self._determine_best = determine_best self._k = 16 def get_name(self): return self._name def fit(self, scenario: ASlibScenario, fold: int, num_instances: int): self._num_algorithms = len(scenario.algorithms) self._algorithm_cutoff_time = scenario.algorithm_cutoff_time # resample `amount_of_training_instances` instances and preprocess them accordingly features, performances = self._resample_instances( scenario.feature_data, scenario.performance_data, num_instances, random_state=fold) features, performances = self._preprocess_scenario( scenario, features, performances) # build nearest neighbors index based on euclidean distance self._model = KDTree(features, leaf_size=30, metric='euclidean') self._performances = np.copy(performances) def predict(self, features, instance_id: int): assert (features.ndim == 1), '`features` must be one dimensional' features = np.expand_dims(features, axis=0) features = self._imputer.transform(features) features = self._scaler.transform(features) neighbour_idx = np.squeeze( self._model.query(features, k=self._k, return_distance=False)) if self._determine_best == 'subportfolio': if np.isnan(self._performances).any(): raise NotImplementedError() sub_portfolio = self._build_subportfolio(neighbour_idx) schedule = self._build_schedule(neighbour_idx, sub_portfolio) selection = schedule[0] elif self._determine_best == 'max-solved': if np.isnan(self._performances).any(): raise NotImplementedError() # select the algorithm which solved the most instances (use min PAR10 as tie-breaker) sub_performances = self._performances[neighbour_idx, :] num_solved = np.sum(sub_performances < self._algorithm_cutoff_time, axis=0) max_solved = np.max(num_solved) indices, = np.where(num_solved >= max_solved) sub_performances = sub_performances[:, indices] runtime = np.sum(sub_performances, axis=0) selection = indices[np.argmin(runtime)] elif self._determine_best == 'min-par10': # select the algorithm with the lowest mean PAR10 score (use max solved as tie-breaker) sub_performances = self._performances[neighbour_idx, :] runtime = np.nanmean(sub_performances, axis=0) if not np.isnan(runtime).all(): min_runtime = np.nanmin(runtime) runtime = np.nan_to_num(runtime, nan=np.inf) else: return np.random.choice(self._num_algorithms) indices, = np.where(runtime <= min_runtime) sub_performances = sub_performances[:, indices] num_solved = np.sum( np.nan_to_num(sub_performances, nan=np.inf) < self._algorithm_cutoff_time) selection = indices[np.argmax(num_solved)] else: ValueError('`{}` is no valid selection strategy'.format( self._determine_best)) # create ranking st. the selected algorithm has rank 0, any other algorithm has rank 1 ranking = np.ones(self._num_algorithms) ranking[selection] = 0 return ranking def _build_subportfolio(self, neighbour_idx): sub_performances = self._performances[neighbour_idx, :] # naive, inefficient computation algorithms = range(self._num_algorithms) num_solved, avg_time = np.NINF, np.NINF sub_portfolio = None for subset in chain.from_iterable( combinations(algorithms, n) for n in range(1, len(algorithms))): # compute number of solved instances and average solving time tmp_solved = np.count_nonzero( np.min(sub_performances[:, subset], axis=1) < self._algorithm_cutoff_time) # TODO: not entirely sure whether this is the correct way to compute the average runtime as mentioned in the paper tmp_avg_time = np.sum( sub_performances[:, subset]) / sub_performances[:, subset].size if tmp_solved > num_solved or (tmp_solved == num_solved and tmp_avg_time < avg_time): num_solved, avg_time = tmp_solved, tmp_avg_time sub_portfolio = subset return sub_portfolio def _build_schedule(self, neighbour_idx, sub_portfolio): # schedule algorithms wrt. to solved instances (asc.) and break ties according to its average runtime (desc.) sub_performances = self._performances[neighbour_idx, :] alg_performances = { alg: (np.count_nonzero( sub_performances[:, alg] < self._algorithm_cutoff_time), (-1) * np.sum(sub_performances[:, alg])) for alg in sub_portfolio } schedule = sorted( [(solved, avg_time, alg) for (alg, (solved, avg_time)) in alg_performances.items()], reverse=True) return [alg for (_, _, alg) in schedule] def _resample_instances(self, feature_data, performance_data, num_instances, random_state): num_instances = min(num_instances, np.size( performance_data, axis=0)) if num_instances > 0 else np.size( performance_data, axis=0) return resample(feature_data, performance_data, n_samples=num_instances, random_state=random_state) def _preprocess_scenario(self, scenario, features, performances): features = self._imputer.fit_transform(features) features = self._scaler.fit_transform(features) return features, performances
def kd_tree(p_ind, p_cloud): tree = KDTree(p_cloud, leaf_size=400) dist, ind = tree.query(p_cloud[p_ind].reshape(1, -1), k=5000) return ind[0]
def _near_neighbours(self, points): tree = KDTree(points, leaf_size=2) dist, ind = tree.query(points[:], k=2) return dist[:, 1]
class RrtStar: def __init__(self, start, goal, obstacle_list, map_limits, path_max=5, path_elements=10, max_iter=1000, goal_sample_rate=0.1): """ Args: start (list/tuple): Start coordinates (x, y) goal (list/tuple): Goal Coordinates (x, y) obstacle_list (list): List of obstacles [[x, y, dx, dy], ...] map_limits (list/tuple): Search space boundaries [x_min, m_max, y_min, y_max] path_max (number): Maximum path length path_elements (int): Resolution of path to check for collisions max_iter (int): Maximum number of iterations to search goal_sample_rate (number): Rate at which to randomly sample goal position as next node (0 -> 1). This creates a bias towards exploring in the goal direction. """ self.obstacle_list = obstacle_list # Create kd-tree of obstacles using (x, y) coordinates self.obstacle_tree = KDTree([x[:2] for x in obstacle_list]) if self.point_collision_free(start): self.start = PathNode(start, cost=0) else: raise ValueError("Start position in collision") if self.point_collision_free(goal): self.goal = PathNode(goal) else: raise ValueError("Goal position in collision") self.map_limits = map_limits self.path_max = path_max self.path_elements = path_elements self.max_iter = max_iter self.node_list = [] self.goal_sample_rate = goal_sample_rate self.goal_node = False def point_collision_free(self, point): """ Args: point (list/tuple): Coordinates (x, y) """ obstacle_list = [] i_obstacles = self.obstacle_tree.query([point], k=1, return_distance=False)[0] for i_obstacle in i_obstacles: obstacle_list.append(self.obstacle_list[i_obstacle]) for (ox, oy, odx, ody) in obstacle_list: dx = abs(ox - point[0]) dy = abs(oy - point[1]) if dx <= odx and dy <= ody: return False # Collision return True # safe def path_collision_free(self, path): """ Args: path (list/tuple): Coordinates ((x, y), ...) """ obstacle_list = [] for coordinates in path: i_obstacles = self.obstacle_tree.query([coordinates], k=1, return_distance=False)[0] for i_obstacle in i_obstacles: obstacle_list.append(self.obstacle_list[i_obstacle]) for (ox, oy, odx, ody) in obstacle_list: dx_list = [abs(ox - x[0]) for x in path] dy_list = [abs(oy - x[1]) for x in path] for dx, dy in zip(dx_list, dy_list): if dx <= odx and dy <= ody: return False # Collision return True # safe def get_random_node(self): if np.random.random() > self.goal_sample_rate or self.goal_node: node = PathNode([ np.random.uniform(self.map_limits[0], self.map_limits[1]), np.random.uniform(self.map_limits[2], self.map_limits[3]) ]) else: # goal point sampling node = PathNode(self.goal.coordinates) return node def create_valid_path(self, node, cost_fn=fn_straight_line, limit_path=True): if node.parent is None: raise ValueError("Node requires a parent") # Move along subpath until distance limit is reached or obstacle is hit step_count = 1 collision_free = True start = node.parent.coordinates distance_to_parent = node.cost_to_node(node.parent, cost_fn=cost_fn) if distance_to_parent == 0: return None updated_node = PathNode(start, parent=node.parent) updated_node.path = [start] if limit_path: path_length = min(self.path_max, distance_to_parent) else: path_length = distance_to_parent path_increment = path_length / self.path_elements while (step_count * path_increment <= path_length and collision_free): xy, _ = cost_fn(updated_node.path[-1], node.coordinates, distance=path_increment) # Check for collisison if self.point_collision_free(xy): updated_node.path.append(xy) updated_node.coordinates = xy else: collision_free = False updated_node.collision_avoided = True step_count += 1 # If no path found return None if len(updated_node.path) <= 1: return None else: updated_node.update_cost(cost_fn=cost_fn) # Check if original node has been adjusted due to obstacles or maximum length if not updated_node.path_same_as_parent( (step_count - 1) * path_increment): updated_node.truncated = True return updated_node def get_parent_node(self, node, node_tree=None, method=2): if node_tree is None: node_tree = KDTree([n.coordinates for n in self.node_list]) # Method 1: FIn closest node then look for cheapest neighbour if method == 1: # Get closest nodes i_closest = node_tree.query([node.coordinates], k=1, return_distance=False)[0][0] # Look in radius around this for cheaper node i_nearby = node_tree.query_radius( [self.node_list[i_closest].coordinates], r=self.path_max * 0.5, return_distance=False)[0] costs = [] for i_node in i_nearby: costs.append(self.node_list[i_node].cost) i_cheapest = i_nearby[np.argmin(costs)] # Method 2: FInd cheapest node from within radius if method == 2: n_neighbours = min(5, len(self.node_list)) i_nearby = node_tree.query([node.coordinates], k=n_neighbours, return_distance=False)[0] costs = [] for i_node in i_nearby: node_nearby = self.node_list[i_node] cost_to_neighbour = node_nearby.cost + node_nearby.cost_to_node( node) costs.append(cost_to_neighbour) i_cheapest = i_nearby[np.argmin(costs)] return self.node_list[i_cheapest] def plan(self, animation=False): print("Starting planning ...") # Reinitialise node list self.node_list = [self.start] self.goal_found = False print_iters = self.max_iter / 10 for i in range(self.max_iter): if i % print_iters == 0: print("Iteration {}".format(i)) # Select random node random_node = self.get_random_node() # Create KD Tree of nodes for multiple queries node_tree = KDTree([n.coordinates for n in self.node_list]) # Pick parent node based upon proximity and cost random_node.parent = self.get_parent_node(random_node, node_tree=node_tree) # Update node to reflect path constraints valid_node = self.create_valid_path(random_node) # Assuming valid update graph if valid_node: if valid_node.close_to_node(self.goal): self.goal_node = valid_node self.node_list.append(valid_node) valid_node.parent.children.add(valid_node) self.rewire(valid_node, node_tree) if animation: self.draw_map(animation=True) print("Reached max iterations") if self.goal_node: print("Goal found") else: print("Goal not found") def rewire(self, new_node, node_tree, path_fn=fn_straight_line): # Find nodes within radius equal to max path length i_nearby = node_tree.query_radius([new_node.coordinates], r=self.path_max, return_distance=False)[0] for i_node in i_nearby: # Look at each of these nodes in turn node = self.node_list[i_node] # Create temporary node with position at each nearby node # As a test, make the new node it's parent temp_node = PathNode(node.coordinates, parent=new_node) temp_node.update_cost() checked_node = self.create_valid_path(temp_node) if checked_node and not checked_node.truncated: # This means path is complete and unobstructed # If this is better then update graph if checked_node.cost < self.node_list[i_node].cost: # Update children node.parent.children.remove(node) new_node.children.add(checked_node) # Replace node with new one self.node_list[i_node] = checked_node self.propogate_cost_to_leaves(checked_node) def propogate_cost_to_leaves(self, parent_node, check_all=True): """ Update cost of downstream (leaves of tree) element after rewiring """ if check_all: # Check every node (old method) for node in self.node_list: if node.parent == parent_node: node.update_cost() self.propogate_cost_to_leaves(node) else: # Check only recorded child nodes (intended to speed up) for child_node in parent_node.children: child_node.update_cost() self.propogate_cost_to_leaves(child_node) def get_path(self, optimise=True, return_type='points'): if self.goal_node: path = [] node = self.goal_node while node.parent is not None: path.insert(0, node) node = node.parent path.insert(0, node) # Goal if optimise: # Remove unnecessary nodes for i_a, node_a in enumerate(path): # for i_b in range(len(path)-1, i_a, -1): for j, node_b in reversed(list(enumerate(path[i_a + 1:]))): i_b = j + i_a + 1 temp_node = PathNode(node_b.coordinates, parent=node_a) valid_node = self.create_valid_path(temp_node, limit_path=False) if valid_node and not valid_node.collision_avoided: del path[i_a + 1:i_b] break if return_type == 'points': path = [node.coordinates for node in path] return path else: print("No path was found last time") return None @staticmethod def plot_rectangle(x, y, dx, dy, color="b"): rect = patches.Rectangle((x - dx, y - dy), 2 * dx, 2 * dy, facecolor=color) # Add the patch to the Axes ax = plt.gca() ax.add_patch(rect) def draw_map(self, animation=False): plt.clf() # for stopping simulation with the esc key. plt.gcf().canvas.mpl_connect( 'key_release_event', lambda event: [exit(0) if event.key == 'escape' else None]) for (ox, oy, odx, ody) in self.obstacle_list: self.plot_rectangle(ox, oy, odx, ody) for node in self.node_list: plt.scatter([node.coordinates[0]], [node.coordinates[1]], marker="o", c="m", s=4) if node.parent: plt.plot([x[0] for x in node.path], [x[1] for x in node.path], "-m", linewidth=1, alpha=0.3) if self.goal_node: # node = self.goal_node full_path = self.get_path(optimise=False, return_type='points') plt.plot([x[0] for x in full_path], [x[1] for x in full_path], "--g", linewidth=2, alpha=1) # while node is not None: # plt.plot([x[0] for x in node.path], [x[1] for x in node.path], "-g", linewidth=2, alpha=1) # node = node.parent short_path = self.get_path(optimise=True, return_type='points') plt.plot([x[0] for x in short_path], [x[1] for x in short_path], "-g", linewidth=3, alpha=1) plt.plot(self.start.coordinates[0], self.start.coordinates[1], "xr", linewidth=3) plt.plot(self.goal.coordinates[0], self.goal.coordinates[1], "xg", linewidth=3) plt.axis(self.map_limits) plt.grid(True) if animation: plt.pause(0.05) else: plt.show()
def buildGraph(self, graph, numberSamples, knearest, tau, addState, eexy, ee1Flag): tester = test_robot(self) points = self.grapple_points numberSamples_tmp = numberSamples D = [tau, 0.1] count = 0 loop_count = 0 diffAng = self.checkhv_ee(eexy) while count < numberSamples: samples = self.sampling_eexy(eexy, numberSamples, ee1Flag, diffAng) loop_count += 1 addCount = 0 for sp in range(numberSamples_tmp): addCount += 1 rob = self.assign_config(samples, sp) if tester.self_collision_test(rob): graph.addVertex(str(rob)) # points = rob.str2list()[:-1] # points = rob.points points = rob.get_position() myarray = np.asarray(points) r, c = myarray.shape myarray = myarray.reshape(1, c * r) if count == 0: output = myarray count += 1 else: output = np.vstack([output, myarray]) count += 1 if count > numberSamples: break else: # print("add 20 obstacle samples") self.obstacle_sampling_near_only(graph, rob, D, 1, tau) if addState == 1: init = self.get_init_state() graph.addVertex(str(init)) # myarray = np.asarray(init.str2list()[:-1]) myarray = np.asarray(init.get_position()) r, c = myarray.shape myarray = myarray.reshape(1, c * r) output = np.vstack([output, myarray]) goal = self.get_goal_state() graph.addVertex(str(goal)) # myarray = np.asarray(goal.str2list()[:-1]) myarray = np.asarray(goal.get_position()) r, c = myarray.shape myarray = myarray.reshape(1, c * r) output = np.vstack([output, myarray]) r, c = output.shape tree = KDTree(output, leaf_size=2) # print(output) for sp in range(r): if sp >= r - 2: knearest = 2 dist, ind = tree.query(output[sp:sp + 1], k=knearest) curNode = graph.getVerticeByInt(sp) m = self.str2robotConfig(curNode) for kn in range(1, knearest): knNode = graph.getVerticeByInt(ind[0][kn]) q = self.str2robotConfig(knNode) if (sp < r - 2): if (tester.test_config_distance(m, q, self, tau)): if curNode[-1] == knNode[-1]: if int(curNode[-1]) == 1: if curNode.split(' ')[0] == knNode.split( ' ')[0] and curNode.split(' ')[ 1] == knNode.split(' ')[1]: graph.addEdge(curNode, knNode) else: if curNode.split(' ')[0] == knNode.split( ' ')[0] and curNode.split(' ')[ 1] == knNode.split(' ')[1]: graph.addEdge(curNode, knNode) else: if curNode[-1] == knNode[-1]: if int(curNode[-1]) == 1: if curNode.split(' ')[0] == knNode.split( ' ')[0] and curNode.split( ' ')[1] == knNode.split(' ')[1]: graph.addEdge(curNode, knNode) else: if curNode.split(' ')[0] == knNode.split( ' ')[0] and curNode.split( ' ')[1] == knNode.split(' ')[1]: graph.addEdge(curNode, knNode)
class ShapePCADataset(data.Dataset): def __init__(self, arg, dataset, split, pca_components=20, trainset_sim=None): self.arg = arg self.dataset = dataset self.split = split self.pca_components = pca_components self.list = get_annotations_list(self.arg.dataset_route, dataset, split, arg.crop_size, ispdb=arg.PDB) #[:1024] self.shapes = None self.pose_params = None self.aligned_shapes = None self.aligned_pose_params = None self.init_aligned_shapes(arg.crop_size) self.tree = None if trainset_sim is not None: self.trainset_sim = trainset_sim self.tree = KDTree(np.float32(self.trainset_sim.shapes)) def init_aligned_shapes(self, crop_size): shapes = np.zeros((2 * kp_num[self.dataset], len(self.list))) for line_index, line in enumerate(self.list): coord_x = np.array( list(map(float, line[:2 * kp_num[self.dataset]:2]))) coord_y = np.array( list(map(float, line[1:2 * kp_num[self.dataset]:2]))) position_before = np.float32([[int(line[-7]), int(line[-6])], [int(line[-7]), int(line[-4])], [int(line[-5]), int(line[-4])]]) position_after = np.float32([[0, 0], [0, crop_size - 1], [crop_size - 1, crop_size - 1]]) crop_matrix = cv2.getAffineTransform(position_before, position_after) coord_x_after_crop = crop_matrix[0][0] * coord_x + crop_matrix[0][ 1] * coord_y + crop_matrix[0][2] coord_y_after_crop = crop_matrix[1][0] * coord_x + crop_matrix[1][ 1] * coord_y + crop_matrix[1][2] shapes[0:2 * kp_num[self.dataset]:2, line_index] = list(coord_x_after_crop) shapes[1:2 * kp_num[self.dataset]:2, line_index] = list(coord_y_after_crop) aligned_shapes = shapes mean_shape = np.mean(aligned_shapes, 1) mean_shape_xy = coords_seq_to_xy(self.dataset, mean_shape) for i in range(len(aligned_shapes[0])): aligned_shape_xy = coords_seq_to_xy(self.dataset, aligned_shapes[:, i]) tmp_error, tmp_shape, tmp_trans = procrustes(mean_shape_xy, aligned_shape_xy, reflection=False) aligned_shapes[:, i] = tmp_shape.reshape((1, -1), order='F') mean_shape = np.mean(aligned_shapes, 1) mean_shape = mean_shape.repeat(len(aligned_shapes[0])).reshape( -1, len(aligned_shapes[0])) aligned_shapes = aligned_shapes - mean_shape shapes = np.moveaxis(shapes, -1, 0) # img_show = np.zeros((crop_size, crop_size, 3), dtype=np.uint8) # idx = random.randint(0, shapes.shape[0] - 1) # for i in range(0, kp_num[self.dataset] - 1): # draw_circle(img_show, (int(shapes[idx, 2*i]), int(shapes[idx, 2*i+1]))) # red # # show_img(img_show) pca = PCA(n_components=self.pca_components, svd_solver='full') pose_params = pca.fit_transform(shapes) aligned_shapes = np.moveaxis(aligned_shapes, -1, 0) pca_aligned = PCA(n_components=self.pca_components, svd_solver='full') aligned_pose_params = pca_aligned.fit_transform(aligned_shapes) self.shapes = shapes self.pose_params = pose_params self.aligned_shapes = aligned_shapes self.aligned_pose_params = aligned_pose_params def __len__(self): return len(self.list) def __getitem__(self, item): gt_coords_xy = np.float32(self.shapes[item]) gt_heatmap = get_gt_heatmap( self.dataset, gt_coords_xy.reshape([2 * kp_num[self.arg.dataset]]), self.arg.crop_size, self.arg.sigma) pose_param = np.float32(self.pose_params[item]) aligned_coords_xy = np.float32(self.aligned_shapes[item]) aligned_pose_params = np.float32(self.aligned_pose_params[item]) return gt_coords_xy, gt_heatmap, pose_param, aligned_coords_xy, aligned_pose_params def get_similars(self, shapes): if self.tree is not None: _, indexes = self.tree.query(coords_xy_to_seq( self.dataset, shapes)) return tuple( map(torch.tensor, zip(*[self.trainset_sim[i] for i in indexes]))) return None
class KdSampler: ''' Represent the KD-Tree Sampling ''' def __init__(self, data, num_samples, start=(), goal=(), safety_dist=1): if type(start) is not tuple or type(goal) is not tuple: raise TypeError("Start and goal coordinate has to be a tuple") self._num_samples = num_samples self._min_z = 5 self._max_z = 15 self._start_node = start self._goal_node = goal self._area = Area( np.min(data[:, 0] - data[:, 3]), np.max(data[:, 0] - data[:, 3]), np.min(data[:, 1] - data[:, 4]), np.max(data[:, 1] - data[:, 4])) self._kept_samples = [] self._removed_samples = [] # Take the center of the obstacle to the KDTree # KDtree input and the query have to be the same dimensions self._obstKD_Tree = KDTree(data[:, 0:3]) dist = np.linalg.norm(np.array(goal[:2]) - np.array(start[:2])) center = (np.array(goal[:2]) + np.array(start[:2])) / 2 rad_deviation = 5 # to deal with a dead end or possible redirections radius = (dist / 2) + rad_deviation print("Air distance: ", dist) # Generate samples in the circle with diameter from start and goal points xvals, yvals = gen_circular_random(center, radius , num_samples, self._area) zvals = np.random.uniform(self._min_z, self._max_z, num_samples).astype(int) rand_3dsamples = list(zip(xvals, yvals, zvals)) rand_3dsamples.append(tuple(start)) rand_3dsamples.append(tuple(goal)) # check the nearest obstacle centers for point3d in rand_3dsamples: # get the nearest 3 obstacle centers data_indices = self._obstKD_Tree.query([point3d], k=3, return_distance=False)[0] # check for the collision using polygon collision = False for i in data_indices: north, east, alt, d_north, d_east, d_alt = data[i, :] # YW NOTE: incorporate the safety distance in the obstacle object obstacle = Obstacle(north - d_north - safety_dist, north + d_north + safety_dist, east - d_east + safety_dist, east + d_east + safety_dist) corners = [(obstacle.north_min, obstacle.east_min), (obstacle.north_min, obstacle.east_max), (obstacle.north_max, obstacle.east_max), (obstacle.north_max, obstacle.east_min)] height = alt + d_alt p = Polygon(corners) if p.contains(Point(point3d)) and (height >= point3d[2]): #print("Colission => obstacle height: %d, sample height: %d" %(height, point3d[2])) self._removed_samples.append(point3d) point3d_list = list(point3d) # list comparison only, avoid numpy array! if point3d_list == start or point3d_list == goal: print("WARNING: the start or goal node in {0}is removed!!".format(point3d)) collision = True break if collision == False: self._kept_samples.append(point3d) # calculate the polygons of the obstacles self._polygons = [] for i in range(data.shape[0]): north, east, alt, d_north, d_east, d_alt = data[i, :] #obstacle[north min, north max, east min, east max] obstacle = Obstacle(north - d_north - safety_dist, north + d_north + safety_dist, east - d_east - safety_dist, east + d_east+ safety_dist) corners = [ (obstacle.north_min, obstacle.east_min), (obstacle.north_min, obstacle.east_max), (obstacle.north_max, obstacle.east_max), (obstacle.north_max, obstacle.east_min), ] height = alt + d_alt p = Polygon(corners) self._polygons.append((p, height))
def fit(self, data): # 屏蔽开始 # data: m * dim array m = data.shape[0] # print("m", m) tree = KDTree(data) W = np.zeros((m, m)) for di, datum in enumerate(data): # neighbors' index if self.use_radius_nn_: nis, ndists = tree.query_radius([datum], self.nnradius_, return_distance=True) else: # the order of return value is different from query_radius! ndists, nis = tree.query([datum], self.nnk_ + 1, return_distance=True) nis = nis[0] ndists = ndists[0] # print("indices", nis) # print("ndists", ndists) # print(nis.shape) # if len(nis.shape) == 0: continue # print(di, nis, ndists) # print("neighbors",nis.shape) for ni, ndist in zip(nis, ndists): # the point itself will be one of its knn, need to skip it if ni == di: continue if self.use_gauss_dist_: W[di][ni] = W[ni][di] = self.gauss_(ndist) else: W[di][ni] = W[ni][di] = 1 / ndist D = np.diag(W.sum(axis=1)) # unnormalized Laplacian L = D - W # for debugging self.W = W self.D = D if self.normalized_: L = a = np.matmul(LA.inv(D), L) L = b = np.identity(m) - np.matmul(LA.inv(D), W) assert (np.allclose(a, b)) # for debugging self.L = L eigvals, eigvecs = LA.eig(L) """ From numpy.linalg.eig's doc: The eigenvalues are not necessarily ordered!! so we need to sort eigen values!! """ sorted_idx = np.argsort(eigvals) # smallest self.k_ eigenvectors V = eigvecs[:, sorted_idx[:self.k_]] # for debugging self.eigvals = eigvals self.eigvecs = eigvecs self.V = V # run kmeans self.labels_ = KMeans(n_clusters=self.k_).fit_predict(V)
# save sub_cloud and KDTree file sub_xyz, sub_colors, sub_labels = DP.grid_sub_sampling( sub_points, sub_colors, sub_labels, grid_size) sub_colors = sub_colors / 255.0 sub_labels = np.squeeze(sub_labels) sub_ply_file = join(sub_pc_folder, file_name + '.ply') write_ply(sub_ply_file, [sub_xyz, sub_colors, sub_labels], ['x', 'y', 'z', 'red', 'green', 'blue', 'class']) search_tree = KDTree(sub_xyz, leaf_size=50) kd_tree_file = join(sub_pc_folder, file_name + '_KDTree.pkl') with open(kd_tree_file, 'wb') as f: pickle.dump(search_tree, f) proj_idx = np.squeeze( search_tree.query(sub_points, return_distance=False)) proj_idx = proj_idx.astype(np.int32) proj_save = join(sub_pc_folder, file_name + '_proj.pkl') with open(proj_save, 'wb') as f: pickle.dump([proj_idx, labels], f) else: full_ply_path = join(original_pc_folder, file_name + '.ply') write_ply(full_ply_path, (pc[:, :3].astype(np.float32), pc[:, 4:7].astype(np.uint8)), ['x', 'y', 'z', 'red', 'green', 'blue']) # save sub_cloud and KDTree file sub_xyz, sub_colors = DP.grid_sub_sampling(pc[:, :3].astype( np.float32), pc[:, 4:7].astype(np.uint8),
tree = trees[i] if tree == None: continue for j in range(len(segment_pts_list)): pts = segment_pts_list[j] if segment_type_list[j] != 0 or i >= j or pts is 'None': continue neighborFlag = False neighborCount = 0 for k in range(pts.shape[0]): sample_prob = 0.2 if random.random() > sample_prob: continue if k > pts.shape[0] * 0.5 * sample_prob and neighborCount == 0: break dist, ind = tree.query(pts[k:k + 1, :3], k=1) if dist[0, 0] < 0.3: neighborCount += 1 if neighborCount >= 5: neighborFlag = True break if neighborFlag: adj_matrix[segment_id_list[i], segment_id_list[j]] = 1 adj_matrix[segment_id_list[j], segment_id_list[i]] = 1 count_adj += 1 np.savetxt(os.path.join(files_dir, 'segment_adjacent_matrix.txt'), adj_matrix) ########################################################################## ## compute affinity matrix affinity_matrix_dir = os.path.join(files_dir, 'segment_affinity_matrix.txt')
def nearest_neighbor(df1, df2): from sklearn.neighbors import KDTree kdt = KDTree(df1.as_matrix()) indexes = kdt.query(df2.as_matrix(), k=1, return_distance=False) return df1.index.values[indexes]
print(q_path) import ImageSearch_Algo_SIFT # get the feature q_kp, q_des = ImageSearch_Algo_SIFT.FEATURE(q_path) predict_kmeans = kmeans.predict(q_des) #calculates the histogram hist1, bin_edges1 = np.histogram(predict_kmeans, bins=n_bins) #histogram is the feature vector q_feature_vector = hist1 # ------- Using KD TREE # reshape - something wrong in this implementation F = q_feature_vector.reshape(1, -1) dist, result = SIFTtree.query(F, k=50) print(result) flist = list(mydataSIFT.iloc[result[0].tolist()]['file']) slist = list(dist[0]) matches = tuple(zip(slist, flist)) # create a list of tuples from 2 lists a, q, pos, cnt = accuracy.accuracy_matches(q_path, matches, 20) print('Accuracy =', a, '%', '| Quality:', q) print('Count', cnt, ' | position', pos) # # using nearest neighbor # dist, result = neighbor.kneighbors([q_feature_vector]) # print (result) # flist = list (mydataSIFT.iloc[ result[0].tolist()]['file']) # slist = list (dist[0])
class Loop: def __init__(self, data, k=4, leaf_size=40, lambdav=.5, lap_vec=None, bins=None): self.tree = KDTree(data, leaf_size=leaf_size) self.data = data if bins is not None: self.is_binned = True self.discretizer = preprocessing.KBinsDiscretizer( n_bins=bins, encode='ordinal').fit(data) self.data_binned = self.discretizer.inverse_transform( self.discretizer.transform(data)) self.bin_tree = KDTree(self.data_binned, leaf_size=leaf_size) self.k = k self.lambdav = lambdav self.knn_dist_arr, self.knn_idx_arr = self.knn(data) self._pdist = self.pdist(data, self.knn_dist_arr, lap_vec) self._plof, self.nplof = self.plof(data, self.knn_idx_arr, self._pdist) self.loop_values = self.loop(self._plof) def knn(self, o): if self.is_binned: return self.bin_tree.query(o, k=self.k) else: return self.tree.query(o, k=self.k) def pdist(self, o, knn_dist, lap_vec=None): if lap_vec is None: lap_vec = np.zeros(len(o)) else: assert len(lap_vec) == len( o), "lap_vec length is unequal data length" stddist = LA.norm(knn_dist, axis=1) / self.k + lap_vec pdist = stddist * self.lambdav return pdist def plof(self, o, knn_idx_arr, pdist): expected_pdist = np.empty(len(o)) for idx, element in enumerate(o): element_neighbors = knn_idx_arr[idx] expected_pdist[idx] = np.mean(self._pdist[element_neighbors]) plof = pdist / expected_pdist - 1 nplof = np.sqrt(self.lambdav * np.mean(plof**2)) return plof, nplof def loop(self, plof): erf_input = plof / (np.sqrt(2) * self.nplof) erf = special.erf(erf_input) loop_values = np.max(np.vstack((np.zeros(len(plof)), erf)), axis=0) return loop_values def query_loop(self, o): knn_dist, knn_idx = self.knn(o) pdist = self.pdist(o, knn_dist) plof, _ = self.plof(o, knn_idx, pdist) loop_values = self.loop(plof) return loop_values @staticmethod def benchmark(train, test, test_labels, ks, lambdav=3, normalize=True, lap_vec=None, bins=None): if normalize: max_abs_scaler = preprocessing.MaxAbsScaler() standard_scaler = preprocessing.StandardScaler(with_std=False) train = max_abs_scaler.fit_transform( standard_scaler.fit_transform(train)) test = max_abs_scaler.transform(standard_scaler.transform(test)) aurocs = np.zeros(len(ks)) for i, k in enumerate(ks): loop_wdbc = Loop(train, lambdav=lambdav, k=k, lap_vec=lap_vec, bins=bins) aurocs[i] = sklearn.metrics.roc_auc_score( test_labels, loop_wdbc.query_loop(test)) return aurocs @staticmethod def bin_benchmark(train, test, test_labels, lambdav=3, normalize=True, lap_vec=None, bins_size=range(2, 10), k=20): if normalize: max_abs_scaler = preprocessing.MaxAbsScaler() standard_scaler = preprocessing.StandardScaler(with_std=False) train = max_abs_scaler.fit_transform( standard_scaler.fit_transform(train)) test = max_abs_scaler.transform(standard_scaler.transform(test)) aurocs = np.zeros(len(bins_size)) data_dim = train.shape[1] for i, b in enumerate(bins_size): bins = [b] * data_dim loop_wdbc = Loop(train, lambdav=lambdav, k=k, lap_vec=lap_vec, bins=bins) aurocs[i] = sklearn.metrics.roc_auc_score( test_labels, loop_wdbc.query_loop(test)) return aurocs @staticmethod def r_unif(train_data, k): no_features = train_data.shape[1] omega_volume = 2**no_features # consider to scale down to -.5,.5 so that this would be 1 data_cardinality = len(train_data) return ((special.gamma(1 + no_features / 2) * k * omega_volume) / (data_cardinality * np.pi**(no_features / 2)))**(1 / no_features)
def score_path(path): return score_chunk(0, path) # 4. Precompute close cities pairs using KDTree. # In[ ]: kdt = KDTree(XY) # In[ ]: pairs = set() for city_id in tqdm(cities.index): dists, neibs = kdt.query([XY[city_id]], 31) for neib_id in neibs[0][1:]: if city_id and neib_id: # skip pairs that include starting city pairs.add(tuple(sorted((city_id, neib_id)))) neibs = kdt.query_radius([XY[city_id]], 31, count_only=False, return_distance=False) for neib_id in neibs[0]: if city_id and neib_id and city_id != neib_id: pairs.add(tuple(sorted((city_id, neib_id)))) print(f'{len(pairs)} cities pairs are selected.') # sort pairs by distance pairs = np.array(list(pairs)) distances = np.sum((XY[pairs.T[0]] - XY[pairs.T[1]])**2, axis=1) order = distances.argsort() pairs = pairs[order]
def get_recall(m, n, database_vectors, query_vectors, query_sets, database_sets, log=False): # Original PointNetVLAD code database_output = database_vectors[m] queries_output = query_vectors[n] # When embeddings are normalized, using Euclidean distance gives the same # nearest neighbour search results as using cosine distance database_nbrs = KDTree(database_output) num_neighbors = 25 recall = [0] * num_neighbors top1_similarity_score = [] one_percent_retrieved = 0 threshold = max(int(round(len(database_output)/100.0)), 1) num_evaluated = 0 for i in range(len(queries_output)): # i is query element ndx query_details = query_sets[n][i] # {'query': path, 'northing': , 'easting': } true_neighbors = query_details[m] if len(true_neighbors) == 0: continue num_evaluated += 1 distances, indices = database_nbrs.query(np.array([queries_output[i]]), k=num_neighbors) if log: # Log 10% of false positives (returned as the first element) for Oxford dataset # Check if there's a false positive returned as the first element if query_details['query'][:6] == 'oxford' and indices[0][0] not in true_neighbors and random.random() < 0.1: fp_ndx = indices[0][0] fp = database_sets[m][fp_ndx] # Database element: {'query': path, 'northing': , 'easting': } fp_emb_dist = distances[0, 0] # Distance in embedding space fp_world_dist = np.sqrt((query_details['northing'] - fp['northing']) ** 2 + (query_details['easting'] - fp['easting']) ** 2) # Find the first true positive tp = None for k in range(len(indices[0])): if indices[0][k] in true_neighbors: closest_pos_ndx = indices[0][k] tp = database_sets[m][closest_pos_ndx] # Database element: {'query': path, 'northing': , 'easting': } tp_emb_dist = distances[0][k] tp_world_dist = np.sqrt((query_details['northing'] - tp['northing']) ** 2 + (query_details['easting'] - tp['easting']) ** 2) break with open("log_fp.txt", "a") as f: s = "{}, {}, {:0.2f}, {:0.2f}".format(query_details['query'], fp['query'], fp_emb_dist, fp_world_dist) if tp is None: s += ', 0, 0, 0\n' else: s += ', {}, {:0.2f}, {:0.2f}\n'.format(tp['query'], tp_emb_dist, tp_world_dist) f.write(s) if query_details['query'][:6] == 'oxford' and len(indices[0]) >= 5 and random.random() < 0.01: # For randomly selected 1% of queries save details of 5 best matches for later visualization s = "{}, ".format(query_details['query']) for k in range(min(len(indices[0]), 5)): is_match = indices[0][k] in true_neighbors e_ndx = indices[0][k] e = database_sets[m][e_ndx] # Database element: {'query': path, 'northing': , 'easting': } e_emb_dist = distances[0][k] s += ', {}, {:0.2f}, {}, '.format(e['query'], e_emb_dist, 1 if is_match else 0) s += '\n' out_file_name = "log_search_results.txt" with open(out_file_name, "a") as f: f.write(s) for j in range(len(indices[0])): if indices[0][j] in true_neighbors: if j == 0: similarity = np.dot(queries_output[i], database_output[indices[0][j]]) top1_similarity_score.append(similarity) recall[j] += 1 break if len(list(set(indices[0][0:threshold]).intersection(set(true_neighbors)))) > 0: one_percent_retrieved += 1 one_percent_recall = (one_percent_retrieved/float(num_evaluated))*100 recall = (np.cumsum(recall)/float(num_evaluated))*100 # print(recall) # print(np.mean(top1_similarity_score)) # print(one_percent_recall) return recall, top1_similarity_score, one_percent_recall
class Mesh: def __init__(self, filename, texmappath, exclude=[], epsilon=0.0, verbose=False): self.faces = {} # Hold Face objects self.v = 0 # Number of LISTED vertices in the whole mesh # (This number may be inflated by duplicates) self.vt = 0 # Number of LISTED texture vertices in the whole mesh # (This number may be inflated by duplicates) self.barycenters2d = None # To become a query-able KDTree self.barycenters3d = None # To become a query-able KDTree self.filename = filename # Name of the mesh obj self.imgformats = ['png', 'jpg', 'jpeg'] # Acceptable texture map file formats self.texmappath = texmappath # Path from script to matterport materials self.texmaporigin = 'ul' # Indication of which corner is texture map origin: # {'ul', 'll', 'ur', 'lr'} respectively for # upper-left, lower-left, upper-right, lower-right self.epsilon = epsilon # Acceptable discrepancies between points # to be considered the same self.reconcile = True # Whether we should bother reconciling triangle soup # with epsilon-distances self.verbose = verbose # Loading these and reconciling triangle soup can # take a while; show signs of life self.filesizes = {} # Save time by looking these up once for imgfile in os.listdir(texmappath): # For every texmap... imgfilename = imgfile.split( '.') # if it's an known format and not omitted... if imgfilename[-1].lower() in self.imgformats: if imgfile not in exclude: texmap = cv2.imread(texmappath + '/' + imgfile, cv2.IMREAD_COLOR) # Save to lookup table of width and height self.filesizes[imgfile] = (len(texmap[0]), len(texmap)) if self.verbose: # Show the texture map dimensions print(' ' + imgfile + ': ' + str(len(texmap[0])) + ' x ' + str(len(texmap))) elif self.verbose: # Show that we're omitting a file by request print(' Excluding ' + imgfile) self.vertexLookup = { } # Look up a vertex index to find a list of all faces # to which it contributes. self.sames = {} # Look up a vertex index to find a list of vertices # we consider "equal to it within epsilon." # Read the OBJ file line by line. Accumulate 3D vertex and texmap (2D) vertex information, and build # an instance of the Face class once we have enough information for a face. def load(self): v = {} # Vertices vctr = 1 # Vertex index counter vt = {} # Texture coordinates vtctr = 1 # Texture coordinates' index counter fctr = 0 # Face index counter is free to start with zero # because it is never referred to by other data types # in the OBJ format currentMaterial = None # Track which material is currently applied if self.verbose: print('\n Loading mesh from ' + self.filename) fh = open(self.filename, 'r') # Read entire file lines = fh.readlines() fh.close() if self.verbose: ######################################### 3D VERTICES print(' Reading vertices...') for line in lines: # Make one initial pass during which we only arr = line.strip().split() # care about the vertices. if len(arr) > 0: if arr[0] == 'v': x = float(arr[1]) y = float(arr[2]) z = float(arr[3]) v[vctr] = (x, y, z ) # Add the vctr-th vertex to the hash table self.vertexLookup[vctr] = [ ] # Prepare a running list of every face vctr += 1 # that uses this vertex if self.verbose: print(' ' + str(vctr - 1) + ' vertices') allV = [v[x] for x in range(1, vctr) ] # Build complete list by vertex index redundancyTree = KDTree(allV) # Turn it into a tree # Find all vertices within epsilon of each other: # we're going to call them "The Same," but only so # we can use them to find neighbors in triangle soup. if self.reconcile: # So... IGNORE this step if our application doesn't if self.verbose: # care about triangle adjacency! print(' Reconciling triangle soup with epsilon ' + str(self.epsilon) + '...') samectr = 0 for vnum in range(1, vctr): # Perform test for every vertex. ind = redundancyTree.query_radius(np.array([v[vnum]]), self.epsilon) ind = [x + 1 for x in ind[0] if x + 1 != vnum] if len(ind) > 0: self.sames[vnum] = ind samectr += 1 if self.verbose: sys.stdout.write( ' %d epsilon-equivalent vertices found\r' % samectr) sys.stdout.flush() if self.verbose: print('') if self.verbose: ######################################### 2D (TEXMAP) VERTEX print(' Reading texture map vertices...') for line in lines: arr = line.strip().split() if len(arr) > 0: # Make sure line actually had content if arr[0] == 'vt': u = float(arr[1]) w = float(arr[2]) vt[vtctr] = ( u, w) # Add the vtctr-th vertex to the hash table vtctr += 1 if self.verbose: ######################################### FACE print(' Reading faces...') for line in lines: arr = line.strip().split() if len(arr) > 0: # Make sure line actually had content if arr[0] == 'f': subarr = arr[1].split('/') # Split v/vt pair a1 = int(subarr[0]) # Save v index a2 = int(subarr[1]) # Save vt index subarr = arr[2].split('/') # Split v/vt pair b1 = int(subarr[0]) # Save v index b2 = int(subarr[1]) # Save vt index subarr = arr[3].split('/') # Split v/vt pair c1 = int(subarr[0]) # Save v index c2 = int(subarr[1]) # Save vt index texmapW = self.filesizes[currentMaterial][ 0] # Retrieve actual dimensions of this texmap texmapH = self.filesizes[currentMaterial][ 1] # so we can get actual pixel locations self.faces[fctr] = Face() # New face... # made of these three 3D vertices... self.faces[fctr].set3DTriangle(v[a1], v[b1], v[c1]) # which have these three OBJ indices... self.faces[fctr].set3DTriangleIndices(a1, b1, c1) # skinned with this 2D triangle... if self.texmaporigin == 'ul': # (Origin in upper-left corner) self.faces[fctr].set2DTriangle( (vt[a2][0] * texmapW, vt[a2][1] * texmapH), \ (vt[b2][0] * texmapW, vt[b2][1] * texmapH), \ (vt[c2][0] * texmapW, vt[c2][1] * texmapH) ) elif self.texmaporigin == 'll': # (Origin in lower-left corner) self.faces[fctr].set2DTriangle( (vt[a2][0] * texmapW, texmapH - vt[a2][1] * texmapH), \ (vt[b2][0] * texmapW, texmapH - vt[b2][1] * texmapH), \ (vt[c2][0] * texmapW, texmapH - vt[c2][1] * texmapH) ) elif self.texmaporigin == 'lr': # (Origin in lower-right corner) self.faces[fctr].set2DTriangle( (texmapW - vt[a2][0] * texmapW, texmapH - vt[a2][1] * texmapH), \ (texmapW - vt[b2][0] * texmapW, texmapH - vt[b2][1] * texmapH), \ (texmapW - vt[c2][0] * texmapW, texmapH - vt[c2][1] * texmapH) ) else: # (Origin in upper-right corner) self.faces[fctr].set2DTriangle( (texmapW - vt[a2][0] * texmapW, vt[a2][1] * texmapH), \ (texmapW - vt[b2][0] * texmapW, vt[b2][1] * texmapH), \ (texmapW - vt[c2][0] * texmapW, vt[c2][1] * texmapH) ) self.faces[fctr].set2DTriangleIndices(a2, b2, c2) # ...which has these three OBJ indices... self.faces[ fctr].texmap = currentMaterial # ...and which comes from this texture map self.vertexLookup[a1].append( fctr ) # Keep a running list of faces touching this vertex self.vertexLookup[b1].append( fctr ) # Keep a running list of faces touching this vertex self.vertexLookup[c1].append( fctr ) # Keep a running list of faces touching this vertex fctr += 1 elif arr[ 0] == 'usemtl': # Change the currently applied material currentMaterial = arr[1] if self.verbose: print(' ' + str(fctr) + ' faces') tree2d = [list(self.faces[x].barycenter2D) for x in range(0, fctr)] tree3d = [list(self.faces[x].barycenter3D) for x in range(0, fctr)] self.barycenters2d = KDTree(tree2d) self.barycenters3d = KDTree(tree3d) self.v = vctr - 1 # Save for reference self.vt = vtctr - 1 return def query2d(self, pt, a, b): dist, ind = self.barycenters2d.query(np.array([list(pt)]), k=b) return list(ind[0])[a:b + 1] def computeFaceNeighbors(self): for i in range(0, len(self.faces)): # For each face in the mesh n = [] # prepare a list of all neighbor faces. for v in self.faces[ i].t3Dindices: # Look up each vertex in each face n += [ x for x in self.vertexLookup[v] if x != i ] # and add as neighbor-faces all faces formed by this vertex. s = [] for v in self.faces[i].t3Dindices: if v in self.sames: for same in self.sames[v]: s += [x for x in self.vertexLookup[same] if x != i] n += s self.faces[i].neighbors = list(dict.fromkeys( n)) # Remove duplicate entries and store in Face class return
def convert_txt2ply(save_path=None, sub_grid_size=0.06): """convert original files to ply file(each line is XYZRGBL). Args: save_path ([type], optional): [description]. Defaults to None. sub_grid_size (float, optional): [description]. Defaults to 0.06. """ make_dir(sub_grid_size) for pointcloud_path in glob.glob( os.path.join(semantic3d_data_path, '*.txt')): print(pointcloud_path) filename = pointcloud_path.split('/')[-1][:-4] if os.path.exists( os.path.join(sub_pointcloud_folder, filename + '_KDTree.pkl')): continue pointcloud = DataProcessing.load_pc_semantic3d(pointcloud_path) label_path = pointcloud_path[:-4] + '.labels' print(label_path) if os.path.exists(label_path): labels = DataProcessing.load_label_semantic3d(label_path) full_ply_path = os.path.join(original_pointcloud_folder, filename + '.ply') sub_points, sub_colors, sub_labels = DataProcessing.grid_sub_sampling( pointcloud[:, :3].astype(np.float32), pointcloud[:, 4:7].astype(np.uint8), labels, 0.01) sub_labels = np.squeeze(sub_labels) ply.write_ply(full_ply_path, (sub_points, sub_colors, sub_labels), ['x', 'y', 'z', 'red', 'green', 'blue', 'class']) sub_xyz, sub_colors, sub_labels = DataProcessing.grid_sub_sampling( sub_points, sub_colors, sub_labels, sub_grid_size) sub_colors = sub_colors / 255.0 sub_labels = np.squeeze(sub_labels) sub_ply_file = os.path.join(sub_pointcloud_folder, filename + '.ply') ply.write_ply(sub_ply_file, [sub_xyz, sub_colors, sub_labels], ['x', 'y', 'z', 'red', 'green', 'blue', 'class']) search_tree = KDTree(sub_xyz, leaf_size=50) kd_tree_file = os.path.join(sub_pointcloud_folder, filename + '_KDTree.pkl') with open(kd_tree_file, 'wb') as f: pickle.dump(search_tree, f) proj_idx = np.squeeze( search_tree.query(sub_points, return_distance=False)) proj_idx = proj_idx.astype(np.int32) proj_save = os.path.join(sub_pointcloud_folder, filename + '_proj.pkl') with open(proj_save, 'wb') as f: pickle.dump([proj_idx, labels], f) else: fully_ply_path = os.path.join(original_pointcloud_folder, filename + '.ply') ply.write_ply(fully_ply_path, (pointcloud[:, :3].astype( np.float32), pointcloud[:, 4:7].astype(np.uint8)), ['x', 'y', 'z', 'red', 'green', 'blue']) sub_xyz, sub_colors = DataProcessing.grid_sub_sampling( pointcloud[:, :3].astype(np.float32), pointcloud[:, 4:7].astype(np.uint8), grid_size=sub_grid_size) sub_colors = sub_colors / 255.0 sub_ply_file = os.path.join(sub_pointcloud_folder, filename + '.ply') ply.write_ply(sub_ply_file, [sub_xyz, sub_colors], ['x', 'y', 'z', 'red', 'green', 'blue']) labels = np.zeros(pointcloud.shape[0], dtype=np.uint8) search_tree = KDTree(sub_xyz, leaf_size=50) kd_tree_file = os.path.join(sub_pointcloud_folder, filename + '_KDTree.pkl') with open(kd_tree_file, 'wb') as f: pickle.dump(search_tree, f) proj_idx = np.squeeze( search_tree.query(pointcloud[:, :3].astype(np.float32), return_distance=False)) proj_idx = proj_idx.astype(np.int32) proj_save = os.path.join(sub_pointcloud_folder, filename + '_proj.pkl') with open(proj_save, 'wb') as f: pickle.dump([proj_idx, labels], f)
class LRU_KNN_PS(object): def __init__(self, capacity, obs_shape, z_dim, env_name, action, num_actions=6, knn=4, debug=True, gamma=0.99, alpha=0.1, beta=0.01): self.obs = np.empty((capacity, ) + obs_shape, dtype=np.uint8) self.action = action self.alpha = alpha self.beta = beta self.z_dim = z_dim self.env_name = env_name self.capacity = capacity self.num_actions = num_actions self.rmax = 100000 self.states = np.empty((capacity, z_dim), dtype=np.float32) self.external_value = np.full((capacity, num_actions), np.nan) self.state_value_v = np.full((capacity, ), np.nan) self.state_value_u = np.full((capacity, ), np.nan) self.reward = np.zeros((capacity, num_actions)) self.done = np.zeros((capacity, num_actions), dtype=np.bool) self.newly_added = np.ones((capacity, num_actions), dtype=np.bool) self.internal_value = self.rmax * np.ones((capacity, num_actions)) self.prev_id = [[] for _ in range(capacity)] self.next_id = [[{} for __ in range(num_actions)] for _ in range(capacity)] self.pseudo_count = [[{} for __ in range(num_actions)] for _ in range(capacity)] self.pseudo_reward = np.zeros((capacity, num_actions)) self.pseudo_prev = [{} for _ in range(capacity)] self.debug = debug self.count = np.zeros((capacity, num_actions)) self.lru = np.zeros(capacity) # self.best_action = np.zeros((capacity, num_actions), dtype=np.int) self.curr_capacity = 0 self.tm = 0.0 self.threshold = 1e-7 self.knn = knn self.gamma = gamma self.b = 0.01 self.knn = knn # self.beta = beta self.tree = None self.logger = logging.getLogger("ecbp") def log(self, *args, logtype='debug', sep=' '): getattr(self.logger, logtype)(sep.join(str(a) for a in args)) def build_tree(self): if self.curr_capacity == 0: return False self.tree = KDTree(self.states[:self.curr_capacity], leaf_size=10) return True def peek(self, key): if self.curr_capacity == 0 or self.tree is None: return -1, [], [] # print(np.array(key).shape) key = np.array(key, copy=True) if len(key.shape) == 1: key = key[np.newaxis, ...] dist, ind = self.tree.query(key, k=min(self.knn, self.curr_capacity)) # dist, ind = knn_cuda_fixmem.knn(self.address, key, 1, self.curr_capacity) # dist, ind = np.transpose(dist), np.transpose(ind - 1) ind_n = ind[0][0] if dist[0][0] < self.threshold: return ind_n, dist, ind return -1, dist, ind def act_value(self, key, knn): knn = min(self.curr_capacity, knn) internal_values = [] external_values = [] exact_refer = [] if knn < 1 or self.tree is None: for i in range(len(key)): internal_values.append(self.rmax * np.ones(self.num_actions)) external_values.append(np.zeros(self.num_actions)) exact_refer.append(False) return external_values, internal_values, np.array(exact_refer) key = np.array(key, copy=True) if len(key.shape) == 1: key = key[np.newaxis, ...] assert key.shape[0] == 1 dist, ind = self.tree.query(key, k=min(knn + 1, self.curr_capacity)) # dist, ind = knn_cuda_fixmem.knn(self.address, key, knn, self.curr_capacity) # dist, ind = np.transpose(dist), np.transpose(ind - 1) # print(dist.shape, ind.shape, len(key), key.shape) # print("nearest dist", dist[0][0]) external_value = np.zeros(self.num_actions) external_nan_mask = np.full((self.num_actions, ), np.nan) internal_value = self.rmax * np.ones(self.num_actions) old_mask = np.array([[1 - self.newly_added[i] for i in query] for query in ind]).astype(np.bool) ind, dist = ind[old_mask].reshape(1, -1), dist[old_mask].reshape(1, -1) for i in range(len(dist)): coeff = -dist[i] / self.b coeff = coeff - np.max(coeff) coeff = np.exp(coeff) coeff = coeff / np.sum(coeff) if dist[i][0] < self.threshold and not np.isnan( self.external_value[ind[i][0]]).all(): self.log("peek in act ", ind[i][0]) exact_refer.append(True) external_value = copy.deepcopy(self.external_value[ind[i][0]]) internal_value = copy.deepcopy(self.internal_value[ind[i][0]]) # external_value[np.isnan(external_value)] = 0 self.lru[ind[i][0]] = self.tm self.tm += 0.01 else: exact_refer.append(False) for j, index in enumerate(ind[i]): tmp_external_value = copy.deepcopy( self.external_value[index, :]) tmp_external_value[np.isnan(tmp_external_value)] = 0 external_nan_mask[(1 - np.isnan(tmp_external_value)).astype( np.bool)] = 0 external_value += tmp_external_value * coeff[j] self.lru[index] = self.tm self.tm += 0.01 external_value += external_nan_mask external_values.append(external_value) internal_values.append(internal_value) return external_values, internal_values, np.array(exact_refer) def add_edge(self, src, des, action, reward, done): if (src, action) not in self.prev_id[des]: self.prev_id[des].append((src, action)) self.newly_added[src, action] = True try: self.next_id[src][action][des] += 1 except KeyError: self.next_id[src][action][des] = 1 if self.internal_value[src, action] > 0 and sum( self.next_id[src][action].values()) > 5: self.internal_value[src, action] = 0 self.reward[ src, action] = reward # note that we assume that reward function is deterministic self.done[src, action] = done return sum(self.next_id[src][action].values()) def add_node(self, key, obs=None): # print(np.array(key).shape) if self.curr_capacity >= self.capacity: # find the LRU entry old_index = int(np.argmin(self.lru)) for action in range(self.num_actions): for successor in self.next_id[old_index][action].keys(): for s, a in self.prev_id[successor]: if s == old_index: self.prev_id.remove((s, a)) self.next_id[old_index][action] = dict() self.states[old_index] = key self.external_value[old_index] = np.full((self.num_actions, ), np.nan) self.internal_value[old_index] = self.rmax * np.ones( self.num_actions) self.state_value_u[old_index] = np.nan self.state_value_v[old_index] = np.nan self.lru[old_index] = self.tm self.count[old_index] = 2 if obs is not None: self.obs[old_index] = obs self.prev_id[old_index] = [] # knn_cuda_fixmem.add(self.address, old_index, np.array(key)) self.tm += 0.01 # self.build_tree() return old_index, True else: self.states[self.curr_capacity] = key self.lru[self.curr_capacity] = self.tm self.count[self.curr_capacity] = 2 if obs is not None: self.obs[self.curr_capacity] = obs # knn_cuda_fixmem.add(self.address, self.curr_capacity, np.array(key)) self.curr_capacity += 1 self.tm += 0.01 # self.build_tree() return self.curr_capacity - 1, False @staticmethod def distance(a, b): return np.sqrt(np.sum(np.square(a - b))) def update_q_value(self, state, action, state_tp1, delta_u): successor_states = self.next_id[state][action].keys() weight = {s: self.next_id[state][action][s] for s in successor_states} trans_p = weight[state_tp1] / sum(weight.values()) assert 0 <= trans_p <= 1 if np.isnan(self.external_value[state, action]): self.external_value[state, action] = self.reward[state, action] self.external_value[state, action] += self.gamma * trans_p * delta_u def sample(self, sample_size): sample_size = min(self.curr_capacity, sample_size) if sample_size % 2 == 1: sample_size -= 1 if sample_size < 2: return None indexes = [] positives = [] values = [] actions = [] while len(indexes) < sample_size: ind = int(np.random.randint(0, self.curr_capacity, 1)) if ind in indexes: continue next_id_tmp = [[(a, ind_tp1) for ind_tp1 in self.next_id[ind][a].keys()] for a in range(self.num_actions)] next_id = [] for x in next_id_tmp: next_id += x # next_id = np.array(next_id).reshape(-1) if len(next_id) == 0: continue positive = next_id[np.random.randint(0, len(next_id))][1] action = next_id[np.random.randint(0, len(next_id))][0] indexes.append(ind) positives.append(positive) actions.append(action) values.append(np.nanmax(self.external_value[ind, :])) negatives = [ int((pos + sample_size // 2) % sample_size) for pos in positives ] z_target = [self.states[ind] for ind in indexes] z_pos = [self.states[pos] for pos in positives] z_neg = [self.states[neg] for neg in negatives] return indexes, positives, negatives, z_target, z_pos, z_neg, values, actions def update(self, indexes, z_new): self.log("update in buffer", self.curr_capacity) assert len(indexes) == len(z_new), "{}{}".format( len(indexes), len(z_new)) assert z_new.shape[1] == self.z_dim for i, ind in enumerate(indexes): self.states[ind] = z_new[i]