def _build_graph(self, features, cluster_features, labels, density, knns): adj = fast_knns2spmat(knns, self.k) adj, adj_row_sum = row_normalize(adj) indices, values, shape = sparse_mx_to_indices_values(adj) g = dgl.graph((indices[1], indices[0])) g.ndata['features'] = torch.FloatTensor(features) g.ndata['cluster_features'] = torch.FloatTensor(cluster_features) g.ndata['labels'] = torch.LongTensor(labels) g.ndata['density'] = torch.FloatTensor(density) g.edata['affine'] = torch.FloatTensor(values) # A Bipartite from DGL sampler will not store global eid, so we explicitly save it here g.edata['global_eid'] = g.edges(form='eid') g.ndata['norm'] = torch.FloatTensor(adj_row_sum) g.apply_edges(lambda edges: {'raw_affine': edges.data['affine'] / edges.dst['norm']}) g.apply_edges(lambda edges: {'labels_conn': (edges.src['labels'] == edges.dst['labels']).long()}) g.apply_edges(lambda edges: {'mask_conn': (edges.src['density'] > edges.dst['density']).bool()}) return g
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') self.num_process = cfg.get('num_process',16) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] # num_imgs*2*k else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) # 通过faiss实现k近邻搜索,此处作者faiss_gpu版本实现可能有问题,但faiss大规模在cpu上跑还是慢 # 当然faiss有针内存和计算速度方面的优化,PQ,IVF等,可参考faiss knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn,self.num_process) # 依据k近邻搜索结果构建邻接矩阵 adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) # num_imgs*k print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.cls_num = len(self.lb2idxs) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('Compute center feature'): self.center_fea = np.zeros((self.cls_num, self.features.shape[1])) for i in range(self.cls_num): self.center_fea[i] = np.mean(self.features[self.lb2idxs[i]], 0) self.center_fea = l2norm(self.center_fea) with Timer('read knn graph'): if os.path.isfile(knn_graph_path): print("load knns from the knn_path") self.knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) self.knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(self.knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) #print('adj before norm') #print(adj) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(self.knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat))