def _read(self, feat_path, label_path, proposal_folders): with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.labels = intdict2ndarray(self.idx2lb) self.inst_num = len(self.idx2lb) self.ignore_label = False else: self.lb2idxs, self.idx2lb = None, None self.labels = None self.inst_num = -1 self.ignore_label = True if not self.featureless: features = read_probs(feat_path, self.inst_num, self.feature_dim) self.features = l2norm(features) if self.inst_num == -1: self.inst_num = features.shape[0] else: assert self.inst_num > 0 self.feature_dim = 1 self.features = np.ones(self.inst_num).reshape(-1, 1) with Timer('read proposal list'): self.lst = [] self.tot_lst = [] if callable(proposal_folders): proposal_folders = proposal_folders() for proposal_folder in proposal_folders: print('read proposals from folder: ', proposal_folder) fn_nodes = sorted( glob.glob(osp.join(proposal_folder, self.fn_node_pattern))) fn_edges = sorted( glob.glob(osp.join(proposal_folder, self.fn_edge_pattern))) assert len(fn_nodes) == len( fn_edges), "node files({}) vs edge files({})".format( len(fn_nodes), len(fn_edges)) assert len(fn_nodes) > 0, 'files under {} is 0'.format( proposal_folder) for fn_node, fn_edge in zip(fn_nodes, fn_edges): # sanity check assert fn_node[:fn_node.rfind( '_')] == fn_edge[:fn_edge.rfind('_' )], "{} vs {}".format( fn_node, fn_edge) if self._check_iop(fn_node): self.lst.append([fn_node, fn_edge]) self.tot_lst.append([fn_node, fn_edge]) self.size = len(self.lst) self.tot_size = len(self.tot_lst) assert self.size <= self.tot_size if self.size < self.tot_size: print('select {} / {} = {:.2f} proposals ' 'with iop between ({:.2f}, {:.2f})'.format( self.size, self.tot_size, 1. * self.size / self.tot_size, self.th_iop_min, self.th_iop_max))
def test_lgcn(model, cfg, logger): for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.test_data) ofn_pred = os.path.join(cfg.work_dir, 'pred_edges_scores.npz') if os.path.isfile(ofn_pred) and not cfg.force: data = np.load(ofn_pred) edges = data['edges'] scores = data['scores'] inst_num = data['inst_num'] if inst_num != len(dataset): logger.warn( 'instance number in {} is different from dataset: {} vs {}'. format(ofn_pred, inst_num, len(dataset))) else: edges, scores, inst_num = test(model, dataset, cfg, logger) # produce predicted labels clusters = graph_clustering_dynamic_th(edges, scores, max_sz=cfg.max_sz, step=cfg.step, pool=cfg.pool) pred_idx2lb = clusters2labels(clusters) pred_labels = intdict2ndarray(pred_idx2lb) if cfg.save_output: print('save predicted edges and scores to {}'.format(ofn_pred)) np.savez_compressed(ofn_pred, edges=edges, scores=scores, inst_num=inst_num) ofn_meta = os.path.join(cfg.work_dir, 'pred_labels.txt') write_meta(ofn_meta, pred_idx2lb, inst_num=inst_num) # evaluation if not dataset.ignore_label: print('==> evaluation') gt_labels = dataset.labels for metric in cfg.metrics: evaluate(gt_labels, pred_labels, metric) single_cluster_idxs = get_cluster_idxs(clusters, size=1) print('==> evaluation (removing {} single clusters)'.format( len(single_cluster_idxs))) remain_idxs = np.setdiff1d(np.arange(len(dataset)), np.array(single_cluster_idxs)) remain_idxs = np.array(remain_idxs) for metric in cfg.metrics: evaluate(gt_labels[remain_idxs], pred_labels[remain_idxs], metric)
def _read(self, feat_path, label_path, proposal_folders): fn_node_pattern = '*_node.npz' fn_edge_pattern = '*_edge.npz' with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.labels = intdict2ndarray(self.idx2lb) self.inst_num = len(self.idx2lb) self.ignore_label = False else: self.lb2idxs, self.idx2lb = None, None self.labels = None self.inst_num = -1 self.ignore_label = True if not self.featureless: features = read_probs(feat_path, self.inst_num, self.feature_dim) self.features = l2norm(features) if self.inst_num == -1: self.inst_num = features.shape[0] else: assert self.inst_num > 0 self.feature_dim = 1 self.features = np.ones(self.inst_num).reshape(-1, 1) with Timer('read proposal list'): self.lst = [] if callable(proposal_folders): proposal_folders = proposal_folders() for proposal_folder in proposal_folders: print('read proposals from folder: ', proposal_folder) fn_nodes = sorted( glob.glob(os.path.join(proposal_folder, fn_node_pattern))) fn_edges = sorted( glob.glob(os.path.join(proposal_folder, fn_edge_pattern))) assert len(fn_nodes) == len( fn_edges), "node files({}) vs edge files({})".format( len(fn_nodes), len(fn_edges)) assert len(fn_nodes) > 0, 'files under {} is 0'.format( proposal_folder) for fn_node, fn_edge in zip(fn_nodes, fn_edges): assert fn_node[:fn_node.rfind( '_')] == fn_edge[:fn_edge.rfind('_' )], "{} vs {}".format( fn_node, fn_edge) self.lst.append([fn_node, fn_edge]) self.size = len(self.lst)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg['knn_graph_path'] self.k_at_hop = cfg['k_at_hop'] self.depth = len(self.k_at_hop) self.active_connection = cfg['active_connection'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.is_sort_knns = cfg.get('is_sort_knns', True) self.is_test = cfg.get('is_test', False) with Timer('read meta and feature'): if label_path is not None: _, idx2lb = read_meta(label_path) self.inst_num = len(idx2lb) self.labels = intdict2ndarray(idx2lb) self.ignore_label = False else: self.labels = None self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = self.inst_num with Timer('read knn graph'): knns = np.load(knn_graph_path)['data'] _, self.knn_graph = knns2ordered_nbrs(knns, sort=self.is_sort_knns) assert np.mean(self.k_at_hop) >= self.active_connection print('feature shape: {}, norm_feat: {}, sort_knns: {} ' 'k_at_hop: {}, active_connection: {}'.format( self.features.shape, self.is_norm_feat, self.is_sort_knns, self.k_at_hop, self.active_connection))
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.ignore_ratio = cfg.get('ignore_ratio', 0.8) self.ignore_small_confs = cfg.get('ignore_small_confs', True) self.use_candidate_set = cfg.get('use_candidate_set', True) self.nproc = cfg.get('nproc', 1) self.max_qsize = cfg.get('max_qsize', int(1e5)) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = self.inst_num assert self.size == self.features.shape[0] print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) with Timer('read knn graph'): if knn_graph_path is not None: knns = np.load(knn_graph_path)['data'] else: prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) assert self.inst_num == len(knns), "{} vs {}".format( self.inst_num, len(knns)) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) self.adj = row_normalize(adj) # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns, sort=True) if cfg.pred_confs != '': print('read estimated confidence from {}'.format( cfg.pred_confs)) self.confs = np.load(cfg.pred_confs)['pred_confs'] else: print('use unsupervised density as confidence') assert self.radius from vegcn.confidence import density self.confs = density(self.dists, radius=self.radius) assert 0 <= self.ignore_ratio <= 1 if self.ignore_ratio == 1: self.ignore_set = set(np.arange(len(self.confs))) else: num = int(len(self.confs) * self.ignore_ratio) confs = self.confs if not self.ignore_small_confs: confs = -confs self.ignore_set = set(np.argpartition(confs, num)[:num]) print( 'ignore_ratio: {}, ignore_small_confs: {}, use_candidate_set: {}'. format(self.ignore_ratio, self.ignore_small_confs, self.use_candidate_set)) print('#ignore_set: {} / {} = {:.3f}'.format( len(self.ignore_set), self.inst_num, 1. * len(self.ignore_set) / self.inst_num)) with Timer('Prepare sub-graphs'): # construct subgraphs with larger confidence self.peaks = {i: [] for i in range(self.inst_num)} self.dist2peak = {i: [] for i in range(self.inst_num)} if self.nproc > 1: # multi-process import multiprocessing as mp pool = mp.Pool(self.nproc) results = [] num = int(self.inst_num / self.max_qsize) + 1 for i in tqdm(range(num)): beg = int(i * self.max_qsize) end = min(beg + self.max_qsize, self.inst_num) lst = [j for j in range(beg, end)] results.extend( list( tqdm(pool.map(self.get_subgraph, lst), total=len(lst)))) pool.close() pool.join() else: results = [ self.get_subgraph(i) for i in tqdm(range(self.inst_num)) ] self.adj_lst = [] self.feat_lst = [] self.lb_lst = [] self.subset_gt_labels = [] self.subset_idxs = [] self.subset_nbrs = [] self.subset_dists = [] for result in results: if result is None: continue elif len(result) == 3: i, nbr, dist = result self.peaks[i].extend(nbr) self.dist2peak[i].extend(dist) continue i, nbr, dist, feat, adj, lb = result self.subset_idxs.append(i) self.subset_nbrs.append(nbr) self.subset_dists.append(dist) self.feat_lst.append(feat) self.adj_lst.append(adj) if not self.ignore_label: self.subset_gt_labels.append(self.idx2lb[i]) self.lb_lst.append(lb) self.subset_gt_labels = np.array(self.subset_gt_labels) self.size = len(self.feat_lst) assert self.size == len(self.adj_lst) if not self.ignore_label: assert self.size == len(self.lb_lst)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) prefix = osp.dirname(feat_path) name = rm_suffix(osp.basename(feat_path)) # find root folder of `features` prefix = osp.dirname(prefix) knn_prefix = osp.join(prefix, 'knns', name) knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.max_conn = cfg.get('max_conn', 1) self.conf_metric = cfg.get('conf_metric') self.num_process = cfg.get('num_process',16) with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('read knn graph'): if os.path.isfile(knn_graph_path): knns = np.load(knn_graph_path)['data'] # num_imgs*2*k else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) # 通过faiss实现k近邻搜索,此处作者faiss_gpu版本实现可能有问题,但faiss大规模在cpu上跑还是慢 # 当然faiss有针内存和计算速度方面的优化,PQ,IVF等,可参考faiss knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn,self.num_process) # 依据k近邻搜索结果构建邻接矩阵 adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(knns) # num_imgs*k print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat)) if not self.ignore_label: with Timer('Prepare ground-truth label'): self.labels = confidence(feats=self.features, dists=self.dists, nbrs=self.nbrs, metric=self.conf_metric, idx2lb=self.idx2lb, lb2idxs=self.lb2idxs) if cfg.eval_interim: _, self.peaks = confidence_to_peaks( self.dists, self.nbrs, self.labels, self.max_conn)
def __init__(self, cfg): feat_path = cfg['feat_path'] label_path = cfg.get('label_path', None) knn_graph_path = cfg.get('knn_graph_path', None) self.k = cfg['k'] self.feature_dim = cfg['feature_dim'] self.is_norm_feat = cfg.get('is_norm_feat', True) self.save_decomposed_adj = cfg.get('save_decomposed_adj', False) self.th_sim = cfg.get('th_sim', 0.) self.conf_metric = cfg.get('conf_metric') with Timer('read meta and feature'): if label_path is not None: self.lb2idxs, self.idx2lb = read_meta(label_path) self.inst_num = len(self.idx2lb) self.cls_num = len(self.lb2idxs) self.gt_labels = intdict2ndarray(self.idx2lb) self.ignore_label = False else: self.inst_num = -1 self.ignore_label = True self.features = read_probs(feat_path, self.inst_num, self.feature_dim) if self.is_norm_feat: self.features = l2norm(self.features) if self.inst_num == -1: self.inst_num = self.features.shape[0] self.size = 1 # take the entire graph as input with Timer('Compute center feature'): self.center_fea = np.zeros((self.cls_num, self.features.shape[1])) for i in range(self.cls_num): self.center_fea[i] = np.mean(self.features[self.lb2idxs[i]], 0) self.center_fea = l2norm(self.center_fea) with Timer('read knn graph'): if os.path.isfile(knn_graph_path): print("load knns from the knn_path") self.knns = np.load(knn_graph_path)['data'] else: if knn_graph_path is not None: print('knn_graph_path does not exist: {}'.format( knn_graph_path)) knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name) self.knns = build_knns(knn_prefix, self.features, cfg.knn_method, cfg.knn) adj = fast_knns2spmat(self.knns, self.k, self.th_sim, use_sim=True) # build symmetric adjacency matrix adj = build_symmetric_adj(adj, self_loop=True) #print('adj before norm') #print(adj) adj = row_normalize(adj) if self.save_decomposed_adj: adj = sparse_mx_to_indices_values(adj) self.adj_indices, self.adj_values, self.adj_shape = adj else: self.adj = adj # convert knns to (dists, nbrs) self.dists, self.nbrs = knns2ordered_nbrs(self.knns) print('feature shape: {}, k: {}, norm_feat: {}'.format( self.features.shape, self.k, self.is_norm_feat))