def _read(self, feat_path, label_path, proposal_folders):
        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.labels = intdict2ndarray(self.idx2lb)
                self.inst_num = len(self.idx2lb)
                self.ignore_label = False
            else:
                self.lb2idxs, self.idx2lb = None, None
                self.labels = None
                self.inst_num = -1
                self.ignore_label = True
            if not self.featureless:
                features = read_probs(feat_path, self.inst_num,
                                      self.feature_dim)
                self.features = l2norm(features)
                if self.inst_num == -1:
                    self.inst_num = features.shape[0]
            else:
                assert self.inst_num > 0
                self.feature_dim = 1
                self.features = np.ones(self.inst_num).reshape(-1, 1)

        with Timer('read proposal list'):
            self.lst = []
            self.tot_lst = []
            if callable(proposal_folders):
                proposal_folders = proposal_folders()
            for proposal_folder in proposal_folders:
                print('read proposals from folder: ', proposal_folder)
                fn_nodes = sorted(
                    glob.glob(osp.join(proposal_folder, self.fn_node_pattern)))
                fn_edges = sorted(
                    glob.glob(osp.join(proposal_folder, self.fn_edge_pattern)))
                assert len(fn_nodes) == len(
                    fn_edges), "node files({}) vs edge files({})".format(
                        len(fn_nodes), len(fn_edges))
                assert len(fn_nodes) > 0, 'files under {} is 0'.format(
                    proposal_folder)
                for fn_node, fn_edge in zip(fn_nodes, fn_edges):
                    # sanity check
                    assert fn_node[:fn_node.rfind(
                        '_')] == fn_edge[:fn_edge.rfind('_'
                                                        )], "{} vs {}".format(
                                                            fn_node, fn_edge)
                    if self._check_iop(fn_node):
                        self.lst.append([fn_node, fn_edge])
                    self.tot_lst.append([fn_node, fn_edge])

            self.size = len(self.lst)
            self.tot_size = len(self.tot_lst)
            assert self.size <= self.tot_size

            if self.size < self.tot_size:
                print('select {} / {} = {:.2f} proposals '
                      'with iop between ({:.2f}, {:.2f})'.format(
                          self.size, self.tot_size,
                          1. * self.size / self.tot_size, self.th_iop_min,
                          self.th_iop_max))
Пример #2
0
def test_lgcn(model, cfg, logger):
    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.test_data)

    ofn_pred = os.path.join(cfg.work_dir, 'pred_edges_scores.npz')
    if os.path.isfile(ofn_pred) and not cfg.force:
        data = np.load(ofn_pred)
        edges = data['edges']
        scores = data['scores']
        inst_num = data['inst_num']
        if inst_num != len(dataset):
            logger.warn(
                'instance number in {} is different from dataset: {} vs {}'.
                format(ofn_pred, inst_num, len(dataset)))
    else:
        edges, scores, inst_num = test(model, dataset, cfg, logger)

    # produce predicted labels
    clusters = graph_clustering_dynamic_th(edges,
                                           scores,
                                           max_sz=cfg.max_sz,
                                           step=cfg.step,
                                           pool=cfg.pool)
    pred_idx2lb = clusters2labels(clusters)
    pred_labels = intdict2ndarray(pred_idx2lb)

    if cfg.save_output:
        print('save predicted edges and scores to {}'.format(ofn_pred))
        np.savez_compressed(ofn_pred,
                            edges=edges,
                            scores=scores,
                            inst_num=inst_num)
        ofn_meta = os.path.join(cfg.work_dir, 'pred_labels.txt')
        write_meta(ofn_meta, pred_idx2lb, inst_num=inst_num)

    # evaluation
    if not dataset.ignore_label:
        print('==> evaluation')
        gt_labels = dataset.labels
        for metric in cfg.metrics:
            evaluate(gt_labels, pred_labels, metric)

        single_cluster_idxs = get_cluster_idxs(clusters, size=1)
        print('==> evaluation (removing {} single clusters)'.format(
            len(single_cluster_idxs)))
        remain_idxs = np.setdiff1d(np.arange(len(dataset)),
                                   np.array(single_cluster_idxs))
        remain_idxs = np.array(remain_idxs)
        for metric in cfg.metrics:
            evaluate(gt_labels[remain_idxs], pred_labels[remain_idxs], metric)
Пример #3
0
    def _read(self, feat_path, label_path, proposal_folders):
        fn_node_pattern = '*_node.npz'
        fn_edge_pattern = '*_edge.npz'

        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.labels = intdict2ndarray(self.idx2lb)
                self.inst_num = len(self.idx2lb)
                self.ignore_label = False
            else:
                self.lb2idxs, self.idx2lb = None, None
                self.labels = None
                self.inst_num = -1
                self.ignore_label = True
            if not self.featureless:
                features = read_probs(feat_path, self.inst_num,
                                      self.feature_dim)
                self.features = l2norm(features)
                if self.inst_num == -1:
                    self.inst_num = features.shape[0]
            else:
                assert self.inst_num > 0
                self.feature_dim = 1
                self.features = np.ones(self.inst_num).reshape(-1, 1)

        with Timer('read proposal list'):
            self.lst = []
            if callable(proposal_folders):
                proposal_folders = proposal_folders()
            for proposal_folder in proposal_folders:
                print('read proposals from folder: ', proposal_folder)
                fn_nodes = sorted(
                    glob.glob(os.path.join(proposal_folder, fn_node_pattern)))
                fn_edges = sorted(
                    glob.glob(os.path.join(proposal_folder, fn_edge_pattern)))
                assert len(fn_nodes) == len(
                    fn_edges), "node files({}) vs edge files({})".format(
                        len(fn_nodes), len(fn_edges))
                assert len(fn_nodes) > 0, 'files under {} is 0'.format(
                    proposal_folder)
                for fn_node, fn_edge in zip(fn_nodes, fn_edges):
                    assert fn_node[:fn_node.rfind(
                        '_')] == fn_edge[:fn_edge.rfind('_'
                                                        )], "{} vs {}".format(
                                                            fn_node, fn_edge)
                    self.lst.append([fn_node, fn_edge])
            self.size = len(self.lst)
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg['knn_graph_path']

        self.k_at_hop = cfg['k_at_hop']
        self.depth = len(self.k_at_hop)
        self.active_connection = cfg['active_connection']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.is_sort_knns = cfg.get('is_sort_knns', True)
        self.is_test = cfg.get('is_test', False)

        with Timer('read meta and feature'):
            if label_path is not None:
                _, idx2lb = read_meta(label_path)
                self.inst_num = len(idx2lb)
                self.labels = intdict2ndarray(idx2lb)
                self.ignore_label = False
            else:
                self.labels = None
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)
            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = self.inst_num

        with Timer('read knn graph'):
            knns = np.load(knn_graph_path)['data']
            _, self.knn_graph = knns2ordered_nbrs(knns, sort=self.is_sort_knns)
        assert np.mean(self.k_at_hop) >= self.active_connection

        print('feature shape: {}, norm_feat: {}, sort_knns: {} '
              'k_at_hop: {}, active_connection: {}'.format(
                  self.features.shape, self.is_norm_feat, self.is_sort_knns,
                  self.k_at_hop, self.active_connection))
Пример #5
0
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg.get('knn_graph_path', None)

        self.k = cfg['k']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True)

        self.th_sim = cfg.get('th_sim', 0.)
        self.max_conn = cfg.get('max_conn', 1)

        self.ignore_ratio = cfg.get('ignore_ratio', 0.8)
        self.ignore_small_confs = cfg.get('ignore_small_confs', True)
        self.use_candidate_set = cfg.get('use_candidate_set', True)

        self.nproc = cfg.get('nproc', 1)
        self.max_qsize = cfg.get('max_qsize', int(1e5))

        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.inst_num = len(self.idx2lb)
                self.gt_labels = intdict2ndarray(self.idx2lb)
                self.ignore_label = False
            else:
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)
            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = self.inst_num
            assert self.size == self.features.shape[0]

        print('feature shape: {}, k: {}, norm_feat: {}'.format(
            self.features.shape, self.k, self.is_norm_feat))

        with Timer('read knn graph'):
            if knn_graph_path is not None:
                knns = np.load(knn_graph_path)['data']
            else:
                prefix = osp.dirname(feat_path)
                name = rm_suffix(osp.basename(feat_path))
                # find root folder of `features`
                prefix = osp.dirname(prefix)
                knn_prefix = osp.join(prefix, 'knns', name)
                knns = build_knns(knn_prefix, self.features, cfg.knn_method,
                                  cfg.knn)
            assert self.inst_num == len(knns), "{} vs {}".format(
                self.inst_num, len(knns))

            adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True)

            # build symmetric adjacency matrix
            adj = build_symmetric_adj(adj, self_loop=True)
            self.adj = row_normalize(adj)

            # convert knns to (dists, nbrs)
            self.dists, self.nbrs = knns2ordered_nbrs(knns, sort=True)

            if cfg.pred_confs != '':
                print('read estimated confidence from {}'.format(
                    cfg.pred_confs))
                self.confs = np.load(cfg.pred_confs)['pred_confs']
            else:
                print('use unsupervised density as confidence')
                assert self.radius
                from vegcn.confidence import density
                self.confs = density(self.dists, radius=self.radius)

            assert 0 <= self.ignore_ratio <= 1
            if self.ignore_ratio == 1:
                self.ignore_set = set(np.arange(len(self.confs)))
            else:
                num = int(len(self.confs) * self.ignore_ratio)
                confs = self.confs
                if not self.ignore_small_confs:
                    confs = -confs
                self.ignore_set = set(np.argpartition(confs, num)[:num])

        print(
            'ignore_ratio: {}, ignore_small_confs: {}, use_candidate_set: {}'.
            format(self.ignore_ratio, self.ignore_small_confs,
                   self.use_candidate_set))
        print('#ignore_set: {} / {} = {:.3f}'.format(
            len(self.ignore_set), self.inst_num,
            1. * len(self.ignore_set) / self.inst_num))

        with Timer('Prepare sub-graphs'):
            # construct subgraphs with larger confidence
            self.peaks = {i: [] for i in range(self.inst_num)}
            self.dist2peak = {i: [] for i in range(self.inst_num)}

            if self.nproc > 1:
                # multi-process
                import multiprocessing as mp
                pool = mp.Pool(self.nproc)
                results = []
                num = int(self.inst_num / self.max_qsize) + 1
                for i in tqdm(range(num)):
                    beg = int(i * self.max_qsize)
                    end = min(beg + self.max_qsize, self.inst_num)
                    lst = [j for j in range(beg, end)]
                    results.extend(
                        list(
                            tqdm(pool.map(self.get_subgraph, lst),
                                 total=len(lst))))
                pool.close()
                pool.join()
            else:
                results = [
                    self.get_subgraph(i) for i in tqdm(range(self.inst_num))
                ]

            self.adj_lst = []
            self.feat_lst = []
            self.lb_lst = []
            self.subset_gt_labels = []
            self.subset_idxs = []
            self.subset_nbrs = []
            self.subset_dists = []
            for result in results:
                if result is None:
                    continue
                elif len(result) == 3:
                    i, nbr, dist = result
                    self.peaks[i].extend(nbr)
                    self.dist2peak[i].extend(dist)
                    continue
                i, nbr, dist, feat, adj, lb = result
                self.subset_idxs.append(i)
                self.subset_nbrs.append(nbr)
                self.subset_dists.append(dist)
                self.feat_lst.append(feat)
                self.adj_lst.append(adj)
                if not self.ignore_label:
                    self.subset_gt_labels.append(self.idx2lb[i])
                    self.lb_lst.append(lb)
            self.subset_gt_labels = np.array(self.subset_gt_labels)

            self.size = len(self.feat_lst)
            assert self.size == len(self.adj_lst)
            if not self.ignore_label:
                assert self.size == len(self.lb_lst)
Пример #6
0
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg.get('knn_graph_path', None)

        self.k = cfg['k']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.save_decomposed_adj = cfg.get('save_decomposed_adj', False)

        self.th_sim = cfg.get('th_sim', 0.)
        self.max_conn = cfg.get('max_conn', 1)
        self.conf_metric = cfg.get('conf_metric')

        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.inst_num = len(self.idx2lb)
                self.gt_labels = intdict2ndarray(self.idx2lb)
                self.ignore_label = False
            else:
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)
            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = 1 # take the entire graph as input

        with Timer('read knn graph'):
            if os.path.isfile(knn_graph_path):
                knns = np.load(knn_graph_path)['data']
            else:
                if knn_graph_path is not None:
                    print('knn_graph_path does not exist: {}'.format(
                        knn_graph_path))
                
                prefix = osp.dirname(feat_path)
                name = rm_suffix(osp.basename(feat_path))
                # find root folder of `features`
                prefix = osp.dirname(prefix)
                knn_prefix = osp.join(prefix, 'knns', name)
                knns = build_knns(knn_prefix, self.features, cfg.knn_method,
                                  cfg.knn)

            adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True)

            # build symmetric adjacency matrix
            adj = build_symmetric_adj(adj, self_loop=True)
            adj = row_normalize(adj)
            if self.save_decomposed_adj:
                adj = sparse_mx_to_indices_values(adj)
                self.adj_indices, self.adj_values, self.adj_shape = adj
            else:
                self.adj = adj

            # convert knns to (dists, nbrs)
            self.dists, self.nbrs = knns2ordered_nbrs(knns)

        print('feature shape: {}, k: {}, norm_feat: {}'.format(
            self.features.shape, self.k, self.is_norm_feat))

        if not self.ignore_label:
            with Timer('Prepare ground-truth label'):
                self.labels = confidence(feats=self.features,
                                         dists=self.dists,
                                         nbrs=self.nbrs,
                                         metric=self.conf_metric,
                                         idx2lb=self.idx2lb,
                                         lb2idxs=self.lb2idxs)
                if cfg.eval_interim:
                    _, self.peaks = confidence_to_peaks(
                        self.dists, self.nbrs, self.labels, self.max_conn)
Пример #7
0
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg.get('knn_graph_path', None)

        self.k = cfg['k']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.save_decomposed_adj = cfg.get('save_decomposed_adj', False)

        self.th_sim = cfg.get('th_sim', 0.)
        self.max_conn = cfg.get('max_conn', 1)
        self.conf_metric = cfg.get('conf_metric')
        self.num_process = cfg.get('num_process',16)

        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.inst_num = len(self.idx2lb)
                self.gt_labels = intdict2ndarray(self.idx2lb)
                self.ignore_label = False
            else:
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)
            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = 1 # take the entire graph as input

        with Timer('read knn graph'):
            if os.path.isfile(knn_graph_path):
                knns = np.load(knn_graph_path)['data']    # num_imgs*2*k
            else:
                if knn_graph_path is not None:
                    print('knn_graph_path does not exist: {}'.format(
                        knn_graph_path))
                knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name)
                # 通过faiss实现k近邻搜索,此处作者faiss_gpu版本实现可能有问题,但faiss大规模在cpu上跑还是慢
                # 当然faiss有针内存和计算速度方面的优化,PQ,IVF等,可参考faiss
                knns = build_knns(knn_prefix, self.features, cfg.knn_method,
                                  cfg.knn,self.num_process)
            # 依据k近邻搜索结果构建邻接矩阵
            adj = fast_knns2spmat(knns, self.k, self.th_sim, use_sim=True)

            # build symmetric adjacency matrix
            adj = build_symmetric_adj(adj, self_loop=True)
            adj = row_normalize(adj)
            if self.save_decomposed_adj:
                adj = sparse_mx_to_indices_values(adj)
                self.adj_indices, self.adj_values, self.adj_shape = adj
            else:
                self.adj = adj

            # convert knns to (dists, nbrs)
            self.dists, self.nbrs = knns2ordered_nbrs(knns)  # num_imgs*k

        print('feature shape: {}, k: {}, norm_feat: {}'.format(
            self.features.shape, self.k, self.is_norm_feat))

        if not self.ignore_label:
            with Timer('Prepare ground-truth label'):
                self.labels = confidence(feats=self.features,
                                         dists=self.dists,
                                         nbrs=self.nbrs,
                                         metric=self.conf_metric,
                                         idx2lb=self.idx2lb,
                                         lb2idxs=self.lb2idxs)
                if cfg.eval_interim:
                    _, self.peaks = confidence_to_peaks(
                        self.dists, self.nbrs, self.labels, self.max_conn)
Пример #8
0
    def __init__(self, cfg):
        feat_path = cfg['feat_path']
        label_path = cfg.get('label_path', None)
        knn_graph_path = cfg.get('knn_graph_path', None)

        self.k = cfg['k']
        self.feature_dim = cfg['feature_dim']
        self.is_norm_feat = cfg.get('is_norm_feat', True)
        self.save_decomposed_adj = cfg.get('save_decomposed_adj', False)

        self.th_sim = cfg.get('th_sim', 0.)
        self.conf_metric = cfg.get('conf_metric')

        with Timer('read meta and feature'):
            if label_path is not None:
                self.lb2idxs, self.idx2lb = read_meta(label_path)
                self.inst_num = len(self.idx2lb)
                self.cls_num = len(self.lb2idxs)
                self.gt_labels = intdict2ndarray(self.idx2lb)
                self.ignore_label = False
            else:
                self.inst_num = -1
                self.ignore_label = True
            self.features = read_probs(feat_path, self.inst_num,
                                       self.feature_dim)

            if self.is_norm_feat:
                self.features = l2norm(self.features)
            if self.inst_num == -1:
                self.inst_num = self.features.shape[0]
            self.size = 1  # take the entire graph as input

        with Timer('Compute center feature'):
            self.center_fea = np.zeros((self.cls_num, self.features.shape[1]))
            for i in range(self.cls_num):
                self.center_fea[i] = np.mean(self.features[self.lb2idxs[i]], 0)
            self.center_fea = l2norm(self.center_fea)

        with Timer('read knn graph'):
            if os.path.isfile(knn_graph_path):
                print("load knns from the knn_path")
                self.knns = np.load(knn_graph_path)['data']
            else:
                if knn_graph_path is not None:
                    print('knn_graph_path does not exist: {}'.format(
                        knn_graph_path))
                knn_prefix = os.path.join(cfg.prefix, 'knns', cfg.name)
                self.knns = build_knns(knn_prefix, self.features,
                                       cfg.knn_method, cfg.knn)

            adj = fast_knns2spmat(self.knns, self.k, self.th_sim, use_sim=True)

            # build symmetric adjacency matrix
            adj = build_symmetric_adj(adj, self_loop=True)
            #print('adj before norm')
            #print(adj)
            adj = row_normalize(adj)
            if self.save_decomposed_adj:
                adj = sparse_mx_to_indices_values(adj)
                self.adj_indices, self.adj_values, self.adj_shape = adj
            else:
                self.adj = adj

            # convert knns to (dists, nbrs)
            self.dists, self.nbrs = knns2ordered_nbrs(self.knns)

        print('feature shape: {}, k: {}, norm_feat: {}'.format(
            self.features.shape, self.k, self.is_norm_feat))