示例#1
0
    def __init__(self, adj, features, labels, idx_train, idx_test, args):
        """
        idx_train: labeled data
        idx_test: unlabeled data
        """

        self.args = args
        self.adj = adj.tocsr()
        if args.dataset != 'reddit':
            self.adj_two_hop = adj.dot(adj)
            self.adj_two_hop.setdiag(0)
            self.adj_two_hop.eliminate_zeros()

        # self.graph = nx.from_scipy_sparse_matrix(adj)
        self.pseudo_labels = np.zeros((adj.shape[0], labels.shape[1]))
        # load_data = os.path.exists(f'preds/ICA_probs_{args.dataset}_{args.seed}.npy')
        load_data = os.path.exists(
            f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy'
        )
        print('if loading: ', load_data)
        if not load_data:
            st = time.time()
            if args.dataset != 'cora':
                features[features != 0] = 1
            classifier = 'sklearn.linear_model.LogisticRegression'
            aggregate = 'count'  # choices=['count', 'prop'], help='Aggregation operator'

            graph, domain_labels = build_graph(adj, features, labels)
            y_true = [graph.node_list[t].label for t in idx_test]
            local_clf = LocalClassifier(classifier)
            agg = pick_aggregator(aggregate, domain_labels)
            relational_clf = RelationalClassifier(classifier, agg)
            ica = ICA(local_clf,
                      relational_clf,
                      bootstrap=True,
                      max_iteration=10)

            ica.fit(graph, idx_train)
            conditional_node_to_label_map = create_map(graph, idx_train)

            eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train)
            ica_predict, probs = ica.predict(graph, eval_idx, idx_test,
                                             conditional_node_to_label_map)
            ica_accuracy = accuracy_score(y_true, ica_predict)
            print('Acc: ' + str(ica_accuracy))
            print('optimization consumes %s s' % (time.time() - st))
            # self.ica_predict = np.array([int(x[1:]) for x in ica_predict])
            dict_pred = {x: int(y[1:]) for x, y in zip(idx_test, ica_predict)}
            dict_train = {x: labels.argmax(1)[x] for x in idx_train}
            dict_pred.update(dict_train)
            concated = sorted(dict_pred.items(), key=lambda x: x[0])

            self.probs = np.vstack((labels[idx_train], probs))
            self.concated = np.array([y for x, y in concated])

            np.save(
                f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy',
                self.probs)
            np.save(
                f'preds/ICA_preds_{args.train_size}_{args.dataset}_{args.seed}.npy',
                self.concated)

        else:
            print('loading probs/preds...')
            # self.probs = np.load(f'ICA_probs_{args.dataset}_{args.seed}.npy')
            # self.concated = np.load(f'ICA_preds_{args.dataset}_{args.seed}.npy')
            # self.probs = np.load(f'ICA_probs_{args.dataset}_10.npy')
            # self.concated = np.load(f'ICA_preds_{args.dataset}_10.npy')

            self.probs = np.load(
                f'preds/ICA_probs_{args.train_size}_{args.dataset}_{args.seed}.npy'
            )
            self.concated = np.load(
                f'preds/ICA_preds_{args.train_size}_{args.dataset}_{args.seed}.npy'
            )

            # self.probs = np.load(f'preds/{args.dataset}_{args.seed}_pred.npy')
            # self.concated = self.probs.argmax(1)
            # self.concated[idx_train] = labels.argmax(1)[idx_train]
            print('Acc: ',
                  (self.concated == labels.argmax(1))[idx_test].sum() /
                  len(idx_test))
示例#2
0
import ipdb
ipdb.set_trace()

# labels = encode_onehot(labels)
#
features = normalize_feature(features)
# idx_train = np.arange(120)
# idx_val = idx_train
# # idx_test = np.arange(120, adj.shape[0])
# idx_test = np.arange(120+500, 120+500+1000)
#
# import ipdb
# ipdb.set_trace()

graph, domain_labels = build_graph(adj, features, labels)

# train / test splits
train = idx_train
if args.validation:
    test = idx_val
else:
    test = idx_test
eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train)

# run training
ica_accuracies = list()
for run in range(args.num_trials):

    t_begin = time.time()