Пример #1
0
    def run(self):
        A_ = self.adj
        best_adj = copy.deepcopy(A_)
        adde = 0
        Preprocess.savedata('{}_initadj.npz'.format(self.taskname), self.adj,
                            self.features, self.labels)

        embds1, per1 = self.test(self.adj)
        init_performance = best_performance = per1
        self.output('initial performace: {}, initial edges: {}'.format(
            init_performance, A_.nnz),
                    f=True)

        early_it = 0
        for i in range(self.tao):
            embd_, _ = self.test(A_)
            cans = self.cangen.cans(A_, embd_)
            edges_p = self.edgeEval.eval(cans, A_)
            new_adj, new_perf, addededgenum = self.edgeUpdate.update(
                edges_p, A_)

            self.output(
                'time: {}, it: {}, performance: {}, init:{}, best:{}, added {} edges'
                .format(time.asctime(time.localtime(time.time())), i, new_perf,
                        init_performance, best_performance, addededgenum),
                f=True)

            if new_perf < best_performance:
                early_it += 1
                if early_it >= self.early_stop:
                    self.output(
                        '\nearly stop at it: {}, performance: {}, init: {}\n'.
                        format(i, best_performance, init_performance),
                        f=True)
                    Preprocess.savedata(
                        '{}_finaladj.npz'.format(self.taskname), best_adj,
                        self.features, self.labels)
                    break
            else:
                best_performance = new_perf
                best_adj = new_adj
                adde = addededgenum
                early_it = 0
                A_ = new_adj

        init_un_perf = self.final_result(self.adj)
        unlabeled_perf = self.final_result(best_adj)
        nnz_init = self.adj.nnz
        nnz_final = best_adj.nnz
        self.output(
            'init performace, test set {}, val set: {}, init {} edges'.format(
                init_un_perf, init_performance, nnz_init),
            f=True)
        self.output(
            'final performace, test set {}, val set: {}, final {} edges, added {} edges'
            .format(unlabeled_perf, best_performance, nnz_final,
                    (nnz_final - nnz_init) / 2),
            f=True)
        return best_adj, best_performance
Пример #2
0
def test():
    # dirs = os.listdir(edgelistdir)
    for dataset in datasets:
        _adj, feature, label = Preprocess.loaddata(
            'data/{}-default.npz'.format(dataset), llc=False)
        split_t = Preprocess.load_split(
            'data/{}-default-split.pkl'.format(dataset))
        ial = ialmodel_gcn(_adj, feature, label, split_t, sgc=usesgc)
        SGCtest(_adj, ial)
Пример #3
0
    def __init__(self, Adj, features, labels, layersize=16, split_t=None, seed=-1, dropout=0.5, sGCN=False):
        # print('GCN model init')
        self.Adj = Adj
        self.features = features
        self.labels = labels

        _N = Adj.shape[0]
        _K = labels.max()+1
        self._Z_obs = np.eye(_K)[labels]
        self.sizes = [layersize, _K]
        self.seed = seed
        self.dropout = dropout

        if sGCN:
            self.GCN = GCN_s
        else:
            self.GCN = GCN_n

        if split_t == None:
            self.split_train, self.split_val, self.split_unlabeled = Preprocess.splitdata(_N, self.labels)
        else:
            assert type(split_t) == tuple and len(split_t) == 3
            self.split_train, self.split_val, self.split_unlabeled = split_t

        adj = utils.preprocess_graph(self.Adj)
        self.model = self.GCN(self.sizes, adj, self.features, "gcn_orig", gpu_id=None, seed=self.seed, params_dict={'dropout': self.dropout})
Пример #4
0
indir = '/root/hux/npz/'
# indir = dirprefix
outdir = '/root/hux/data/'

dataset = 'cora'

if __name__ == "__main__":

    dirs = os.listdir(indir)
    for p in percents:
        count = 0
        for d in dirs:
            print(d)
            a = d.find(p)
            if a == -1:
                continue

            ff = outdir + dataset
            if d.find('init') != -1:
                ff += 'init-{}+{}'.format(p, count)
            else:
                ff += 'final-{}+{}'.format(p, count)
            print(ff)
            count += 1
            adj, feature, label = Preprocess.loaddata(indir + d)
            adj = adj.tolil()
            for j in range(feature.shape[0]):
                if not adj[j].nonzero():
                    adj[j, j] = 1
            savedata(adj.tocsr(), feature, label, ff)
Пример #5
0
pkldir = [
    '/Users/davidhu/Desktop/pkl/cora-25/',
    '/Users/davidhu/Desktop/pkl/cora-50/', 
    '/Users/davidhu/Desktop/pkl/cora-75/',
    '/Users/davidhu/Desktop/pkl/cora-100/',
    
]
ramainsize = [0.7554,0.9375,1.04657,1.102]
ramainsize_ci = [0.88,0.8921,0.6841,0.985]

if __name__ == "__main__":
    # adj, features, labels = Preprocess.loaddata('data/{}.npz'.format(dataset), llc=False)
    # _A_prev = _A_obs
    evaltimes = 20
    adj, features, labels = Preprocess.loaddata('data/{}-default.npz'.format(dataset))
    train, val, test = split_t = Preprocess.load_split('data/{}-default-split.pkl'.format(dataset))
    ial = ialmodel_gcn(adj, features, labels, split_t)
    for ii,ds in enumerate(pkldir):
        if ii == 0 or ii == 1:
            continue
        print('dir name {}'.format(ds))
        pkl_loader = file_loader_generator(ds)
        try:
            while True:
                adj_p = next(pkl_loader)
                mask = upper_triangular_mask(adj_p.shape)
                actual = delll = 0
                results = []
                for i in range(evaltimes):
                    # adj_sam = top(a, mask, 2714+500*9)
Пример #6
0
    def run(self):
        init_test_perf, init_val_perf, _ = self.gemodel.multitest(self.adj)
        init_performance = best_performance = init_val_perf
        self.output('init performace, test set {}, val set: {}'.format(
            init_test_perf, init_val_perf),
                    f=True)

        if self.disturbadj_before != None:
            A_, _ = self.deleteedges()
        else:
            A_ = self.adj
        best_adj = copy.deepcopy(A_)
        Preprocess.savedata('{}_initadj.npz'.format(self.taskname), self.adj,
                            self.features, self.labels)

        # init_performance = best_performance = 0

        early_stop_best = 0
        early_stop_it = 0
        early_it = 0
        perfs = []
        for i in range(self.tao):
            if self.cg == 'ran':
                embd_ = 0
            else:
                embd_, _ = self.gemodel.singleTrain(A_)
            t1 = time.time()
            cans = self.cangen.cans(A_, embd_, self.edgesunadded)
            t2 = time.time()
            edges_p = self.edgeEval.eval(cans, A_)
            t3 = time.time()
            new_adj, new_perf, addededges = self.edgeUpdate.update(edges_p, A_)
            t4 = time.time()
            for e in addededges:
                self.edgesunadded.discard(e)
            print('self.edges unadded len: {}'.format(len(self.edgesunadded)))

            test_perf, val_perf, train_perf = self.gemodel.multitest(new_adj)
            t5 = time.time()
            print('time consuming: {} {} {} {}'.format(t2 - t1, t3 - t2,
                                                       t4 - t3, t5 - t4))
            self.output(
                'time: {}, test res: {}, val res: {}, train res: {}'.format(
                    time.asctime(time.localtime(time.time())), test_perf,
                    val_perf, train_perf),
                f=True)
            self.output(
                'time: {}, it: {}, performance: {}, init:{}, best:{}, added {} edges'
                .format(time.asctime(time.localtime(time.time())), i, val_perf,
                        init_performance, best_performance,
                        (new_adj.nnz - self.adj.nnz) / 2),
                f=True)

            if val_perf <= best_performance:
                early_it += 1
                if early_stop_best == 0 and early_it >= self.early_stop:
                    early_stop_it = i
                    early_stop_best = best_performance
                    self.output(
                        '\nearly stop at it: {}, performance: {}, init: {}\n'.
                        format(i, best_performance, init_performance),
                        f=True)
                    # break
            else:
                best_performance = val_perf
                best_adj = new_adj
                if early_stop_best == 0:
                    early_it = 0

            if early_stop_best != 0 and (new_adj.nnz -
                                         self.adj.nnz) / 2 >= self.minedges:
                break

            A_ = new_adj

        # unlabeled_perf, val_perf, train_perf = self.final_result(best_adj)
        unlabeled_perf, val_perf, train_perf = self.gemodel.multitest(best_adj)
        nnz_init = self.adj.nnz
        nnz_final = best_adj.nnz
        self.output(
            'init performace, test set {}, val set: {}, init {} edges'.format(
                init_test_perf, init_val_perf, nnz_init),
            f=True)
        self.output(
            'final performace, test set {}, val set: {}, final {} edges, added {} edges'
            .format(unlabeled_perf, best_performance, nnz_final,
                    (nnz_final - nnz_init) / 2),
            f=True)

        Preprocess.savedata('{}_finaladj.npz'.format(self.taskname), best_adj,
                            self.features, self.labels)

        return best_adj, best_performance
Пример #7
0
    def __init__(self,
                 adj,
                 features,
                 labels,
                 tao,
                 minedges,
                 randomedgenum=1000,
                 gemodel='GCN',
                 cangen='knn',
                 edgeEval='max',
                 edgeUpdate='easy',
                 early_stop=20,
                 seed=-1,
                 dropout=0.5,
                 deleted_edges=None,
                 completeadj=None,
                 disturbadj_before=None,
                 params=None,
                 dataset=('cora', 1),
                 testindex=1,
                 split_share=(0.1, 0.1),
                 expectEdgeNum=-1,
                 spaceF=10,
                 simtype='node',
                 split_seed=-1,
                 poolnum=2):
        '''
        args:
            adj: init adj matrix, N*N
            feature: N*D
            tao: iter times
            n: candidate patch size
            s: one patch size
            params: (edgenumPit2add, cannumPit, knn, subsetnum) e2a, cand, knn, se
        '''
        self.adj = adj
        self.adjlil = self.adj.copy().tolil()
        self.features = features
        self.tao = tao
        self.labels = labels
        self.early_stop = early_stop
        self.seed = seed
        self.deleted_edges = deleted_edges
        self.dropout = dropout
        self.split_share = split_share
        self.randomedgenum = randomedgenum
        self.cg = cangen
        self.minedges = minedges
        self.disturbadj_before = disturbadj_before

        if params == None:
            self.params = (20, 20, 20, 20, 5)
        else:
            self.params = params

        self.edgenumPit2add, self.seedEdgeNum, self.knn, self.subsetnum, self.evalPerEdge = self.params
        self.poolnum = poolnum

        print('iterAddlinks: params:{} start'.format(self.params))
        timenow = time.asctime(time.localtime(time.time()))

        self.taskname = 'ial_res_{}_{}_{}_{}_{}'.format(
            dataset, edgeEval, self.params, testindex, timenow)

        self.outfile = open('{}.txt'.format(self.taskname), 'w')

        split_ss = 123 if split_seed == -1 else split_seed
        _N = self.adj.shape[0]
        self.split_train, self.split_val, self.split_unlabeled = Preprocess.splitdata(
            _N, self.labels, seed=split_ss, share=self.split_share)
        if edgeEval == 'SGC':
            self.sgc_val = self.split_val[int(len(self.split_val) / 2):]
            self.split_val = self.split_val[:int(len(self.split_val) / 2)]

        self.split_t = (self.split_train, self.split_val, self.split_unlabeled)

        if gemodel == 'GCN':
            self.gemodel = ialmodel_gcn(self.adj, self.features, self.labels,
                                        self.split_t)
        elif isinstance(gemodel, ialmodel):
            self.gemodel = gemodel
        else:
            print('wrong gemodel, expected type ialmodel, actually type {}'.
                  format(type(gemodel)))
            exit(0)

        if completeadj != None:
            # testp, valp, trainp = self.final_result(initadj)
            testp, valp, trainp = self.gemodel.multitest(completeadj)
            self.output(
                'complete adj performance test: {}, val: {}, train: {}'.format(
                    testp, valp, trainp),
                f=True)

        if disturbadj_before != None:
            testp, valp, trainp = self.gemodel.multitest(disturbadj_before)
            self.output(
                'disturbed before adj performance test: {}, val: {}, train: {}'
                .format(testp, valp, trainp),
                f=True)

        if cangen == 'knn':
            self.cangen = canGen_knn(self.seedEdgeNum,
                                     self.poolnum,
                                     self.knn,
                                     simtype=simtype)
        elif cangen == 'ran':
            self.cangen = canGen_ran(self.randomedgenum, _N)
        else:
            self.output('cangen params err')
            exit(0)

        if edgeEval == 'max':
            self.edgeEval = edgeEval_max(self.adj,
                                         self.features,
                                         self.labels,
                                         self.split_t,
                                         self.poolnum,
                                         self.knn,
                                         self.evalPerEdge,
                                         seed=self.seed,
                                         dropout=self.dropout)
        elif edgeEval == 'SGC':
            self.edgeEval = edgeEval_SGC(self.adj,
                                         self.features,
                                         self.labels,
                                         self.split_t,
                                         self.sgc_val,
                                         poolnum=self.poolnum)
        else:
            self.output('edgeeval params err')
            exit(0)

        if edgeUpdate == 'easy':
            self.edgeUpdate = edgesUpdate_easy(self.adj,
                                               self.features,
                                               self.labels,
                                               self.split_t,
                                               self.edgenumPit2add,
                                               self.poolnum,
                                               self.subsetnum,
                                               self.seed,
                                               self.dropout,
                                               expectEdgeNum=expectEdgeNum,
                                               spaceF=spaceF)
        elif edgeUpdate == 'topK':
            self.edgeUpdate = edgesUpdate_k(self.edgenumPit2add)
        else:
            self.output('edgeUpdation params err')
            exit(0)

        self.edgesunadded = set()
        if _N > 5000:
            kf = int((_N * _N) / 10000000)
            for i in range(_N):
                s = random.randint(1, kf)
                for j in range(i + s, _N, kf):
                    self.edgesunadded.add((i, j))
        else:
            for i in range(_N):
                for j in range(i + 1, _N):
                    self.edgesunadded.add((i, j))

        t = self.adj.nonzero()
        rows = t[0]
        cols = t[1]
        print('prev unadded edges size: {}'.format(len(self.edgesunadded)))
        for i in range(len(rows)):
            self.edgesunadded.discard((rows[i], cols[i]))
        print('after unadded edges size: {}'.format(len(self.edgesunadded)))
Пример #8
0
    def deleteedges(self):
        # A_del, _, __= spa.delete_edges(self.adj, k=1)
        print('delete edges begin')
        A_del = sp.csr_matrix(([], ([], [])), shape=self.adj.shape)
        edges2add = set()
        t = self.adj.nonzero()
        rows = t[0]
        cols = t[1]
        for i in range(len(rows)):
            if rows[i] <= cols[i]:
                edges2add.add((rows[i], cols[i]))

        init_test_perf, init_val_perf, _ = self.gemodel.multitest(A_del)
        init_performance = best_performance = init_val_perf
        self.output('init performace, test set {}, val set: {}'.format(
            init_test_perf, init_val_perf),
                    f=True)

        early_stop_best = 0
        early_stop_it = 0
        early_it = 0
        perfs = []
        for i in range(self.tao):
            if self.cg == 'ran':
                embd_ = 0
            else:
                embd_, _ = self.gemodel.singleTrain(A_del)
            cans = list(edges2add)
            edges_p = self.edgeEval.eval(cans, A_del)
            new_adj, new_perf, addededges = self.edgeUpdate.update(edges_p,
                                                                   A_del,
                                                                   p=0.1)
            for e in addededges:
                edges2add.discard(e)
            print('self.edges unadded len: {}'.format(len(edges2add)))

            test_perf, val_perf, train_perf = self.gemodel.multitest(new_adj)
            self.output(
                'time: {}, test res: {}, val res: {}, train res: {}'.format(
                    time.asctime(time.localtime(time.time())), test_perf,
                    val_perf, train_perf),
                f=True)
            self.output(
                'time: {}, it: {}, performance: {}, init:{}, best:{}, added {} edges'
                .format(time.asctime(time.localtime(time.time())), i, val_perf,
                        init_performance, best_performance, (new_adj.nnz) / 2),
                f=True)

            if val_perf <= best_performance:
                early_it += 1
                if early_stop_best == 0 and early_it >= self.early_stop:
                    early_stop_it = i
                    early_stop_best = best_performance
                    self.output(
                        '\nearly stop at it: {}, performance: {}, init: {}\n'.
                        format(i, best_performance, init_performance),
                        f=True)
                    # break
            else:
                best_performance = val_perf
                best_adj = new_adj
                if early_stop_best == 0:
                    early_it = 0

            if early_stop_best != 0 and (new_adj.nnz) / 2 >= self.minedges:
                break

            A_del = new_adj

        unlabeled_perf, val_perf, train_perf = self.gemodel.multitest(best_adj)
        nnz_final = best_adj.nnz
        self.output(
            'init performace, test set {}, val set: {}, init {} edges'.format(
                init_test_perf, init_val_perf, 0),
            f=True)
        self.output(
            'final performace, test set {}, val set: {}, final {} edges, added {} edges'
            .format(unlabeled_perf, best_performance, nnz_final,
                    (nnz_final) / 2),
            f=True)

        Preprocess.savedata(
            '{}_delete_edges_finaladj.npz'.format(self.taskname), best_adj,
            self.features, self.labels)

        return best_adj, best_performance
Пример #9
0
    def __init__(self,
                 adj,
                 features,
                 labels,
                 tao,
                 n,
                 s,
                 gemodel='GCN',
                 edge_Rec='MLE',
                 trainsize=0.5,
                 early_stop=10,
                 seed=-1,
                 dropout=0.5,
                 deleted_edges=None,
                 initadj=None,
                 params=None,
                 dataset=('cora', 1),
                 testindex=1):
        '''
        args:
            adj: init adj matrix, N*N
            feature: N*D
            tao: iter times
            n: candidate patch size
            s: one patch size
            params: (edgenumPit2add, cannumPit, knn, subsetnum) e2a, cand, knn, se
        '''
        self.adj = adj
        self.features = features
        self.tao = tao
        self.n = n
        self.s = s
        self.labels = labels
        self.trainsize = trainsize
        self.early_stop = early_stop
        self.seed = seed
        self.deleted_edges = deleted_edges
        self.dropout = dropout
        if params == None:
            self.params = (20, 20, 20, 20, 5)
        else:
            self.params = params

        print('iterAddlinks: params:{} start'.format(self.params))

        self.outfile = open(
            'ial_res_{}_{}_{}_{}.txt'.format(dataset, edge_Rec, self.params,
                                             testindex), 'w')

        _N = self.adj.shape[0]
        self.split_train, self.split_val, self.split_unlabeled = Preprocess.splitdata(
            _N, self.labels)

        if initadj != None:
            e, p = self.test(initadj)
            self.output('complete adj performance: {}'.format(p), f=True)

        if gemodel == None:
            self.gemodel = model_i()
        elif gemodel == 'GCN':
            # self.gemodel = gemodel_GCN(self.adj, self.features, self.labels, seed=self.seed, dropout=0)
            self.gemodel = None
        else:
            print('ERR: wrong graph embedding class')
            exit(-1)

        if edge_Rec == 'rand':
            self.edgeRecMethod = addEdges_random()
        elif edge_Rec == 'rand_test':
            self.edgeRecMethod = addEdges_random_test(
                self.features, self.labels, self.split_train, self.split_val,
                self.split_unlabeled, self.deleted_edges, self.seed)
        elif edge_Rec == 'MLE':
            self.edgeRecMethod = addEdges_MLE(self.features, self.labels,
                                              self.split_train, self.split_val,
                                              self.split_unlabeled)
        elif edge_Rec == 'KNN':
            self.edgeRecMethod = addEdges_KNN(self.features,
                                              self.labels,
                                              self.split_train,
                                              self.split_val,
                                              self.split_unlabeled,
                                              hyperp=self.params)
        else:
            print('ERR: wrong edge reconstruction class')
            exit(-1)
Пример #10
0
dataset = 'cora'
percent = 0.5
share = (0.052, 0.3693)

ps = [0.1, 0.25, 0.5, 0.75, 1]

edgenumPit2adds = [10, 20, 50, 100]
cannumpits = [20, 50, 100, 200, 500]
knns = [20, 50, 100]
subsetevalnum = [20, 50, 100, 200, 300]
etimeperedge = [5, 10, 20, 30]

hps = [edgenumPit2adds, cannumpits, knns, subsetevalnum, etimeperedge]

if __name__ == "__main__":
    _A_obs, feas, labels = Preprocess.loaddata('data/{}.npz'.format(dataset),
                                               llc=False)
    _A_prev = _A_obs

    # adj, remained, deleted = spa.delete_edges(_A_obs, k=percent)
    # print('preprocess, delete some edges, remaind edges num(bi): {}'.format(adj.nnz))

    # t = IALGE(adj, feas, labels, 100, 10, 10, edge_Rec='rand')
    # t = IALGE(adj, feas, labels, 100, 10, 10, seed=1, dropout=0)
    # t = IALGE(adj, feas, labels, 100, 10, 10, seed=1, dropout=0, edge_Rec='rand_test', deleted_edges=deleted, gemodel=None)
    # t = IALGE(adj, feas, labels, 100, 10, 10, seed=1, dropout=0, edge_Rec='MLE', deleted_edges=deleted, gemodel=None)
    # t = IALGE(adj, feas, labels, 100, 10, 10)


    def testhp(index=1, testtimes=3):
        hp = [20, 20, 50, 50, 10]
        ds = (dataset, percent)
Пример #11
0
    def __init__(self,
                 adj,
                 features,
                 labels,
                 tao,
                 n,
                 s,
                 gemodel='GCN',
                 cangen='knn',
                 edgeEval='max',
                 edgeUpdate='easy',
                 early_stop=20,
                 seed=-1,
                 dropout=0.5,
                 deleted_edges=None,
                 initadj=None,
                 params=None,
                 dataset=('cora', 1),
                 testindex=1,
                 split_share=(0.1, 0.1)):
        '''
        args:
            adj: init adj matrix, N*N
            feature: N*D
            tao: iter times
            n: candidate patch size
            s: one patch size
            params: (edgenumPit2add, cannumPit, knn, subsetnum) e2a, cand, knn, se
        '''
        self.adj = adj
        self.features = features
        self.tao = tao
        self.n = n
        self.s = s
        self.labels = labels
        self.early_stop = early_stop
        self.seed = seed
        self.deleted_edges = deleted_edges
        self.dropout = dropout
        self.split_share = split_share

        if params == None:
            self.params = (20, 20, 20, 20, 5)
        else:
            self.params = params

        self.edgenumPit2add, self.seedEdgeNum, self.knn, self.subsetnum, self.evalPerEdge = self.params
        self.poolnum = 20

        print('iterAddlinks: params:{} start'.format(self.params))
        timenow = time.asctime(time.localtime(time.time()))

        self.taskname = 'ial_res_{}_{}_{}_{}_{}'.format(
            dataset, edgeEval, self.params, testindex, timenow)

        self.outfile = open('{}.txt'.format(self.taskname), 'w')

        _N = self.adj.shape[0]
        self.split_train, self.split_val, self.split_unlabeled = Preprocess.splitdata(
            _N, self.labels, share=self.split_share)
        self.split_t = (self.split_train, self.split_val, self.split_unlabeled)

        if initadj != None:
            e, p = self.test(initadj)
            self.output('complete adj performance: {}'.format(p), f=True)

        # if gemodel == None:
        #     self.gemodel = model_i()
        # elif gemodel == 'GCN':
        #     # self.gemodel = gemodel_GCN(self.adj, self.features, self.labels, seed=self.seed, dropout=0)
        #     self.gemodel = None
        # else:
        #     print('ERR: wrong graph embedding class')
        #     exit(-1)

        if cangen == 'knn':
            self.cangen = canGen_knn(self.seedEdgeNum, self.poolnum, self.knn)
        else:
            self.output('cangen params err')
            exit(0)

        if edgeEval == 'max':
            self.edgeEval = edgeEval_max(self.adj,
                                         self.features,
                                         self.labels,
                                         self.split_t,
                                         self.poolnum,
                                         self.knn,
                                         self.evalPerEdge,
                                         seed=self.seed,
                                         dropout=self.dropout)
        else:
            self.output('edgeeval params err')
            exit(0)

        if edgeUpdate == 'easy':
            self.edgeUpdate = edgesUpdate_easy(self.adj, self.features,
                                               self.labels, self.split_t,
                                               self.edgenumPit2add,
                                               self.poolnum, self.subsetnum,
                                               self.seed, self.dropout)
        else:
            self.output('edgeUpdation params err')
            exit(0)
Пример #12
0
    import copy
    from disturbEdges import distEdge_ran

    warnings.filterwarnings("ignore")
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    dataset = 'cora'
    # dataset = 'pubmed'
    # dataset = 'polblogs'
    # dataset = 'citeseer'
    percent = 0.85
    stype = 'node'
    share = (0.052, 0.3693) if dataset == 'cora' else (0.0362, 0.3006)
    if dataset == 'pubmed':
        share = (0.003, 0.05)
    _A_obs, feas, labels = Preprocess.loaddata('data/{}.npz'.format(dataset),
                                               llc=False)

    _N = _A_obs.shape[0]
    split_train, split_val, split_unlabeled = split_t = Preprocess.splitdata(
        _N, labels, seed=123, share=share)

    distnum = 1000
    dst = distEdge_ran(distnum)
    deletesizes = [1, 0.8, 0.5, 0.2]
    res = []
    labelk = []
    fo = '{}/{}'
    for i in range(len(deletesizes)):
        adj, remained, deleted = spa.delete_edges(_A_obs, k=deletesizes[i])

        adj_d = dst.disturb(adj)
Пример #13
0
import time

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

dataset = 'cora'
percent = 0.5

def run_time(func, *args, **kw):
    start_time = time.time()
    func(*args, **kw)
    end_time = time.time()
    print('run time: {}s'.format(end_time-start_time))

if __name__ == "__main__":
    _A_obs, feas, labels = Preprocess.loaddata(
        'data/{}.npz'.format(dataset), llc=False)
    _A_prev = _A_obs
    adj, remained, deleted = spa.delete_edges(_A_obs, k=percent)

    _N = _A_prev.shape[0]
    # split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels) #seed share as default
    # split_t = (split_train, split_val, split_unlabeled)
    # gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0)
    # run_time(gcn.train)
    # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    # gcn = gemodel_GCN(adj, feas, labels, split_t=split_t, seed=1, dropout=0)
    # gcn.train()
    # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels, seed=12, share=(0.052, 0.3693)) #seed share as default