示例#1
0
 def edgeEva(self, edge, adj, modelnum=20, edgenum=20):
     res = 0
     n = adj.shape[0]
     for i in range(modelnum):
         tempadj = copy.deepcopy(adj)
         for j in range(edgenum):
             a = random.randint(0, n - 1)
             b = random.randint(0, n - 1)
             tempadj[a, b] = 1
             tempadj[b, a] = 1
         g = gemodel_GCN(tempadj,
                         self.features,
                         self.labels,
                         split_t=(self.split_train, self.split_val,
                                  self.split_unlabeled))
         g.train()
         p1 = g.performance()
         tempadj[edge[0], edge[1]] = 1
         tempadj[edge[1], edge[0]] = 1
         g = gemodel_GCN(tempadj,
                         self.features,
                         self.labels,
                         split_t=(self.split_train, self.split_val,
                                  self.split_unlabeled))
         g.train()
         p2 = g.performance()
         if p2 >= p1:
             res += 1
     print('edge: {} evaluated {} better score'.format(edge, res))
     return (res, (edge[0], edge[1]))
示例#2
0
    def edgeEvaNew(self, candidates, adj):
        res = collections.defaultdict(list)

        try:
            lencan = len(candidates)
            modelnum = int(2 * lencan * self.edgeevaltimes / self.poolnum /
                           self.evalEdgeNum) + 1
            # print('eval model num: {}'.format(modelnum))
            for i in range(modelnum):
                addededge = []
                tempadj = copy.deepcopy(adj)
                for j in range(self.evalEdgeNum):
                    a, b = candidates[random.randint(0, lencan - 1)]
                    tempadj[a, b] = 1
                    tempadj[b, a] = 1
                    addededge.append((a, b))

                g = gemodel_GCN(tempadj,
                                self.features,
                                self.labels,
                                split_t=self.split_t
                                )  # , seed=self.seed, dropout=self.dropout)
                g.train()
                p1 = g.acu()

                for e in addededge:
                    res[e].append(p1)
        except BaseException as err:
            print('raised exception edgeEvaNew: {}'.format(err))

        return res
示例#3
0
    def edgeEvaNew(self, candidators, adj, edgenum=20):
        res = collections.defaultdict(list)

        try:
            # n = adj.shape[0]
            lencan = len(candidators)
            modelnum = int(2 * self.candinum * self.knn * self.edgeevaltimes /
                           self.poolnum / edgenum)
            # print('modelnum', modelnum)
            for i in range(modelnum):
                addededge = []
                tempadj = copy.deepcopy(adj)
                for j in range(edgenum):
                    a, b = candidators[random.randint(0, lencan - 1)]
                    tempadj[a, b] = 1
                    tempadj[b, a] = 1
                    addededge.append((a, b))

                g = gemodel_GCN(tempadj,
                                self.features,
                                self.labels,
                                split_t=(self.split_train, self.split_val,
                                         self.split_unlabeled))
                g.train()
                p1 = g.performance()

                for e in addededge:
                    res[e].append(p1)
        except BaseException as err:
            print('raised exception edgeEvaNew: {}'.format(err))

        # print('edge: {} evaluated {} better score'.format(edge, res))
        # print('return eval res:{}'.format(res))
        return res
示例#4
0
 def par(self, es, prevadj):
     tempAdj = copy.deepcopy(prevadj)
     for i, j in es:
         tempAdj[i, j] = 1
         tempAdj[j, i] = 1
     g = gemodel_GCN(tempAdj,
                     self.features,
                     self.labels,
                     split_t=(self.split_train, self.split_val,
                              self.split_unlabeled),
                     seed=self.seed,
                     dropout=0)
     g.train()
     per = g.performance()
     # print('{}, {} finished'.format(i, j))
     return per
示例#5
0
    def subseteval2(self, topset, prevadj):
        '''eval edge set performance, randomly and some edges from top set
        '''
        # print('topset len: {}'.format(len(topset)))
        tempadj = copy.deepcopy(prevadj)
        eset = set()
        for i in range(self.edgeaddnum):
            ran = random.randint(0, len(topset)-1)
            eset.add(topset[ran])
            a, b = topset[ran]
            tempadj[a, b] = 1
            tempadj[b, a] = 1

        g = gemodel_GCN(tempadj, self.features, self.labels, split_t=self.split_t, seed=self.seed, dropout=self.dropout)
        g.train()
        return (eset, g.acu())
示例#6
0
    def getembs(self, adj):
        print('generate several embds')
        embeddings = []
        if self.default_model == 'GCN':
            a = utils.preprocess_graph(adj)
            for i in range(self.model_num):
                g = gemodel_GCN(a,
                                self.features,
                                self.labels,
                                split_t=(self.split_train, self.split_val,
                                         self.split_unlabeled))
                g.train()
                emb = g.getembeddings()
                embeddings.append(emb)
        else:
            print('ERR: wrong default GE model for add edges')
            exit(-1)

        return embeddings
示例#7
0
 def subseteval(self, adj, candidators):
     tempadj = copy.deepcopy(adj)
     eset = set()
     for i in range(self.enum2add):
         ran = random.randint(0, len(candidators) - 1)
         eset.add(candidators[ran])
         a, b = candidators[ran]
         tempadj[a, b] = 1
         tempadj[b, a] = 1
     # _, p = Modeltest_GCN.subprocess_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled), seed=1, dropout=0)
     g = gemodel_GCN(tempadj,
                     self.features,
                     self.labels,
                     split_t=(self.split_train, self.split_val,
                              self.split_unlabeled),
                     seed=1,
                     dropout=0)
     g.train()
     p = g.performance()
     return (eset, p)
示例#8
0
    def edgeReconstruction(self, prevAdj, embds, edgenum=20):

        better = worse = eq = 0
        # multiprocessing.set_start_method('fork')
        p = Pool()
        res = []
        res2 = []

        enum = 30

        for e in range(1000):
            es = []
            for f in range(enum):
                a = self.deleted_edges[random.randint(
                    0,
                    len(self.deleted_edges) - 1)]
                es.append((a[1], a[2]))
            r = p.apply_async(self.par, args=(es, prevAdj))
            res.append(r)

        for e in range(1000):
            es = []
            for f in range(enum):
                a = random.randint(0, prevAdj.shape[0] - 1)
                b = random.randint(0, prevAdj.shape[0] - 1)
                es.append((a, b))
            r = p.apply_async(self.par, args=(es, prevAdj))
            res2.append(r)

        p.close()
        p.join()

        g = gemodel_GCN(prevAdj,
                        self.features,
                        self.labels,
                        split_t=(self.split_train, self.split_val,
                                 self.split_unlabeled),
                        seed=self.seed,
                        dropout=0)
        g.train()
        initperformance = g.performance()

        ret = []
        for x in res:
            ret.append(x.get())
        best = 0
        worst = 100
        for a in ret:
            best = max(best, a)
            worst = min(worst, a)
            if a > initperformance:
                better += 1
            elif a < initperformance:
                worse += 1
            else:
                eq += 1

        ret2 = []
        for x in res2:
            ret2.append(x.get())
        better2 = worse2 = eq2 = 0
        best2 = 0
        worst2 = 100
        for a in ret2:
            best2 = max(best2, a)
            worst2 = min(worst2, a)
            if a > initperformance:
                better2 += 1
            elif a < initperformance:
                worse2 += 1
            else:
                eq2 += 1
        print(
            'better2: {}, worse2: {}, eq2: {}, best2: {}, worse2: {}, init: {}'
            .format(better2, worse2, eq2, best2, worst2, initperformance))
        print('better: {}, worse: {}, eq: {}, best: {}, worse: {}, init: {}'.
              format(better, worse, eq, best, worst, initperformance))
        exit(-1)
示例#9
0
    split_train, split_val, split_unlabeled = split_t = Preprocess.splitdata(
        _N, labels, seed=123, share=share)

    distnum = 1000
    dst = distEdge_ran(distnum)
    deletesizes = [1, 0.8, 0.5, 0.2]
    res = []
    labelk = []
    fo = '{}/{}'
    for i in range(len(deletesizes)):
        adj, remained, deleted = spa.delete_edges(_A_obs, k=deletesizes[i])

        adj_d = dst.disturb(adj)
        print('disturb adj, add {} ran edges, prev {}, after {}'.format(
            len(deleted), adj.nnz, adj_d.nnz))
        initadjset = edge2list.sett(adj)
        disadjset = edge2list.sett(adj_d)
        g = gemodel_GCN(adj_d, feas, labels, split_t=split_t, sGCN=True)
        g.train()
        print(g.acu())

        W = g.model.W1.eval(session=g.model.session)

        fl = FastLoss(adj_d, feas, labels, W, split_val, deleted_e=deleted)
        bins, dens = fl.test(initset=initadjset, disset=disadjset)
        res.append((bins, dens))
        labelk.append(fo.format(distnum, adj.nnz / 2))
    Hist.subplots(res, labelk)
    # fl.test_deleted()
    # fl.test_remained(remained)
示例#10
0
    adj, remained, deleted = spa.delete_edges(_A_obs, k=percent)

    _N = _A_prev.shape[0]
    # split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels) #seed share as default
    # split_t = (split_train, split_val, split_unlabeled)
    # gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0)
    # run_time(gcn.train)
    # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    # gcn = gemodel_GCN(adj, feas, labels, split_t=split_t, seed=1, dropout=0)
    # gcn.train()
    # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels, seed=12, share=(0.052, 0.3693)) #seed share as default
    split_t = (split_train, split_val, split_unlabeled)
    gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0)
    run_time(gcn.train)
    print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0, sGCN=True)
    run_time(gcn.train)
    print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    gcn = gemodel_GCN(adj, feas, labels, split_t=split_t, seed=1, dropout=0)
    gcn.train()
    print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu()))

    
    # print('preprocess, delete some edges, remaind edges num(bi): {}'.format(adj.nnz))

    # savefile = 'data/coradele.npz'