示例#1
0
def calculate_eam_maps(num_atom_types, _Gs, _types):
    batchsize = len(_Gs)
    r = [[[] for _ in range(num_atom_types)] for _ in range(num_atom_types)]
    b_indices = [[[] for _ in range(num_atom_types)]
                 for _ in range(num_atom_types)]
    Ns = [0] * num_atom_types
    indices = [[] for _ in range(num_atom_types)]

    for i, (G_vec, t_vec) in enumerate(zip(_Gs, _types)):
        for Gi, ti in zip(G_vec, t_vec):
            indices[ti].append([i, Ns[ti]])
            for tj in range(num_atom_types):
                for j in range(len(Gi[tj])):
                    b_indices[ti][tj].append([Ns[ti], len(r[ti][tj]) + j])
                r[ti][tj].extend(Gi[tj])
            Ns[ti] += 1

    # Cast into numpy arrays, also takes care of wrong dimensionality of empty
    # lists
    maps = []
    b_maps = [[[] for _ in range(num_atom_types)]
              for _ in range(num_atom_types)]
    for i in range(num_atom_types):
        indices[i] = _np.array(indices[i], dtype=_np.int64).reshape((-1, 2))
        maps.append(
            _tf.SparseTensorValue(indices[i], [1.0] * Ns[i],
                                  [batchsize, Ns[i]]))
        for j in range(num_atom_types):
            b_indices[i][j] = _np.array(b_indices[i][j],
                                        dtype=_np.int64).reshape((-1, 2))
            b_maps[i][j] = _tf.SparseTensorValue(b_indices[i][j],
                                                 [1.0] * len(r[i][j]),
                                                 [Ns[i], len(r[i][j])])
            r[i][j] = _np.array(r[i][j]).reshape((-1, 1))
    return r, b_maps, maps
示例#2
0
文件: cox.py 项目: SerTelnov/DLF
    def train(self):
        step = 0
        epoch = 0
        loss_list = []
        batch_loss = []

        while True:
            x_batch, b_batch, z_batch, y_batch = self.util_train.get_batch_data_origin_sorted(step)
            feed_dict = {self.X: tfc.SparseTensorValue(x_batch, [1] * len(x_batch), [self.batch_size, dimension]),
                         self.b: b_batch, self.z: z_batch, self.y: y_batch}

            self.sess.run(self.train_step, feed_dict)
            batch_loss.append(self.sess.run(self.loss, feed_dict))
            step += 1

            if step * self.batch_size - epoch * int(0.02 * self.train_data_amt) >= int(0.02 * self.train_data_amt):
                loss = np.mean(batch_loss[step - int(int(0.02 * self.train_data_amt) / self.batch_size) - 1:])
                loss_list.append(loss)
                print("train loss of epoch-{0} is {1}".format(epoch, loss))
                epoch += 1

            # stop condition
            if epoch * 0.02 * self.train_data_amt <= 5 * self.train_data_amt:
                continue
            if loss_list[-1] - loss_list[-2] > 0 and loss_list[-2] - loss_list[-3] > 0:
                break
            if epoch * 0.02 * self.train_data_amt >= 20 * self.train_data_amt:
                break

        # draw SGD training process
        x = [i for i in range(len(loss_list))]
        plt.plot(x, loss_list)
        plt.savefig(self.output_dir + 'train.png')
        plt.gcf().clear()
示例#3
0
def getProblem(num_vars, base_lits_pc=3, var_lits_pc=2, verbosity=0):
    entries, nlits, nclauses = randSAT.get_problem(num_vars,
                                                   base_lits_pc,
                                                   var_lits_pc,
                                                   verbosity=verbosity)
    return tf.SparseTensorValue(entries,
                                np.ones(entries.shape[0], dtype=np.int64),
                                [nlits, nclauses])
示例#4
0
def batch_problems(problems):
    """Combines multiple problems into 1 big batch
        Since cnf is sparse, no wasted memory"""
    cnfs, sols = zip(*problems)
    nvars = int(sum([cnf.dense_shape[0] / 2 for cnf in cnfs]))
    nclauses = sum([cnf.dense_shape[1] for cnf in cnfs])

    if sols[0] is not None:
        sols = np.zeros((1), dtype=np.float32)
    else:
        sols = None

    vars_sofar = 0
    clauses_sofar = 0
    #inds = np.zeros([0,2], dtype=np.int64)
    inds0 = np.zeros([0], dtype=np.int64)
    inds1 = np.zeros([0], dtype=np.int64)

    for cnf, sol in problems:

        cnvars = int(cnf.dense_shape[0]) / 2 # number of vars in this problem
        cnclauses = cnf.dense_shape[1]        # number of clauses in this problem
        ind = np.array(cnf.indices, copy=False)               # index list from cnf sparse representation

        lit_nums = ind[:,0]

        # making signed indices:
        lit_nums[lit_nums >= cnvars] -= int(2 * cnvars + vars_sofar)
        lit_nums[lit_nums >= 0] += int(1 + vars_sofar)

        #ind = np.stack((lit_nums, ind[:,1] + clauses_sofar ), axis = 1) # new index list to concatenate
        #inds = np.concatenate((inds, ind), axis=0) # accumulated index list

        inds0 = np.concatenate((inds0, lit_nums), axis=0) # accumulated (signed) literal numbers
        inds1 = np.concatenate((inds1, ind[:,1] + int(clauses_sofar)), axis=0) # accumulated clause numbers
        
        vars_sofar += cnvars
        clauses_sofar += cnclauses

        if sols is not None:
            if sol is None:
                raise Exception("Some problems have solutions given, but others in same batch don't!")
            sols = np.concatenate((sol[-cnvars:0],sols,sol[1:cnvars+1]))

    #if sols is not None:
    #    assert sols.shape[0] == inds0.shape[0]

    # Making indices positive:
    inds0[inds0 > 0] -= 1
    inds0[inds0 < 0] += int(2 * nvars)

    inds = np.stack((inds0,inds1), axis=1)

    inds = inds[np.lexsort(inds[:,::-1].T),:]
    return tf.SparseTensorValue(indices=inds, values = np.ones(inds.shape[0], dtype=np.float32), dense_shape=[nvars * 2, nclauses]), sols
示例#5
0
 def gen_sparse_tensor(fs):
     global g_dr
     kk, vv = [], []
     for i in range(len(fs)):
         ff = fs[i]
         assert (isinstance(ff, set))
         ff = list(ff)
         for k in range(len(ff)):
             kk.append(np.array([i, k], dtype=np.int32))
             vv.append(ff[k])
     return tf.SparseTensorValue(
         kk, vv, [len(fs), g_dr.unique_feature_num()])
示例#6
0
    def train_phase2(self):
        self.ks_const = self.ks.eval(session=self.sess)  #np array
        self.theta_const = self.theta.eval(session=self.sess)  #np array

        step = 0
        epoch = 0
        loss_list = []
        batch_loss = []

        print("begin training phase 2")
        while True:
            x_batch, b_batch, z_batch, y_batch, ks_batch = self.util_train.get_batch_data_origin_with_ks(
                step, self.ks_const)
            feed_dict = {}
            feed_dict[self.X] = tf.SparseTensorValue(
                x_batch, [1] * len(x_batch), [self.batch_size, dimension])
            feed_dict[self.b] = b_batch
            feed_dict[self.z] = z_batch
            feed_dict[self.y] = y_batch
            feed_dict[self.label_phase2] = self.theta_const * ks_batch

            self.sess.run(self.train_step2, feed_dict)
            batch_loss.append(self.sess.run(self.loss_phase2, feed_dict))
            step += 1

            if step * self.batch_size - epoch * int(
                    0.02 * self.train_data_amt) >= int(
                        0.02 * self.train_data_amt):
                loss = np.mean(batch_loss[
                    step -
                    int(int(0.02 * self.train_data_amt) / self.batch_size) -
                    1:])
                loss_list.append(loss)
                print("train loss of phase2 epoch-{0} is {1}".format(
                    epoch, loss))
                epoch += 1

            # stop condition
            if epoch * 0.02 * self.train_data_amt <= 5 * self.train_data_amt:
                continue
            if (loss_list[-1] - loss_list[-2] > 0
                    and loss_list[-2] - loss_list[-3] > 0):
                break
            if epoch * 0.02 * self.train_data_amt >= 20 * self.train_data_amt:
                break

        # draw SGD training process
        x = [i for i in range(len(loss_list))]
        plt.plot(x, loss_list)
        plt.savefig(self.output_dir + 'train_phase2.png')
        plt.gcf().clear()
示例#7
0
    def test(self):
        print('Test begin')
        self.pred_mp = tf.exp(tf.sparse_tensor_dense_matmul(self.X, self.w))
        self.MSE = tf.reduce_mean(tf.square(self.z - self.pred_mp))

        x, b, z, y = self.util_test.get_all_data_origin()
        feed_dict = {}

        feed_dict[self.X] = tf.SparseTensorValue(
            x, [1] * len(x), [self.test_data_amt, dimension])
        feed_dict[self.z] = z
        feed_dict[self.y] = y
        feed_dict[self.b] = b

        # calculate MSE
        mse = self.sess.run(self.MSE, feed_dict)
        print("MSE: {}".format(mse))

        ks = self.pred_mp / self.theta
        ps = tf.pow(self.z, (ks - 1.)) * tf.exp(-self.z / self.theta) / tf.pow(
            self.theta, ks) / tf.exp(tf.lgamma(ks))
        cs = tf.igamma(ks, self.b / self.theta) / tf.exp(tf.lgamma(ks))
        # calculate AUC and LogLoss
        win_rate = self.sess.run(cs, feed_dict)
        auc = roc_auc_score(y, win_rate)
        print("AUC: {}".format(auc))
        logloss = log_loss(y, win_rate)
        print("Log Loss: {}".format(logloss))

        # calculate ANLP
        logp = -tf.log(tf.clip_by_value(ps, 1e-8, 1.0))
        logp_arr = self.sess.run(logp, feed_dict)
        logp_arr[np.isnan(logp_arr)] = 1e-20  #for overflow values, minor
        logp_arr[logp_arr == 0] = 1e-20

        anlp = np.mean(logp_arr)
        print("ANLP: {}".format(anlp))

        # save result and params
        fin = open(self.output_dir + 'result.txt', 'w')
        fin.writelines([
            "MSE: {0}   AUC: {1}    Log Loss: {2}   ANLP: {3}\n".format(
                mse, auc, logloss, anlp)
        ])
        fin.close()

        np.save(self.output_dir + 'w', self.sess.run(self.w))
        np.save(self.output_dir + 'k', self.sess.run(ks, feed_dict))
        np.save(self.output_dir + 'theta', self.sess.run(self.theta))
示例#8
0
文件: DWPP.py 项目: SerTelnov/DLF
    def train(self):
        step = 0
        epoch = 0
        batch_loss = []
        loss_list = []

        while True:
            x_batch_field, b_batch, z_batch, y_batch, all_prices = self.util_train.get_batch_data_sorted_dwpp(
                step)
            feed_dict = {}
            for j in range(len(self.X)):
                feed_dict[self.X[j]] = tfc.SparseTensorValue(
                    x_batch_field[j], [1] * len(x_batch_field[j]),
                    [self.batch_size, self.field_sizes[j]])
            feed_dict[self.b] = b_batch
            feed_dict[self.z] = z_batch
            feed_dict[self.y] = y_batch
            feed_dict[self.all_prices] = all_prices

            batch_loss.append(self.sess.run(self.loss, feed_dict))
            self.sess.run(self.train_step, feed_dict)
            batch_loss.append(self.sess.run(self.loss, feed_dict))
            step += 1

            if step * self.batch_size - epoch * int(
                    0.1 * self.train_data_amt) >= int(
                        0.1 * self.train_data_amt):
                loss = np.mean(batch_loss[
                    step -
                    int(int(0.1 * self.train_data_amt) / self.batch_size) -
                    1:])
                loss_list.append(loss)
                print("train loss of epoch-{0} is {1}".format(epoch, loss))
                epoch += 1

            # stop condition
            if epoch * 0.1 * self.train_data_amt <= 3 * self.train_data_amt:
                continue
            if loss_list[-1] - loss_list[-2] > 0 and loss_list[-2] - loss_list[
                    -3] > 0:
                break
            if epoch * 0.1 * self.train_data_amt >= 5 * self.train_data_amt:
                break

        # draw SGD training process
        x = [i for i in range(len(loss_list))]
        plt.plot(x, loss_list)
        plt.savefig(self.output_dir + 'train.png')
        plt.gcf().clear()
示例#9
0
 def getNextBatch(self):
     if self.waitingBatch is not None:
         wb = self.waitingBatch
         self.waitingBatch = None
         return wb
     if self.model:
         entries, nlits, nclauses, model = randSAT.getNextBatchAndModel(
             self.capsule)
         model = model.astype(np.float32)
         model = np.concatenate((model, -model[::-1] + 1))
         if using_tf:
             return tf.SparseTensorValue(
                 entries, np.ones(entries.shape[0], dtype=np.int64),
                 [nlits, nclauses]), model
         else:
             return (entries, [nlits, nclauses], model)
     else:
         entries, nlits, nclauses = randSAT.getNextBatch(self.capsule)
         if using_tf:
             return tf.SparseTensorValue(
                 entries, np.ones(entries.shape[0], dtype=np.int64),
                 [nlits, nclauses])
         else:
             return (entries, [nlits, nclauses])
示例#10
0
 def output_s(self):
     batch_num = int(self.test_data_amt / self.batch_size)
     output = np.ones([self.batch_size, OUT_SIZE2])
     for i in range(batch_num):
         x_batch_field, b_batch, z_batch, y_batch = self.util_test.get_batch_data(i)
         feed_dict = {}
         for j in range(len(self.X)):
             feed_dict[self.X[j]] = tf.SparseTensorValue(x_batch_field[j], [1] * len(x_batch_field[j]),
                                                               [self.batch_size, self.field_sizes[j]])
         feed_dict[self.b] = b_batch
         feed_dict[self.z] = z_batch
         feed_dict[self.y] = y_batch
         output = np.vstack([output, self.sess.run(self.w, feed_dict)])
     print(output.shape)
     np.savetxt(self.output_dir + 's.txt', 1 - output[self.batch_size:,], delimiter='\t', fmt='%.4f')
示例#11
0
    def train_phase1(self, train_round=50):
        # get all batches data
        x, b, z, y = self.util_train.get_all_data_origin()
        feed_dict = {}
        feed_dict[self.X] = tf.SparseTensorValue(x, [1] * len(x),
                                                 [b.shape[0], dimension])
        feed_dict[self.b] = b
        feed_dict[self.z] = z
        feed_dict[self.y] = y

        print("begin training phase 1")
        for i in range(train_round):
            self.sess.run(self.train_step1, feed_dict)
            loss = self.sess.run(self.loss_phase1, feed_dict)
            print("train loss of phase-1, iteration-{0} is {1}".format(
                i, loss))
示例#12
0
文件: deephit.py 项目: SerTelnov/DLF
    def test(self):
        batch_num = int(self.test_data_amt / self.batch_size)
        anlp_batch = []
        auc_batch = []
        logloss_batch = []
        for i in range(batch_num):
            x_batch_field, b_batch, z_batch, y_batch = self.util_test.get_batch_data_sorted(
                i)
            feed_dict = {}
            for j in range(len(self.X)):
                feed_dict[self.X[j]] = tfc.SparseTensorValue(
                    x_batch_field[j], [1] * len(x_batch_field[j]),
                    [self.batch_size, self.field_sizes[j]])
            feed_dict[self.b] = b_batch
            feed_dict[self.z] = z_batch
            feed_dict[self.y] = y_batch

            pz = self.sess.run(self.pz, feed_dict)
            wb = self.sess.run(self.wb, feed_dict)

            pz[pz == 0] = 1e-20
            anlp = np.average(-np.log(pz))
            try:
                auc = roc_auc_score(y_batch, wb)
            except Exception:
                print("Metric ERROE")
                continue

            logloss = log_loss(y_batch, wb)

            anlp_batch.append(anlp)
            auc_batch.append(auc)
            logloss_batch.append(logloss)

        ANLP = np.mean(anlp_batch)
        AUC = np.mean(auc_batch)
        LOGLOSS = np.mean(logloss_batch)

        print("AUC: {}".format(AUC))
        print("Log-Loss: {}".format(LOGLOSS))
        print("ANLP: {}".format(ANLP))

        with open(self.output_dir + 'result.txt', 'w') as f:
            f.writelines(
                ["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
示例#13
0
文件: DWPP.py 项目: SerTelnov/DLF
    def test(self):
        batch_num = int(self.test_data_amt / self.batch_size)
        pzs = []
        wbs = []
        ys = []
        for i in range(batch_num):
            x_batch_field, b_batch, z_batch, y_batch, all_prices = self.util_test.get_batch_data_sorted_dwpp(
                i)
            feed_dict = {}
            for j in range(len(self.X)):
                feed_dict[self.X[j]] = tfc.SparseTensorValue(
                    x_batch_field[j], [1] * len(x_batch_field[j]),
                    [self.batch_size, self.field_sizes[j]])
            feed_dict[self.b] = b_batch
            feed_dict[self.z] = z_batch
            feed_dict[self.y] = y_batch
            feed_dict[self.all_prices] = all_prices
            ys += y_batch.reshape(-1, ).tolist()
            pz = self.sess.run(self.pz, feed_dict)
            wb = self.sess.run(self.wb, feed_dict)

            # print(self.sess.run(self.u, feed_dict))
            # print(self.sess.run(self.z-self.u, feed_dict))
            # print(self.sess.run(self.y, feed_dict))
            # break
            pz[pz == 0] = 1e-20
            pzs += pz.reshape(-1, ).tolist()
            wbs += wb.reshape(-1, ).tolist()

        ANLP = np.average(-np.log(pzs))
        AUC = roc_auc_score(ys, wbs)
        LOGLOSS = log_loss(ys, wbs)

        print("AUC: {}".format(AUC))
        print("Log-Loss: {}".format(LOGLOSS))
        print("ANLP: {}".format(ANLP))

        with open(self.output_dir + 'result.txt', 'w') as f:
            f.writelines(
                ["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
示例#14
0
def calculate_bp_maps(num_atom_types, _Gs, _types):
    batchsize = len(_Gs)
    Ns = [0] * num_atom_types
    indices = [[] for _ in range(num_atom_types)]
    atoms = [[] for _ in range(num_atom_types)]

    for i, (G_vec, t_vec) in enumerate(zip(_Gs, _types)):
        for Gi, ti in zip(G_vec, t_vec):
            indices[ti].append([i, Ns[ti]])
            atoms[ti].append(Gi)
            Ns[ti] += 1

    # Cast into numpy arrays, also takes care of wrong dimensionality of empty
    # lists
    maps = []
    for a in range(num_atom_types):
        indices[a] = _np.array(indices[a], dtype=_np.int64).reshape((-1, 2))
        maps.append(
            _tf.SparseTensorValue(indices[a], [1.0] * Ns[a],
                                  [batchsize, Ns[a]]))
        atoms[a] = _np.array(atoms[a])
    return atoms, maps
示例#15
0
文件: cox.py 项目: SerTelnov/DLF
    def test(self):
        batch_num = int(self.test_data_amt / self.batch_size)
        anlp_batch = []
        auc_batch = []
        logloss_batch = []

        for b in range(batch_num):
            x, b, z, y = self.util_test.get_batch_data_origin(b)
            feed_dict = {}

            feed_dict[self.X] = tfc.SparseTensorValue(x, [1] * len(x), [self.batch_size, dimension])
            feed_dict[self.z] = z
            feed_dict[self.y] = y
            feed_dict[self.b] = b

            base = self.sess.run(self.base, feed_dict)
            candidate = self.sess.run(self.candidate, feed_dict)
            multiple_times = self.sess.run(self.multiple_times, feed_dict)

            # get survival rate of b and b+1
            H0_b = np.zeros([self.batch_size, 1])
            H0_z = np.zeros([self.batch_size, 1])
            H0_z1 = np.zeros([self.batch_size, 1])
            for i in range(self.batch_size):
                bid = b[i][0]
                mp = z[i][0]
                H0_b[i][0] = np.sum(candidate[base <= bid])
                H0_z[i][0] = np.sum(candidate[base <= mp])
                H0_z1[i][0] = np.sum(candidate[base <= mp + 1])
            S0_b = np.exp(-H0_b)
            S0_z = np.exp(-H0_z)
            S0_z1 = np.exp(-H0_z1)

            S_b = np.power(S0_b, multiple_times)
            S_z = np.power(S0_z, multiple_times)
            S_z1 = np.power(S0_z1, multiple_times)

            p = S_z - S_z1
            p[p <= 0] = 1e-20
            # print(p[p == 0].size)
            # print(p[p < 0].size)
            anlp = np.average(-np.log(p))

            W_b = 1 - S_b
            try:
                auc = roc_auc_score(y, W_b)
                logloss = log_loss(y, W_b)
            except Exception:
                print("Metric ERROE")
                continue

            anlp_batch.append(anlp)
            auc_batch.append(auc)
            logloss_batch.append(logloss)

        ANLP = np.mean(anlp_batch)
        AUC = np.mean(auc_batch)
        LOGLOSS = np.mean(logloss_batch)

        print("AUC: {}".format(AUC))
        print("Log-Loss: {}".format(LOGLOSS))
        print("ANLP: {}".format(ANLP))

        with open(self.output_dir + 'result.txt', 'w') as f:
            f.writelines(["AUC:{}\tANLP:{}\tLog-Loss:{}".format(AUC, ANLP, LOGLOSS)])
示例#16
0
    def feed_dict(self, mode='train'):
        """ DONE """
        if mode in ['val', 'test']:
            self.node_subgraph = np.arange(self.class_arr.shape[0])
            adj = sp.csr_matrix(([], [], np.zeros(2)),
                                shape=(1, self.node_subgraph.shape[0]))
            #adj = self.adj_full_norm
            adj_0 = self.adj_full_norm_0
            adj_1 = self.adj_full_norm_1
            adj_2 = self.adj_full_norm_2
            adj_3 = self.adj_full_norm_3
            adj_4 = self.adj_full_norm_4
            adj_5 = self.adj_full_norm_5
            adj_6 = self.adj_full_norm_6
            adj_7 = self.adj_full_norm_7
            _dropout = 0.
        else:
            assert mode == 'train'
            tt0 = time.time()
            if len(self.subgraphs_remaining_nodes) == 0:
                self.par_graph_sample('train')
                print()
            tt5 = time.time()
            self.node_subgraph = self.subgraphs_remaining_nodes.pop()
            self.size_subgraph = len(self.node_subgraph)
            adj = sp.csr_matrix((self.subgraphs_remaining_data.pop(),self.subgraphs_remaining_indices.pop(),\
                        self.subgraphs_remaining_indptr.pop()),shape=(self.node_subgraph.size,self.node_subgraph.size))
            adj_edge_index = self.subgraphs_remaining_edge_index.pop()
            #print("{} nodes, {} edges, {} degree".format(self.node_subgraph.size,adj.size,adj.size/self.node_subgraph.size))
            tt1 = time.time()
            assert len(self.node_subgraph) == adj.shape[0]
            norm_aggr(adj.data,
                      adj_edge_index,
                      self.norm_aggr_train,
                      num_proc=args_global.num_cpu_core)

            tt2 = time.time()
            adj = adj_norm(adj, deg=self.deg_train[self.node_subgraph])

            adj_0 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_1 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_2 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_3 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_4 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_5 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_6 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))
            adj_7 = sp.csr_matrix(([], [], np.zeros(2)),
                                  shape=(1, self.node_subgraph.shape[0]))

            _dropout = self.dropout
            self.sampling_time += tt5 - tt0
            self.batch_num += 1
        feed_dict = dict()
        feed_dict.update(
            {self.placeholders['node_subgraph']: self.node_subgraph})
        feed_dict.update(
            {self.placeholders['labels']: self.class_arr[self.node_subgraph]})
        feed_dict.update({self.placeholders['dropout']: _dropout})
        if mode in ['val', 'test']:
            feed_dict.update(
                {self.placeholders['norm_loss']: self.norm_loss_test})
        else:
            feed_dict.update(
                {self.placeholders['norm_loss']: self.norm_loss_train})

        _num_edges = len(adj.nonzero()[1])
        _num_vertices = len(self.node_subgraph)
        _indices_ph = np.column_stack(adj.nonzero())
        _shape_ph = adj.shape
        feed_dict.update({self.placeholders['adj_subgraph']: \
            tf.SparseTensorValue(_indices_ph,adj.data,_shape_ph)})
        feed_dict.update({self.placeholders['adj_subgraph_0']: \
            tf.SparseTensorValue(np.column_stack(adj_0.nonzero()),adj_0.data,adj_0.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_1']: \
            tf.SparseTensorValue(np.column_stack(adj_1.nonzero()),adj_1.data,adj_1.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_2']: \
            tf.SparseTensorValue(np.column_stack(adj_2.nonzero()),adj_2.data,adj_2.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_3']: \
            tf.SparseTensorValue(np.column_stack(adj_3.nonzero()),adj_3.data,adj_3.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_4']: \
            tf.SparseTensorValue(np.column_stack(adj_4.nonzero()),adj_4.data,adj_4.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_5']: \
            tf.SparseTensorValue(np.column_stack(adj_5.nonzero()),adj_5.data,adj_5.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_6']: \
            tf.SparseTensorValue(np.column_stack(adj_6.nonzero()),adj_6.data,adj_6.shape)})
        feed_dict.update({self.placeholders['adj_subgraph_7']: \
            tf.SparseTensorValue(np.column_stack(adj_7.nonzero()),adj_7.data,adj_7.shape)})
        feed_dict.update({self.placeholders['dim0_adj_sub']:\
            self.dim0_adj_sub})
        tt3 = time.time()
        # if mode in ['train']:
        #     print("t1:{:.3f} t2:{:.3f} t3:{:.3f}".format(tt0-tt1,tt2-tt1,tt3-tt2))
        if mode in ['val', 'test']:
            feed_dict[self.placeholders['is_train']] = False
        else:
            feed_dict[self.placeholders['is_train']] = True
        return feed_dict, self.class_arr[self.node_subgraph]
示例#17
0
def parse(s, sort=True): # s -- string
    """DIMACS parsing code"""
    nvars = 0
    nclauses = 0
    inds = [] #np.zeros([0,2],dtype=np.int)
    sol = None

    lines = s.split('\n')

    pComment = re.compile(r'c.*')
    pStats = re.compile(r'p\s*cnf\s*(\d*)\s*(\d*)')
    pSat = re.compile(r's\s*(\w*)')
    pVal = re.compile(r'v\s*(.*)')
    
    c = 0
    while len(lines) > 0:
        line = lines.pop(0)

        # Only deal with lines that aren't comments
        if pComment.match(line):
            continue
            
        m = pStats.match(line)
        if m:
            nvars = int(m[1])
            nclauses = int(m[2])
            continue

        if sol is None:
            m = pSat.match(line)
            if m and m[0] == 'SATISFIABLE':
                sol = np.zeros([nvars * 2 +1],type=np.float32)
                continue

        m = pVal.match(line)
        if m:
            nums = m[0].split(' ')
            for lit_str in nums:
                if lit_str != '' and int(lit_str) != 0:
                    sol[int(lit_str),c] = 1
            continue



        nums = line.rstrip('\n').split(' ')
        nonempty = False
        for lit_str in nums:
            if lit_str != '':
                try:
                    i = int(lit_str)
                except:
                    continue
                if i == 0:
                    continue
                if i < 0:
                    i += 2 * nvars
                else:
                    i -= 1
                inds.append([i,c])
                nonempty = True

        if nonempty:
            c = c + 1

    vals = np.ones([len(inds)], dtype=np.float32)
    cnf = tf.SparseTensorValue(indices = np.array(inds,dtype=np.int64), values = vals, dense_shape=[nvars * 2, nclauses])
    if sort:
        return batch_problems([(cnf,sol)])
    else:
        return cnf, sol
示例#18
0
  def test_multivalent_sequence_features(self, combiner: Text):
    """Tests multivalent sequence embedding features.

    Args:
      combiner: The combiner used to reduce multivalent features.  A multivalent
        sequence can have many IDs per sequence index.  The input for
        multivalent sequence features is a 3D SparseTensor (instead of a 2D
        SparseTensor for univalent sequence features).  The last dimension
        represents the index that will be reduced (using the combiner).
    """
    batch_size = 4
    max_sequence_length = 3
    dimension = 1
    embedding_weights = np.float32([
        [-5.],  # embedding ID = 0
        [10.],  # embedding ID = 1
        [20.],  # embedding ID = 2
        [30.],  # embedding ID = 3
        [40.],  # embedding ID = 4
        [50.],  # embedding ID = 5
    ])

    # For multivalent sequence features, IDs are a 3D sparse tensor.
    # The outer dimension is batch, the middle dimension is sequence, and the
    # last dimension is the index.
    sparse_ids = tf.SparseTensorValue(
        indices=[
            [0, 0, 0],
            [0, 0, 1],
            [1, 0, 0],
            [1, 1, 0],
            [3, 0, 0],
            [3, 2, 0],
            [3, 2, 1],
            [3, 3, 0],
        ],
        values=[
            1,  # Example 0, sequence_index 0,  id_index 0.
            0,  # Example 0, sequence_index 0,  id_index 1.
            2,  # Example 1, sequence_index 0,  id_index 0.
            3,  # Example 1, sequence_index 1,  id_index 0.
            4,  # Example 3, sequence_index 0,  id_index 0.
            5,  # Example 3, sequence_index 2.  id_index 0.
            2,  # Example 3, sequence_index 2.  id_index 1.
            5,  # Example 3, sequence_index 3,  id_index 0.
        ],
        dense_shape=[batch_size, max_sequence_length + 1, 2],
    )

    activations, sequence_lengths = self.get_activations_and_sequence_lengths(
        embedding_weights,
        sparse_ids,
        batch_size,
        max_sequence_length,
        dimension,
        combiner=combiner,
    )

    self.assertAllEqual(
        [
            [  # Example 0
                [5 if combiner == 'sum' else 2.5],  # Sequence Index = 0.
                [0.],  # Sequence Index = 1.
                [0.],  # Sequence Index = 2.
            ],
            [  # Example 1
                [20],  # Sequence Index = 0.
                [30],  # Sequence Index = 1.
                [0.],  # Sequence Index = 2.
            ],
            [  # Example 2
                [0.],  # Sequence Index = 0.
                [0.],  # Sequence Index = 1.
                [0.],  # Sequence Index = 2.
            ],
            [  # Example 3
                [40],  # Sequence Index = 0.
                [0.],  # Sequence Index = 1.
                [70 if combiner == 'sum' else 35],  # Sequence Index = 2.
            ],
        ],
        activations,
    )

    self.assertAllEqual(
        [
            1,  # Example 0
            2,  # Example 1
            0,  # Example 2
            3,  # Example 3
        ],
        sequence_lengths,
    )
示例#19
0
  def test_non_contiguous_sequence_with_length_gt_max_sequence_length(self):
    """Tests non contiguous sequence which has length > max_sequence_length.

    A "non-contiguous sequence" is a sequence which has missing values followed
    by actual values.

    Additionally, this test has a sequence with length > max_sequence_length. In
    this case, we expect the sequence to be truncated from the right.
    """
    batch_size = 4
    max_sequence_length = 3
    dimension = 1
    embedding_weights = np.float32([
        [-5.],  # embedding ID = 0
        [10.],  # embedding ID = 1
        [20.],  # embedding ID = 2
        [30.],  # embedding ID = 3
        [40.],  # embedding ID = 4
        [50.],  # embedding ID = 5
    ])

    # The sparse_ids are indexes into the embedding_weights for each
    # (example, sequence_index).  Sequence indexes larger than max_sequence
    # length will be truncated.
    sparse_ids = tf.SparseTensorValue(
        indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2], [2, 3]],
        values=[
            1,  # Example 0, sequence_index 0
            2,  # Example 1, sequence_index 0
            3,  # Example 1, sequence_index 1
            4,  # Example 2, sequence_index 0
            5,  # Example 2, sequence_index 2
            6,  # Example 2, sequence_index 3
        ],
        dense_shape=[batch_size, max_sequence_length + 1],
    )

    activations, sequence_lengths = self.get_activations_and_sequence_lengths(
        embedding_weights,
        sparse_ids,
        batch_size,
        max_sequence_length,
        dimension,
    )

    self.assertAllEqual(
        [
            [  # Example 0
                [10],  # Sequence Index = 0
                [0.],  # Sequence Index = 1
                [0.],  # Sequence Index = 2
            ],
            [  # Example 1
                [20],  # Sequence Index = 0
                [30],  # Sequence Index = 1
                [0.],  # Sequence Index = 2
            ],
            [  # Example 2 (Truncated)
                [40],  # Sequence Index = 0
                [0.],  # Sequence Index = 1 (Missing value mid-sequence)
                [50],  # Sequence Index = 2
            ],
            [  # Example 3
                [0.],  # Sequence Index = 0
                [0.],  # Sequence Index = 1
                [0.],  # Sequence Index = 2
            ],
        ],
        activations)

    self.assertAllEqual(
        [
            1,  # Example 0
            2,  # Example 1
            3,  # Example 2
            0,  # Example 3
        ],
        sequence_lengths,
    )
示例#20
0
  def test_non_contiguous_sequence(self):
    """Tests embedding lookups for non-contiguous sparse IDs.

    A "non-contiguous sequence" is a sequence which has missing values followed
    by actual values.
    """
    batch_size = 4
    max_sequence_length = 3
    dimension = 2
    embedding_weights = np.float32([
        [-5., -5.],  # embedding ID = 0
        [10., 11.],  # embedding ID = 1
        [20., 21.],  # embedding ID = 2
        [30., 31.],  # embedding ID = 3
        [40., 41.],  # embedding ID = 4
        [50., 51.],  # embedding ID = 5
    ])

    # The sparse_ids are indexes into the embedding_weights for each
    # (example, sequence_index).
    sparse_ids = tf.SparseTensorValue(
        indices=[[0, 0], [1, 0], [1, 1], [2, 0], [2, 2]],
        values=[
            1,  # Example 0, sequence_index 0
            2,  # Example 1, sequence_index 0
            3,  # Example 1, sequence_index 1
            4,  # Example 2, sequence_index 0
            5,  # Example 2, sequence_index 2
        ],
        dense_shape=[batch_size, max_sequence_length],
    )

    activations, sequence_lengths = self.get_activations_and_sequence_lengths(
        embedding_weights,
        sparse_ids,
        batch_size,
        max_sequence_length,
        dimension,
    )

    self.assertAllEqual(
        [
            [  # Example 0
                [10, 11],  # Sequence Index = 0
                [0., 0.],  # Sequence Index = 1
                [0., 0.],  # Sequence Index = 2
            ],
            [  # Example 1
                [20, 21],  # Sequence Index = 0
                [30, 31],  # Sequence Index = 1
                [0., 0.],  # Sequence Index = 2
            ],
            [  # Example 2
                [40, 41],  # Sequence Index = 0
                [0., 0.],  # Sequence Index = 1 (Missing value mid-sequence)
                [50, 51],  # Sequence Index = 2
            ],
            [  # Example 3
                [0., 0.],  # Sequence Index = 0
                [0., 0.],  # Sequence Index = 1
                [0., 0.],  # Sequence Index = 2
            ],
        ],
        activations)
    self.assertAllEqual(
        [
            1,  # Example 0
            2,  # Example 1
            3,  # Example 2
            0,  # Example 3
        ],
        sequence_lengths,
    )