示例#1
0
def test_acc(path, class_nums, growth_rate, depth):
    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3])
    labels = tf.placeholder(tf.int64, [None])
    train_phase = tf.placeholder(tf.bool)
    logits = DenseNet(inputs,
                      nums_out=class_nums,
                      growth_rate=growth_rate,
                      train_phase=train_phase,
                      depth=depth)
    pred = softmax(logits)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), tf.float32))
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, "./save_para//.\\densenet.ckpt")
    data, labels_ = read_cifar_data(path)
    acc = 0
    for i in range(data.shape[0] // 100):
        acc += sess.run(accuracy,
                        feed_dict={
                            inputs: data[i * 100:i * 100 + 100],
                            labels: labels_[i * 100:i * 100 + 100],
                            train_phase: False
                        })
    return acc / (data.shape[0] // 100)
示例#2
0
def train(batch_size, class_nums, growth_rate, weight_decay, depth, cifar10_path, train_epoch, lr):
    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3])
    labels = tf.placeholder(tf.int64, [None])
    train_phase = tf.placeholder(tf.bool)
    learning_rate = tf.placeholder(tf.float32)
    logits = DenseNet(inputs, nums_out=class_nums, growth_rate=growth_rate, train_phase=train_phase, depth=depth)
    pred = softmax(logits)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), tf.float32))
    one_hot_label = to_OneHot(labels, class_nums)
    cross_entropy_loss = tf.reduce_mean(-tf.log(tf.reduce_sum(pred * one_hot_label, axis=1) + 1e-10))
    regular = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
    Opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.9, use_nesterov=True).minimize(cross_entropy_loss + weight_decay * regular)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    path = cifar10_path + "data_batch_"
    valid_path = cifar10_path + "data_batch_5"
    loss_list = []
    train_acc_list = []
    test_acc_list = []
    saver = tf.train.Saver()
    # saver.restore(sess, "./save_para//.\\densenet.ckpt")
    # saver.restore(sess, "./save_para/densenet.ckpt")
    for epoch in range(train_epoch):
        if epoch == train_epoch // 2 or epoch == train_epoch * 3 // 4:
            lr /= 10
        for i in range(1, 6):
            if i != 5:
                data, labels_ = read_cifar_data(path + str(i))
                data, labels_ = shuffle(data, labels_)
            else:
                data, labels_ = read_cifar_data(path + str(i))
                data, labels_ = shuffle(data[:5000], labels_[:5000])
            for j in range(data.shape[0] // batch_size - 1):
                batch_data = data[j * batch_size:j * batch_size + batch_size, :, :, :]
                batch_labels = labels_[j * batch_size:j * batch_size + batch_size]
                [_, loss, acc] = sess.run([Opt, cross_entropy_loss, accuracy], feed_dict={inputs: batch_data, labels: batch_labels, train_phase: True, learning_rate: lr})
                loss_list.append(loss)
                train_acc_list.append(acc)
                if j % 100 == 0:
                    print("Epoch: %d, iter: %d, loss: %f, train_acc: %f"%(epoch, j, loss, acc))
                    np.savetxt("loss.txt", loss_list)
                    np.savetxt("train_acc.txt", train_acc_list)
                    np.savetxt("test_acc.txt", test_acc_list)
            if ((epoch + 1) % 5) == 0:
                vali_acc = validation_acc(inputs, labels, train_phase, accuracy, sess, valid_path)
                test_acc_list.append(vali_acc)
                print("Validation Accuracy: %f"%(vali_acc))
                saver.save(sess, "./save_para/densenet.ckpt")



# if __name__ == "__main__":
#     train(batch_size=64, class_nums=10, growth_rate=12, weight_decay=1e-4, depth=40, train_epoch=5)
示例#3
0
def discrete_decoder(opts, noise, reuse=False, is_training=True):
    with tf.variable_scope('generator/disc_gen', reuse=reuse):
        outputs, logits = [], []
        for i in range(opts["nmixtures"]):
            input = tf.expand_dims(noise[:, i], axis=0)
            _, logit = decoder(opts, input, 'mlp', opts['g_disc_nlayers'],
                               opts['g_disc_nfilters'], [opts['nclasses']],
                               'mix%d' % i, opts['batch_norm'], reuse,
                               is_training)
            outputs.append(ops.softmax(logit, axis=-1))
            logits.append(logit)
    outputs = tf.concat(outputs, axis=0)
    logits = tf.concat(logits, axis=0)
    return outputs, logits
示例#4
0
    def weak_mcts(self, planes, player_turn, legals, p):
        planes = np.copy(planes)
        new_p = np.full(p.shape, 0.)
        num_boards = (input_planes - 1) / 2

        # Reverse player feature plane
        p_plane = input_planes - 1
        planes[:, :, :, p_plane] = np.full(planes[:, :, :, p_plane].shape,
                                           (player_turn + 1) % 2)

        # Swap board planes
        tmp = np.copy(planes[:, :, :, 0])
        planes[:, :, :, 0] = planes[:, :, :, 1]
        planes[:, :, :, 1] = tmp

        # Simulates the legal move and get value
        for move in legals:
            #planes[0][move[0]][move[1]][0] = player_turn+1
            last_plane = input_planes - 1 - num_boards
            planes[0][move[0]][move[1]][last_plane] = 1
            t_v = self.feed_forward_value(planes)
            planes[0][move[0]][move[1]][last_plane] = 0
            s_move = move[0] * self.board_size + move[1]
            t_v = t_v[0][0][0]
            new_p[0][s_move] = (t_v * (-1.) + 2.)  #+ p[0][s_move]

        # Simulates pass move and get value
        t_v = self.feed_forward_value(planes)
        t_v = t_v[0][0][0]
        new_p[0][self.input_size] = (t_v * (-1.) + 2.
                                     )  #+ p[0][self.input_size]

        # Dirichlet noise
        new_p[0] = ops.dirichlet_noise(new_p[0], dirichlet_alpha,
                                       dirichlet_epsilon)

        # Activate
        new_p = ops.softmax(new_p)

        return new_p
示例#5
0
文件: rnn.py 项目: comadan/nn
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t-1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k-1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape((-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k+1)].T, dhs[k+1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' % k] += mult(dus[k][t], hs[k-1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t-1]
                        dhs[k][t-1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape((-1, 1)) / bsize

        return cost, self.grads
示例#6
0
文件: rnn.py 项目: runngezhang/nn-1
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t - 1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k - 1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape(
                (-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k + 1)].T,
                                      dhs[k + 1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' %
                               k] += mult(dus[k][t], hs[k - 1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(
                            Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t - 1]
                        dhs[k][t - 1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape(
                (-1, 1)) / bsize

        return cost, self.grads
示例#7
0
    def __init__(self, opts):
        logging.error('Building the Tensorflow Graph')

        # --- Create session
        self.sess = tf.Session()
        self.opts = opts

        # --- Some of the parameters for future use
        assert opts['dataset'] in datashapes, 'Unknown dataset.'
        self.data_shape = datashapes[opts['dataset']]

        # --- Placeholders
        self.add_model_placeholders()
        self.add_training_placeholders()
        sample_size = tf.shape(self.u_points,out_type=tf.int64)[0]
        range = tf.range(sample_size)
        zero = tf.zeros([tf.cast(sample_size,dtype=tf.int32)],dtype=tf.int64)
        # --- Initialize prior parameters
        self.pz_mean, self.pz_sigma = init_gaussian_prior(opts)
        self.pi0 = init_cat_prior(opts)
        # --- Encoding inputs
        probs_logit = label_encoder(self.opts, self.u_points, False,
                                                        self.is_training)
        self.probs = ops.softmax(probs_logit,axis=-1)
        logit_pi, self.u_enc_mean, self.u_enc_logSigma = self.encoder(
                                                        self.u_points,
                                                        False)
        log_Zpi = ops.log_sum_exp(logit_pi,axis=-1,keepdims=True)
        logit = logit_pi - log_Zpi \
                + tf.expand_dims(probs_logit,axis=-1)
        u_logit = ops.log_sum_exp(logit,axis=1,keepdims=False)
        #self.u_pi = ops.softmax(u_logit,axis=-1)
        u_pi = tf.multiply(ops.softmax(logit_pi,axis=-1),tf.expand_dims(self.probs,axis=-1))
        self.u_pi = tf.reduce_sum(u_pi,axis=1,keepdims=False)

        logit_pi, self.l_enc_mean, self.l_enc_logSigma = self.encoder(
                                                        self.l_points,
                                                        True)
        idx_label = tf.stack([range,self.l_labels], axis=-1)
        logit = tf.gather_nd(logit_pi,idx_label)
        self.l_pi = ops.softmax(logit,axis=-1)
        # --- Sampling from encoded MoG prior
        self.u_mixtures_encoded = sample_mixtures(opts, self.u_enc_mean,
                                                        tf.exp(self.u_enc_logSigma),
                                                        sample_size,'tensorflow')
        self.l_mixtures_encoded = sample_mixtures(opts, self.l_enc_mean,
                                                        tf.exp(self.l_enc_logSigma),
                                                        sample_size,'tensorflow')
        # --- Decoding encoded points (i.e. reconstruct)
        self.u_reconstructed, self.u_reconstructed_logits = self.decoder(
                                                        self.u_mixtures_encoded,
                                                        False)
        self.l_reconstructed, self.l_reconstructed_logits = self.decoder(
                                                        self.l_mixtures_encoded,
                                                        True)
        self.labels_reconstructed, self.labels_reconstructed_logits = discrete_decoder(
                                                        opts,
                                                        self.label_noise,
                                                        False,
                                                        self.is_training)
        # --- Reconstructing inputs (only for visualization)
        idx = tf.reshape(tf.multinomial(tf.nn.log_softmax(u_logit),1),[-1])
        mix_idx = tf.stack([range,idx],axis=-1)
        self.encoded_point = tf.gather_nd(self.u_mixtures_encoded,mix_idx)
        self.reconstructed_point = tf.gather_nd(self.u_reconstructed,mix_idx)
        self.reconstructed_logit = tf.gather_nd(self.u_reconstructed_logits,mix_idx)

        # --- Sampling from model (only for generation)
        self.decoded, self.decoded_logits = self.decoder(self.sample_noise,
                                                        True)
        # --- Objectives, losses, penalties, pretraining
        # Compute reconstruction cost
        self.l_loss_reconstruct = reconstruction_loss(opts, self.l_pi,
                                                        self.l_points,
                                                        self.l_reconstructed,
                                                        self.l_labels,
                                                        tf.argmax(self.labels_reconstructed,axis=-1))
        self.u_loss_reconstruct = reconstruction_loss(opts, self.u_pi,
                                                        self.u_points,
                                                        self.u_reconstructed)
        # Compute matching penalty cost
        self.kl_g, self.kl_d, self.l_cont_penalty, self.l_disc_penalty = matching_penalty(opts,
                                                        self.pi0, self.l_pi,
                                                        self.l_enc_mean, self.l_enc_logSigma,
                                                        self.pz_mean, self.pz_sigma,
                                                        self.l_sample_mix_noise, self.l_mixtures_encoded)
        self.kl_g, self.kl_d, self.u_cont_penalty, self.u_disc_penalty = matching_penalty(opts,
                                                        self.pi0, self.u_pi,
                                                        self.u_enc_mean, self.u_enc_logSigma,
                                                        self.pz_mean, self.pz_sigma,
                                                        self.u_sample_mix_noise, self.u_mixtures_encoded)
        # Compute Labeled obj
        self.l_loss = self.l_loss_reconstruct\
                         + self.l_lmbd * self.l_cont_penalty\
                         + self.l_beta * self.l_disc_penalty
        # Compute Unlabeled obj
        self.u_loss = self.u_loss_reconstruct\
                         + self.u_lmbd * self.u_cont_penalty\
                         + self.u_beta * self.u_disc_penalty
        # Compute wae obj
        self.objective = self.alpha*self.alpha_decay * self.l_loss + self.u_loss

        # Pre Training
        self.pretrain_loss()

        # --- Optimizers, savers, etc
        self.add_optimizers()
        self.add_savers()
        self.init = tf.global_variables_initializer()
示例#8
0
    def cost_and_grad(self, data, labels, back=True):
        hps = self.hps
        grads = self.grads

        # May not be full batch size if at end of dataset
        bsize = data.shape[-1]

        p = ParamStruct(**self.params)

        # Forward prop

        acts = list()
        acts.append(self.nl(mult(p.Wih, data) + p.bih))

        for k in xrange(hps.hidden_layers - 1):
            W = self.params['W%d' % (k + 1)]
            b = self.params['b%d' % (k + 1)]
            acts.append(self.nl(mult(W, acts[-1]) + b))

        y = mult(p.Who, acts[-1]) + p.bho
        probs = softmax(y)

        if labels is None:
            return None, probs

        # NOTE For more precision if necessary convert to nparray early
        cost_array = np.empty(bsize, dtype=np.float64)
        # Speed things up by doing assignments off gpu
        neg_log_prob = -1 * np.log(as_np(probs))
        for k in xrange(bsize):
            cost_array[k] = neg_log_prob[labels[k], k]
        cost = cost_array.sum() / bsize

        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        # Do assignments off GPU to speed things up
        dLdy = as_np(probs)
        # NOTE This changes probs
        for k in xrange(bsize):
            dLdy[labels[k], k] -= 1
        dLdy = array(dLdy)

        grads['bho'] = dLdy.sum(axis=1).reshape((-1, 1))
        grads['Who'] = mult(dLdy, acts[-1].T)
        Ws = [p.Wih] + [
            self.params['W%d' % (k + 1)] for k in xrange(hps.hidden_layers - 1)
        ] + [p.Who]
        deltas = [dLdy]

        for k in reversed(xrange(hps.hidden_layers - 1)):
            delta = get_nl_grad(self.hps.nl, acts[k + 1]) * mult(
                Ws[k + 2].T, deltas[-1])
            deltas.append(delta)
            grads['b%d' % (k + 1)] = delta.sum(axis=1).reshape((-1, 1))
            grads['W%d' % (k + 1)] = mult(delta, acts[k].T)

        delta = get_nl_grad(self.hps.nl, acts[0]) * mult(Ws[1].T, deltas[-1])
        grads['bih'] = delta.sum(axis=1).reshape((-1, 1))
        grads['Wih'] = mult(delta, data.T)

        # Normalize
        for k in self.grads:
            self.grads[k] /= bsize

        return cost, self.grads
示例#9
0
文件: dnn.py 项目: comadan/nn
    def cost_and_grad(self, data, labels, back=True):
        hps = self.hps
        grads = self.grads

        # May not be full batch size if at end of dataset
        bsize = data.shape[-1]

        p = ParamStruct(**self.params)

        # Forward prop

        acts = list()
        acts.append(self.nl(mult(p.Wih, data) + p.bih))

        for k in xrange(hps.hidden_layers - 1):
            W = self.params['W%d' % (k+1)]
            b = self.params['b%d' % (k+1)]
            acts.append(self.nl(mult(W, acts[-1]) + b))

        y = mult(p.Who, acts[-1]) + p.bho
        probs = softmax(y)

        if labels is None:
            return None, probs

        # NOTE For more precision if necessary convert to nparray early
        cost_array = np.empty(bsize, dtype=np.float64)
        # Speed things up by doing assignments off gpu
        neg_log_prob = -1 * np.log(as_np(probs))
        for k in xrange(bsize):
            cost_array[k] = neg_log_prob[labels[k], k]
        cost = cost_array.sum() / bsize

        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        # Do assignments off GPU to speed things up
        dLdy = as_np(probs)
        # NOTE This changes probs
        for k in xrange(bsize):
            dLdy[labels[k], k] -= 1
        dLdy = array(dLdy)

        grads['bho'] = dLdy.sum(axis=1).reshape((-1, 1))
        grads['Who'] = mult(dLdy, acts[-1].T)
        Ws = [p.Wih] + [self.params['W%d' % (k+1)] for k in xrange(hps.hidden_layers - 1)] + [p.Who]
        deltas = [dLdy]

        for k in reversed(xrange(hps.hidden_layers - 1)):
            delta = get_nl_grad(self.hps.nl, acts[k+1]) * mult(Ws[k + 2].T, deltas[-1])
            deltas.append(delta)
            grads['b%d' % (k+1)] = delta.sum(axis=1).reshape((-1, 1))
            grads['W%d' % (k+1)] = mult(delta, acts[k].T)

        delta = get_nl_grad(self.hps.nl, acts[0]) * mult(Ws[1].T, deltas[-1])
        grads['bih'] = delta.sum(axis=1).reshape((-1, 1))
        grads['Wih'] = mult(delta, data.T)

        # Normalize
        for k in self.grads:
            self.grads[k] /= bsize

        return cost, self.grads
示例#10
0
    def forward(self, img):
        device = img.get_device()
        bn = img.shape[0]
        img_preprocessed = self.preprocessor(img)

        # Shape Representation with HG
        if self.module in [1, 3]:
            img_shape = self.hg_shape(img_preprocessed)
            img_shape = self.dropout(img_shape)
            img_shape = self.out(img_shape)
            feature_map = self.to_parts(img_shape)

        # Shape Representation with ViT
        if self.module in [2, 4]:
            img_shape = self.conv1(img_preprocessed)
            feature_map = self.vit_shape(img_shape)

        # Get Normalized Maps
        map_normalized = softmax(feature_map)

        # Get Stack for Appearance Hourglass
        map_transformed = self.map_transform(map_normalized)
        stack = map_transformed + img_preprocessed

        # Use old method:
        if self.module in [1, 3]:
            mu, L_inv = get_mu_and_prec(map_normalized, device, L_inv_scal=0.8)

        # Use new method
        if self.module in [2, 4]:
            mu = get_mu(map_normalized, device)
            L_inv = self.L_inv(feature_map)
            L_inv = self.sigmoid(self.bn(L_inv)).reshape(bn, self.k, 2)
            rot, scal = 2 * 3.141 * L_inv[:, :, 0].reshape(
                -1), 20 * L_inv[:, :, 1].reshape(-1)
            scal_matrix = torch.cat([
                torch.tensor([[scal[i], 0.], [0., 0.]],
                             device=device).unsqueeze(0)
                for i in range(scal.shape[0])
            ], 0).reshape(bn, self.k, 2, 2)
            rot_mat = torch.cat([
                rotation_mat(rot[i].reshape(-1)).unsqueeze(0)
                for i in range(rot.shape[0])
            ], 0).reshape(bn, self.k, 2, 2)
            L_inv = torch.tensor([[30., 0.], [0., 30.]], device=device).unsqueeze(0).unsqueeze(0).repeat(bn, self.k, 1, 1) - \
                   scal_matrix
            L_inv = rot_mat @ L_inv @ rot_mat.transpose(2, 3)

        # Make Heatmap
        heat_map = get_heat_map(mu, L_inv, device, self.background)
        norm = torch.sum(heat_map, 1, keepdim=True) + 1
        heat_map_norm = heat_map / norm

        # Get Appearance Representation with HG
        if self.module in [1, 3]:
            img_app = self.hg_appearance(stack)
            raw_features = self.to_features(img_app)

        # Get Appearance Representation with ViT
        if self.module in [2, 4]:
            raw_features = self.vit_appearance(stack)

        # Get Localized Part Appearances
        part_appearances = torch.einsum('bfij, bkij -> bkf', raw_features,
                                        heat_map_norm)

        return mu, L_inv, map_normalized, heat_map, heat_map_norm, part_appearances
def seperate_hourglass(inputs, train, n_landmark, n_features, nFeat_1, nFeat_2):
    _, h, w, c = inputs.get_shape().as_list()
    nLow = 4  # hourglass preprocessing reduces by factor two hourglass by factor 16 (2⁴)  e.g. 128 -> 4
    n_Low_feat = 1
    dropout_rate = 0.2

    # Storage Table
    hg = [None] * 2
    ll = [None] * 2
    ll_ = [None] * 2
    drop = [None] * 2
    out = [None] * 2
    out_ = [None] * 2
    sum_ = [None] * 2

    nFeat_1 = nFeat_1
    nFeat_2 = nFeat_2

    train = train

    with tf.variable_scope('model'):
        with tf.variable_scope('preprocessing'):
            if h == 256:
                pad1 = tf.pad(inputs, [[0, 0], [2, 2], [2, 2], [0, 0]], name='pad_1')
                conv1 = _conv_bn_relu(pad1, filters=64, train=train, kernel_size=6, strides=2, name='conv_256_to_128')
                r1 = _residual(conv1, num_out=128, train=train, name='r1')
                pool1 = tf.contrib.layers.max_pool2d(r1, [2, 2], [2, 2], padding='VALID')
                r2 = _residual(pool1, num_out=int(nFeat_1 / 2), train=train, name='r2')
                r3 = _residual(r2, num_out=nFeat_1, train=train, name='r3')

            elif h == 128:
                pad1 = tf.pad(inputs, [[0, 0], [2, 2], [2, 2], [0, 0]], name='pad_1')
                conv1 = _conv_bn_relu(pad1, filters=64, train=train, kernel_size=6, strides=2, name='conv_64_to_32')
                r3 = _residual(conv1, num_out=nFeat_1, train=train, name='r3')

            elif h == 64:
                pad1 = tf.pad(inputs, [[0, 0], [3, 2], [3, 2], [0, 0]], name='pad_1')
                conv1 = _conv_bn_relu(pad1, filters=64, train=train, kernel_size=6, strides=1, name='conv_64_to_32')
                r3 = _residual(conv1, num_out=nFeat_1, train=train, name='r3')

            else:
                raise ValueError

        with tf.variable_scope('stage_0'):
            hg[0] = _hourglass(r3, nLow, nFeat_1, train=train, name='hourglass')
            drop[0] = tf.layers.dropout(hg[0], rate=dropout_rate, training=train, name='dropout')
            ll[0] = _conv_bn_relu(drop[0], nFeat_1, train=train, kernel_size=1, strides=1, pad='VALID', name='conv')
            ll_[0] = _conv(ll[0], nFeat_1, 1, 1, 'VALID', 'll')
            out[0] = _conv(ll[0], n_landmark, 1, 1, 'VALID', 'out')
            out_[0] = _conv(softmax(out[0]), nFeat_1, 1, 1, 'VALID', 'out_')
            sum_[0] = tf.add_n([out_[0], r3], name='merge')

        with tf.variable_scope('stage_1'):
            hg[1] = _hourglass(sum_[0], n_Low_feat, nFeat_2, train=train, name='hourglass')
            drop[1] = tf.layers.dropout(hg[1], rate=dropout_rate,
                                                      training=train, name='dropout')
            ll[1] = _conv_bn_relu(drop[1], nFeat_2, train=train, kernel_size=1, strides=1,
                                           pad='VALID', name='conv')

            out[1] = _conv(ll[1], n_features, 1, 1, 'VALID', 'out')

        features = out[1]
        return softmax(out[0]), features
示例#12
0
def neural_net(x):
    h = x
    for w in (w1, w2):
        h = dot(h, w)
    return h, softmax(h)