Exemplo n.º 1
0
def main(args):
    # 数据加载
    (x_train, y_train), (x_test, y_test) = load_cifar(args.cifar_root)

    # 随机选择训练样本
    train_num = x_train.shape[0]

    def next_batch(batch_size):
        idx = np.random.choice(train_num, batch_size)
        return x_train[idx], y_train[idx]

    # 网络
    vgg = VGG(image_size=32, name='vgg11')
    opt = RmsProp(vgg.weights, lr=args.lr, decay=1e-3)

    # 加载权重
    if args.checkpoint:
        weights = load_weights(args.checkpoint)
        vgg.load_weights(weights)
        print("load weights done")

    # 评估
    if args.eval_only:
        indices = np.random.choice(len(x_test), args.eval_num, replace=False)
        print('{} start evaluate'.format(
            time.asctime(time.localtime(time.time()))))
        acc = get_accuracy(vgg, x_test[indices], ys=y_test[indices])
        print('{} acc on test dataset is :{:.3f}'.format(
            time.asctime(time.localtime(time.time())), acc))
        return

    # 训练
    num_steps = args.steps
    for step in range(num_steps):
        x, y_true = next_batch(args.batch_size)
        # 前向传播
        y_predict = vgg.forward(x.astype(np.float))
        # print('y_pred: min{},max{},mean:{}'.format(np.min(y_predict, axis=-1),
        #                                            np.max(y_predict, axis=-1),
        #                                            np.mean(y_predict, axis=-1)))
        # print('y_pred: {}'.format(y_predict))
        acc = np.mean(
            np.argmax(y_predict, axis=1) == np.argmax(y_true, axis=1))
        # 计算loss
        loss, gradient = cross_entropy_loss(y_predict, y_true)

        # 反向传播
        vgg.backward(gradient)
        # 更新梯度
        opt.iterate(vgg)

        # 打印信息
        print('{} step:{},loss:{:.4f},acc:{:.4f}'.format(
            time.asctime(time.localtime(time.time())), step, loss, acc))

        # 保存权重
        if step % 100 == 0:
            save_weights(
                os.path.join(args.save_dir, 'weights-{:03d}.pkl'.format(step)),
                vgg.weights)
Exemplo n.º 2
0
    def backward(self, train_data, y_true):
        loss, self.gradients["A3"] = losses.cross_entropy_loss(self.nodes["A3"], y_true)
        self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \
            layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"])

        self.gradients["A2"] = activations.relu_backward(self.gradients["Z2"].T, self.nodes["A2"])
        self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"])

        self.gradients["A1"] = activations.relu_backward(self.gradients["Z1"].T, self.nodes["A1"])
        self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], self.nodes["X2"])

        self.gradients["Z1"] = self.gradients["Z1"].reshape((128, 16, 5, 5))

        self.gradients["Maxpool2"] = layer.max_pooling_backward(self.gradients["Z1"], self.nodes["Conv2"], (2, 2))
        self.gradients["K2"], self.gradients["Kb2"], self.gradients["KZ2"] = \
            layer.conv_backward(self.gradients["Maxpool2"], self.Parameters["K2"], self.nodes["Maxpool1"])

        self.gradients["Maxpool1"] = \
            layer.max_pooling_backward(self.gradients["KZ2"], self.nodes["Conv1"], (2, 2))
        self.gradients["K1"], self.gradients["Kb1"], self.gradients["KZ1"] = \
            layer.conv_backward(self.gradients["Maxpool1"], self.Parameters["K1"], train_data)

        return loss
Exemplo n.º 3
0
 def testCrossEntropyLossAllWrongWithWeight(self):
     with self.test_session():
         logits = tf.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                               [0.0, 0.0, 10.0]])
         labels = tf.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
         loss = losses.cross_entropy_loss(logits, labels, weight=0.5)
         self.assertEquals(loss.op.name, 'CrossEntropyLoss/value')
         self.assertAlmostEqual(loss.eval(), 5.0, 3)
Exemplo n.º 4
0
 def backward(self, train_data, y_true):
     loss, self.gradients["y"] = cross_entropy_loss(self.nurons["y"], y_true)
     self.gradients["W3"], self.gradients["b3"], self.gradients["z3_relu"] = fc_backward(self.gradients["y"],
                                                                                         self.weights["W3"],
                                                                                         self.nurons["z3_relu"])
     self.gradients["z3"] = relu_backward(self.gradients["z3_relu"], self.nurons["z3"])
     self.gradients["W2"], self.gradients["b2"], self.gradients["z2_relu"] = fc_backward(self.gradients["z3"],
                                                                                         self.weights["W2"],
                                                                                         self.nurons["z2_relu"])
     self.gradients["z2"] = relu_backward(self.gradients["z2_relu"], self.nurons["z2"])
     self.gradients["W1"], self.gradients["b1"], _ = fc_backward(self.gradients["z2"],
                                                                 self.weights["W1"],
                                                                 train_data)
     return loss
Exemplo n.º 5
0
    def backward(self, train_data, y_true):
        loss, self.gradients["A3"] = losses.cross_entropy_loss(
            self.nodes["A3"], y_true)
        self.gradients["W3"], self.gradients["B3"], self.gradients["Z2"] = \
            layer.fc_backward(self.gradients["A3"], self.Parameters["W3"], self.nodes["Z2"])

        self.gradients["A2"] = activations.relu_backward(
            self.gradients["Z2"].T, self.nodes["A2"])
        self.gradients["W2"], self.gradients["B2"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A2"], self.Parameters["W2"], self.nodes["Z1"])

        self.gradients["A1"] = activations.relu_backward(
            self.gradients["Z1"].T, self.nodes["A1"])
        self.gradients["W1"], self.gradients["B1"], self.gradients["Z1"] = \
            layer.fc_backward(self.gradients["A1"], self.Parameters["W1"], train_data)

        return loss
Exemplo n.º 6
0
 def backward_pass(self, layer_activations, targets):
     '''
     Return the deltas for each layer of the network; deltas are as defined in theory of Michael Nielson's book 
     '''
     # Backward propagation : calculate the errors and gradients
     deltas = [None] * (len(self.layer_sizes))
     # we assume that loss is always cross entropy and last layer is a softmax layer
     deltas[-1] = losses.cross_entropy_loss(layer_activations[-1],
                                            targets,
                                            deriv=True)
     # start the iteration from the second last layer
     for layer in range(len(deltas) - 2, 0, -1):
         deltas[layer] = np.dot(deltas[layer + 1],
                                self.weight_matrices[layer].T) * activation(
                                    layer_activations[layer],
                                    type=self.non_lins[layer],
                                    deriv=True)
     return deltas
Exemplo n.º 7
0
    def make_train_op(self):
        masks = self.tf_placeholders['dir_masks']
        logits, pred_regs, mask_logits = self.make_rpn_op(training=True)
        labels = self.tf_placeholders['labels']
        gt_regs = self.tf_placeholders['bbox_regs']
        weights = self.tf_placeholders['weights']
        learning_rate = self.tf_placeholders['learning_rate']
        batch_size = self.cfg.batch_size

        clf_losses, reg_losses = [], []
        for i in self.block_layers:
            # classification loss (object vs background)
            logits_i = tf.reshape(logits[i], [batch_size, -1, 2])
            labels_i = tf.maximum(labels[i], 0)
            cross_entropy_loss = losses.cross_entropy_loss(logits=logits_i,
                                                           labels=labels_i,
                                                           weights=weights[i])

            # regression loss
            reg_loss = losses.reg_loss(gt_regs[i], pred_regs[i], labels[i])
            reg_loss = self.cfg.reg_weight * reg_loss
            clf_losses.append(cross_entropy_loss)
            reg_losses.append(reg_loss)

        sem_seg_loss, dir_seg_loss = losses.mask_loss(mask_logits, masks)
        mask_loss = self.cfg.sem_seg_weight * tf.reduce_mean(sem_seg_loss) \
            + self.cfg.dir_seg_weight * tf.reduce_mean(dir_seg_loss)

        solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)

        # regular_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        # loss = cross_entropy_loss + reg_loss + tf.add_n(regular_losses)
        clf_loss = tf.add_n(clf_losses)
        reg_loss = tf.add_n(reg_losses)
        loss = clf_loss + reg_loss + mask_loss

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.train_op = solver.minimize(loss, global_step=self.global_step)
            self.loss_op = [clf_loss, reg_loss, mask_loss]
Exemplo n.º 8
0
 def score(self, x, target, metric='accuracy'):
     y = self.predict(x)
     if metric == 'accuracy':
         return accuracy(y, target)
     elif metric == 'loss':
         return losses.cross_entropy_loss(y, target)
Exemplo n.º 9
0
def pretrain(opts):
    # dataset and iterator
    dataset = Dataset(opts.rawdata_path)
    dataset_train = dataset.get_dataset(opts)
    iterator = tf.data.Iterator.from_structure(dataset_train.output_types,
                                               dataset_train.output_shapes)
    v_a, v_b, label = iterator.get_next()

    # network
    outputs, training = get_network(tf.concat((v_a, v_b), axis=0), opts)

    # save and load
    saver = tf.train.Saver(var_list=tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES, scope=opts.network))

    # loss
    loss, accuracy = cross_entropy_loss(outputs, label, opts)

    # summary
    writer_train = tf.summary.FileWriter(
        os.path.join(opts.output_path, opts.time, 'logs'),
        tf.get_default_graph())
    summary_op = tf.summary.merge_all()

    # optimizer
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(opts.lr,
                                    global_step,
                                    dataset_train.length * 67,
                                    0.1,
                                    staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(
        loss, global_step=global_step, colocate_gradients_with_ops=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    train_op = tf.group(update_ops + [train_op])

    # main loop
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())
        print('training loop start')
        start_train = time.clock()
        for epoch in range(1, opts.epochs + 1):
            print('epoch: %d' % epoch)
            start_ep = time.clock()
            # train
            print('training')
            sess.run(iterator.make_initializer(dataset_train))
            while True:
                try:
                    summary_train, _ = sess.run([summary_op, train_op],
                                                feed_dict={training: True})
                    writer_train.add_summary(
                        summary_train, tf.train.global_step(sess, global_step))
                except tf.errors.OutOfRangeError:
                    break
            print('step: %d' % tf.train.global_step(sess, global_step))
            # save model
            if epoch % opts.save_freq == 0 or epoch == opts.epochs:
                print('save model')
                saver.save(
                    sess,
                    os.path.join(opts.output_path, opts.time,
                                 'pretrainmodel.ckpt'))
            print("epoch end, elapsed time: %ds, total time: %ds" %
                  (time.clock() - start_train, time.clock() - start_ep))
        print('training loop end')
        writer_train.close()