示例#1
0
def get_config():
    basename = os.path.basename(__file__)
    logger.set_logger_dir(
        os.path.join('train_log', basename[:basename.rfind('.')]))

    # prepare dataset
    dataset_train = tp.BatchData(tp.dataset.Mnist('train'), 128)
    dataset_test = tp.BatchData(tp.dataset.Mnist('test'), 256, remainder=True)
    step_per_epoch = dataset_train.size()

    # prepare session
    sess_config = tp.get_default_sess_config()
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    lr = tf.train.exponential_decay(
        learning_rate=1e-3,
        global_step=tp.get_global_step_var(),
        decay_steps=dataset_train.size() * 10,
        decay_rate=0.3, staircase=True, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)

    return tp.TrainConfig(
        dataset=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr),
        callbacks=Callbacks([
            StatPrinter(),
            ModelSaver(),
            InferenceRunner(dataset_test,
                [ScalarStats('cost'), ClassificationError() ])
        ]),
        session_config=sess_config,
        model=Model(),
        step_per_epoch=step_per_epoch,
        max_epoch=100,
    )
示例#2
0
 def optimizer(self) -> Any:
     lr = tf.train.exponential_decay(
         learning_rate=self.hparams["base_learning_rate"],
         global_step=tp.get_global_step_var(),
         decay_steps=self.hparams["decay_steps"],
         decay_rate=self.hparams["decay_rate"],
         staircase=True,
         name="learning_rate",
     )
     tf.summary.scalar("lr", lr)
     return tf.train.AdamOptimizer(lr)
示例#3
0
 def _setup(self):
     super(GANTrainer, self)._setup()
     with TowerContext(''):
         actual_inputs = self._get_input_tensors()
         self.model.build_graph(actual_inputs)
     self.g_min = self.config.optimizer.minimize(self.model.g_loss,
             var_list=self.model.g_vars, name='g_op')
     self.d_min = self.config.optimizer.minimize(self.model.d_loss,
             var_list=self.model.d_vars, name='d_op')
     self.gs_incr = tf.assign_add(get_global_step_var(), 1, name='global_step_incr')
     self.summary_op = summary_moving_average()
     self.d_min = tf.group(self.d_min, self.summary_op, self.gs_incr)
示例#4
0
    def build_graph(self, image, label):
        image = image / 128.0
        assert tf.test.is_gpu_available()

        with tf.variable_scope(self._name):
            x = ScaleNormConv2D(image, 16, 3, 1, name="conv_input")
            # shape = [batchsize, 32, 32, 16]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        16,
                                        stride=1,
                                        mult_decay=self._mult_decay,
                                        name="g1")
            # shape = [batchsize, 16, 16, 32]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        32,
                                        stride=2,
                                        mult_decay=self._mult_decay,
                                        name="g2")
            # shape = [batchsize, 8, 8, 64]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        64,
                                        stride=2,
                                        mult_decay=self._mult_decay,
                                        name="g3")
            # normalise the final output by the accumulated multiplier
            #x = BatchNorm("bn_last", x, epsilon=EPSILON, center=False, scale=True)
            x = ActBias(x, name="act_top")
            #
            x = GlobalAvgPooling("gap", x)
            logits = FullyConnected("linear", x, self._n_classes)
            prob = tf.nn.softmax(logits, name="prob")

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name="cross_entropy_loss")

        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)),
                        tf.float32,
                        name="wrong_vector")
        add_moving_summary(tf.reduce_mean(wrong, name="train_error"))

        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.multiply(wd_w,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='wd_cost')
        add_moving_summary(cost, wd_cost)

        return tf.add_n([cost, wd_cost], name="cost")
示例#5
0
    def build_graph(self, image, label):
        scale_image = 1. / 128.0
        image = image * scale_image
        image_moment2 = CIFAR_TRAIN_PIXEL_MOMENT2 * scale_image * scale_image
        assert tf.test.is_gpu_available()

        with tf.variable_scope(self._name):
            x = NormConv2DScale(image,
                                16,
                                3,
                                1,
                                center=self._center,
                                input_moment2=image_moment2,
                                name="conv_input")
            add_activation_summary(x, types=["mean", "rms", "histogram"])
            # shape = [batchsize, 32, 32, 16]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        16,
                                        stride=1,
                                        center=self._center,
                                        theta_init=self._theta_init,
                                        theta_lr_mult=self._theta_lr_mult,
                                        name="g1")
            add_activation_summary(x, types=["mean", "rms", "histogram"])
            # shape = [batchsize, 16, 16, 32]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        32,
                                        stride=2,
                                        center=self._center,
                                        theta_init=self._theta_init,
                                        theta_lr_mult=self._theta_lr_mult,
                                        name="g2")
            add_activation_summary(x, types=["mean", "rms", "histogram"])
            # shape = [batchsize, 8, 8, 64]
            x = CifarResNet.build_group(x,
                                        self._n,
                                        64,
                                        stride=2,
                                        center=self._center,
                                        theta_init=self._theta_init,
                                        theta_lr_mult=self._theta_lr_mult,
                                        name="g3")
            add_activation_summary(x, types=["mean", "rms", "histogram"])
            x = ActBias(x, name="act_top")
            #
            x = GlobalAvgPooling("gap", x)
            logits = FullyConnected("linear", x, self._n_classes)
            prob = tf.nn.softmax(logits, name="prob")

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name="cross_entropy_loss")

        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)),
                        tf.float32,
                        name="wrong_vector")
        add_moving_summary(tf.reduce_mean(wrong, name="train_error"))

        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.multiply(wd_w,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='wd_cost')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/theta', ['histogram']))
        add_param_summary(('.*/ma_mu', ['histogram']))
        return tf.add_n([cost, wd_cost], name="cost")