Exemplo n.º 1
0
    def __init__(self,
                 session,
                 parallel_models,
                 optimize_op,
                 train_set=None,
                 eval_set=None,
                 variables=None,
                 lr=0.001,
                 device_type='gpu',
                 save_dir="test"):
        super().__init__(session, parallel_models, optimize_op, train_set,
                         eval_set, variables)
        self.lr = lr
        self.save_dir = save_dir

        grads = []
        for i in range(self.nr_model):
            grads.append(self.parallel_models[i].grads)
        with tf.device('/' + device_type + ':0'):
            for i in range(1, self.nr_model):
                for j in range(len(grads[0])):
                    grads[0][j] += grads[i][j]
        self.aggregated_grads = grads[0]

        self.optimize_op = adam_updates(variables,
                                        self.aggregated_grads,
                                        lr=self.lr)
Exemplo n.º 2
0
    def construct_models(self,
                         model_cls,
                         model_opt,
                         learning_rate,
                         trainable_params=None,
                         eval_keys=['total loss']):
        # models
        self.models = [model_cls(counters={}) for i in range(self.nr_gpu)]
        template = tf.make_template('model', model_cls.build_graph)
        for i in range(self.nr_gpu):
            with tf.device('/gpu:%d' % i):
                template(self.models[i], **model_opt)
        if trainable_params is None:
            self.params = tf.trainable_variables()
        else:
            self.params = get_trainable_variables(trainable_params)
        # gradients
        grads = []
        for i in range(self.nr_gpu):
            with tf.device('/gpu:%d' % i):
                grads.append(
                    tf.gradients(self.models[i].loss,
                                 self.params,
                                 colocate_gradients_with_ops=True))
        with tf.device('/gpu:0'):
            for i in range(1, self.nr_gpu):
                for j in range(len(grads[0])):
                    grads[0][j] += grads[i][j]

        mdict = {}
        if 'total loss' in eval_keys:
            mdict['total loss'] = tf.add_n(
                [model.loss for model in self.models]) / self.nr_gpu
        if 'nll loss' in eval_keys:
            mdict['nll loss'] = tf.add_n(
                [model.loss_nll for model in self.models]) / self.nr_gpu
        if 'reg loss' in eval_keys:
            mdict['reg loss'] = tf.add_n(
                [model.loss_reg for model in self.models]) / self.nr_gpu
        if 'bits per dim' in eval_keys:
            mdict['bits per dim'] = tf.add_n(
                [model.bits_per_dim for model in self.models]) / self.nr_gpu
        if 'mi' in eval_keys:
            mdict['mi'] = tf.add_n([model.mi
                                    for model in self.models]) / self.nr_gpu

        self.monitor = Monitor(dict=mdict,
                               config_str="",
                               log_file_path=self.save_dir + "/logfile")
        self.train_step = adam_updates(self.params, grads[0], lr=learning_rate)
        #
        self.saver = tf.train.Saver()
Exemplo n.º 3
0
if True:
    all_params = tf.trainable_variables(
    )  #get_trainable_variables(["conv_encoder", "conv_decoder", "conv_pixel_cnn"])
    grads = []
    for i in range(args.nr_gpu):
        with tf.device('/gpu:%d' % i):
            grads.append(
                tf.gradients(models[i].loss,
                             all_params,
                             colocate_gradients_with_ops=True))
    with tf.device('/gpu:0'):
        for i in range(1, args.nr_gpu):
            for j in range(len(grads[0])):
                grads[0][j] += grads[i][j]

        train_step = adam_updates(all_params, grads[0], lr=args.learning_rate)

learner = Learner(session=None,
                  parallel_models=models,
                  optimize_op=train_step,
                  train_set=train_set,
                  eval_set=val_set,
                  variables=tf.trainable_variables())

# def make_feed_dict(data, is_training=True, dropout_p=0.5):
#     data = np.rint(data)
#     ds = np.split(data, args.nr_gpu)
#     feed_dict = {is_trainings[i]: is_training for i in range(args.nr_gpu)}
#     feed_dict.update({dropout_ps[i]: dropout_p for i in range(args.nr_gpu)})
#     feed_dict.update({ xs[i]:ds[i] for i in range(args.nr_gpu) })
#     return feed_dict