Exemplo n.º 1
0
 def _res_core_vars(self, left, target, right, rnn_part_size, res_size,
                    var_scope):
     with tf.variable_scope(var_scope):
         matrices, biases = list(), list()
         in_ndims = sum(flatten(left)) + sum(flatten(target)) + sum(
             flatten(right)) + rnn_part_size
         out_ndims = sum(flatten(target)) + rnn_part_size
         out_stddev = self._optimizer_init_parameter / (res_size +
                                                        rnn_part_size)**.5
         out_init = tf.concat([
             tf.zeros([res_size, sum(flatten(target))]),
             tf.truncated_normal([res_size, rnn_part_size],
                                 stddev=out_stddev)
         ], -1)
         in_stddev = self._optimizer_init_parameter / (in_ndims +
                                                       res_size)**.5
         with tf.variable_scope('in_core'):
             matrices.append(
                 tf.get_variable(
                     'matrix',
                     shape=[in_ndims, res_size],
                     initializer=tf.truncated_normal_initializer(
                         stddev=in_stddev),
                     # initializer=tf.zeros_initializer(),
                     # trainable=False
                 ))
             biases.append(
                 tf.get_variable(
                     'bias',
                     shape=[res_size],
                     initializer=tf.zeros_initializer(),
                     # trainable=False
                 ))
         with tf.variable_scope('out_core'):
             matrices.append(
                 tf.get_variable(
                     'matrix',
                     # shape=[res_size, out_ndims],
                     # initializer=tf.truncated_normal_initializer(stddev=in_stddev)
                     initializer=out_init,
                     # initializer=tf.zeros_initializer()
                     # trainable=False
                 ))
             biases.append(
                 tf.get_variable(
                     'bias',
                     shape=[out_ndims],
                     # initializer=tf.zeros_initializer(),
                     initializer=tf.constant_initializer(
                         1e-15),  # because otherwise neurons will be dead
                     # initializer=tf.truncated_normal_initializer(stddev=in_stddev)
                     # trainable=False
                 ))
         for m in matrices:
             tf.add_to_collection(tf.GraphKeys.WEIGHTS, m)
     return matrices, biases
Exemplo n.º 2
0
 def _create_optimizer_trainable_vars(self):
     total_ndim = sum(flatten(self._pupil_dims))
     if 'num_unrollings' in self._pupil_net_size:
         total_ndim *= self._pupil_net_size['num_unrollings']
     with tf.variable_scope('optimizer_trainable_variables'):
         coefs = tf.Variable(tf.ones([total_ndim]),
                             name='coefs',
                             trainable=True)
         bias = tf.Variable(tf.zeros([total_ndim]),
                            name='bias',
                            trainable=True)
         vars = [coefs, bias]
         tf.add_to_collection(tf.GraphKeys.WEIGHTS, coefs)
     return vars
Exemplo n.º 3
0
 def _core_vars(self, target, var_scope):
     with tf.variable_scope(var_scope):
         ndims = sum(flatten(target))
         stddev = self._optimizer_init_parameter / (2 * ndims)**.5
         matrix = tf.get_variable(
             'matrix',
             shape=[ndims, ndims],
             initializer=tf.truncated_normal_initializer(stddev=stddev),
         )
         bias = tf.get_variable(
             'bias',
             shape=[ndims],
             initializer=tf.zeros_initializer(),
             # trainable=False
         )
         tf.add_to_collection(tf.GraphKeys.WEIGHTS, matrix)
     return matrix, bias
Exemplo n.º 4
0
    def __init__(
            self,
            pupil,
            num_exercises=10,
            num_optimizer_unrollings=10,
            perm_period=None,
            num_gpus=1,
            regularization_rate=1e-7,
            inp_gradient_clipping='norm_loss',
            clip_norm=1e+5,
            optimizer_init_parameter=.1,
            regime='train',
            optimizer_for_opt_type='adam',
            additional_metrics=None,
            flags=None,
            normalizing=None,
            permute=False,
            get_theta=False,
            get_omega_and_beta=False,
            matrix_mod='phi_and_psi',
            no_end=False,
    ):
        if additional_metrics is None:
            additional_metrics = list()
        if flags is None:
            flags = list()

        self._pupil = pupil
        self._pupil_net_size = self._pupil.get_net_size()
        self._pupil_dims = self._pupil.get_layer_dims()
        self._emb_layer_is_present = 'embedding_size' in self._pupil_net_size
        self._num_exercises = num_exercises
        self._num_optimizer_unrollings = num_optimizer_unrollings
        self._perm_period = perm_period
        self._num_gpus = num_gpus
        if self._num_gpus == 1:
            self._base_device = '/gpu:0'
        else:
            self._base_device = '/cpu:0'
        self._regularization_rate = regularization_rate
        self._inp_gradient_clipping = inp_gradient_clipping
        self._clip_norm = clip_norm
        self._optimizer_init_parameter = optimizer_init_parameter
        self._regime = regime

        self._optimizer_for_opt_type = optimizer_for_opt_type

        self._additional_metrics = additional_metrics

        self._flags = flags
        self._get_theta = get_theta
        self._get_omega_and_beta = get_omega_and_beta
        self._matrix_mod = matrix_mod
        self._no_end = no_end

        self._normalizing = normalizing

        self._permute = permute
        self._total_ndim = sum(flatten(self._pupil_dims)) * self._pupil_net_size['num_unrollings']
        self._hooks = dict(
            pupil_grad_eval_inputs=None,
            pupil_grad_eval_labels=None,
            optimizer_grad_inputs=None,
            optimizer_grad_labels=None,
            pupil_savers=None,
            optimizer_train_op=None,
            learning_rate_for_optimizer_training=None,
            train_with_meta_optimizer_op=None,
            reset_optimizer_train_state=None,
            reset_optimizer_inference_state=None,
            reset_permutation_matrices=None,
            reset_pupil_grad_eval_pupil_storage=None,
            reset_optimizer_grad_pupil_storage=None,
            reset_optimizer_inference_pupil_storage=None,
            meta_optimizer_saver=None,
            loss=None,
            start_loss=None,
            end_loss=None,
            optimizer_dropout_keep_prob=None,
            pupil_trainable_initializers=None,
            train_optimizer_summary=None
        )
        for add_metric in self._additional_metrics:
            self._hooks['start_' + add_metric] = None
            self._hooks['end_' + add_metric] = None
            self._hooks[add_metric] = None

        self._debug_tensors = list()

        self._optimizer_dropout_keep_prob = tf.placeholder(tf.float32, name='optimizer_dropout_keep_prob')
        self._hooks['optimizer_dropout_keep_prob'] = self._optimizer_dropout_keep_prob
        if regime == 'train':
            ex_per_gpu = self._num_exercises // self._num_gpus
            remaining = self._num_exercises - self._num_gpus * ex_per_gpu
            self._exercise_gpu_map = [n // ex_per_gpu for n in range((self._num_gpus - 1) * ex_per_gpu)] + \
                                     [self._num_gpus - 1] * (ex_per_gpu + remaining)
            self._num_ex_on_gpus = [ex_per_gpu] * (self._num_gpus - 1) + [ex_per_gpu + remaining]
            self._gpu_borders = self._gpu_idx_borders(self._exercise_gpu_map)

            tmp = self._make_inputs_and_labels_placeholders(
                self._pupil, self._num_optimizer_unrollings, self._num_exercises,
                self._exercise_gpu_map)
            self._pupil_grad_eval_inputs, self._pupil_grad_eval_labels,\
                self._optimizer_grad_inputs, self._optimizer_grad_labels = tmp
            self._pupil_trainable_variables, self._pupil_grad_eval_pupil_storage, self._optimizer_grad_pupil_storage, \
                self._pupil_savers, self._pupil_trainable_initializers = self._create_pupil_variables_and_savers(
                    self._pupil, self._num_exercises, self._exercise_gpu_map)
            self._hooks['pupil_savers'] = self._pupil_savers
            self._hooks['pupil_trainable_initializers'] = self._pupil_trainable_initializers
            self._hooks['reset_pupil_grad_eval_pupil_storage'] = tf.group(
                *chain(*[self._pupil.reset_storage(stor) for stor in self._pupil_grad_eval_pupil_storage])
            )
            self._hooks['reset_optimizer_grad_pupil_storage'] = tf.group(
                *chain(*[self._pupil.reset_storage(stor) for stor in self._optimizer_grad_pupil_storage])
            )
            self._hooks['reset_optimizer_inference_pupil_storage'] = tf.group(*self._pupil.reset_self_train_storage())
            self._add_standard_train_hooks()

            self._additional_loss = 0
        else:
            self._exercise_gpu_map = None
            self._pupil_grad_eval_inputs, self._pupil_grad_eval_labels, \
                self._optimizer_grad_inputs, self._optimizer_grad_labels = None, None, None, None

        with tf.device(self._base_device):
            self._opt_trainable = self._create_optimizer_trainable_vars()

        if self._regime == 'train':
            self._learning_rate_for_optimizer_training = tf.placeholder(
                tf.float32, name='learning_rate_for_optimizer_training')
            self._hooks['learning_rate_for_optimizer_training'] = self._learning_rate_for_optimizer_training
            if self._optimizer_for_opt_type == 'adam':
                self._optimizer_for_optimizer_training = tf.train.AdamOptimizer(
                    learning_rate=self._learning_rate_for_optimizer_training)
            elif self._optimizer_for_opt_type == 'sgd':
                self._optimizer_for_optimizer_training = tf.train.GradientDescentOptimizer(
                    learning_rate=self._learning_rate_for_optimizer_training)
            self._train_graph()
            self._inference_graph()
            self._empty_op = tf.constant(0)
            self._hooks['reset_optimizer_train_state'] = self._empty_op
            self._hooks['reset_optimizer_inference_state'] = self._empty_op
            self._hooks['reset_permutation_matrices'] = self._empty_op
        elif self._regime == 'inference':
            self._inference_graph()
            self._hooks['reset_optimizer_inference_state'] = self._empty_op