def add_layer(inputs, in_size, out_size, n_layer, activation_function=None): # add one more layer and return the output of this layer layer_name = 'layer%s' % n_layer with tf.name_scope('layer'): with tf.name_scope('weights'): Weights = tf.Variable(tf.random_normal([in_size, out_size]), name='W') # histogram_summary(layer_name + '/weights', Weights) tf.summary.histogram(layer_name + '/weights', Weights) # tensorflow >= 0.12 with tf.name_scope('biases'): biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b') # histogram_summary(layer_name + '/biase', biases) tf.summary.histogram(layer_name + '/biases', biases) # Tensorflow >= 0.12 with tf.name_scope('Wx_plus_b'): Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases) if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) histogram_summary(layer_name + '/outputs', outputs) # tf.summary.histogram(layer_name + '/outputs', outputs) # Tensorflow >= 0.12 return outputs
def add_gradients_summaries(grads_and_vars): """Add summaries to gradients. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The list of created summaries. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, ops.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append( logging_ops.histogram_summary(var.op.name + ':gradient', grad_values)) summaries.append( logging_ops.histogram_summary( var.op.name + ':gradient_norm', clip_ops.global_norm([grad_values]))) else: logging.info('Var %s has no gradient', var.op.name) return summaries
def logistic_regression(X, y, class_weight=None, init_mean=None, init_stddev=1.0): """Creates logistic regression TensorFlow subgraph. Args: X: tensor or placeholder for input features, shape should be [batch_size, n_features]. y: tensor or placeholder for target, shape should be [batch_size, n_classes]. class_weight: tensor, [n_classes], where for each class it has weight of the class. If not provided will check if graph contains tensor `class_weight:0`. If that is not provided either all ones are used. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('logistic_regression'): logging_ops.histogram_summary('logistic_regression.X', X) logging_ops.histogram_summary('logistic_regression.y', y) # Set up the requested initialization. if (init_mean is None): weights = vs.get_variable('weights', [X.get_shape()[1], y.get_shape()[-1]]) bias = vs.get_variable('bias', [y.get_shape()[-1]]) else: weights = vs.get_variable('weights', [X.get_shape()[1], y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev)) bias = vs.get_variable('bias', [y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev)) logging_ops.histogram_summary('logistic_regression.weights', weights) logging_ops.histogram_summary('logistic_regression.bias', bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: try: class_weight = ops.get_default_graph().get_tensor_by_name('class_weight:0') except KeyError: pass return losses_ops.softmax_classifier(X, y, weights, bias, class_weight=class_weight)
def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. Args: x: tensor or placeholder for input features. y: tensor or placeholder for target. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('linear_regression'): scope_name = vs.get_variable_scope().name logging_ops.histogram_summary('%s.x' % scope_name, x) logging_ops.histogram_summary('%s.y' % scope_name, y) dtype = x.dtype.base_dtype y_shape = y.get_shape() if len(y_shape) == 1: output_shape = 1 else: output_shape = y_shape[1] # Set up the requested initialization. if init_mean is None: weights = vs.get_variable('weights', [x.get_shape()[1], output_shape], dtype=dtype) bias = vs.get_variable('bias', [output_shape], dtype=dtype) else: weights = vs.get_variable( 'weights', [x.get_shape()[1], output_shape], initializer=init_ops.random_normal_initializer(init_mean, init_stddev, dtype=dtype), dtype=dtype) bias = vs.get_variable( 'bias', [output_shape], initializer=init_ops.random_normal_initializer(init_mean, init_stddev, dtype=dtype), dtype=dtype) logging_ops.histogram_summary('%s.weights' % scope_name, weights) logging_ops.histogram_summary('%s.bias' % scope_name, bias) return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. Args: x: tensor or placeholder for input features. y: tensor or placeholder for target. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('linear_regression'): scope_name = vs.get_variable_scope().name logging_ops.histogram_summary('%s.x' % scope_name, x) logging_ops.histogram_summary('%s.y' % scope_name, y) dtype = x.dtype.base_dtype y_shape = y.get_shape() if len(y_shape) == 1: output_shape = 1 else: output_shape = y_shape[1] # Set up the requested initialization. if init_mean is None: weights = vs.get_variable( 'weights', [x.get_shape()[1], output_shape], dtype=dtype) bias = vs.get_variable('bias', [output_shape], dtype=dtype) else: weights = vs.get_variable('weights', [x.get_shape()[1], output_shape], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) bias = vs.get_variable('bias', [output_shape], initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) logging_ops.histogram_summary('%s.weights' % scope_name, weights) logging_ops.histogram_summary('%s.bias' % scope_name, bias) return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
def add_gradients_summaries(grads_and_vars): """Add summaries to gradients. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The list of created summaries. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, ops.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(logging_ops.histogram_summary( var.op.name + ':gradient', grad_values)) summaries.append(logging_ops.histogram_summary( var.op.name + ':gradient_norm', clip_ops.global_norm([grad_values]))) else: logging.info('Var %s has no gradient', var.op.name) return summaries
def _add_hidden_layer_summary(value, tag): logging_ops.scalar_summary("%s/fraction_of_zero_values" % tag, nn.zero_fraction(value)) logging_ops.histogram_summary("%s/activation" % tag, value)
def _setup_training(self): """Sets up graph, model and trainer.""" # Create config if not given. if self._config is None: self._config = RunConfig(verbose=self.verbose) # Create new graph. self._graph = ops.Graph() self._graph.add_to_collection("IS_TRAINING", True) with self._graph.as_default(): random_seed.set_random_seed(self._config.tf_random_seed) self._global_step = variables.Variable( 0, name="global_step", trainable=False) # Setting up inputs and outputs. self._inp, self._out = self._data_feeder.input_builder() # If class weights are provided, add them to the graph. # Different loss functions can use this tensor by name. if self.class_weight: self._class_weight_node = constant_op.constant( self.class_weight, name='class_weight') # Add histograms for X and y if they are floats. if self._data_feeder.input_dtype in (np.float32, np.float64): logging_ops.histogram_summary("X", self._inp) if self._data_feeder.output_dtype in (np.float32, np.float64): logging_ops.histogram_summary("y", self._out) # Create model's graph. self._model_predictions, self._model_loss = self.model_fn( self._inp, self._out) # Create trainer and augment graph with gradients and optimizer. # Additionally creates initialization ops. learning_rate = self.learning_rate optimizer = self.optimizer if callable(learning_rate): learning_rate = learning_rate(self._global_step) if callable(optimizer): optimizer = optimizer(learning_rate) self._train = optimizers.optimize_loss(self._model_loss, self._global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Update ops during training, e.g. batch_norm_ops self._train = control_flow_ops.group(self._train, *ops.get_collection('update_ops')) # Merge all summaries into single tensor. self._summaries = logging_ops.merge_all_summaries() # Get all initializers for all trainable variables. self._initializers = variables.initialize_all_variables() # Create model's saver capturing all the nodes created up until now. self._saver = train.Saver( max_to_keep=self._config.keep_checkpoint_max, keep_checkpoint_every_n_hours=self._config.keep_checkpoint_every_n_hours) # Enable monitor to create validation data dict with appropriate tf placeholders self._monitor.create_val_feed_dict(self._inp, self._out) # Create session to run model with. self._session = session.Session(self._config.tf_master, config=self._config.tf_config) # Run parameter initializers. self._session.run(self._initializers)
def add_histogram_summary(self, v, name): logging_ops.histogram_summary("{}:{}".format(self.name, name), v)
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, moving_average_decay=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None): """Given loss and parameters for optimizer, returns a training op. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of tf.Optimizer that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantion of tf.Optimizer sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float or `None`, clips gradients by this value. moving_average_decay: Deprecated. float or None, takes into account previous loss to make learning smoother due to outliers. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: tf.train.exponential_decay. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(update_ops, loss) # Moving average of the loss with decay. # TODO(b/30439864): moving_average_decay should be removed. if moving_average_decay is not None: logging.warn("'moving_average_decay' is deprecated. Please use " "tensorboard's builtin averaging instead.") # Generate moving averages of the loss. loss_averages = train.ExponentialMovingAverage(moving_average_decay, name="avg") loss_averages_op = loss_averages.apply([loss]) logging_ops.scalar_summary("loss/mean", loss_averages.average(loss)) loss = control_flow_ops.with_dependencies([loss_averages_op], loss) # Learning rate variable, with possible decay. if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError("Learning rate should be 0d Tensor or float. " "Got %s of type %s" % ( str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] if learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: logging_ops.scalar_summary("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer): opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer or instance of " "subclass of Optimizer. Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients(loss, variables) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients( gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) # Optionally clip gradients by global norm. if clip_gradients is not None: gradients = _clip_gradients_by_norm(gradients, clip_gradients) # Add scalar summary for loss. if "loss" in summaries: logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: if "gradients" in summaries: logging_ops.histogram_summary(variable.name + "/gradients", grad_values) if "gradient_norm" in summaries: logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers, include: - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - function, takes learning rate `Tensor` as argument and must return `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - class, subclass of `Optimizer` that takes only one required argument - learning rate, such as AdamOptimizer, AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - object, instance of subclass of `Optimizer`. E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float or `None`, clips gradients by this value. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] if learning_rate is not None and learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: logging_ops.scalar_summary("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) # Optionally clip gradients by global norm. if clip_gradients is not None: gradients = _clip_gradients_by_norm(gradients, clip_gradients) # Add scalar summary for loss. if "loss" in summaries: logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: if "gradients" in summaries: logging_ops.histogram_summary(variable.name + "/gradients", grad_values) if "gradient_norm" in summaries: logging_ops.histogram_summary( variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def optimize_loss( loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, ): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers, include: - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - function, takes learning rate `Tensor` as argument and must return `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - class, subclass of `Optimizer` that takes only one required argument - learning rate, such as AdamOptimizer, AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - object, instance of subclass of `Optimizer`. E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of tf.Optimizer that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantion of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float or `None`, clips gradients by this value. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: tf.train.exponential_decay. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0: lr = learning_rate elif isinstance(learning_rate, float): lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate) ) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate))) ) if summaries is None: summaries = ["loss", "learning_rate"] if learning_rate is not None and learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: logging_ops.scalar_summary("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer ) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer) ) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer ) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt) ) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer) ) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients(loss, variables) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) # Optionally clip gradients by global norm. if clip_gradients is not None: gradients = _clip_gradients_by_norm(gradients, clip_gradients) # Add scalar summary for loss. if "loss" in summaries: logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: if "gradients" in summaries: logging_ops.histogram_summary(variable.name + "/gradients", grad_values) if "gradient_norm" in summaries: logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def optimize_loss(loss, global_step, learning_rate, optimizer, clip_gradients=None, moving_average_decay=0.9, learning_rate_decay_fn=None, variables=None): """Given loss and parameters for optimizer, returns a training op. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of tf.Optimizer that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantion of tf.Optimizer sub-class and have `compute_gradients` and `apply_gradients` functions. clip_gradients: float or None, clips gradients by this value. moving_average_decay: float or None, takes into account previous loss to make learning smoother due to outliers. learning_rate_decay_fn: function, takes learning_rate and global_step Tensors, returns Tensor. Can be used to implement any learning rate decay funcitons. For example: tf.train.exponential_decay. variables: list of variables to optimizer or none. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ # Moving average of the loss with decay. if moving_average_decay is not None: # Generate moving averages of the loss. loss_averages = train.ExponentialMovingAverage(moving_average_decay, name="avg") loss_averages_op = loss_averages.apply([loss]) logging_ops.scalar_summary("loss/mean", loss_averages.average(loss)) loss = control_flow_ops.with_dependencies([loss_averages_op], loss) # Learning rate variable, with possible decay. if isinstance(learning_rate, ops.Tensor) and len(learning_rate.get_shape()) == 0: lr = learning_rate elif isinstance(learning_rate, float): lr = vs.get_variable("learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError("Learning rate should be 0d Tensor or float. Got %s" % str(learning_rate)) if learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError("Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer): opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer or instance of " "subclass of Optimizer. Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients and clip them if provided. gradients = opt.compute_gradients(loss, variables) if clip_gradients is not None: gradients, variables = zip(*gradients) clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_gradients) gradients = list(zip(clipped_gradients, variables)) # Add scalar summary for loss. logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: logging_ops.histogram_summary(variable.name, variable) logging_ops.histogram_summary(variable.name + "/gradients", grad_values) logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Make sure total_loss is valid. final_loss = array_ops.check_numerics(loss, "Loss is inf or nan") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss) return train_tensor
def logistic_regression(X, y, class_weight=None, init_mean=None, init_stddev=1.0): """Creates logistic regression TensorFlow subgraph. Args: X: tensor or placeholder for input features, shape should be [batch_size, n_features]. y: tensor or placeholder for target, shape should be [batch_size, n_classes]. class_weight: tensor, [n_classes], where for each class it has weight of the class. If not provided will check if graph contains tensor `class_weight:0`. If that is not provided either all ones are used. init_mean: the mean value to use for initialization. init_stddev: the standard devation to use for initialization. Returns: Predictions and loss tensors. Side effects: The variables linear_regression.weights and linear_regression.bias are initialized as follows. If init_mean is not None, then initialization will be done using a random normal initializer with the given init_mean and init_stddv. (These may be set to 0.0 each if a zero initialization is desirable for convex use cases.) If init_mean is None, then the uniform_unit_scaling_initialzer will be used. """ with vs.variable_scope('logistic_regression'): logging_ops.histogram_summary('logistic_regression.X', X) logging_ops.histogram_summary('logistic_regression.y', y) # Set up the requested initialization. if (init_mean is None): weights = vs.get_variable( 'weights', [X.get_shape()[1], y.get_shape()[-1]]) bias = vs.get_variable('bias', [y.get_shape()[-1]]) else: weights = vs.get_variable( 'weights', [X.get_shape()[1], y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev)) bias = vs.get_variable( 'bias', [y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev)) logging_ops.histogram_summary('logistic_regression.weights', weights) logging_ops.histogram_summary('logistic_regression.bias', bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: try: class_weight = ops.get_default_graph().get_tensor_by_name( 'class_weight:0') except KeyError: pass return softmax_classifier(X, y, weights, bias, class_weight=class_weight)
def testHistogramSummary(self): with self.cached_session(): c = constant_op.constant(3) s = logging_ops.histogram_summary('tag', c) self.assertEqual(s.op.type, u'HistogramSummary')
def _setup_training(self): """Sets up graph, model and trainer.""" # Create config if not given. if self._config is None: self._config = RunConfig(verbose=self.verbose) # Create new graph. self._graph = ops.Graph() self._graph.add_to_collection("IS_TRAINING", True) with self._graph.as_default(): random_seed.set_random_seed(self._config.tf_random_seed) self._global_step = variables.Variable(0, name="global_step", trainable=False) # Setting up inputs and outputs. self._inp, self._out = self._data_feeder.input_builder() # If class weights are provided, add them to the graph. # Different loss functions can use this tensor by name. if self.class_weight: self._class_weight_node = constant_op.constant( self.class_weight, name='class_weight') # Add histograms for X and y if they are floats. if self._data_feeder.input_dtype in (np.float32, np.float64): logging_ops.histogram_summary("X", self._inp) if self._data_feeder.output_dtype in (np.float32, np.float64): logging_ops.histogram_summary("y", self._out) # Create model's graph. self._model_predictions, self._model_loss = self.model_fn( self._inp, self._out) # Set up a single operator to merge all the summaries self._summaries = logging_ops.merge_all_summaries() # Create trainer and augment graph with gradients and optimizer. # Additionally creates initialization ops. learning_rate = self.learning_rate optimizer = self.optimizer if callable(learning_rate): learning_rate = learning_rate(self._global_step) if callable(optimizer): optimizer = optimizer(learning_rate) self._train = optimizers.optimize_loss( self._model_loss, self._global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Update ops during training, e.g. batch_norm_ops self._train = control_flow_ops.group( self._train, *ops.get_collection('update_ops')) # Get all initializers for all trainable variables. self._initializers = variables.initialize_all_variables() # Create model's saver capturing all the nodes created up until now. self._saver = train.Saver( max_to_keep=self._config.keep_checkpoint_max, keep_checkpoint_every_n_hours=self._config. keep_checkpoint_every_n_hours) # Enable monitor to create validation data dict with appropriate tf placeholders self._monitor.create_val_feed_dict(self._inp, self._out) # Create session to run model with. self._session = session.Session(self._config.tf_master, config=self._config.tf_config) # Run parameter initializers. self._session.run(self._initializers)
def optimize_loss(loss, global_step, learning_rate, optimizer, clip_gradients=None, moving_average_decay=0.9, learning_rate_decay_fn=None, variables=None): """Given loss and parameters for optimizer, returns a training op. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string or function, used as optimizer for training. clip_gradients: float or None, clips gradients by this value. moving_average_decay: float or None, takes into account previous loss to make learning smoother due to outliers. learning_rate_decay_fn: function, takes learning_rate and global_step Tensors, returns Tensor. Can be used to implement any learning rate decay funcitons. For example: tf.train.exponential_decay. variables: list of variables to optimizer or none. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ # Moving average of the loss with decay. if moving_average_decay is not None: # Generate moving averages of the loss. loss_averages = train.ExponentialMovingAverage(moving_average_decay, name="avg") loss_averages_op = loss_averages.apply([loss]) logging_ops.scalar_summary("loss/mean", loss_averages.average(loss)) loss = control_flow_ops.with_dependencies([loss_averages_op], loss) # Convert optimizer into the optimizer class. if isinstance(optimizer, str): opt_cls = OPTIMIZER_CLS_NAMES[optimizer] elif callable(optimizer): opt_cls = optimizer else: raise ValueError("Unrecognized optimizer: should be string or function.") # Learning rate variable, with possible decay. lr = vs.get_variable("learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) if learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) # Create optimizer. opt = opt_cls(learning_rate=lr) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients and clip them if provided. gradients = opt.compute_gradients(loss, variables) if clip_gradients is not None: clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_gradients) gradients = zip(clipped_gradients, variables) # Add scalar summary for loss. logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient logging_ops.histogram_summary(variable.name, variable) logging_ops.histogram_summary(variable.name + "/gradients", grad_values) logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Make sure total_loss is valid. final_loss = array_ops.check_numerics(loss, "Loss is inf or nan") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss) return train_tensor
def __init__(self, params, infer=False): self.is_training = tf.placeholder(tf.bool) self.output_keep_prob = tf.placeholder(tf.float32) num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = 10 cell_fn = tf.nn.rnn_cell.BasicLSTMCell cell = cell_fn(rnn_size) #RNN size cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers) # cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob = self.output_keep_prob) self.cell = cell NOUT = params['n_output'] # end_of_stroke + prob + 2*(mu + sig) + corr self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.input_zero = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.target_data = tf.placeholder( tf.float32, [params["batch_size"] * params["seq_length"], params["n_output"]]) self.initial_state = cell.zero_state(batch_size=params['batch_size'], dtype=tf.float32) ran_noise = tf.random_normal(shape=[ params["batch_size"], params['seq_length'], params['n_input'] ], mean=0, stddev=0.00008) ran_noise = tf.mul(ran_noise, self.input_zero) tmp_input = tf.nn.relu(self.input_data + ran_noise) self.input_data = tf.select(self.is_training, tmp_input, self.input_data) outputs = [] state = self.initial_state with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(self.input_data[:, time_step, :], state) outputs.append(cell_output) rnn_output = tf.reshape(tf.concat(1, outputs), [-1, params['n_hidden']]) with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable("output_w1", [rnn_size, rnn_size]) output_b1 = tf.get_variable("output_b1", [rnn_size]) output_w2 = tf.get_variable("output_w2", [rnn_size, NOUT]) output_b2 = tf.get_variable("output_b3", [NOUT]) hidden_1 = tf.add(tf.matmul(rnn_output, output_w1), output_b1) self.final_output = tf.add(tf.matmul(hidden_1, output_w2), output_b2) tmp = self.final_output - self.target_data loss = tf.nn.l2_loss(tmp) self.cost = tf.reduce_mean(loss) self.final_state = state tf.scalar_summary('losses/total_loss', loss) self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) for grad in grads: # if isinstance(grad, ops.grads): # grad_values = grad.values # else: # grad_values = grad grad_values = grad logging_ops.histogram_summary(grad.op.name + ':gradient', grad_values) logging_ops.histogram_summary(grad.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def create_train_op( total_loss, optimizer, global_step=None, update_ops=None, variables_to_train=None, clip_gradient_norm=0, summarize_gradients=False, gate_gradients=tf_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False): """Creates an `Operation` that evaluates the gradients and returns the loss. Args: total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as `None`, then slim.variables.global_step() is used. update_ops: an optional list of updates to execute. Note that the update_ops that are used are the union of those update_ops passed to the function and the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None, then the value of slim.ops.GetUpdateOps() is still used. variables_to_train: an optional list of variables to train. If None, it will default to all tf.trainable_variables(). clip_gradient_norm: If greater than 0 then the gradients would be clipped by it. summarize_gradients: Whether or not add summaries for each gradient. gate_gradients: How to gate the computation of gradients. See tf.Optimizer. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: Whether or not to try colocating the gradients with the ops that generated them. Returns: A `Tensor` that when evaluated, computes the gradients and returns the total loss value. """ if global_step is None: global_step = variables.get_or_create_global_step() update_ops = set(update_ops or []) # Make sure update_ops are computed before total_loss. if update_ops: with control_flow_ops.control_dependencies(update_ops): barrier = control_flow_ops.no_op(name='update_barrier') total_loss = control_flow_ops.with_dependencies([barrier], total_loss) if variables_to_train is None: # Default to tf.trainable_variables() variables_to_train = tf_variables.trainable_variables() else: # Make sure that variables_to_train are in tf.trainable_variables() for v in variables_to_train: assert v in tf_variables.trainable_variables() assert variables_to_train # Create the gradients. Note that apply_gradients adds the gradient # computation to the current graph. grads = optimizer.compute_gradients( total_loss, variables_to_train, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) # Clip gradients. if clip_gradient_norm > 0: grads = clip_gradient_norms(grads, clip_gradient_norm) # Summarize gradients. if summarize_gradients: for grad, var in grads: if grad is not None: if isinstance(grad, ops.IndexedSlices): grad_values = grad.values else: grad_values = grad logging_ops.histogram_summary(var.op.name + ':gradient', grad_values) logging_ops.histogram_summary(var.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])) else: logging.info('Var %s has no gradient', var.op.name) # Create gradient updates. grad_updates = optimizer.apply_gradients(grads, global_step=global_step) # Make sure total_loss is valid. total_loss = array_ops.check_numerics(total_loss, 'LossTensor is inf or nan') # Ensure the train_tensor computes grad_updates. return control_flow_ops.with_dependencies([grad_updates], total_loss)
def _add_hidden_layer_summary(self, value, tag): # TODO(zakaria): Move this code to tf.learn and add test. logging_ops.scalar_summary("%s:fraction_of_zero_values" % tag, nn.zero_fraction(value)) logging_ops.histogram_summary("%s:activation" % tag, value)
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, moving_average_decay=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None): """Given loss and parameters for optimizer, returns a training op. Args: loss: Tensor, 0 dimensional. global_step: Tensor, step counter for each update. learning_rate: float or Tensor, magnitude of update per each training step. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of tf.Optimizer that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantion of tf.Optimizer sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float or `None`, clips gradients by this value. moving_average_decay: Deprecated. float or None, takes into account previous loss to make learning smoother due to outliers. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: tf.train.exponential_decay. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. Returns: Training op. Raises: ValueError: if optimizer is wrong type. """ with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Moving average of the loss with decay. # TODO(b/30439864): moving_average_decay should be removed. if moving_average_decay is not None: logging.warn("'moving_average_decay' is deprecated. Please use " "tensorboard's builtin averaging instead.") # Generate moving averages of the loss. loss_averages = train.ExponentialMovingAverage( moving_average_decay, name="avg") loss_averages_op = loss_averages.apply([loss]) logging_ops.scalar_summary("loss/mean", loss_averages.average(loss)) loss = control_flow_ops.with_dependencies([loss_averages_op], loss) # Learning rate variable, with possible decay. if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError("Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] if learning_rate_decay_fn is not None: lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: logging_ops.scalar_summary("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif isinstance(optimizer, type) and issubclass( optimizer, optimizer_.Optimizer): opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer or instance of " "subclass of Optimizer. Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients(loss, variables) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) # Optionally clip gradients by global norm. if clip_gradients is not None: gradients = _clip_gradients_by_norm(gradients, clip_gradients) # Add scalar summary for loss. if "loss" in summaries: logging_ops.scalar_summary("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: if "gradients" in summaries: logging_ops.histogram_summary(variable.name + "/gradients", grad_values) if "gradient_norm" in summaries: logging_ops.histogram_summary( variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def __init__(self, params, infer=False): self.is_training = tf.placeholder(tf.bool) self.output_keep_prob = tf.placeholder(tf.float32) num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = 10 cell_lst = [] for i in range(num_layers): cell = tf.nn.rnn_cell.LSTMCell( rnn_size, initializer=tf.contrib.layers.xavier_initializer( uniform=False), forget_bias=1.0) # if i==0: # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,input_keep_prob= self.output_keep_prob) # cell=cell_drop cell_drop = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=self.output_keep_prob) cell = cell_drop cell_lst.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cell_lst) # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,output_keep_prob= self.output_keep_prob) # cell=cell_drop self.cell = cell NOUT = params['n_output'] # end_of_stroke + prob + 2*(mu + sig) + corr self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.input_zero = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.repeat_data = tf.placeholder(dtype=tf.int32, shape=[None, params['seq_length']]) self.target_data = tf.placeholder( tf.float32, [None, params["seq_length"], params["n_output"]]) self.initial_state = cell.zero_state(batch_size=params['batch_size'], dtype=tf.float32) #Noise applied only training phase and if only std bigger than 0 if (params["noise_std"] > 0.0): ran_noise = tf.random_normal(shape=[ params["batch_size"], params['seq_length'], params['n_input'] ], mean=0, stddev=params['noise_std']) # ran_noise=tf.mul(ran_noise,self.input_zero) tmp_input = tf.nn.relu(self.input_data + ran_noise) self.input_data = tf.select(self.is_training, tmp_input, self.input_data) outputs = [] state = self.initial_state with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(self.input_data[:, time_step, :], state) outputs.append(cell_output) rnn_output = tf.reshape(tf.transpose(tf.pack(outputs), [1, 0, 2]), [-1, params['n_hidden']]) with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w1", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b1", [NOUT]) self.final_output = tf.add(tf.matmul(rnn_output, output_w1), output_b1) flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) tmp = self.final_output - tf.reshape(self.target_data, [-1, params["n_output"]]) tmp = tf.gather(tmp, tf.squeeze(indices, [1])) loss = tf.nn.l2_loss(tmp) self.cost = tf.reduce_mean(loss) self.final_state = state tf.scalar_summary('losses/total_loss', loss) self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() total_parameters = 0 for variable in tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) for grad in grads: # if isinstance(grad, ops.grads): # grad_values = grad.values # else: # grad_values = grad grad_values = grad logging_ops.histogram_summary(grad.op.name + ':gradient', grad_values) logging_ops.histogram_summary(grad.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))