def loss(self, y_true, y_pred): """ categorical crossentropy loss """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.use_float16: y_true = K.cast(y_true, 'float16') y_pred = K.cast(y_pred, 'float16') # scale and clip probabilities # this should not be necessary for softmax output. y_pred /= K.sum(y_pred, axis=-1, keepdims=True) y_pred = K.clip(y_pred, K.epsilon(), 1) # compute log probability log_post = K.log(y_pred) # likelihood # loss loss = - y_true * log_post # weighted loss if self.weights is not None: loss *= self.weights if self.vox_weights is not None: loss *= self.vox_weights # take the total loss # loss = K.batch_flatten(loss) mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1)) tf.verify_tensor_all_finite(mloss, 'Loss not finite') return mloss
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0): """Calculate the KL of multivariate normal distributions with diagonal covariances. Parameters ---------- loc_one : tf.Tensor A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M x n where each row represents the mean of a n-dimensional Gaussian. scale_one : tf.Tensor A tensor of same shape as ``loc_one``, representing the standard deviation. loc_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the mean of another Gaussian. scale_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the standard deviation of another Gaussian. Returns ------- tf.Tensor For 0-D or 1-D tensor inputs, outputs the 0-D tensor ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )`` For 2-D tensor inputs, outputs the 1-D tensor ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M`` Raises ------ InvalidArgumentError If the location variables have Inf or NaN values, or if the scale variables are not positive. """ dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''), tf.verify_tensor_all_finite(loc_two, msg=''), tf.assert_positive(scale_one), tf.assert_positive(scale_two)] loc_one = control_flow_ops.with_dependencies(dependencies, loc_one) scale_one = control_flow_ops.with_dependencies(dependencies, scale_one) loc_one = tf.cast(loc_one, tf.float32) scale_one = tf.cast(scale_one, tf.float32) if loc_two == 0.0 and scale_two == 1.0: # With default arguments, we can avoid some intermediate computation. out = tf.square(scale_one) + tf.square(loc_one) - \ 1.0 - 2.0 * tf.log(scale_one) else: loc_two = control_flow_ops.with_dependencies(dependencies, loc_two) scale_two = control_flow_ops.with_dependencies(dependencies, scale_two) loc_two = tf.cast(loc_two, tf.float32) scale_two = tf.cast(scale_two, tf.float32) out = tf.square(scale_one/scale_two) + \ tf.square((loc_two - loc_one)/scale_two) - \ 1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one) if len(out.get_shape()) <= 1: # scalar or vector return 0.5 * tf.reduce_sum(out) else: # matrix return 0.5 * tf.reduce_sum(out, 1)
def create_generative(parameters): print('Creating the neural network model.') tf.reset_default_graph() # tf Graph input x = tf.placeholder(tf.float32, shape=(1, parameters['n_input']), name='input') x = tf.verify_tensor_all_finite(x, "X not finite!") y = tf.placeholder(tf.float32, shape=(1, parameters['n_output']), name='expected_output') y = tf.verify_tensor_all_finite(y, "Y not finite!") x = tf.Print(x, [x], "X: ") y = tf.Print(y, [y], "Y: ") lstm_state_size = np.sum(parameters['lstm_layers']) * 2 # Note: Batch size is the first dimension in istate. istate = tf.placeholder(tf.float32, shape=(None, lstm_state_size), name='internal_state') lr = tf.placeholder(tf.float32, name='learning_rate') # The target to track itself and its peers, each with x, y ## and velocity x and y. input_size = (parameters['n_peers'] + 1) * 2 inputToRnn = parameters['input_layer'] if (parameters['input_layer'] == None): inputToRnn = parameters['n_input'] cells = [rnn_cell.LSTMCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn, num_proj=parameters['lstm_layers'][i], cell_clip=parameters['lstm_clip'], use_peepholes=True) for i,l in enumerate(parameters['lstm_layers'])] # TODO: GRUCell support here. # cells = [rnn_cell.GRUCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn) for i,l in enumerate(parameters['lstm_layers'])] model = { 'input_weights': tf.Variable(tf.random_normal( [input_size, parameters['input_layer']]), name='input_weights'), 'input_bias': tf.Variable(tf.random_normal([parameters['input_layer']]), name='input_bias'), 'output_weights': tf.Variable(tf.random_normal([parameters['lstm_layers'][-1], # 6 = 2 sigma, 2 mean, weight, rho parameters['n_mixtures'] * 6]), name='output_weights'), # We need to put at least the standard deviation output biases to about 5 to prevent zeros and infinities. # , mean = 5.0, stddev = 3.0 'output_bias': tf.Variable(tf.random_normal([parameters['n_mixtures'] * 6]), name='output_bias'), 'rnn_cell': rnn_cell.MultiRNNCell(cells), 'lr': lr, 'x': x, 'y': y, 'keep_prob': tf.placeholder(tf.float32), 'istate': istate } # The next variables need to be remapped, because we don't have RNN context anymore: # RNN/MultiRNNCell/Cell0/LSTMCell/ -> MultiRNNCell/Cell0/LSTMCell/ # B, W_F_diag, W_O_diag, W_I_diag, W_0 with tf.variable_scope("RNN"): pred = RNN_generative(parameters, x, model, istate) model['pred'] = pred[0] model['last_state'] = pred[1] return model
def hessian(y, xs): """Calculate Hessian of y with respect to each x in xs. Parameters ---------- y : tf.Tensor Tensor to calculate Hessian of. xs : list of tf.Variable List of TensorFlow variables to calculate with respect to. The variables can have different shapes. Returns ------- tf.Tensor A 2-D tensor where each row is .. math:: \partial_{xs} ( [ \partial_{xs} y ]_j ). Raises ------ InvalidArgumentError If the inputs have Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(y, msg='')] dependencies.extend([tf.verify_tensor_all_finite(x, msg='') for x in xs]) with tf.control_dependencies(dependencies): # Calculate flattened vector grad_{xs} y. grads = tf.gradients(y, xs) grads = [tf.reshape(grad, [-1]) for grad in grads] grads = tf.concat(0, grads) # Loop over each element in the vector. mat = [] d = grads.get_shape()[0] if not isinstance(d, int): d = grads.eval().shape[0] for j in range(d): # Calculate grad_{xs} ( [ grad_{xs} y ]_j ). gradjgrads = tf.gradients(grads[j], xs) # Flatten into vector. hi = [] for l in range(len(xs)): hij = gradjgrads[l] # return 0 if gradient doesn't exist; TensorFlow returns None if hij is None: hij = tf.zeros(xs[l].get_shape(), dtype=tf.float32) hij = tf.reshape(hij, [-1]) hi.append(hij) hi = tf.concat(0, hi) mat.append(hi) # Form matrix where each row is grad_{xs} ( [ grad_{xs} y ]_j ). return tf.pack(mat)
def _validate(self): vops = [tf.assert_positive(self._scale), tf.assert_positive(self._high - self._low), tf.verify_tensor_all_finite(self._high, "Upper bound not finite"), tf.verify_tensor_all_finite(self._low, "Lower bound not finite"), tf.verify_tensor_all_finite(self._loc, "Loc not finite"), tf.verify_tensor_all_finite(self._scale, "Scale not finite"), ] return tf.group(*vops, name="ValidationOps")
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0): """Calculate the KL of multivariate normal distributions with diagonal covariances. Parameters ---------- loc_one : tf.Tensor n-dimensional vector, or M x n-dimensional matrix where each row represents the mean of a n-dimensional Gaussian scale_one : tf.Tensor n-dimensional vector, or M x n-dimensional matrix where each row represents the standard deviation of a n-dimensional Gaussian loc_two : tf.Tensor, optional n-dimensional vector, or M x n-dimensional matrix where each row represents the mean of a n-dimensional Gaussian scale_two : tf.Tensor, optional n-dimensional vector, or M x n-dimensional matrix where each row represents the standard deviation of a n-dimensional Gaussian Returns ------- tf.Tensor for scalar or vector inputs, outputs the scalar ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )`` for matrix inputs, outputs the vector ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M`` Raises ------ InvalidArgumentError If the location variables have Inf or NaN values, or if the scale variables are not positive. """ dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''), tf.verify_tensor_all_finite(loc_two, msg=''), tf.assert_positive(scale_one), tf.assert_positive(scale_two)] loc_one = control_flow_ops.with_dependencies(dependencies, loc_one) loc_two = control_flow_ops.with_dependencies(dependencies, loc_two) scale_one = control_flow_ops.with_dependencies(dependencies, scale_one) scale_two = control_flow_ops.with_dependencies(dependencies, scale_two) if loc_two == 0.0 and scale_two == 1.0: return 0.5 * tf.reduce_sum( tf.square(scale_one) + tf.square(loc_one) - \ 1.0 - 2.0 * tf.log(scale_one)) else: return 0.5 * tf.reduce_sum( tf.square(scale_one/scale_two) + \ tf.square((loc_two - loc_one)/scale_two) - \ 1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one), 1)
def cumprod(xs): """Cumulative product of a tensor along its outer dimension. https://github.com/tensorflow/tensorflow/issues/813 Parameters ---------- xs : tf.Tensor A 1-D or higher tensor. Returns ------- tf.Tensor A tensor with `cumprod` applied along its outer dimension. Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(xs, msg='')] xs = control_flow_ops.with_dependencies(dependencies, xs) xs = tf.cast(xs, dtype=tf.float32) values = tf.unpack(xs) out = [] prev = tf.ones_like(values[0]) for val in values: s = prev * val out.append(s) prev = s result = tf.pack(out) return result
def mean_dice(self, y_true, y_pred): """ weighted mean dice across all patches and labels """ # compute dice, which will now be [batch_size, nb_labels] dice_metric = self.dice(y_true, y_pred) # weigh the entries in the dice matrix: if self.weights is not None: dice_metric *= self.weights if self.vox_weights is not None: dice_metric *= self.vox_weights # return one minus mean dice as loss mean_dice_metric = K.mean(dice_metric) tf.verify_tensor_all_finite(mean_dice_metric, 'metric not finite') return mean_dice_metric
def testVerifyTensorAllFiniteSucceeds(self): x_shape = [5, 4] x = np.random.random_sample(x_shape).astype(np.float32) with self.test_session(): t = tf.constant(x, shape=x_shape, dtype=tf.float32) t_verified = tf.verify_tensor_all_finite(t, "Input is not a number.") self.assertAllClose(x, t_verified.eval())
def __init__(self, rnn_states, type_embedder, name='DelexicalizedDynamicPredicateEmbedder'): """Construct DelexicalizedDynamicPredicateEmbedder. Args: rnn_states (SequenceBatch): of shape (num_contexts, seq_length, rnn_state_dim) type_embedder (TokenEmbedder) name (str) """ self._type_embedder = type_embedder with tf.name_scope(name): # column indices of rnn_states (indexes time) self._col_indices = FeedSequenceBatch() # (num_predicates, max_predicate_mentions) # row indices of rnn_states (indexes utterance) self._row_indices = tf.placeholder(dtype=tf.int32, shape=[None]) # (num_predicates,) row_indices_expanded = expand_dims_for_broadcast(self._row_indices, self._col_indices.values) # (num_predicates, max_predicate_mentions, rnn_state_dim) rnn_states_selected = SequenceBatch( gather_2d(rnn_states.values, row_indices_expanded, self._col_indices.values), self._col_indices.mask) # (num_predicates, rnn_state_dim) rnn_embeds = reduce_mean(rnn_states_selected, allow_empty=True) rnn_embeds = tf.verify_tensor_all_finite(rnn_embeds, "RNN-state-based embeddings") self._type_seq_embedder = MeanSequenceEmbedder(type_embedder.embeds, name='TypeEmbedder') self._embeds = tf.concat(1, [rnn_embeds, self._type_seq_embedder.embeds])
def log_sum_exp(input_tensor, reduction_indices=None, keep_dims=False): """Compute the ``log_sum_exp`` of elements in a tensor, taking the sum across axes given by ``reduction_indices``. Parameters ---------- input_tensor : tf.Tensor The tensor to reduce. Should have numeric type. reduction_indices : int or list of int, optional The dimensions to reduce. If `None` (the default), reduces all dimensions. keep_dims : bool, optional If true, retains reduced dimensions with length 1. Returns ------- tf.Tensor The reduced tensor. Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(input_tensor, msg='')] input_tensor = control_flow_ops.with_dependencies(dependencies, input_tensor); input_tensor = tf.cast(input_tensor, dtype=tf.float32) x_max = tf.reduce_max(input_tensor, reduction_indices, keep_dims=True) return tf.squeeze(x_max) + tf.log(tf.reduce_sum( tf.exp(input_tensor - x_max), reduction_indices, keep_dims))
def log_sum_exp(x): """Compute the ``log_sum_exp`` of the elements in x. Parameters ---------- x : tf.Tensor vector or matrix with second dimension 1 shape=TensorShape([Dimension(N)]) shape=TensorShape([Dimension(N), Dimension(1)]) Returns ------- tf.Tensor scalar if vector input, vector if matrix tensor input Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(x, msg='')] x = control_flow_ops.with_dependencies(dependencies, x); x_max = tf.reduce_max(x) return tf.add(x_max, tf.log(tf.reduce_sum(tf.exp(tf.sub(x, x_max)))))
def loss(self, y_true, y_pred): """ the loss. Assumes y_pred is prob (in [0,1] and sum_row = 1) """ # compute dice, which will now be [batch_size, nb_labels] dice_metric = self.dice(y_true, y_pred) # loss dice_loss = 1 - dice_metric # weigh the entries in the dice matrix: if self.weights is not None: dice_loss *= self.weights # return one minus mean dice as loss mean_dice_loss = K.mean(dice_loss) tf.verify_tensor_all_finite(mean_dice_loss, 'Loss not finite') return mean_dice_loss
def init_target(self): with tf.name_scope('target') as scope: self.target = self.reduced_loss + self.reg * self.regularization self.checked_target = tf.verify_tensor_all_finite( self.target, msg='NaN or Inf in target value', name='target') tf.summary.scalar('target', self.checked_target)
def multivariate_rbf(x, y=0.0, sigma=1.0, l=1.0): """Squared-exponential kernel .. math:: k(x, y) = \sigma^2 \exp{ -1/(2l^2) \sum_i (x_i - y_i)^2 } Parameters ---------- x : tf.Tensor A n-D tensor. y : tf.Tensor, optional A tensor of same shape as ``x``. sigma : tf.Tensor, optional A 0-D tensor, representing the standard deviation of radial basis function. l : tf.Tensor, optional A 0-D tensor, representing the lengthscale of radial basis function. Returns ------- tf.Tensor A tensor of one less dimension than the input. Raises ------ InvalidArgumentError If the mean variables have Inf or NaN values, or if the scale and length variables are not positive. """ dependencies = [tf.verify_tensor_all_finite(x, msg=''), tf.verify_tensor_all_finite(y, msg=''), tf.assert_positive(sigma), tf.assert_positive(l)] x = control_flow_ops.with_dependencies(dependencies, x) y = control_flow_ops.with_dependencies(dependencies, y) sigma = control_flow_ops.with_dependencies(dependencies, sigma) l = control_flow_ops.with_dependencies(dependencies, l) x = tf.cast(x, dtype=tf.float32) y = tf.cast(y, dtype=tf.float32) sigma = tf.cast(sigma, dtype=tf.float32) l = tf.cast(l, dtype=tf.float32) return tf.pow(sigma, 2.0) * \ tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * \ tf.reduce_sum(tf.pow(x - y , 2.0)))
def l2_normalize(x, dim, name=None): """l2 normalizes x and caps the gradient of the Square Root. Args: x: The tensor to normalize. dim: The dimension to normalize along. name: Optional name for this op. Returns: x normalized along dim. """ with tf.op_scope([x], name, 'l2_normalize') as scope: x = tf.convert_to_tensor(x, name='x') x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope) x_norm = capped_sqrt(tf.reduce_sum(tf.square(x), [dim], keep_dims=True)) return tf.verify_tensor_all_finite(tf.div(x, x_norm, name=scope), 'Error at %s' % scope)
def l1_normalize(x, dim, name=None): """l1 normalizes x. Args: x: The tensor to normalize. dim: The dimension to normalize along. name: Optional name for this op. Returns: x normalized along dim. """ with tf.op_scope([x], name, 'l1_normalize') as scope: x = tf.convert_to_tensor(x, name='x') x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope) x_norm = tf.reduce_sum(tf.abs(x), [dim], keep_dims=True) return tf.verify_tensor_all_finite(tf.div(x, x_norm, name=scope), 'Error at %s' % scope)
def embed(sequence_batch, embeds): mask = sequence_batch.mask embedded_values = tf.gather(embeds, sequence_batch.values) embedded_values = tf.verify_tensor_all_finite(embedded_values, 'embedded_values') # set all pad embeddings to zero broadcasted_mask = expand_dims_for_broadcast(mask, embedded_values) embedded_values *= broadcasted_mask return SequenceBatch(embedded_values, mask)
def to_simplex(x): """Transform real vector of length ``(K-1)`` to a simplex of dimension ``K`` using a backward stick breaking construction. Parameters ---------- x : tf.Tensor A 1-D or 2-D tensor. Returns ------- tf.Tensor A tensor of same shape as input but with last dimension of size ``K``. Raises ------ InvalidArgumentError If the input has Inf or NaN values. Notes ----- x as a 3-D or higher tensor is not guaranteed to be supported. """ dependencies = [tf.verify_tensor_all_finite(x, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) x = tf.cast(x, dtype=tf.float32) if isinstance(x, tf.Tensor) or isinstance(x, tf.Variable): shape = get_dims(x) else: shape = x.shape if len(shape) == 1: n_rows = () K_minus_one = shape[0] eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32)) z = tf.sigmoid(eq + x) pil = tf.concat(0, [z, tf.constant([1.0])]) piu = tf.concat(0, [tf.constant([1.0]), 1.0 - z]) S = cumprod(piu) return S * pil else: n_rows = shape[0] K_minus_one = shape[1] eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32)) z = tf.sigmoid(eq + x) pil = tf.concat(1, [z, tf.ones([n_rows, 1])]) piu = tf.concat(1, [tf.ones([n_rows, 1]), 1.0 - z]) # cumulative product along 1st axis S = tf.pack([cumprod(piu_x) for piu_x in tf.unpack(piu)]) return S * pil
def testVerifyTensorAllFiniteFails(self): x_shape = [5, 4] x = np.random.random_sample(x_shape).astype(np.float32) my_msg = "Input is not a number." # Test NaN. x[0] = np.nan with self.test_session(): with self.assertRaisesOpError(my_msg): t = tf.constant(x, shape=x_shape, dtype=tf.float32) t_verified = tf.verify_tensor_all_finite(t, my_msg) t_verified.eval() # Test Inf. x[0] = np.inf with self.test_session(): with self.assertRaisesOpError(my_msg): t = tf.constant(x, shape=x_shape, dtype=tf.float32) t_verified = tf.verify_tensor_all_finite(t, my_msg) t_verified.eval()
def init_learnable_params(self): self.w = [None] * self.order for i in range(1, self.order + 1): r = self.rank if i == 1: r = 1 rnd_weights = tf.random_uniform([self.n_features, r], -self.init_std, self.init_std) self.w[i - 1] = tf.verify_tensor_all_finite( tf.Variable(rnd_weights, trainable=True, name='embedding_' + str(i)), msg='NaN or Inf in w[{}].'.format(i-1)) self.b = tf.Variable(self.init_std, trainable=True, name='bias') tf.summary.scalar('bias', self.b)
def multivariate_rbf(x, y=0.0, sigma=1.0, l=1.0): """Squared-exponential kernel .. math:: k(x, y) = \sigma^2 \exp{ -1/(2l^2) \sum_i (x_i - y_i)^2 } Parameters ---------- x : tf.Tensor scalar, vector, matrix, or n-Tensor y : Optional[tf.Tensor], default 0.0 scalar, vector, matrix, or n-Tensor sigma : Optional[double], default 1.0 standard deviation of radial basis function l : Optional[double], default 1.0 lengthscale of radial basis function Returns ------- tf.Tensor scalar if vector input, rank-(n-1) if n-Tensor input Raises ------ InvalidArgumentError If the mean variables have Inf or NaN values, or if the scale and length variables are not positive. """ dependencies = [tf.verify_tensor_all_finite(x, msg=''), tf.verify_tensor_all_finite(y, msg=''), tf.assert_positive(sigma), tf.assert_positive(l)] x = control_flow_ops.with_dependencies(dependencies, x) y = control_flow_ops.with_dependencies(dependencies, y) sigma = control_flow_ops.with_dependencies(dependencies, sigma) l = control_flow_ops.with_dependencies(dependencies, l) return tf.pow(sigma, 2.0) * \ tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * \ tf.reduce_sum(tf.pow(x - y , 2.0)))
def dot(x, y): """Compute dot product between a 2-D tensor and a 1-D tensor. If x is a ``[M x N]`` matrix, then y is a ``M``-vector. If x is a ``M``-vector, then y is a ``[M x N]`` matrix. Parameters ---------- x : tf.Tensor A 1-D or 2-D tensor (see above). y : tf.Tensor A 1-D or 2-D tensor (see above). Returns ------- tf.Tensor A 1-D tensor of length ``N``. Raises ------ InvalidArgumentError If the inputs have Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(x, msg=''), tf.verify_tensor_all_finite(y, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) y = control_flow_ops.with_dependencies(dependencies, y) x = tf.cast(x, dtype=tf.float32) y = tf.cast(y, dtype=tf.float32) if len(x.get_shape()) == 1: vec = x mat = y return tf.matmul(tf.expand_dims(vec, 0), mat) else: mat = x vec = y return tf.matmul(mat, tf.expand_dims(vec, 1))
def dot(x, y): """Compute dot product between a 2-D tensor and a 1-D tensor. If x is a `[M x N]` matrix, then y is a `M`-vector. If x is a `M`-vector, then y is a `[M x N]` matrix. Args: x: tf.Tensor. A 1-D or 2-D tensor (see above). y: tf.Tensor. A 1-D or 2-D tensor (see above). Returns: tf.Tensor. A 1-D tensor of length `N`. Raises: InvalidArgumentError. If the inputs have Inf or NaN values. """ x = tf.convert_to_tensor(x) y = tf.convert_to_tensor(y) dependencies = [tf.verify_tensor_all_finite(x, msg=''), tf.verify_tensor_all_finite(y, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) y = control_flow_ops.with_dependencies(dependencies, y) if len(x.shape) == 1: vec = x mat = y return tf.reshape(tf.matmul(tf.expand_dims(vec, 0), mat), [-1]) else: mat = x vec = y return tf.reshape(tf.matmul(mat, tf.expand_dims(vec, 1)), [-1])
def to_simplex(x): """Transform real vector of length `(K-1)` to a simplex of dimension `K` using a backward stick breaking construction. Args: x: tf.Tensor. A 1-D or 2-D tensor. Returns: tf.Tensor. A tensor of same shape as input but with last dimension of size `K`. Raises: InvalidArgumentError. If the input has Inf or NaN values. #### Notes x as a 3-D or higher tensor is not guaranteed to be supported. """ x = tf.cast(x, dtype=tf.float32) dependencies = [tf.verify_tensor_all_finite(x, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) if isinstance(x, (tf.Tensor, tf.Variable)): shape = x.get_shape().as_list() else: shape = x.shape if len(shape) == 1: K_minus_one = shape[0] eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32)) z = tf.sigmoid(eq + x) pil = tf.concat([z, tf.constant([1.0])], 0) piu = tf.concat([tf.constant([1.0]), 1.0 - z], 0) S = tf.cumprod(piu) return S * pil else: n_rows = shape[0] K_minus_one = shape[1] eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32)) z = tf.sigmoid(eq + x) pil = tf.concat([z, tf.ones([n_rows, 1])], 1) piu = tf.concat([tf.ones([n_rows, 1]), 1.0 - z], 1) S = tf.cumprod(piu, axis=1) return S * pil
def l1_normalize(x, dim, epsilon=1e-12, name=None): """l1 normalizes x. Args: x: The tensor to normalize. dim: The dimension to normalize along. epsilon: Lower bound on the norm, used to avoid exploding gradients as the norm approaches 0. name: Optional name for this op. Returns: x normalized along dim. """ with tf.name_scope(name, "l1_normalize", [x]) as scope: x = tf.convert_to_tensor(x, name="x") x = tf.verify_tensor_all_finite(x, "Error at input %s" % scope) x_norm = tf.maximum(tf.reduce_sum(tf.abs(x), [dim], keep_dims=True), epsilon) return tf.div(x, x_norm, name=scope)
def RNN(parameters, input, model, initial_state): # The model is: # 1. input # 2. linear layer # 3 - n. LSTM layers # n+1. linear layer # n+1. output input = tf.verify_tensor_all_finite(input, "Input not finite!") # input shape: (batch_size, n_steps, n_input) input = tf.transpose(input, [1, 0, 2]) # permute n_steps and batch_size input = tf.verify_tensor_all_finite(input, "Input not finite2!") # Reshape to prepare input to the linear layer input = tf.reshape(input, [-1, parameters['n_input']]) # (n_steps*batch_size, n_input) input = tf.verify_tensor_all_finite(input, "Input not finite3!") # 1. layer, linear activation for each batch and step. if (model.has_key('input_weights')): input = tf.matmul(input, model['input_weights']) + model['input_bias'] # input = tf.nn.dropout(input, model['keep_prob']) # Split data because rnn cell needs a list of inputs for the RNN inner loop, # that is, a n_steps length list of tensors shaped: (batch_size, n_inputs) # This is not well documented, but check for yourself here: https://goo.gl/NzA5pX input = tf.split(0, parameters['n_steps'], input) # n_steps * (batch_size, :) initial_state = tf.verify_tensor_all_finite(initial_state, "Initial state not finite!") # Note: States is shaped: batch_size x cell.state_size outputs, states = rnn.rnn(model['rnn_cell'], input, initial_state=initial_state) #outputs[-1] = tf.Print(outputs[-1], [outputs[-1]], "LSTM Output: ", summarize = 100) lastOutput = tf.verify_tensor_all_finite(outputs[-1], "LSTM Outputs not finite!") #lastOutput = tf.nn.dropout(lastOutput, model['keep_prob']) # Only the last output is interesting for error back propagation and prediction. # Note that all batches are handled together here. raw_output = tf.matmul(lastOutput, model['output_weights']) + model['output_bias'] raw_output = tf.verify_tensor_all_finite(raw_output, "Raw output not finite!") n_mixtures = parameters['n_mixtures'] batch_size = parameters['batch_size'] # And now, instead of just outputting the expected value, we output mixture distributions. # The number of mixtures is intuitively the number of possible actions the target can take. # The output is divided into triplets of n_mixtures mixture parameters for the 2 absolute position coordinates. output = softmax_mixtures(raw_output, n_mixtures, batch_size) #output = tf.Print(output, [output], "Output: ", summarize = 100) output = tf.verify_tensor_all_finite(output, "Final output not finite!") return (output, states)
def softplus(x): """Elementwise Softplus function .. math:: \log(1 + \exp(x)) If input `x < -30`, returns `0.0` exactly. If input `x > 30`, returns `x` exactly. TensorFlow can't currently autodiff through ``tf.nn.softplus()``. Parameters ---------- x : tf.Tensor A n-D tensor. Returns ------- tf.Tensor A tensor of same shape as input. Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(x, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) x = tf.cast(x, dtype=tf.float32) result = tf.log(1.0 + tf.exp(x)) less_than_thirty = tf.less(x, -30.0) result = tf.select(less_than_thirty, tf.zeros_like(x), result) greater_than_thirty = tf.greater(x, 30.0) result = tf.select(greater_than_thirty, x, result) return result
def mixture_loss(pred, y, n_mixtures, batch_size): pred = tf.verify_tensor_all_finite(pred, "Pred not finite!") out_pi, out_sigma, out_mu, out_rho = splitMix(pred, n_mixtures, batch_size) result_binorm, result_delta = tf_bivariate_normal(y, out_mu, out_sigma, out_rho, n_mixtures, batch_size) result_binorm = tf.verify_tensor_all_finite(result_binorm, "Result not finite1!") result_weighted = tf.mul(result_binorm, out_pi) result_weighted = tf.verify_tensor_all_finite(result_weighted, "Result not finite2!") result_raw = tf.reduce_sum(result_weighted + epsilon, 1, keep_dims=True) result_raw = tf.Print(result_raw, [tf.reduce_sum(result_raw)], "Sum of weighted density. If zero, sigma is too small: ") result_raw = tf.Print(result_raw, [tf.reduce_max(result_raw)], "Max of weighted density. If zero, sigma is too small: ") result_raw = tf.verify_tensor_all_finite(result_raw, "Result not finite3!") result = -tf.log(result_raw + e) result = tf.verify_tensor_all_finite(result, "Result not finite4!") result = tf.reduce_sum(result) result = tf.verify_tensor_all_finite(result, "Result not finite5!") return result
def softplus(x): """Elementwise Softplus function .. math:: \log(1 + \exp(x)) If input `x < -30`, returns `0.0` exactly. If input `x > 30`, returns `x` exactly. TensorFlow can't currently autodiff through ``tf.nn.softplus()``. Parameters ---------- x : tf.Tensor scalar, vector, matrix, or n-Tensor Returns ------- tf.Tensor size corresponding to size of input Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(x, msg='')] x = control_flow_ops.with_dependencies(dependencies, x) result = tf.log(1.0 + tf.exp(x)) less_than_thirty = tf.less(x, -30.0) result = tf.select(less_than_thirty, tf.zeros_like(x), result) greater_than_thirty = tf.greater(x, 30.0) result = tf.select(greater_than_thirty, x, result) return result
def buildModel(self, inputShape): if (self.vggFile): npWeights = loadWeights(self.vggFile) else: print "Must load from weights" assert (0) #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable([ self.batchSize, inputShape[0], inputShape[1], inputShape[2] ], "inputImage") self.gt = node_variable([self.batchSize, self.numClasses], "gt") with tf.name_scope("Conv1Ops"): self.W_conv1_1 = weight_variable_fromnp( npWeights["conv1_1_w"], "w_conv1_1") self.B_conv1_1 = weight_variable_fromnp( npWeights["conv1_1_b"], "b_conv1_1") self.W_conv1_2 = weight_variable_fromnp( npWeights["conv1_2_w"], "w_conv1_2") self.B_conv1_2 = weight_variable_fromnp( npWeights["conv1_2_b"], "b_conv1_2") self.h_conv1_1 = tf.nn.relu( conv2d(self.inputImage, self.W_conv1_1, "conv1_1", stride=[1, 1, 1, 1]) + self.B_conv1_1) self.h_conv1_2 = tf.nn.relu( conv2d(self.h_conv1_1, self.W_conv1_2, "conv1_1", stride=[1, 1, 1, 1]) + self.B_conv1_2) self.h_pool1 = maxpool_2x2(self.h_conv1_2, "pool1") with tf.name_scope("Conv2Ops"): self.W_conv2_1 = weight_variable_fromnp( npWeights["conv2_1_w"], "w_conv2_1") self.B_conv2_1 = weight_variable_fromnp( npWeights["conv2_1_b"], "b_conv2_1") self.W_conv2_2 = weight_variable_fromnp( npWeights["conv2_2_w"], "w_conv2_2") self.B_conv2_2 = weight_variable_fromnp( npWeights["conv2_2_b"], "b_conv2_2") self.h_conv2_1 = tf.nn.relu( conv2d(self.h_pool1, self.W_conv2_1, "conv2_1") + self.B_conv2_1) self.h_conv2_2 = tf.nn.relu( conv2d(self.h_conv2_1, self.W_conv2_2, "conv2_2") + self.B_conv2_2) self.h_pool2 = maxpool_2x2(self.h_conv2_2, "pool2") with tf.name_scope("Conv3Ops"): self.W_conv3_1 = weight_variable_fromnp( npWeights["conv3_1_w"], "w_conv3_1") self.B_conv3_1 = weight_variable_fromnp( npWeights["conv3_1_b"], "b_conv3_1") self.W_conv3_2 = weight_variable_fromnp( npWeights["conv3_2_w"], "w_conv3_2") self.B_conv3_2 = weight_variable_fromnp( npWeights["conv3_2_b"], "b_conv3_2") self.W_conv3_3 = weight_variable_fromnp( npWeights["conv3_3_w"], "w_conv3_3") self.B_conv3_3 = weight_variable_fromnp( npWeights["conv3_3_b"], "b_conv3_3") self.h_conv3_1 = tf.nn.relu( conv2d(self.h_pool2, self.W_conv3_1, "conv3_1") + self.B_conv3_1) self.h_conv3_2 = tf.nn.relu( conv2d(self.h_conv3_1, self.W_conv3_2, "conv3_2") + self.B_conv3_2) self.h_conv3_3 = tf.nn.relu( conv2d(self.h_conv3_2, self.W_conv3_3, "conv3_2") + self.B_conv3_3) self.h_pool3 = maxpool_2x2(self.h_conv3_3, "pool3") with tf.name_scope("Conv4Ops"): self.W_conv4_1 = weight_variable_fromnp( npWeights["conv4_1_w"], "w_conv4_1") self.B_conv4_1 = weight_variable_fromnp( npWeights["conv4_1_b"], "b_conv4_1") self.W_conv4_2 = weight_variable_fromnp( npWeights["conv4_2_w"], "w_conv4_2") self.B_conv4_2 = weight_variable_fromnp( npWeights["conv4_2_b"], "b_conv4_2") self.W_conv4_3 = weight_variable_fromnp( npWeights["conv4_3_w"], "w_conv4_3") self.B_conv4_3 = weight_variable_fromnp( npWeights["conv4_3_b"], "b_conv4_3") self.h_conv4_1 = tf.nn.relu( conv2d(self.h_pool3, self.W_conv4_1, "conv4_1") + self.B_conv4_1) self.h_conv4_2 = tf.nn.relu( conv2d(self.h_conv4_1, self.W_conv4_2, "conv4_2") + self.B_conv4_2) self.h_conv4_3 = tf.nn.relu( conv2d(self.h_conv4_2, self.W_conv4_3, "conv4_2") + self.B_conv4_3) self.h_pool4 = maxpool_2x2(self.h_conv4_3, "pool4") with tf.name_scope("Conv5Ops"): self.W_conv5_1 = weight_variable_fromnp( npWeights["conv5_1_w"], "w_conv5_1") self.B_conv5_1 = weight_variable_fromnp( npWeights["conv5_1_b"], "b_conv5_1") self.W_conv5_2 = weight_variable_fromnp( npWeights["conv5_2_w"], "w_conv5_2") self.B_conv5_2 = weight_variable_fromnp( npWeights["conv5_2_b"], "b_conv5_2") self.W_conv5_3 = weight_variable_fromnp( npWeights["conv5_3_w"], "w_conv5_3") self.B_conv5_3 = weight_variable_fromnp( npWeights["conv5_3_b"], "b_conv5_3") self.h_conv5_1 = tf.nn.relu( conv2d(self.h_pool4, self.W_conv5_1, "conv5_1") + self.B_conv5_1) self.h_conv5_2 = tf.nn.relu( conv2d(self.h_conv5_1, self.W_conv5_2, "conv5_2") + self.B_conv5_2) self.h_conv5_3 = tf.nn.relu( conv2d(self.h_conv5_2, self.W_conv5_3, "conv5_2") + self.B_conv5_3) self.h_pool5 = maxpool_2x2(self.h_conv5_3, "pool5") with tf.device('cpu:0'): self.keep_prob = tf.placeholder(tf.float32) with tf.name_scope("FC6"): self.W_fc6 = weight_variable_fromnp(npWeights["fc6_w"], "w_fc6") self.B_fc6 = weight_variable_fromnp(npWeights["fc6_b"], "b_fc6") h_pool5_flat = tf.reshape(self.h_pool5, [self.batchSize, 7 * 7 * 512]) self.h_fc6 = tf.nn.relu( tf.matmul(h_pool5_flat, self.W_fc6, name="fc6") + self.B_fc6, "fc6_relu") self.drop_h_fc6 = tf.nn.dropout(self.h_fc6, self.keep_prob) with tf.device(self.device): with tf.name_scope("FC7"): self.W_fc7 = weight_variable_fromnp(npWeights["fc7_w"], "w_fc7") self.B_fc7 = weight_variable_fromnp(npWeights["fc7_b"], "b_fc7") self.h_fc7 = tf.nn.relu( tf.matmul(self.drop_h_fc6, self.W_fc7, name="fc7") + self.B_fc7, "fc7_relu") self.drop_h_fc7 = tf.nn.dropout(self.h_fc7, self.keep_prob) with tf.name_scope("FC8"): self.W_fc8 = weight_variable_xavier([4096, 20], "w_fc8") self.B_fc8 = bias_variable([20], "b_fc8") self.est = tf.nn.softmax( tf.matmul(self.drop_h_fc7, self.W_fc8, name="fc8") + self.B_fc8, "fc8_relu") with tf.name_scope("Loss"): #Define loss self.loss = tf.reduce_mean( -tf.reduce_sum(self.gt * tf.log(self.est + self.epsilon), reduction_indices=[1])) self.regLoss = self.loss + self.regStrength * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) self.nan_check_loss = tf.verify_tensor_all_finite( self.loss, msg="check_nan") with tf.name_scope("Opt"): #Define optimizer self.optimizerAll = tf.train.AdamOptimizer( self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss) #self.optimizerAll = tf.train.MomentumOptimizer(self.learningRate, momentum=self.beta1).minimize(self.loss) self.optimizerPre = tf.train.AdamOptimizer( self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss, var_list=[ self.W_fc6, self.B_fc6, self.W_fc7, self.B_fc7, self.W_fc8, self.B_fc8, ]) with tf.name_scope("Metric"): self.correct = tf.equal(tf.argmax(self.gt, 1), tf.argmax(self.est, 1)) self.accuracy = tf.reduce_mean( tf.cast(self.correct, tf.float32)) #Cannot be on GPU (self.eval_vals, self.eval_idx) = tf.nn.top_k(self.est, k=5) #Summaries tf.scalar_summary('loss', self.loss, name="lossSum") tf.scalar_summary('accuracy', self.accuracy, name="accSum") tf.histogram_summary('input', self.inputImage, name="image_vis") tf.histogram_summary('gt', self.gt, name="gt_vis") #Conv layer histograms tf.histogram_summary('conv1_1', self.h_conv1_1, name="conv1_1_vis") tf.histogram_summary('conv1_2', self.h_conv1_2, name="conv1_2_vis") tf.histogram_summary('conv2_1', self.h_conv2_1, name="conv2_1_vis") tf.histogram_summary('conv2_2', self.h_conv2_2, name="conv2_2_vis") tf.histogram_summary('conv3_1', self.h_conv3_1, name="conv3_1_vis") tf.histogram_summary('conv3_2', self.h_conv3_2, name="conv3_2_vis") tf.histogram_summary('conv3_3', self.h_conv3_3, name="conv3_3_vis") tf.histogram_summary('conv4_1', self.h_conv4_1, name="conv4_1_vis") tf.histogram_summary('conv4_2', self.h_conv4_2, name="conv4_2_vis") tf.histogram_summary('conv4_3', self.h_conv4_3, name="conv4_3_vis") tf.histogram_summary('conv5_1', self.h_conv5_1, name="conv5_1_vis") tf.histogram_summary('conv5_2', self.h_conv5_2, name="conv5_2_vis") tf.histogram_summary('conv5_3', self.h_conv5_3, name="conv5_3_vis") tf.histogram_summary('fc6', self.h_fc6, name="fc6_vis") tf.histogram_summary('fc7', self.h_fc7, name="fc7_vis") tf.histogram_summary('est', self.est, name="est_vis") #Weight and bias hists tf.histogram_summary('w_conv1_1', self.W_conv1_1, name="w_conv1_1_vis") tf.histogram_summary('b_conv1_1', self.B_conv1_1, name="b_conv1_1_vis") tf.histogram_summary('w_conv1_2', self.W_conv1_2, name="w_conv1_2_vis") tf.histogram_summary('b_conv1_2', self.B_conv1_2, name="b_conv1_2_vis") tf.histogram_summary('w_conv2_1', self.W_conv2_1, name="w_conv2_1_vis") tf.histogram_summary('b_conv2_1', self.B_conv2_1, name="b_conv2_1_vis") tf.histogram_summary('w_conv2_2', self.W_conv2_2, name="w_conv2_2_vis") tf.histogram_summary('b_conv2_2', self.B_conv2_2, name="b_conv2_2_vis") tf.histogram_summary('w_conv3_1', self.W_conv3_1, name="w_conv3_1_vis") tf.histogram_summary('b_conv3_1', self.B_conv3_1, name="b_conv3_1_vis") tf.histogram_summary('w_conv3_2', self.W_conv3_2, name="w_conv3_2_vis") tf.histogram_summary('b_conv3_2', self.B_conv3_2, name="b_conv3_2_vis") tf.histogram_summary('w_conv3_3', self.W_conv3_3, name="w_conv3_3_vis") tf.histogram_summary('b_conv3_3', self.B_conv3_3, name="b_conv3_3_vis") tf.histogram_summary('w_conv4_1', self.W_conv4_1, name="w_conv4_1_vis") tf.histogram_summary('b_conv4_1', self.B_conv4_1, name="b_conv4_1_vis") tf.histogram_summary('w_conv4_2', self.W_conv4_2, name="w_conv4_2_vis") tf.histogram_summary('b_conv4_2', self.B_conv4_2, name="b_conv4_2_vis") tf.histogram_summary('w_conv4_3', self.W_conv4_3, name="w_conv4_3_vis") tf.histogram_summary('b_conv4_3', self.B_conv4_3, name="b_conv4_3_vis") tf.histogram_summary('w_conv5_1', self.W_conv5_1, name="w_conv5_1_vis") tf.histogram_summary('b_conv5_1', self.B_conv5_1, name="b_conv5_1_vis") tf.histogram_summary('w_conv5_2', self.W_conv5_2, name="w_conv5_2_vis") tf.histogram_summary('b_conv5_2', self.B_conv5_2, name="b_conv5_2_vis") tf.histogram_summary('w_conv5_3', self.W_conv5_3, name="w_conv5_3_vis") tf.histogram_summary('b_conv5_3', self.B_conv5_3, name="b_conv5_3_vis") tf.histogram_summary('w_fc6', self.W_fc6, name="w_fc6_vis") tf.histogram_summary('b_fc6', self.B_fc6, name="b_fc6_vis") tf.histogram_summary('w_fc7', self.W_fc7, name="w_fc7_vis") tf.histogram_summary('b_fc7', self.B_fc7, name="b_fc7_vis") tf.histogram_summary('w_fc8', self.W_fc7, name="w_fc8_vis") tf.histogram_summary('b_fc8', self.B_fc7, name="b_fc8_vis")
def buildModel(self, inputShape): self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2]) if self.fc: self.WShape = (self.imageShape[1] * self.imageShape[2] * self.imageShape[3], self.numV) self.VShape = (self.batchSize, self.numV) else: assert (self.imageShape[1] % self.VStrideY == 0) assert (self.imageShape[2] % self.VStrideX == 0) V_Y = int(self.imageShape[1] / self.VStrideY) V_X = int(self.imageShape[2] / self.VStrideX) self.WShape = (self.patchSizeY, self.patchSizeX, self.imageShape[3], self.numV) self.VShape = (self.batchSize, V_Y, V_X, self.numV) #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable(self.imageShape, "inputImage") defaultMask = tf.zeros(self.imageShape) self.inputMask = tf.placeholder_with_default( defaultMask, self.imageShape) #Normalize image if (self.normalize): n = tf.reduce_sum(1 - self.inputMask, axis=[1, 2], keepdims=True) #Avoid divide by 0 n = tf.where(tf.equal(n, 0), tf.ones(n.shape), n) self.data_mean = tf.reduce_sum( self.inputImage, axis=[1, 2], keepdims=True) / n self.data_std = tf.sqrt( tf.reduce_sum( tf.square(self.inputImage - self.data_mean), axis=[1, 2], keepdims=True) / n) #Avoid divide by 0 self.data_std = tf.where(tf.equal(self.data_std, 0), tf.ones(self.data_std.shape), self.data_std) self.scaled_inputImage = (self.inputImage - self.data_mean) / self.data_std #Scale inputImage if (self.fc): #TODO is this necessary for fc? #self.scaled_inputImage = self.inputImage/(np.sqrt(self.imageShape[1]*self.imageShape[2]*self.imageShape[3])) self.scaled_inputImage = self.scaled_inputImage else: self.patch_norm = np.sqrt( self.patchSizeX * self.patchSizeY * self.imageShape[3]) self.scaled_inputImage = self.scaled_inputImage / self.patch_norm self.scaled_inputImage = self.scaled_inputImage * self.inputMult #self.checked_inputImage = tf.check_numerics(self.scaled_inputImage, "scaled_input error", name=None) with tf.name_scope("Dictionary"): self.V1_W = weight_variable(self.WShape, "V1_W", 1e-3) with tf.name_scope("weightNorm"): if (self.fc): self.normVals = tf.sqrt( tf.reduce_sum(tf.square(self.V1_W), axis=[0], keepdims=True)) else: self.normVals = tf.sqrt( tf.reduce_sum(tf.square(self.V1_W), axis=[0, 1, 2], keepdims=True)) self.normVals = tf.verify_tensor_all_finite(self.normVals, 'V1W error', name=None) self.normalize_W = self.V1_W.assign(self.V1_W / (self.normVals + 1e-8)) with tf.name_scope("LCA_ADAM"): self.V1_init = tf.random_uniform(self.VShape, 0, 1.25 * self.thresh, dtype=tf.float32) self.V1_U = uniform_weight_variable(self.VShape, "V1_U", 0.0, 1.25 * self.thresh) self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3) with tf.name_scope("Recon"): if (self.fc): flat_recon = tf.matmul(self.V1_A, self.V1_W, transpose_b=True, a_is_sparse=False) #Reshape recon into image shape self.recon = tf.reshape(flat_recon, self.imageShape) else: assert (self.VStrideY >= 1) assert (self.VStrideX >= 1) self.recon = tf.nn.conv2d_transpose( self.V1_A, self.V1_W, self.imageShape, [1, self.VStrideY, self.VStrideX, 1], padding='SAME', name="recon") #Unnormalize self.unscaled_recon = self.recon / self.inputMult if (self.fc): pass else: self.unscaled_recon = self.unscaled_recon * self.patch_norm if (self.normalize): self.unscaled_recon = (self.unscaled_recon * self.data_std) + self.data_mean else: self.unscaled_recoon = recon #self.recon = tf.check_numerics(self.recon, 'recon error', name=None) with tf.name_scope("Error"): self.error = self.scaled_inputImage - self.recon with tf.name_scope("Loss"): if (self.fc): self.reconError = tf.reduce_mean( tf.reduce_sum(tf.square(self.error), axis=[1])) self.l1Sparsity = tf.reduce_mean( tf.reduce_sum(tf.abs(self.V1_A), axis=[1])) else: self.reconError = tf.reduce_mean( tf.reduce_sum(tf.square(self.error), axis=[1, 2, 3])) self.l1Sparsity = tf.reduce_mean( tf.reduce_sum(tf.abs(self.V1_A), axis=[1, 2, 3])) #self.reconError = tf.reduce_mean(tf.square(self.error)) #self.l1Sparsity = tf.reduce_mean(tf.abs(self.V1_A)) #Define loss self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity with tf.name_scope("Opt"): #Calculate A from U self.optimizerA0 = self.V1_A.assign( tf.nn.relu(self.V1_U - self.thresh)) self.v1Reset = self.V1_U.assign(self.V1_init) self.optimizerA1 = tf.train.AdamOptimizer(self.learningRateA) #Find gradient wrt A self.lossGrad = self.optimizerA1.compute_gradients( self.reconError, [self.V1_A]) #self.checkGrad = tf.check_numerics(self.lossGrad[0][0], "grad error", name=None) self.dU = [(self.lossGrad[0][0] - self.V1_A + self.V1_U, self.V1_U)] #TODO add momentum or ADAM here self.optimizerA = self.optimizerA1.apply_gradients(self.dU) #self.optimizerW = tf.train.AdadeltaOptimizer(self.learningRateW, epsilon=1e-6).minimize(self.loss, self.optimizerW = tf.train.AdamOptimizer( self.learningRateW, epsilon=1e-6).minimize(self.loss, var_list=[self.V1_W]) with tf.name_scope("stats"): self.nnz = tf.reduce_mean( tf.cast(tf.not_equal(self.V1_A, 0), tf.float32)) self.imageStd = tf.sqrt( tf.reduce_mean( tf.square(self.scaled_inputImage - tf.reduce_mean(self.scaled_inputImage)))) self.errorStd = tf.sqrt( tf.reduce_mean( tf.square(self.error - tf.reduce_mean(self.error)))) / self.imageStd self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A)) if (self.fc): flat_weightImages = tf.transpose(self.V1_W, [1, 0]) #[numV, img] self.weightImages = tf.reshape(flat_weightImages, [ self.numV, self.imageShape[1], self.imageShape[2], self.imageShape[3] ]) else: self.weightImages = tf.squeeze( tf.transpose(self.V1_W, [3, 0, 1, 2])) #For log of activities self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-13) #Summaries self.s_loss = tf.summary.scalar('loss', self.loss) self.s_recon = tf.summary.scalar('recon error', self.reconError) self.s_errorStd = tf.summary.scalar('errorStd', self.errorStd) self.s_l1 = tf.summary.scalar('l1_sparsity', self.l1Sparsity) self.s_l1_mean = tf.summary.scalar('l1_mean', self.l1_mean) self.s_s_nnz = tf.summary.scalar('nnz', self.nnz) self.h_input = tf.summary.histogram('input', self.inputImage) self.h_input = tf.summary.histogram('scale_input', self.scaled_inputImage) self.h_recon = tf.summary.histogram('recon', self.recon) self.h_v1_w = tf.summary.histogram('V1_W', self.V1_W) self.h_v1_u = tf.summary.histogram('V1_U', self.V1_U) self.h_v1_a = tf.summary.histogram('V1_A', self.V1_A) self.h_log_v1_a = tf.summary.histogram('Log_V1_A', self.log_V1_A)
def test1(self): # build the operator libs if needed cpulib = os.path.join(cache_directory, "libaddcpu.so") gpulib = os.path.join(cache_directory, "libaddgpu.so") if not os.path.exists(cpulib): this_file_path = os.path.abspath(__file__) this_directory = os.path.split(this_file_path)[0] cpp_path = os.path.join(this_directory, 'addcpu.cpp') subprocess.call([ cxx, '-fPIC', '-Wall', '-std=c++11', '-Ofast', '-Wextra', '-g', '-pedantic', '-I' + this_directory + '/..', '-o', cpulib, '-shared', cpp_path ]) if cuda_enabled: if not os.path.exists(gpulib): this_file_path = os.path.abspath(__file__) this_directory = os.path.split(this_file_path)[0] nvcc_path = os.path.join(cuda_directory, 'bin/nvcc') cuda_path = os.path.join(this_directory, 'addgpu.cu') cuda_o_path = os.path.join(cache_directory, 'addgpu.o') subprocess.call([ nvcc_path, '-O3', '--use_fast_math', '--relocatable-device-code=true', '--compile', '-Xcompiler', '-fPIC', '-std=c++11', '-I' + this_directory + '/..', cuda_path, '-o', cuda_o_path ]) subprocess.call( [nvcc_path, '-shared', '-o', gpulib, cuda_o_path]) # clean up .o files subprocess.call(['rm', cuda_o_path]) devices = ['/cpu:0', '/gpu:0'] else: devices = ['/cpu:0'] for dev_string in devices: logger.debug('*** device: {dev}'.format(dev=dev_string)) test_config = tf.ConfigProto(allow_soft_placement=False) # Don't perform optimizations for tests so we don't inadvertently run # gpu ops on cpu test_config.graph_options.optimizer_options.opt_level = -1 with tf.Session(config=test_config): logger.debug('*** add2float') with tf.device(dev_string): in0 = np.random.rand(3, 50).astype(np.float32) in1 = np.random.rand(3, 50).astype(np.float32) ones = np.ones((3, 50), dtype=np.float32) output = _DynamicLibOp.module().dynamic_lib( inputs=[in0, in1], out_shapes=[[3, 50]], out_types=['float'], cpu_lib_path=cpulib, cpu_func_name="add2float", gpu_lib_path=gpulib, gpu_func_name="add2float", serialized_grad_dag='', grad_dag_arg_index=[], cuda_threads_per_block=_default_cuda_threads_per_block) ref = np.add(in0, in1) if (dev_string is '/gpu:0'): ref = np.add(ref, ones) assert np.allclose(output[0].eval(), ref) in2 = np.random.rand(3, 50).astype(np.float64) logger.debug('*** addFloatDoubleFloat') output = _DynamicLibOp.module().dynamic_lib( inputs=[in0, in2, in1], out_shapes=[[3, 50]], out_types=['float'], cpu_lib_path=cpulib, cpu_func_name="addFloatDoubleFloat", gpu_lib_path=gpulib, gpu_func_name="addFloatDoubleFloat", serialized_grad_dag='', grad_dag_arg_index=[], cuda_threads_per_block=_default_cuda_threads_per_block) ref = (in0 + in2 + in1).astype(np.float32) if (dev_string is '/gpu:0'): ref = ref + ones assert np.allclose(output[0].eval(), ref) logger.debug('*** sumAndSq') output = _DynamicLibOp.module().dynamic_lib( inputs=[in0, in2], out_shapes=[[3, 50], [3, 50]], out_types=['float', 'float'], cpu_lib_path=cpulib, cpu_func_name="sumAndSq", gpu_lib_path=gpulib, gpu_func_name="sumAndSq", serialized_grad_dag='', grad_dag_arg_index=[], cuda_threads_per_block=_default_cuda_threads_per_block) out0 = (in0 + in2).astype(np.float32) if (dev_string is '/gpu:0'): out0 = out0 + ones out1 = np.multiply(out0, out0) if (dev_string is '/gpu:0'): out1 = out1 + ones assert np.allclose(output[0].eval(), out0) assert np.allclose(output[1].eval(), out1) # make sure we can also use a standard TF gpu operator in the same session logger.debug('*** TF numerics op') x_shape = [5, 4] x = np.random.random_sample(x_shape).astype(np.float32) t = tf.constant(x, shape=x_shape, dtype=tf.float32) t_verified = tf.verify_tensor_all_finite( t, "Input is not a number.") assert np.allclose(x, t_verified.eval())
def check_legal_inputs(self, tensor, name): # ensure that the current tensor is finite (doesn't have any NaN values) return tf.verify_tensor_all_finite(tensor, "ERR: Tensor not finite - " + name, name=name)
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0): """Calculate the KL of multivariate normal distributions with diagonal covariances. Parameters ---------- loc_one : tf.Tensor A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M x n where each row represents the mean of a n-dimensional Gaussian. scale_one : tf.Tensor A tensor of same shape as ``loc_one``, representing the standard deviation. loc_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the mean of another Gaussian. scale_two : tf.Tensor, optional A tensor of same shape as ``loc_one``, representing the standard deviation of another Gaussian. Returns ------- tf.Tensor For 0-D or 1-D tensor inputs, outputs the 0-D tensor ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )`` For 2-D tensor inputs, outputs the 1-D tensor ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || `` ``N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M`` Raises ------ InvalidArgumentError If the location variables have Inf or NaN values, or if the scale variables are not positive. """ loc_one = tf.convert_to_tensor(loc_one) scale_one = tf.convert_to_tensor(scale_one) loc_two = tf.convert_to_tensor(loc_two) scale_two = tf.convert_to_tensor(scale_two) dependencies = [ tf.verify_tensor_all_finite(loc_one, msg=''), tf.verify_tensor_all_finite(loc_two, msg=''), tf.assert_positive(scale_one), tf.assert_positive(scale_two) ] loc_one = control_flow_ops.with_dependencies(dependencies, loc_one) scale_one = control_flow_ops.with_dependencies(dependencies, scale_one) if loc_two == 0.0 and scale_two == 1.0: # With default arguments, we can avoid some intermediate computation. out = tf.square(scale_one) + tf.square(loc_one) - \ 1.0 - 2.0 * tf.log(scale_one) else: loc_two = control_flow_ops.with_dependencies(dependencies, loc_two) scale_two = control_flow_ops.with_dependencies(dependencies, scale_two) out = tf.square(scale_one / scale_two) + \ tf.square((loc_two - loc_one) / scale_two) - \ 1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one) if len(out.get_shape()) <= 1: # scalar or vector return 0.5 * tf.reduce_sum(out) else: # matrix return 0.5 * tf.reduce_sum(out, 1)
def __init__(self, env, task, visualise, policy, learning_rate): """ An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments. Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism. But overall, we'll define the model, specify its inputs, and describe how the policy gradients step should be computed. """ self.env = env self.task = task worker_device = "/job:worker/task:{}/cpu:0".format(task) with tf.device(tf.train.replica_device_setter(1, worker_device=worker_device)): with tf.variable_scope("global"): self.network = policy(env.observation_space, env.action_space) self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) with tf.device(worker_device): with tf.variable_scope("local"): self.local_network = pi = policy(env.observation_space, env.action_space) pi.global_step = self.global_step self.ac = tf.placeholder(tf.float32, [None, env.action_space.dim()], name="ac") self.adv = tf.placeholder(tf.float32, [None], name="adv") self.r = tf.placeholder(tf.float32, [None], name="r") # the "policy gradients" loss: its derivative is precisely the policy gradient # notice that self.ac is a placeholder that is provided externally. # adv will contain the advantages, as calculated in process_rollout pi_loss = - tf.reduce_sum(pi.log_prob(self.ac) * self.adv) # loss of value function vf_loss = tf.reduce_sum(tf.square(pi.vf - self.r)) entropy = tf.reduce_sum(pi.dist.entropy()) # loss gets minimized! pi_loss goes down, cv_loss, goes down, and entropy goes up. self.loss = pi_loss + 0.25 * vf_loss - entropy * 1e-4 self.loss = tf.verify_tensor_all_finite(self.loss, 'loss') # 20 represents the number of "local steps": the number of timesteps # we run the policy before we update the parameters. # The larger local steps is, the lower is the variance in our policy gradients estimate # on the one hand; but on the other hand, we get less frequent parameter updates, which # slows down learning. In this code, we found that making local steps be much # smaller than 20 makes the algorithm more difficult to tune and to get to work. self.runner = RunnerThread(env, pi, 20, visualise) grads = tf.gradients(self.loss, pi.var_list) # learning_rate = 1e-5 # / (tf.to_float(self.global_step) + 1e-6) # learning_rate /= (tf.to_float(self.global_step) + 1e-6) bs = tf.to_float(tf.shape(pi.x)[0]) if USE_TF12_API: tf.summary.scalar("model/learning_rate", learning_rate) tf.summary.scalar("model/total_loss", self.loss / bs) tf.summary.scalar("model/policy_loss", pi_loss / bs) tf.summary.scalar("model/value_loss", vf_loss / bs) tf.summary.scalar("model/entropy", entropy / bs) if len(list(env.observation_space.shape)) > 1: tf.summary.image("model/state", pi.x) tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads)) tf.summary.scalar("model/var_global_norm", tf.global_norm(pi.var_list)) self.summary_op = tf.summary.merge_all() else: tf.scalar_scalar("model/learning_rate", learning_rate) tf.scalar_summary("model/total_loss", self.loss / bs) tf.scalar_summary("model/policy_loss", pi_loss / bs) tf.scalar_summary("model/value_loss", vf_loss / bs) tf.scalar_summary("model/entropy", entropy / bs) if len(list(env.observation_space.shape)) > 1: tf.image_summary("model/state", pi.x) tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads)) tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list)) self.summary_op = tf.merge_all_summaries() grads, _ = tf.clip_by_global_norm(grads, 40.0) # copy weights from the parameter server to the local model self.sync = tf.group(*[v1.assign(v2) for v1, v2 in zip(pi.var_list, self.network.var_list)]) grads_and_vars = list(zip(grads, self.network.var_list)) inc_step = self.global_step.assign_add(tf.shape(pi.x)[0]) opt = tf.train.AdamOptimizer(learning_rate) self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step) self.summary_writer = None self.local_steps = 0
def tf_mean_l2(w, coefs, n_users): elementwise_sq_norm = tf.reduce_sum(tf.pow(w, 2), axis=1) checked_elwise_l2 = tf.verify_tensor_all_finite(elementwise_sq_norm, msg='NaN or Inf in norm', name='checked_elwise_l2') mean_l2 = tf.reduce_sum(tf.multiply(checked_elwise_l2, coefs)) return mean_l2
def init_target(self): self.target = self.reduced_loss + self.reg * self.regularization self.checked_target = tf.verify_tensor_all_finite( self.target, msg='NaN or Inf in target value', name='target') tf.scalar_summary('target', self.checked_target)
def model(self, seq_length, img_ph, pnt_ph, aud_ph, partitions_ph, train_ph, prompts_ph, variable_scope, variable_scope2, var_img, var_pnt, var_aud, var_lstm, incep_reuse=True): # def process_vars(seq, data_type): # cast inputs to the correct data type seq_inp = tf.cast(seq, tf.float32) return tf.reshape(seq_inp, (self.__batch_size, -1, data_type["cmp_h"], data_type["cmp_w"], data_type["num_c"])) def convolve_data_inception(input_data, val, n, dtype): data = tf.reshape(input_data, [-1, 299, 299, 3]) logits, end_points = inception_resnet_v2(data, num_classes=output_sizes[-1] * output_sizes[-1] * layer_elements[ -2], is_training=False, reuse=incep_reuse) return logits def convolve_data_3layer_pnt(input_data, val, variables, n, dtype): def pad_tf(x, p): return tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]], "CONSTANT") def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'): conv = tf.nn.conv2d(sequence, W, strides=[1, stride, stride, 1], padding=padding) + b return tf.nn.relu(conv) input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]], name=n + "_inp_reshape") # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_n: ") input_data = pad_tf(input_data, padding_size[0]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], stride_sizes[0], layer_elements[1], output_sizes[0], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv1") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv1_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_n: ") input_data = pad_tf(input_data, padding_size[1]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], stride_sizes[1], layer_elements[2], output_sizes[1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv2") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv2_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_n: ") input_data = pad_tf(input_data, padding_size[2]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], stride_sizes[-1], layer_elements[-2], output_sizes[-1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv3") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - ", name="conv3_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out4_n: ") return input_data def convolve_data_3layer_aud(input_data, val, variables, n, dtype): def pad_tf(x, padding): return tf.pad(x, [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]], "CONSTANT") def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'): conv = tf.nn.conv2d(sequence, W, strides=[1, stride[0], stride[1], 1], padding=padding) + b return tf.nn.relu(conv) input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]], name=n + "_inp_reshape") # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_a: ") input_data = pad_tf(input_data, aud_padding_size[0]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], aud_stride_sizes[0], aud_layer_elements[1], aud_output_sizes[0], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv1") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv1_" + n, name="conv1_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_a: ") input_data = pad_tf(input_data, aud_padding_size[1]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], aud_stride_sizes[1], aud_layer_elements[2], aud_output_sizes[1], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv2") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv2_" + n, name="conv2_" + n ) # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_a: ") input_data = pad_tf(input_data, aud_padding_size[2]) padding = "VALID" input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], aud_stride_sizes[2], aud_layer_elements[3], aud_output_sizes[2], train_ph, padding) self.variable_summaries(input_data, dtype["name"] + "_conv3") input_data = tf.verify_tensor_all_finite( input_data, "ERR: Tensor not finite - conv3_" + n, name="conv3_" + n ) return input_data # pass different data types through conv networks inp_data = [0] * TOTAL_PARAMS conv_inp = [0] * TOTAL_PARAMS # with tf.device('/gpu:0'): with tf.device('/gpu:1'): if (self.graphbuild[0]): val = 0 inp_data[val] = process_vars(img_ph, img_dtype) conv_inp[val] = convolve_data_inception(inp_data[val], val, "img", img_dtype) with variable_scope as scope: # with tf.device('/gpu:1'): if (self.graphbuild[1]): val = 1 inp_data[val] = process_vars(pnt_ph, pnt_dtype) conv_inp[val] = convolve_data_3layer_pnt(inp_data[val], val, var_pnt, "pnt", pnt_dtype) if (self.graphbuild[2]): val = 2 inp_data[val] = process_vars(aud_ph, aud_dtype) conv_inp[val] = convolve_data_3layer_aud(inp_data[val], val, var_aud, "aud", aud_dtype) # combine different inputs together combined_data = None for i in range(TOTAL_PARAMS): if (self.graphbuild[i]): tf.Print(conv_inp[i], [tf.shape(conv_inp[i])]) if (i < 2): conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1, output_sizes[-1] * output_sizes[-1] * layer_elements[ -2]], name="combine_reshape") else: # print(">>", aud_output_sizes[-1][0]*aud_output_sizes[-1][0]*aud_layer_elements[-2]) conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1, aud_output_sizes[-1][0] * aud_output_sizes[-1][0] * aud_layer_elements[-2]], name="combine_reshape_aud") # tf.Print(conv_inp[i], [tf.shape(conv_inp[i])]) if (combined_data == None): combined_data = conv_inp[i] else: combined_data = tf.concat([combined_data, conv_inp[i]], 2) W_lstm = var_lstm["W_lstm"] b_lstm = var_lstm["b_lstm"] W_fc = var_lstm["W_fc"] b_fc = var_lstm["b_fc"] combined_data = tf.verify_tensor_all_finite( combined_data, "ERR: Tensor not finite - combined_data", name="combined_data" ) # combined_data = tf.Print(combined_data, [tf.shape(combined_data)], message="combined_data") with variable_scope2 as scope: # lstm_cell = BNLSTMCell(layer_elements[-2], is_training_tensor=train_ph, max_bn_steps=MAX_BN_LEN) lstm_cell = tf.contrib.rnn.LSTMCell(layer_elements[-2], use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None ) outputs, states = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=combined_data, dtype=tf.float32, sequence_length=seq_length, time_major=False ) outputs = tf.where(tf.is_nan(outputs), tf.zeros_like(outputs), outputs) # outputs = tf.Print(outputs, [outputs], message="outputs", summarize=100) # outputs = tf.Print(outputs, [tf.reduce_max(outputs)], message="outputs", summarize=100) outputs = tf.verify_tensor_all_finite( outputs, "ERR: Tensor not finite - outputs", name="outputs" ) num_partitions = 2 res_out = tf.dynamic_partition(outputs, partitions_ph, num_partitions)[1] # res_out = tf.Print(res_out, [res_out], message="res_out") # tf.where(tf.is_nan(res_out), tf.zeros_like(res_out), res_out) # res_out = tf.Print(res_out, [res_out], message="res_out", summarize=100) # res_out = tf.Print(res_out, [tf.reduce_max(res_out)], message="res_out", summarize=100) rnn_x = tf.matmul(res_out, W_lstm) + b_lstm self.variable_summaries(rnn_x, "lstm") rnn_x = tf.verify_tensor_all_finite( rnn_x, "ERR: Tensor not finite - fc1", name="fc1" ) # prompts_ph = tf.reshape(prompts_ph, [-1, 1]) x_tensor = rnn_x # tf.concat([rnn_x, prompts_ph], 1) rnn_x = tf.matmul(x_tensor, W_fc) + b_fc self.variable_summaries(rnn_x, "fc") rnn_x = tf.verify_tensor_all_finite( rnn_x, "ERR: Tensor not finite - fc2", name="fc2" ) return rnn_x
def tf_mean_l2(w): elementwise_sq_norm = tf.reduce_sum(tf.pow(w, 2), axis=1) checked_elwise_l2 = tf.verify_tensor_all_finite(elementwise_sq_norm, msg='NaN or Inf in norm', name='checked_elwise_l2') mean_l2 = tf.reduce_mean(checked_elwise_l2) return mean_l2
def run(): global task_name parser = argparse.ArgumentParser() parser.add_argument('-d', '--model_directory', type=str, default=MODEL_DIRECTORY) parser.add_argument('-dd', '--dataset_directory', type=str, default=DATASET_DIRECTORY) parser.add_argument('-bs', '--batch_size', type=int, default=BATCH_SIZE) parser.add_argument('-lr', '--learning_rate', type=float, default=LEARNING_RATE) parser.add_argument('-g', '--gpu', type=int, default=GPU) parser.add_argument('-t', '--task_name', type=str, default=task_name) parser.add_argument('-n', '--hyper_net', type=str, default=HYPER_NET) parser.add_argument('-v', '--variance', type=float, default=RF.VARIANCE) parser.add_argument('-de', '--depth', type=int, default=52) parser.add_argument('-ep', '--epoch', type=int, default=100) args = parser.parse_args() epoch = args.epoch task_name = args.task_name RF.VARIANCE = args.variance directory_output = os.path.join(args.model_directory) depth = args.depth X_train, Y_train, X_test, Y_test = load_data.load() X_test_m = [0] * (10) Y_test_m = [0] * (10) for i in range(10): X_test_m[i] = X_test[i * 1000:(i + 1) * 1000] Y_test_m[i] = Y_test[i * 1000:(i + 1) * 1000] # 縮小する #X_train, Y_train = X_train[0:5000], Y_train[0:5000] #X_test, Y_test = X_test[0:1000] , Y_test[0:1000] X = tf.placeholder("float", [None, 32, 32, 3]) Y = tf.placeholder("float", [None, 10]) time_list = tf.placeholder("float", [None]) W_list = tf.placeholder("float", [None]) learning_rate = tf.placeholder("float", []) hypernet = args.hyper_net # tf.placeholder("string") task_name_tr = tf.placeholder("string") net = RF.SDE_model(X, depth, time_list, W_list, task_name, hypernet, test=False) test_net = RF.SDE_model(X, depth, time_list, W_list, task_name, hypernet, test=True) sess = tf.Session() beta = 1e-3 cross_entropy = -tf.reduce_sum( Y * tf.log(tf.clip_by_value(net, 1e-10, 1.0))) suml2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) loss = cross_entropy + beta * suml2 #opt = tf.train.MomentumOptimizer(learning_rate, 0.9) var_name_list1 = ["W_conv", "b_conv"] + hypernet_variable[0] var_name_list2 = ["W_fc1", "b_fc1", "W_fc2", "b_fc2", "W_fc3", "b_fc3"] #train_op = None correct_prediction = tf.equal(tf.argmax(test_net, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) saver = tf.train.Saver() batch_size = args.batch_size num_data = X_train.shape[0] # with tf.variable_scope("scope", reuse=True ): # var_list1 = [ tf.get_variable(name=x) for x in var_name_list1 ] # var_list2 = [ tf.get_variable(name=x) for x in var_name_list2 ] # if task_name == "ResNet" or task_name =="ResNet_test" or task_name =="Stochastic_Depth": # learning_late = 1e-4 # else: # learning_late = 1e-6 # train_op1 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list1 ) # tf.train.GradientDescentOptimizer(0.000001) # train_op2 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list2 ) # tf.train.GradientDescentOptimizer(0.0001) # tf.group(train_op1, train_op2) # tf.train.GradientDescentOptimizer( 1e-6 ).minimize(cross_entropy) # train_op = tf.train.MomentumOptimizer(args.learning_rate, 0.9).minimize(loss) sess.run(tf.global_variables_initializer()) print(tf.trainable_variables()) late_ad = 1.0 for j in range(epoch): sff_idx = np.random.permutation(num_data) if j < 20: late_ad = 1.0 elif j < 40: late_ad = 0.1 elif j < 60: late_ad = 0.01 else: late_ad = 0.001 for idx in range(0, num_data, batch_size): batch_x = X_train[sff_idx[idx:idx + batch_size if idx + batch_size < num_data else num_data]] batch_y = Y_train[sff_idx[idx:idx + batch_size if idx + batch_size < num_data else num_data]] t, W = RF.tW_def(depth, task_name) feed_dict_train = { X: batch_x, Y: batch_y, learning_rate: args.learning_rate * late_ad, time_list: t, W_list: W, task_name_tr: task_name } # print(sess.run(net,feed_dict=feed_dict_train)) #print(sess.run(tf.argmax(net, 1),feed_dict=feed_dict_train)) sess.run([train_op], feed_dict=feed_dict_train) # for z in (RF.Z_imagetest): #print(sess.run(net,feed_dict= feed_dict_train)) #assert(not np.isnan(sess.run(z,feed_dict=feed_dict_train)).any()) #count += 1 elapsed = time.time() - start_time print("epoch %d end : %.3f seconds elapsed " % (j, elapsed)) # if j % 512 == 0: # a=1 if True or j == 0 or j % 10 == 9 or j + 1 == EPOCH: # 最初 , 10回ごと , 最後 のどれかならテストしてみる t_test, W_test = RF.tW_def(depth, "test") if task_name == "ResNet" or task_name == "Stochastic_Depth": task_name_test = "ResNet_test" else: task_name_test = "test" feed_dict_test = { X: X_test, Y: Y_test, time_list: t_test, W_list: W_test, task_name_tr: task_name_test } if SAVE_ENABLE: print("saving checkpoint...") saver.save( sess, "model/model.ckpt" + str(task_name) + "step" + str(j) + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) print("saved!") acc = 0 for i in range(10): feed_dict_test = { X: X_test_m[i], Y: Y_test_m[i], time_list: t_test, W_list: W_test, task_name_tr: task_name_test } acc += sess.run(accuracy, feed_dict=feed_dict_test) acc = acc / 10.0 print("accuracy after epoch %d : %.3f " % (j, acc), flush=True) # accuracy_summary = tf.scalar_summary("accuracy", accuracy) # ここからパラメータ数計算および列挙 total_parameters = 0 parameters_string = "" for variable in tf.trainable_variables(): sess.run( tf.verify_tensor_all_finite(variable, "NaN in : %s \n" % variable.name)) shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters if len(shape) == 1: parameters_string += ("%s %d, " % (variable.name, variable_parameters)) else: parameters_string += ( "%s %s=%d, " % (variable.name, str(shape), variable_parameters)) print(parameters_string) print("Total %d variables, %s params" % (len(tf.trainable_variables()), "{:,}".format(total_parameters))) sess.close()
def loss_layer(self, idx, predicts, labels): predict_classes = tf.reshape( predicts[:, :self.boundary1], [self.batch_size, self.cell_size, self.cell_size, self.num_class]) predict_scales = tf.reshape(predicts[:, self.boundary1:self.boundary2], [ self.batch_size, self.cell_size, self.cell_size, self.boxes_per_cell ]) predict_boxes = tf.reshape(predicts[:, self.boundary2:], [ self.batch_size, self.cell_size, self.cell_size, self.boxes_per_cell, 4 ]) response = tf.reshape( labels[:, :, :, 0], [self.batch_size, self.cell_size, self.cell_size, 1]) boxes = tf.reshape( labels[:, :, :, 1:5], [self.batch_size, self.cell_size, self.cell_size, 1, 4]) boxes = tf.tile(boxes, [1, 1, 1, self.boxes_per_cell, 1]) / self.image_size classes = labels[:, :, :, 5:] offset = tf.constant(self.offset, dtype=tf.float32) offset = tf.reshape( offset, [1, self.cell_size, self.cell_size, self.boxes_per_cell]) offset = tf.tile(offset, [self.batch_size, 1, 1, 1]) predict_boxes_tran = tf.stack([ (predict_boxes[:, :, :, :, 0] + offset) / self.cell_size, (predict_boxes[:, :, :, :, 1] + tf.transpose(offset, (0, 2, 1, 3))) / self.cell_size, tf.square(predict_boxes[:, :, :, :, 2]), tf.square(predict_boxes[:, :, :, :, 3]) ]) predict_boxes_tran = tf.transpose(predict_boxes_tran, [1, 2, 3, 4, 0]) # TODO remove # predict_boxes = tf.Print(predict_boxes, [predict_boxes], "predict_boxes = ", -1, 490) # boxes = tf.Print(boxes, [boxes], "boxes = ", -1, 490) iou_predict_truth = self.calculate_iou(predict_boxes_tran, boxes) # calculate I tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] object_mask = tf.reduce_max(iou_predict_truth, 3, keep_dims=True) object_mask = tf.cast( (iou_predict_truth >= object_mask), tf.float32) * response # mask = tf.tile(response, [1, 1, 1, self.boxes_per_cell]) # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] noobject_mask = tf.ones_like(object_mask, dtype=tf.float32) - object_mask boxes_tran = tf.stack([ boxes[:, :, :, :, 0] * self.cell_size - offset, boxes[:, :, :, :, 1] * self.cell_size - tf.transpose(offset, (0, 2, 1, 3)), tf.sqrt(boxes[:, :, :, :, 2]), tf.sqrt(boxes[:, :, :, :, 3]) ]) boxes_tran = tf.transpose(boxes_tran, [1, 2, 3, 4, 0]) # class_loss class_loss = tf.reduce_mean(tf.reduce_sum(tf.square( response * (predict_classes - classes)), reduction_indices=[1, 2, 3]), name='class_loss') * self.class_scale # object_loss object_loss = tf.reduce_mean(tf.reduce_sum( tf.square(object_mask * (predict_scales - iou_predict_truth)), reduction_indices=[1, 2, 3]), name='object_loss') * self.object_scale # noobject_loss noobject_loss = tf.reduce_mean( tf.reduce_sum(tf.square(noobject_mask * predict_scales), reduction_indices=[1, 2, 3]), name='noobject_loss') * self.noobject_scale # coord_loss coord_mask = tf.expand_dims(object_mask, 4) # TODO remove # coord_mask = tf.Print(coord_mask, [coord_mask], "coord_mask = ", -1, 100000) # predict_boxes_without_negative = tf.nn.relu(predict_boxes, name=None) # boxes_tran_without_negative = tf.nn.relu(boxes_tran, name=None) # TODO remove # predict_boxes_without_negative = tf.Print(predict_boxes_without_negative, [predict_boxes_without_negative], "predict_boxes_without_negative = ", -1, 490) # boxes_tran_without_negative = tf.Print(boxes_tran_without_negative, [boxes_tran_without_negative], "boxes_tran_without_negative = ", -1, 490) boxes_delta = coord_mask * (predict_boxes - boxes_tran) coord_loss = tf.reduce_mean(tf.reduce_sum( tf.square(boxes_delta), reduction_indices=[1, 2, 3, 4]), name='coord_loss') * self.coord_scale # TODO remove # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 0]], # "boxes_delta_x = ", -1, 490) # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 1]], # "boxes_delta_y = ", -1, 490) # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 2]], # "boxes_delta_w = ", -1, 490) # boxes_delta= tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 3]], # "boxes_delta_h = ", -1, 490) # checks for NaN and inf tf.verify_tensor_all_finite(class_loss, "class_loss") tf.verify_tensor_all_finite(object_loss, "object_loss") tf.verify_tensor_all_finite(noobject_loss, "noobject_loss") tf.verify_tensor_all_finite(coord_loss, "coord_loss") tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 0], "boxes_delta_x") tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 1], "boxes_delta_y") tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 2], "boxes_delta_w") tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 3], "boxes_delta_h") tf.verify_tensor_all_finite(iou_predict_truth, "iou") # TODO remove # prints values of loss # class_loss = tf.Print(class_loss, [class_loss], "class_loss = ", -1, 490) # object_loss = tf.Print(object_loss, [object_loss], "object_loss = ", -1, 490) # noobject_loss = tf.Print(noobject_loss, [noobject_loss], "noobject_loss = ", -1, 490) # coord_loss = tf.Print(coord_loss, [coord_loss], "coord_loss = ", -1, 490) # for summary in tensorboard tf.summary.scalar(self.phase + '/class_loss', class_loss) tf.summary.scalar(self.phase + '/object_loss', object_loss) tf.summary.scalar(self.phase + '/noobject_loss', noobject_loss) tf.summary.scalar(self.phase + '/coord_loss', coord_loss) tf.summary.histogram(self.phase + '/boxes_delta_x', boxes_delta[:, :, :, :, 0]) tf.summary.histogram(self.phase + '/boxes_delta_y', boxes_delta[:, :, :, :, 1]) tf.summary.histogram(self.phase + '/boxes_delta_w', boxes_delta[:, :, :, :, 2]) tf.summary.histogram(self.phase + '/boxes_delta_h', boxes_delta[:, :, :, :, 3]) tf.summary.histogram(self.phase + '/iou', iou_predict_truth) return class_loss + object_loss + noobject_loss + coord_loss
def check_legal_inputs(tensor, name): return tf.verify_tensor_all_finite(tensor, "ERR: Tensor not finite - " + name, name=name)