def train_step(self, paragraph_tokens, ref_question, global_step: tf.Variable): losses = [] preds = [[] for _ in range(ref_question.shape[0])] context = paragraph_tokens past = None types = tf.constant(0, dtype=tf.int32, shape=paragraph_tokens.shape) batch_dim = paragraph_tokens.shape[0] for i in tf.range(ref_question.shape[1]): ref_tokens = ref_question[:, i] if tf.reduce_any( tf.not_equal(ref_tokens, self.embedder.padding_token)): predictions, past, token_loss = self.token_pred_and_loss( context, past, ref_tokens, types) context = tf.expand_dims(ref_question[:, i], axis=1) types = tf.constant(1, dtype=tf.int32, shape=(batch_dim, 1)) for ind, ref in enumerate(ref_tokens): if ref != self.embedder.padding_token: preds[ind].append(predictions[ind]) losses.append(token_loss) if self.print_predictions: for i, pred in enumerate(preds): ref = self.embedder.tokenizer.decode(ref_question[i]) pred = self.embedder.tokenizer.decode(tf.stack(pred)) paragraph = self.embedder.tokenizer.decode(paragraph_tokens[i]) tf.print(paragraph, "\n", ref, "\n", pred, "\n") global_step.assign(global_step + 1) with self.train_summary_writer.as_default(): total_loss = tf.reduce_mean(losses) tf.summary.scalar('loss', total_loss, step=global_step) return total_loss
def _st(model: tf.keras.Model, gen_img: tf.Variable, content_path: str, style_path: str, content_layers: List[str], style_layers: List[str], lpi: Callable, opt: tf.train.AdamOptimizer, content_weight=1e3, style_weight=1e-2, num_iterations=100) -> None: """ Style transfer from a style image to a source image with a given pre-trained network :param model: The model to use for the style transfer :param gen_img: The generated image to modify INPLACE :param content_path: The path to the source image to paint the style :param style_path: The path to the image to use the style :param content_layers: The list of content layers to use :param style_layers: The list of style layers to use :param lpi: The function to use to load and process image :param opt: The Adam optimizer to use :param content_weight: The weight for the content loss :param style_weight: The weight for the style loss :param num_iterations: The number of iteration to paint :return: The best image associated with his best loss """ # Get the style and content feature representations (from our specified intermediate layers) style_features, content_features = compute_feature_representations( model, lpi, content_path, style_path, len(style_layers)) gram_style_features = [ gram_matrix(style_feature) for style_feature in style_features ] loss_weights = (style_weight, content_weight) cfg = { 'model': model, 'loss_weights': loss_weights, 'gen_img': gen_img, 'gram_style_features': gram_style_features, 'content_features': content_features, 'num_style_layers': len(style_layers), 'num_content_layers': len(content_layers) } norm_means = np.array([103.939, 116.779, 123.68]) min_vals = -norm_means max_vals = 255 - norm_means for i in range(num_iterations): grads, all_loss = compute_grads(cfg) loss, style_score, content_score = all_loss opt.apply_gradients([(grads, gen_img)]) clipped = tf.clip_by_value(gen_img, min_vals, max_vals) gen_img.assign(clipped) _logger.info( f"Iteration n°{i} | loss : {loss} | style_score : {style_score} | content_score : {content_score}" )
def unit_pruning(w: tf.Variable, k: float) -> tf.Variable: """Performs pruning on a weight matrix w in the following way: - The euclidean norm of each column is computed. - The indices of smallest k% columns based on their euclidean norms are selected. - All elements in the columns that have the matching indices are set to 0. Args: w: The weight matrix. k: The percentage of columns that should be pruned from the matrix. Returns: The weight pruned weight matrix. """ k = tf.cast( tf.round(tf.cast(tf.shape(w)[1], tf.float32) * tf.constant(k)), dtype=tf.int32 ) norm = tf.norm(w, axis=0) row_indices = tf.tile(tf.range(tf.shape(w)[0]), [k]) _, col_indices = tf.nn.top_k(tf.negative(norm), k, sorted=True, name=None) col_indices = tf.reshape( tf.tile(tf.reshape(col_indices, [-1, 1]), [1, tf.shape(w)[0]]), [-1] ) indices = tf.stack([row_indices, col_indices], axis=1) return w.assign( tf.scatter_nd_update(w, indices, tf.zeros(tf.shape(w)[0] * k, tf.float32)) )
def weight_pruning(w: tf.Variable, k: float) -> tf.Variable: """Performs pruning on a weight matrix w in the following way: - The absolute value of all elements in the weight matrix are computed. - The indices of the smallest k% elements based on their absolute values are selected. - All elements with the matching indices are set to 0. Args: w: The weight matrix. k: The percentage of values (units) that should be pruned from the matrix. Returns: The unit pruned weight matrix. """ k = tf.cast( tf.round(tf.size(w, out_type=tf.float32) * tf.constant(k)), dtype=tf.int32 ) w_reshaped = tf.reshape(w, [-1]) _, indices = tf.nn.top_k(tf.negative(tf.abs(w_reshaped)), k, sorted=True, name=None) mask = tf.scatter_nd_update( tf.Variable( tf.ones_like(w_reshaped, dtype=tf.float32), name="mask", trainable=False ), tf.reshape(indices, [-1, 1]), tf.zeros([k], tf.float32), ) return w.assign(tf.reshape(w_reshaped * mask, tf.shape(w)))
def update_variable(variable: tf.Variable, expression: tf.Tensor, inputs: list, name=None): """ Built to replicate theano.function(inps=inputs, [], updates=updates) Updates the value of variable with the expression by substituting the value of passed in tensor in expression :param inputs: iterable of [ndarrays or array like objects] value which will be fed into expression to update variable :param variable: tf.Variable WILL BE MODIFIED BY FUNCTION! :param expression: tf.Tensor expression / graph with variable and tensor as inputs :return: None """ with tf.name_scope(name, 'update_variable', [variable, expression]): with tf.Session() as sess: upd_variable_val = sess.run(expression, feed_dict=inputs) variable.assign(upd_variable_val)
def train_step_with_variation_loss(image: tf.Variable, extractor: StyleContentModel, opt: tf.optimizers.Adam, style_targets: tf.Tensor, content_targets: tf.Tensor, num_style_layers: int, num_content_layers: int, style_weight: float, content_weight: float, total_variation_weight: float) -> None: """ Method to apply a training step with total variation consideration Args: image (tf.Variable): the rendered image extractor (StyleContentModel): the intermidate layer extractor opt (tf.optimizers.Adam): the optimizer style_targets (tf.Tensor): the style intermidate outputs content_targets (tf.Tensor): the content intermidate outputs num_style_layers: number of style layers num_content_layers(int): number of content layers style_weight (float): the style weight content_weight (float): the content weight total_variation_weight (float): the total variation weight """ with tf.GradientTape() as tape: # forward pass rendered image outputs: Dict[str, tf.Tensor] = extractor(image) # calculate style content loss loss: tf.Tensor = style_content_loss(outputs, style_targets, content_targets, num_style_layers, num_content_layers, style_weight, content_weight) # add total variation loss loss += total_variation_weight*tf.image.total_variation(image) # calculate gradient descent grad = tape.gradient(loss, image) # apply gradient descent opt.apply_gradients([(grad, image)]) # update image and clip to [0,1] image.assign(clip_0_1(image))
def assign(self, var: tf.Variable, graph: T, graph_var: tf.Variable) -> None: """ Assigns the value of <graph_var>, a Variable of <graph>, to <var>, a Variable of this PBTAbleGraph. """ with tf.device(self.device): value = graph.run(graph_var) self.run(var.assign(value))
def var_to_var(var_from: tf.Variable, var_to: tf.Variable, epsilon: float): """Expands a variable to another variable. Assume the shape of `var_from` is (a, b, ..., y, z), the shape of `var_to` can be (a, ..., z * 2), (a * 2, ..., z * 2), (a * 2, ..., z) If the shape of `var_to` is (a, ..., 2 * z): For any x, tf.matmul(x, var_to) ~= expand_vector(tf.matmul(x, var_from)) / 2 Not that there will be noise added to the left hand side, if epsilon != 0. If the shape of `var_to` is (2 * a, ..., z): For any x, tf.matmul(expand_vector(x), var_to) == tf.matmul(x, var_from) If the shape of `var_to` is (2 * a, ..., 2 * z): For any x, tf.matmul(expand_vector(x), var_to) == expand_vector(tf.matmul(expand_vector(x), var_from)) Args: var_from: input variable to expand. var_to: output variable. epsilon: the noise ratio that will be added, when splitting `var_from`. """ shape_from = var_from.shape shape_to = var_to.shape if shape_from == shape_to: var_to.assign(var_from) elif len(shape_from) == 1 and len(shape_to) == 1: var_to.assign(expand_vector(var_from.numpy())) elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] == shape_to[-1]: var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon, axis=0)) elif shape_from[0] == shape_to[0] and shape_from[-1] * 2 == shape_to[-1]: var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon, axis=-1)) elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] * 2 == shape_to[ -1]: var_to.assign(expand_2_axes(var_from.numpy(), epsilon=epsilon)) else: raise ValueError("Shape not supported, {}, {}".format( shape_from, shape_to))
def update_codebook(self, codebook: tf.Variable, counts: tf.Variable, means: tf.Variable, decay: float = 0.99, epsilon: float = 1e-5): r""" Update the codebook using exponential moving average. (Appendix A.1) Args: codebook: A `float`-like `Tensor`, the codebook for code embedding, shape `[n_codes, code_size]`. counts: A `float`-like `Tensor`, stores the occurrences counts for each code in the codebook the codebook for code embedding, shape `[n_codes]`. means: A `float`-like `Tensor`, stores the moving average of each code in the codebook, shape `[n_codes, code_size]`. Returns: updated_codebook: the updated codebook, shape `[n_codes, code_size]` updated_counts: the moving average updated counts, shape `[code_size]` updated_means: the moving average updated means, shape `[n_codes, code_size]` """ input_ndim = len(self.codes.shape) - 2 axes = range(input_ndim + 1) # the batch axes # Use an exponential moving average to update the codebook. updated_ema_count = moving_averages.assign_moving_average( variable=counts, value=tf.reduce_sum(self.assignments, axis=axes), decay=decay, zero_debias=False) updated_ema_means = moving_averages.assign_moving_average( variable=means, value=tf.reduce_sum(tf.expand_dims(self.codes, axis=-2) * tf.expand_dims(self.assignments, axis=-1), axis=axes), decay=decay, zero_debias=False) # Add small value to avoid dividing by zero. perturbed_ema_count = updated_ema_count + epsilon codebook.assign(updated_ema_means / perturbed_ema_count[..., tf.newaxis]) return codebook, updated_ema_count, updated_ema_means
def update_learning_rate(session: tf.Session, learning_rate_variable: tf.Variable, new_learning_rate: float): """ Runs a tf.Session and updates the current learning rate stored in learning_rate_variable. :param session: :param learning_rate_variable: :param new_learning_rate: e.g. 0.001 :return: None """ assign_op = learning_rate_variable.assign(new_learning_rate) session.run(assign_op)
def _make_variable_pruning_op(variable: tf.Variable, threshold, name=None): [mask] = tf.get_collection(MASK_COLLECTION, variable.op.name) with tf.name_scope(name, default_name='variable_prune_op'): to_prune = tf.less_equal(tf.abs(variable), threshold, name='prune_mask') remaining = 1 - tf.cast(to_prune, dtype=tf.float32) new_mask = tf.multiply(mask, remaining, name='new_mask') new_variable = tf.multiply(variable, remaining, name='new_variable') assign_mask = mask.assign(new_mask) assign_variable = variable.assign(new_variable) prune_op = tf.group(assign_mask, assign_variable, name='prune') return prune_op
def _ema_assign_fn(self, variable: tf.Variable, value: tf.Tensor): """Updates the exponential moving average for a single variable.""" return variable.assign(self._decay * variable + (1.0 - self._decay) * value)
def init_first_layer_weights(var: tf.Variable, rgb_weights: np.ndarray, sess: tf.Session, hs_weight_init: str) -> None: '''Initializes the weights for filters in the first conv layer. 'resnet/scale1/weights:0' for ResNet 'vggf/conv1/conv1_weights:0' for VGGF If we are using RGB-only, then just initializes var to rgb_weights. Otherwise, uses hs_weight_init to determine how to initialize the weights for non-RGB bands. Args - var: tf.Variable, the filters in the 1st convolution layer, shape [F, F, C, 64] - F is the filter size (7 for ResNet, 11 for VGGF) - C is either 3 (RGB), 7 (lxv3), or 9 (Landsat7) - rgb_weights: ndarray of np.float32, shape [F, F, 3, 64] - sess: tf.Session - hs_weight_init: str, one of ['random', 'same', 'samescaled'] ''' var_shape = np.asarray(var.get_shape().as_list()) rgb_weights_shape = np.asarray(rgb_weights.shape) # only weights in the 1st conv layer need to be adjusted for dealing with hyperspectral images # check that the filter shape and num_filters match up, and that RGB weights have 3 channels if 'scale1/weights:0' in var.name: # ResNet F = 7 elif 'conv1/conv1_weights:0' in var.name: # VGGF F = 11 else: raise ValueError('var is not the weights for the first conv layer') assert np.all(var_shape[[0, 1]] == [F, F]) assert np.all(var_shape[[0, 1, 3]] == rgb_weights_shape[[0, 1, 3]]) assert rgb_weights.shape[2] == 3 assert rgb_weights.dtype == np.float32 # if we are using the RGB-only model, then just initialize to saved weights if var_shape[2] == 3: print('Using rgb only model') sess.run(var.assign(rgb_weights)) return # Set up the initializer function print('Initializing var different from saved rgb weights:', var.name, ' With shape:', var_shape) print('Using ' + hs_weight_init + ' initialization for hyperspectral weights.') num_hs_channels = var_shape[2] - rgb_weights.shape[2] hs_weights_shape = [F, F, num_hs_channels, 64] if hs_weight_init == 'random': # initialize the weights in the hyperspectral bands to gaussian with same overall mean and # stddev as the RGB channels rgb_mean = np.mean(rgb_weights) rgb_std = np.std(rgb_weights) hs_weights = tf.truncated_normal(hs_weights_shape, mean=rgb_mean, stddev=rgb_std, dtype=tf.float32) elif hs_weight_init == 'same': # initialize the weight for each position in each filter to the average of the 3 RGB weights # at the same position in the same filter rgb_mean = rgb_weights.mean(axis=2, keepdims=True) # shape [F, F, 1, 64] hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1)) elif hs_weight_init == 'samescaled': # similar to hs_weight_init == 'same', but we normalize the weights rgb_mean = rgb_weights.mean(axis=2, keepdims=True) # shape [F, F, 1, 64] hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1)) rgb_weights *= 3 / (3 + num_hs_channels) hs_weights *= 3 / (3 + num_hs_channels) else: raise ValueError(f'Unknown hs_weight_init type: {hs_weight_init}') final_weight = tf.concat([rgb_weights, hs_weights], axis=2) print('Shape of 1st layer weights:', final_weight.shape) # should be (F, F, C, 64) sess.run(var.assign(final_weight))
def eval_model(is_training: tf.Variable, sess: tf.Session, best_iou: float, val_loss: tf.Tensor, val_acc: tf.Tensor, val_iou_update: tf.Operation, val_iou: tf.Tensor, val_iou_reset: tf.Operation, val_writer: tf.summary.FileWriter, epoch: int, saver: tf.train.Saver) -> float: """ evaluates model with one pass over validation set :param is_training: tf var which indicates if model is training :param sess: tf sess :param best_iou: best validation iou until now :param val_loss: val loss tensor :param val_acc: val accuracy tensor :param val_iou_update: val iou update operation :param val_iou: val iou tensor :param val_iou_reset: val iou reset operation :param val_writer: val summary writer :param epoch: index of current epoch :param saver: tf model saver :return: new best iou """ acc_sum, loss_sum = 0, 0 # toggle training off assign_op = is_training.assign(False) sess.run(assign_op) val_batches = N_VAL_SAMPLES // BATCH_SIZE print(f"starting evaluation {val_batches} batches") for j in range(val_batches): loss_val, acc_val, _, val_iou_val = sess.run( [val_loss, val_acc, val_iou_update, val_iou]) print( f"\tevaluation epoch: {epoch:03d}\tbatch {j:03d} eval:" f"\tloss: {loss_val:.4f}\taccuracy: {acc_val:.4f}\taccumulated iou {val_iou_val:.4f}" ) acc_sum += acc_val loss_sum += loss_val # validation summary loss = loss_sum / val_batches acc = acc_sum / val_batches iou = val_iou_val summary = get_tf_summary(loss, acc, iou) val_writer.add_summary(summary, epoch) print( f"evaluation:\tmean loss: {loss:.4f}\tmean acc: {acc:.4f}\tmean iou {iou:.4f}\n" ) # save model if it is better if iou > best_iou: best_iou = iou save_path = saver.save( sess, os.path.join(LOG_DIR + "_train", f"best_model_epoch_{epoch:03d}.ckpt")) print(f"Model saved in file: {save_path}\n") # reset accumulator sess.run(val_iou_reset) # toggle training on assign_op = is_training.assign(True) sess.run(assign_op) return best_iou
def _resource_apply_dense(self, grad: tf.Tensor, var: tf.Variable, apply_state: Optional[dict] = None) -> tf.Tensor: updated_var = self._get_multi_batch_update(grad, var, apply_state) return var.assign(updated_var)
def assign1(values, variables: tf.Variable): variables.assign(values, use_locking=False, read_value=False)