    def train_step(self, paragraph_tokens, ref_question,
                   global_step: tf.Variable):

        losses = []
        preds = [[] for _ in range(ref_question.shape[0])]
        context = paragraph_tokens
        past = None
        types = tf.constant(0, dtype=tf.int32, shape=paragraph_tokens.shape)
        batch_dim = paragraph_tokens.shape[0]

        for i in tf.range(ref_question.shape[1]):
            ref_tokens = ref_question[:, i]
            if tf.reduce_any(
                    tf.not_equal(ref_tokens, self.embedder.padding_token)):
                predictions, past, token_loss = self.token_pred_and_loss(
                    context, past, ref_tokens, types)
                context = tf.expand_dims(ref_question[:, i], axis=1)
                types = tf.constant(1, dtype=tf.int32, shape=(batch_dim, 1))
                for ind, ref in enumerate(ref_tokens):
                    if ref != self.embedder.padding_token:

        if self.print_predictions:
            for i, pred in enumerate(preds):
                ref = self.embedder.tokenizer.decode(ref_question[i])
                pred = self.embedder.tokenizer.decode(tf.stack(pred))
                paragraph = self.embedder.tokenizer.decode(paragraph_tokens[i])
                tf.print(paragraph, "\n", ref, "\n", pred, "\n")
        global_step.assign(global_step + 1)

        with self.train_summary_writer.as_default():
            total_loss = tf.reduce_mean(losses)
            tf.summary.scalar('loss', total_loss, step=global_step)
        return total_loss
def _st(model: tf.keras.Model,
        gen_img: tf.Variable,
        content_path: str,
        style_path: str,
        content_layers: List[str],
        style_layers: List[str],
        lpi: Callable,
        opt: tf.train.AdamOptimizer,
        num_iterations=100) -> None:
    Style transfer from a style image to a source image with a given pre-trained network
    :param model: The model to use for the style transfer
    :param gen_img: The generated image to modify INPLACE
    :param content_path: The path to the source image to paint the style
    :param style_path: The path to the image to use the style
    :param content_layers: The list of content layers to use
    :param style_layers: The list of style layers to use
    :param lpi: The function to use to load and process image
    :param opt: The Adam optimizer to use
    :param content_weight: The weight for the content loss
    :param style_weight: The weight for the style loss
    :param num_iterations: The number of iteration to paint
    :return: The best image associated with his best loss
    # Get the style and content feature representations (from our specified intermediate layers)
    style_features, content_features = compute_feature_representations(
        model, lpi, content_path, style_path, len(style_layers))
    gram_style_features = [
        gram_matrix(style_feature) for style_feature in style_features
    loss_weights = (style_weight, content_weight)
    cfg = {
        'model': model,
        'loss_weights': loss_weights,
        'gen_img': gen_img,
        'gram_style_features': gram_style_features,
        'content_features': content_features,
        'num_style_layers': len(style_layers),
        'num_content_layers': len(content_layers)
    norm_means = np.array([103.939, 116.779, 123.68])
    min_vals = -norm_means
    max_vals = 255 - norm_means
    for i in range(num_iterations):
        grads, all_loss = compute_grads(cfg)
        loss, style_score, content_score = all_loss
        opt.apply_gradients([(grads, gen_img)])
        clipped = tf.clip_by_value(gen_img, min_vals, max_vals)
            f"Iteration n°{i} | loss : {loss} | style_score : {style_score} | content_score : {content_score}"
def unit_pruning(w: tf.Variable, k: float) -> tf.Variable:
    """Performs pruning on a weight matrix w in the following way:

    - The euclidean norm of each column is computed.
    - The indices of smallest k% columns based on their euclidean norms are
    - All elements in the columns that have the matching indices are set to 0.

        w: The weight matrix.
        k: The percentage of columns that should be pruned from the matrix.

        The weight pruned weight matrix.

    k = tf.cast(
        tf.round(tf.cast(tf.shape(w)[1], tf.float32) * tf.constant(k)), dtype=tf.int32
    norm = tf.norm(w, axis=0)
    row_indices = tf.tile(tf.range(tf.shape(w)[0]), [k])
    _, col_indices = tf.nn.top_k(tf.negative(norm), k, sorted=True, name=None)
    col_indices = tf.reshape(
        tf.tile(tf.reshape(col_indices, [-1, 1]), [1, tf.shape(w)[0]]), [-1]
    indices = tf.stack([row_indices, col_indices], axis=1)

    return w.assign(
        tf.scatter_nd_update(w, indices, tf.zeros(tf.shape(w)[0] * k, tf.float32))
def weight_pruning(w: tf.Variable, k: float) -> tf.Variable:
    """Performs pruning on a weight matrix w in the following way:

    - The absolute value of all elements in the weight matrix are computed.
    - The indices of the smallest k% elements based on their absolute values are
    - All elements with the matching indices are set to 0.

        w: The weight matrix.
        k: The percentage of values (units) that should be pruned from the matrix.

        The unit pruned weight matrix.

    k = tf.cast(
        tf.round(tf.size(w, out_type=tf.float32) * tf.constant(k)), dtype=tf.int32
    w_reshaped = tf.reshape(w, [-1])
    _, indices = tf.nn.top_k(tf.negative(tf.abs(w_reshaped)), k, sorted=True, name=None)
    mask = tf.scatter_nd_update(
            tf.ones_like(w_reshaped, dtype=tf.float32), name="mask", trainable=False
        tf.reshape(indices, [-1, 1]),
        tf.zeros([k], tf.float32),

    return w.assign(tf.reshape(w_reshaped * mask, tf.shape(w)))
def update_variable(variable: tf.Variable, expression: tf.Tensor, inputs: list, name=None):
    Built to replicate theano.function(inps=inputs, [], updates=updates)

    Updates the value of variable with the expression by substituting the value of passed in tensor in expression
    :param inputs: iterable of [ndarrays or array like objects]
        value which will be fed into expression to update variable
    :param variable: tf.Variable
    :param expression: tf.Tensor 
        expression / graph with variable and tensor as inputs 
    :return: None 
    with tf.name_scope(name, 'update_variable', [variable, expression]):
        with tf.Session() as sess:
            upd_variable_val = sess.run(expression, feed_dict=inputs)
def train_step_with_variation_loss(image: tf.Variable,
                                   extractor: StyleContentModel,
                                   opt: tf.optimizers.Adam,
                                   style_targets: tf.Tensor,
                                   content_targets: tf.Tensor,
                                   num_style_layers: int,
                                   num_content_layers: int,
                                   style_weight: float,
                                   content_weight: float,
                                   total_variation_weight: float) -> None:
    Method to apply a training step with total variation consideration

      image (tf.Variable): the rendered image
      extractor (StyleContentModel): the intermidate layer extractor
      opt (tf.optimizers.Adam): the optimizer
      style_targets (tf.Tensor): the style intermidate outputs
      content_targets (tf.Tensor): the content intermidate outputs
      num_style_layers: number of style layers
      num_content_layers(int): number of content layers
      style_weight (float): the style weight
      content_weight (float): the content weight
      total_variation_weight (float): the total variation weight
    with tf.GradientTape() as tape:
        # forward pass rendered image
        outputs: Dict[str, tf.Tensor] = extractor(image)

        # calculate style content loss
        loss: tf.Tensor = style_content_loss(outputs, style_targets,
                                             content_targets, num_style_layers,
                                             num_content_layers, style_weight,

        # add total variation loss
        loss += total_variation_weight*tf.image.total_variation(image)

    # calculate gradient descent
    grad = tape.gradient(loss, image)

    # apply gradient descent
    opt.apply_gradients([(grad, image)])

    # update image and clip to [0,1]
 def assign(self, var: tf.Variable, graph: T,
            graph_var: tf.Variable) -> None:
     Assigns the value of <graph_var>, a Variable of <graph>, to <var>, a
     Variable of this PBTAbleGraph.
     with tf.device(self.device):
         value = graph.run(graph_var)
def var_to_var(var_from: tf.Variable, var_to: tf.Variable, epsilon: float):
    """Expands a variable to another variable.

  Assume the shape of `var_from` is (a, b, ..., y, z), the shape of `var_to`
  can be (a, ..., z * 2), (a * 2, ..., z * 2), (a * 2, ..., z)

  If the shape of `var_to` is (a, ..., 2 * z):
    For any x, tf.matmul(x, var_to) ~= expand_vector(tf.matmul(x, var_from)) / 2
    Not that there will be noise added to the left hand side, if epsilon != 0.
  If the shape of `var_to` is (2 * a, ..., z):
    For any x, tf.matmul(expand_vector(x), var_to) == tf.matmul(x, var_from)
  If the shape of `var_to` is (2 * a, ..., 2 * z):
    For any x, tf.matmul(expand_vector(x), var_to) ==
        expand_vector(tf.matmul(expand_vector(x), var_from))

    var_from: input variable to expand.
    var_to: output variable.
    epsilon: the noise ratio that will be added, when splitting `var_from`.
    shape_from = var_from.shape
    shape_to = var_to.shape

    if shape_from == shape_to:

    elif len(shape_from) == 1 and len(shape_to) == 1:

    elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] == shape_to[-1]:
        var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon, axis=0))

    elif shape_from[0] == shape_to[0] and shape_from[-1] * 2 == shape_to[-1]:
        var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon,

    elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] * 2 == shape_to[
        var_to.assign(expand_2_axes(var_from.numpy(), epsilon=epsilon))

        raise ValueError("Shape not supported, {}, {}".format(
            shape_from, shape_to))
  def update_codebook(self,
                      codebook: tf.Variable,
                      counts: tf.Variable,
                      means: tf.Variable,
                      decay: float = 0.99,
                      epsilon: float = 1e-5):
    r""" Update the codebook using exponential moving average. (Appendix A.1)

      codebook: A `float`-like `Tensor`,
        the codebook for code embedding, shape `[n_codes, code_size]`.
      counts: A `float`-like `Tensor`,
        stores the occurrences counts for each code in the codebook
        the codebook for code embedding, shape `[n_codes]`.
      means: A `float`-like `Tensor`,
        stores the moving average of each code in the codebook,
        shape `[n_codes, code_size]`.

      updated_codebook: the updated codebook, shape  `[n_codes, code_size]`
      updated_counts: the moving average updated counts, shape  `[code_size]`
      updated_means: the moving average updated means, shape  `[n_codes, code_size]`
    input_ndim = len(self.codes.shape) - 2
    axes = range(input_ndim + 1)  # the batch axes
    # Use an exponential moving average to update the codebook.
    updated_ema_count = moving_averages.assign_moving_average(
        value=tf.reduce_sum(self.assignments, axis=axes),
    updated_ema_means = moving_averages.assign_moving_average(
        value=tf.reduce_sum(tf.expand_dims(self.codes, axis=-2) *
                            tf.expand_dims(self.assignments, axis=-1),
    # Add small value to avoid dividing by zero.
    perturbed_ema_count = updated_ema_count + epsilon
    codebook.assign(updated_ema_means / perturbed_ema_count[..., tf.newaxis])
    return codebook, updated_ema_count, updated_ema_means
def update_learning_rate(session: tf.Session,
                         learning_rate_variable: tf.Variable,
                         new_learning_rate: float):
    Runs a tf.Session and updates the current learning rate stored in learning_rate_variable.
    :param session:
    :param learning_rate_variable:
    :param new_learning_rate: e.g. 0.001
    :return: None
    assign_op = learning_rate_variable.assign(new_learning_rate)
def _make_variable_pruning_op(variable: tf.Variable, threshold, name=None):
    [mask] = tf.get_collection(MASK_COLLECTION, variable.op.name)

    with tf.name_scope(name, default_name='variable_prune_op'):
        to_prune = tf.less_equal(tf.abs(variable), threshold, name='prune_mask')
        remaining = 1 - tf.cast(to_prune, dtype=tf.float32)
        new_mask = tf.multiply(mask, remaining, name='new_mask')
        new_variable = tf.multiply(variable, remaining, name='new_variable')
        assign_mask = mask.assign(new_mask)
        assign_variable = variable.assign(new_variable)

        prune_op = tf.group(assign_mask, assign_variable, name='prune')

    return prune_op
 def _ema_assign_fn(self, variable: tf.Variable, value: tf.Tensor):
   """Updates the exponential moving average for a single variable."""
   return variable.assign(self._decay * variable + (1.0 - self._decay) * value)
def init_first_layer_weights(var: tf.Variable, rgb_weights: np.ndarray,
                             sess: tf.Session, hs_weight_init: str) -> None:
    '''Initializes the weights for filters in the first conv layer.

    'resnet/scale1/weights:0' for ResNet
    'vggf/conv1/conv1_weights:0' for VGGF

    If we are using RGB-only, then just initializes var to rgb_weights. Otherwise, uses
    hs_weight_init to determine how to initialize the weights for non-RGB bands.

    - var: tf.Variable, the filters in the 1st convolution layer, shape [F, F, C, 64]
        - F is the filter size (7 for ResNet, 11 for VGGF)
        - C is either 3 (RGB), 7 (lxv3), or 9 (Landsat7)
    - rgb_weights: ndarray of np.float32, shape [F, F, 3, 64]
    - sess: tf.Session
    - hs_weight_init: str, one of ['random', 'same', 'samescaled']
    var_shape = np.asarray(var.get_shape().as_list())
    rgb_weights_shape = np.asarray(rgb_weights.shape)

    # only weights in the 1st conv layer need to be adjusted for dealing with hyperspectral images
    # check that the filter shape and num_filters match up, and that RGB weights have 3 channels
    if 'scale1/weights:0' in var.name:  # ResNet
        F = 7
    elif 'conv1/conv1_weights:0' in var.name:  # VGGF
        F = 11
        raise ValueError('var is not the weights for the first conv layer')

    assert np.all(var_shape[[0, 1]] == [F, F])
    assert np.all(var_shape[[0, 1, 3]] == rgb_weights_shape[[0, 1, 3]])
    assert rgb_weights.shape[2] == 3
    assert rgb_weights.dtype == np.float32

    # if we are using the RGB-only model, then just initialize to saved weights
    if var_shape[2] == 3:
        print('Using rgb only model')

    # Set up the initializer function
    print('Initializing var different from saved rgb weights:', var.name,
          ' With shape:', var_shape)
    print('Using ' + hs_weight_init +
          ' initialization for hyperspectral weights.')
    num_hs_channels = var_shape[2] - rgb_weights.shape[2]
    hs_weights_shape = [F, F, num_hs_channels, 64]

    if hs_weight_init == 'random':
        # initialize the weights in the hyperspectral bands to gaussian with same overall mean and
        # stddev as the RGB channels
        rgb_mean = np.mean(rgb_weights)
        rgb_std = np.std(rgb_weights)
        hs_weights = tf.truncated_normal(hs_weights_shape,
    elif hs_weight_init == 'same':
        # initialize the weight for each position in each filter to the average of the 3 RGB weights
        # at the same position in the same filter
        rgb_mean = rgb_weights.mean(axis=2,
                                    keepdims=True)  # shape [F, F, 1, 64]
        hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1))
    elif hs_weight_init == 'samescaled':
        # similar to hs_weight_init == 'same', but we normalize the weights
        rgb_mean = rgb_weights.mean(axis=2,
                                    keepdims=True)  # shape [F, F, 1, 64]
        hs_weights = np.tile(rgb_mean, (1, 1, num_hs_channels, 1))
        rgb_weights *= 3 / (3 + num_hs_channels)
        hs_weights *= 3 / (3 + num_hs_channels)
        raise ValueError(f'Unknown hs_weight_init type: {hs_weight_init}')

    final_weight = tf.concat([rgb_weights, hs_weights], axis=2)
    print('Shape of 1st layer weights:',
          final_weight.shape)  # should be (F, F, C, 64)

def eval_model(is_training: tf.Variable, sess: tf.Session, best_iou: float,
               val_loss: tf.Tensor, val_acc: tf.Tensor,
               val_iou_update: tf.Operation, val_iou: tf.Tensor,
               val_iou_reset: tf.Operation, val_writer: tf.summary.FileWriter,
               epoch: int, saver: tf.train.Saver) -> float:
    evaluates model with one pass over validation set

    :param is_training: tf var which indicates if model is training
    :param sess: tf sess
    :param best_iou: best validation iou until now
    :param val_loss: val loss tensor
    :param val_acc: val accuracy tensor
    :param val_iou_update: val iou update operation
    :param val_iou: val iou tensor
    :param val_iou_reset: val iou reset operation
    :param val_writer: val summary writer
    :param epoch: index of current epoch
    :param saver: tf model saver
    :return: new best iou
    acc_sum, loss_sum = 0, 0

    # toggle training off
    assign_op = is_training.assign(False)

    val_batches = N_VAL_SAMPLES // BATCH_SIZE
    print(f"starting evaluation {val_batches} batches")

    for j in range(val_batches):
        loss_val, acc_val, _, val_iou_val = sess.run(
            [val_loss, val_acc, val_iou_update, val_iou])
            f"\tevaluation epoch: {epoch:03d}\tbatch {j:03d} eval:"
            f"\tloss: {loss_val:.4f}\taccuracy: {acc_val:.4f}\taccumulated iou {val_iou_val:.4f}"
        acc_sum += acc_val
        loss_sum += loss_val

    # validation summary
    loss = loss_sum / val_batches
    acc = acc_sum / val_batches
    iou = val_iou_val
    summary = get_tf_summary(loss, acc, iou)
    val_writer.add_summary(summary, epoch)
        f"evaluation:\tmean loss: {loss:.4f}\tmean acc: {acc:.4f}\tmean iou {iou:.4f}\n"

    # save model if it is better
    if iou > best_iou:
        best_iou = iou
        save_path = saver.save(
            os.path.join(LOG_DIR + "_train",
        print(f"Model saved in file: {save_path}\n")

    # reset accumulator

    # toggle training on
    assign_op = is_training.assign(True)

    return best_iou
 def _resource_apply_dense(self,
                           grad: tf.Tensor,
                           var: tf.Variable,
                           apply_state: Optional[dict] = None) -> tf.Tensor:
     updated_var = self._get_multi_batch_update(grad, var, apply_state)
     return var.assign(updated_var)
def assign1(values, variables: tf.Variable):
    variables.assign(values, use_locking=False, read_value=False)