def resource_apply_scheduled_momentum( var: tf.Tensor, accum: tf.Tensor, lr: float, grad: tf.Tensor, current_momentum: float, next_momentum: float, use_locking: bool, use_nesterov: bool, ): if use_nesterov: accum_value = tf.identity(accum) accum_update = accum.assign(current_momentum * accum - lr * grad, use_locking=use_locking) var_update = var.assign_add( -current_momentum * accum_value + (next_momentum + 1) * accum_update, use_locking=use_locking, ) else: accum_update = accum.assign(current_momentum * accum - lr * grad, use_locking=use_locking) var_update = var.assign_add(accum_update, use_locking=use_locking) return tf.group(*[var_update, accum_update])
def __call__(self, image: tf.Tensor, clip: bool = True) \ -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Apply one step of optimization to the given image Arguments --------- image: tf.Tensor Image in internal (TensorFlow) representation. This image will be updated by performing one optimization step. Result ------ loss: tf.Tensor (float) style_loss: tf.Tensor (float) total_loss: tf.Tensor (float) """ with tf.GradientTape() as tape: outputs = self._extractor(image) loss = self.style_content_loss(outputs) grad = tape.gradient(loss, image) # apply one step of gradient descent self._optimizer.apply_gradients([(grad, image)]) # make sure image values stey in valid range if clip: image.assign(self.clip_0_1(image)) # return the loss return loss
def copy_weight(source: tf.Tensor, destination: tf.Tensor): """ Copies values from `src` to `dst`, making adjustments for each dimension, where necessary. :param source: Source tensor. :param destination: Destination tensor. """ if source.shape == destination.shape: destination.assign(source) return dst = np.zeros(destination.shape) if len(source.shape) == 4: # Copying a conv kernel, which could be either from # Conv2D: with shape (K, K, C_in, C_out), or # DWConv2D: with shape (K, K, C, 1) sk_h, sk_w, sc_in, sc_out = source.shape dk_h, dk_w, dc_in, dc_out = dst.shape assert sk_h == sk_w and sk_h % 2 == 1 assert dk_h == dk_w and dk_h % 2 == 1 # If kernel size changes: copy `src` into the middle of `dst` or vice versa # For example: # # 5 x 5 3 x 3 3 x 3 5 x 5 # A B C D E 0 0 0 0 0 # F G H I J G H I A B C 0 A B C 0 # K L M N O => L M N OR D E F => 0 D E F 0 # P Q R S T Q R S G H I 0 G H I 0 # U V W X Y 0 0 0 0 0 # # If channels change: copy the first channels of `src` into the `dst` or vice versa # Compute offsets for the kernel dimension for source (sko) and destination (dko) tensors if sk_h < dk_h: dko, sko = (dk_h - sk_h) // 2, 0 else: sko, dko = (sk_h - dk_h) // 2, 0 # Compute how many channels (both in and out) will be transferred c_in = min(sc_in, dc_in) c_out = min(sc_out, dc_out) dst[dko:dk_h - dko, dko:dk_w - dko, :c_in, :c_out] = source[sko:sk_h - sko, sko:sk_w - sko, :c_in, :c_out] elif len(source.shape) == 2: # Copying a fully-connected (Dense) layer kernel; shape: (U_in, U_out) su_in, su_out = source.shape du_in, du_out = dst.shape u_in = min(su_in, du_in) u_out = min(su_out, du_out) dst[:u_in, :u_out] = source[:u_in, :u_out] else: # Copying a bias tensor assert len(source.shape) == 1 c = min(source.shape[0], dst.shape[0]) dst[:c] = source[:c] destination.assign(dst)