def grad_clip_joint(loss_joint, max_grad_norm, scope_list_joint): ''' :param loss_joint: [loss1, loss2, loss3, ...] :param max_grad_norm: :param scope_list_joint: [scope_list1(with respect to loss1), scope_list2, scope_list3, ..,] :return: ''' grads_joint = [] params_list_joint = [] seg_points = [int(0)] for i, loss in enumerate(loss_joint): params_list = [] for scope in scope_list_joint[i]: List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) print(len(List)) params_list += List grads_joint += tf.gradients(loss, params_list) params_list_joint += params_list seg_points.append(len(params_list)) if max_grad_norm is not None: grads_joint, grad_norm = tf.clip_by_global_norm( grads_joint, max_grad_norm) grads_joint_return = [] global_norm_return = [] for i in range(len(seg_points) - 1): grads = grads_joint[seg_points[i]:seg_points[i + 1]] params_list = params_list_joint[seg_points[i]:seg_points[i + 1]] global_norm = tf.sqrt(sum([l2norm(t)**2 for t in grads])) grads = list(zip(grads, params_list)) grads_joint_return.append(grads) global_norm_return.append(global_norm) return grads_joint_return, global_norm_return
def orthogonalize_(i, basis, v): v_norm = util.l2norm(v) v_new, v_new_norm = orthogonalize_once(i, basis, v) # If the norm decreases more than 1/sqrt(2), run a second # round of MGS. See proof in: # B. N. Parlett, ``The Symmetric Eigenvalue Problem'', # Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109 return tf.cond(v_new_norm < 0.7071 * v_norm, lambda: orthogonalize_once(i, basis, v), lambda: (v_new, v_new_norm))
def testL2Norm(self): with self.test_session(): x_np = np.array([[2], [-3.], [5.]]) x_norm_np = np.linalg.norm(x_np) x_normalized_np = x_np / x_norm_np x = constant_op.constant(x_np) l2norm = util.l2norm(x) l2norm_squared = util.l2norm_squared(x) x_normalized, x_norm = util.l2normalize(x) self.assertAllClose(l2norm.eval(), x_norm_np) self.assertAllClose(l2norm_squared.eval(), np.square(x_norm_np)) self.assertAllClose(x_norm.eval(), x_norm_np) self.assertAllClose(x_normalized.eval(), x_normalized_np)
def grad_clip(loss, max_grad_norm, scope_list): ''' :param loss: :param params: :param max_grad_norm: :param scope: a list consist of variable scopes :return: ''' params_list = [] for scope in scope_list: List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) print(len(List)) params_list += List grads = tf.gradients(loss, params_list) # for i, grad in enumerate(grads): # if grad is None: # grads[i] = tf.zeros(shape=params_list[i].get_shape(), dtype=params_list[i].dtype) global_norm = 0. if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) global_norm = tf.sqrt(sum([l2norm(t)**2 for t in grads])) grads = list(zip(grads, params_list)) return grads, global_norm