def create_mnist_batch_jacobian(batch_size, data_format, training): images = random_ops.random_uniform([batch_size, 28, 28]) model = Mnist(data_format) logits = model(images, training=training) pfor_jacobian = gradients.batch_jacobian(logits, images, use_pfor=True) while_jacobian = gradients.batch_jacobian(logits, images, use_pfor=False) return pfor_jacobian, while_jacobian
def create_lstm_batch_hessian(batch_size, state_size, steps): inp, output = lstm_model_fn(batch_size, state_size, steps) pfor_jacobian = gradients.batch_jacobian(output, inp, use_pfor=True) pfor_jacobian = array_ops.reshape(pfor_jacobian, [batch_size, -1]) pfor_hessian = gradients.batch_jacobian(pfor_jacobian, inp, use_pfor=True) # TODO(agarwal): using two nested while_loop doesn't seem to work here. # Hence we use pfor_jacobian for computing while_hessian. while_jacobian = pfor_jacobian while_hessian = gradients.batch_jacobian(while_jacobian, inp, use_pfor=False) return pfor_hessian, while_hessian
def test_batch_jacobian_fixed_shape(self): x = random_ops.random_uniform([2, 3, 5]) y = x * x batch_jacobian_pfor = gradients.batch_jacobian(y, x, use_pfor=True) batch_jacobian_while = gradients.batch_jacobian(y, x, use_pfor=False) two_x = 2 * x answer = array_ops.stack( [array_ops.diag(two_x[0]), array_ops.diag(two_x[1])]) self.run_and_assert_equal(answer, batch_jacobian_pfor) self.run_and_assert_equal(answer, batch_jacobian_while)
def test_batch_jacobian_unknown_shape(self): with self.test_session() as sess: x = array_ops.placeholder(dtypes.float32) y = x * x batch_jacobian_pfor = gradients.batch_jacobian(y, x, use_pfor=True) batch_jacobian_while = gradients.batch_jacobian(y, x, use_pfor=False) two_x = 2 * x answer = array_ops.stack( [array_ops.diag(two_x[0]), array_ops.diag(two_x[1])]) ans, pfor_value, while_value = sess.run( [answer, batch_jacobian_pfor, batch_jacobian_while], feed_dict={x: [[1, 2], [3, 4]]}) self.assertAllClose(ans, pfor_value) self.assertAllClose(ans, while_value)
def test_batch_jacobian_bad_unknown_shapes(self): with self.test_session() as sess: x = array_ops.placeholder(dtypes.float32) y = array_ops.concat([x, x], axis=0) jacobian = gradients.batch_jacobian(y, x) with self.assertRaisesRegexp(errors.InvalidArgumentError, "assertion failed"): sess.run(jacobian, feed_dict={x: [[1, 2], [3, 4]]})
def create_dynamic_lstm_batch_jacobian(batch_size, state_size, max_steps): inp, (_, final_state) = dynamic_lstm_model_fn(batch_size, state_size, max_steps) pfor_jacobian = gradients.batch_jacobian(final_state.c, inp, use_pfor=True) # Note that use_pfor=False does not work above given the current limitations # on implementation of while_loop. So we statically unroll the looping in the # jacobian computation. while_gradients = [ gradient_ops.gradients(array_ops.gather(final_state.c, i, axis=1), inp)[0] for i in range(state_size) ] return pfor_jacobian, while_gradients
def xyz2ic_log_det_jac(x, Z_indices, eps=1e-10): from deep_boltzmann.models.MM import dist_tf, angle_tf, torsion_tf batchsize = tf.shape(x)[0] atom_indices = np.arange(int(3*(np.max(Z_indices)+1))).reshape((-1, 3)) log_det_jac = tf.zeros((batchsize,)) global_transform = (Z_indices.min() < 0) if global_transform: start_rest = 3 # remaining atoms start in row 3 # 1. bond (input: z axis) reference_atom = tf.gather(x, atom_indices[Z_indices[1, 0]], axis=1) other_atom = tf.gather(x, atom_indices[Z_indices[1, 1]], axis=1) x_ = reference_atom[:, 0] y_ = reference_atom[:, 1] z_ = reference_atom[:, 2] arg = tf.expand_dims(z_, axis=1) reference_atom = tf.stack([x_, y_, arg[:, 0]], axis=-1) reference_atom = tf.expand_dims(reference_atom, axis=1) other_atom = tf.expand_dims(other_atom, axis=1) bond = dist_tf( reference_atom, other_atom ) out = bond jac = batch_jacobian(out, arg) + eps * tf.eye(3, batch_shape=(1,)) log_det_jac += tf.linalg.slogdet(jac)[-1] # 2. bond/angle (input: x/z axes) reference_atom = tf.gather(x, atom_indices[Z_indices[2, 0]], axis=1) other_atom_1 = tf.gather(x, atom_indices[Z_indices[2, 1]], axis=1) other_atom_2 = tf.gather(x, atom_indices[Z_indices[2, 2]], axis=1) x_ = reference_atom[:, 0] y_ = reference_atom[:, 1] z_ = reference_atom[:, 2] arg = tf.stack([x_, z_], axis=-1) reference_atom = tf.stack([arg[:, 0], y_, arg[:, 1]], axis=-1) reference_atom = tf.expand_dims(reference_atom, axis=1) other_atom_1 = tf.expand_dims(other_atom_1, axis=1) other_atom_2 = tf.expand_dims(other_atom_2, axis=1) bond = dist_tf( reference_atom, other_atom_1 ) angle = angle_tf( reference_atom, other_atom_1, other_atom_2 ) out = tf.stack([bond, angle], axis=-1) jac = batch_jacobian(out, arg) + eps * tf.eye(3, batch_shape=(1,)) log_det_jac_ = tf.linalg.slogdet(jac)[-1] log_det_jac_ = tf.reshape(log_det_jac_, [batchsize, -1]) log_det_jac_ = tf.reduce_sum(log_det_jac_, axis=-1) log_det_jac += log_det_jac_ else: start_rest = 0 # remaining atoms start now # 3. everything together reference_atoms = tf.gather(x, atom_indices[Z_indices[start_rest:, 0]], axis=1) other_atoms_1 = tf.gather(x, atom_indices[Z_indices[start_rest:, 1]], axis=1) other_atoms_2 = tf.gather(x, atom_indices[Z_indices[start_rest:, 2]], axis=1) other_atoms_3 = tf.gather(x, atom_indices[Z_indices[start_rest:, 3]], axis=1) arg = tf.reshape(reference_atoms, [-1, 3]) reference_atoms = tf.reshape(arg, [batchsize, -1, 3]) bond = dist_tf( reference_atoms, other_atoms_1 ) angle = angle_tf( reference_atoms, other_atoms_1, other_atoms_2 ) torsion = torsion_tf( reference_atoms, other_atoms_1, other_atoms_2, other_atoms_3 ) out = tf.stack([bond, angle, torsion], axis=-1) out = tf.reshape(out, [-1, 3]) jac = batch_jacobian(out, arg, use_pfor=False) # + eps * tf.eye(3, batch_shape=(1,) log_det_jac_ = tf.linalg.slogdet(jac)[-1] log_det_jac_ = tf.reshape(log_det_jac_, [batchsize, -1]) log_det_jac_ = tf.reduce_sum(log_det_jac_, axis=-1) log_det_jac += log_det_jac_ return log_det_jac
def test_batch_jacobian_parallel_iterations(self): x = constant_op.constant([[1., 2], [3, 4]]) w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]]) y = math_ops.matmul(x, w) self.assertAllClose(gradients.batch_jacobian(y, x, parallel_iterations=2), gradients.batch_jacobian(y, x, parallel_iterations=3))
def create_lstm_batch_jacobian(batch_size, state_size, steps): inp, output = lstm_model_fn(batch_size, state_size, steps) pfor_jacobian = gradients.batch_jacobian(output, inp, use_pfor=True) while_jacobian = gradients.batch_jacobian(output, inp, use_pfor=False) return pfor_jacobian, while_jacobian
def create_fc_batch_jacobian(batch_size, activation_size, num_layers): inp, output = fully_connected_model_fn(batch_size, activation_size, num_layers) pfor_jacobian = gradients.batch_jacobian(output, inp, use_pfor=True) while_jacobian = gradients.batch_jacobian(output, inp, use_pfor=False) return pfor_jacobian, while_jacobian
def contractive_regularizer(mean, cov, x, name=None): with tf.name_scope(scope, 'contractive_regularizer', [mean, cov, x]) as name: #<<<<<<< HEAD # scale_m = tf.convert_to_tensor(scale_mean, # dtype=mean.dtype.base_dtype, # name='scale_mean') # # scale_c = tf.convert_to_tensor(scale_covariance, # dtype=cov.dtype.base_dtype, # name='scale_covariance') # # reg_mean_node = tf.multiply(scale_m, # tf.reduce_sum(tf.abs(mean))) # # reg_cov_node = tf.multiply(scale_c, # tf.reduce_sum(tf.abs(cov))) #======= scale_p = tf.convert_to_tensor(scale, dtype=mean.dtype.base_dtype, name='scale_penalty') # import pdb;pdb.set_trace() jac_mean_rk5 = batch_jacobian( mean, x, use_pfor=use_pfor ) # this has shape (?, latent_size, 28, 28, 1) jac_shape = jac_mean_rk5.shape lastdim = np.prod(jac_shape[2:]) jac_mean = tf.reshape(jac_mean_rk5, [-1, jac_shape[1], lastdim ]) # obtain shape (?, latent_size, 784) jac_cov_rk5 = batch_jacobian(cov, x, use_pfor=use_pfor) jac_cov = tf.reshape( jac_cov_rk5, [-1, jac_shape[1], lastdim] ) # jac_mean_rk5 and jac_cov_rk5 have same shape: jac_shape = (?, latent_size, 28, 28, 1) fisher_mean_vector = 1 / cov # tf.math.pow(cov, tf.constant(-1.0)) # obtain 1/sigma_k^2 # D_mean = tf.linalg.trace(tf.matmul(tf.transpose(jac_mean, perm = [0,2,1]), # tf.math.multiply(tf.expand_dims(fisher_mean_vector, axis = -1), jac_mean))) D_mean = tf.reduce_sum(tf.multiply( jac_mean, tf.multiply(tf.expand_dims(fisher_mean_vector, axis=-1), jac_mean)), axis=[1, 2]) fisher_cov_vector = (1 / 2) * ( 1 / (cov**2) ) #* tf.math.pow(cov, tf.constant(-2.0)) # obtain 1/(2*sigma_k^4) # D_cov = tf.linalg.trace(tf.matmul(tf.transpose(jac_cov, perm = [0,2,1]), # tf.math.multiply(tf.expand_dims(fisher_cov_vector, axis = -1), jac_cov))) D_cov = tf.reduce_sum(tf.multiply( jac_cov, tf.multiply(tf.expand_dims(fisher_cov_vector, axis=-1), jac_cov)), axis=[1, 2]) D_total = tf.reduce_mean(D_mean + D_cov) reg_D_node = tf.multiply(scale_p, D_total, name=name) return reg_D_node
def contractive_regularizer(mean, cov, x, name=None): with tf.name_scope(scope, 'contractive_regularizer', [mean, cov, x]) as name: scale_m = tf.convert_to_tensor(scale_mean, dtype=mean.dtype.base_dtype, name='scale_mean') scale_c = tf.convert_to_tensor(scale_covariance, dtype=cov.dtype.base_dtype, name='scale_covariance') # if while_loop: # norm_jac_mean = tf.constant(0.0) # norm_jac_cov = tf.constant(0.0) # i = tf.constant(0) # n_vars = tf.shape(mean)[1] # # def cond(i, n_vars, norm_jac_mean, norm_jac_cov): # return tf.less(i, n_vars) # # def body(i, n_vars, norm_jac_mean, norm_jac_cov): # #pdb.set_trace() # norm_jac_mean += tf.pow(tf.norm(batch_jacobian(mean[:,i:i+batch_size], x, use_pfor = use_pfor), ord = norm), norm) # norm_jac_cov += tf.pow(tf.norm(batch_jacobian(cov[:,i:i+batch_size], x, use_pfor = use_pfor), ord = norm), norm) # return [tf.add(i, batch_size), n_vars, norm_jac_mean, norm_jac_cov] # # _, _, norm_jac_mean, norm_jac_cov = tf.while_loop(cond, # body, # [i, n_vars, norm_jac_mean, norm_jac_cov], # #back_prop=True, # swap_memory=swap_memory, # parallel_iterations=parallel_iterations) # norm_jac_mean = tf.pow(norm_jac_mean, 1/norm, name = "norm_jac_mean") # norm_jac_cov = tf.pow(norm_jac_cov, 1/norm, name = "norm_jac_cov") # else: # norm_jac_mean = tf.math.pow(tf.norm(batch_jacobian(mean, x, use_pfor = use_pfor), ord = norm, name = "norm_jac_mean"), 2) # norm_jac_cov = tf.math.pow(tf.norm(batch_jacobian(cov, x, use_pfor = use_pfor), ord = norm, name = "norm_jac_cov"), 2) jac_mean = batch_jacobian( mean, x, use_pfor=use_pfor ) # this has shape [?, latent_size, 28, 28, 1] jac_mean_rk2 = tf.reshape( jac_mean, [tf.shape(jac_mean)[0], -1 ]) # this has shape [?, latent_size*28*28*1] # norm_jac_mean = tf.reduce_mean(tf.pow(tf.norm(jac_mean_rk2, axis = 1, name = "norm_jac_mean"), 2)) norm_jac_mean = tf.reduce_mean(tf.reduce_sum(jac_mean_rk2**2, axis=1), name="norm_jac_mean") jac_cov = batch_jacobian( cov, x, use_pfor=use_pfor ) # this has shape [?, latent_size, 28, 28, 1]s jac_cov_rk2 = tf.reshape( jac_cov, [tf.shape(jac_cov)[0], -1 ]) # this has shape [?, latent_size*28*28*1] # norm_jac_cov = tf.reduce_mean(tf.pow(tf.norm(jac_cov_rk2, axis = 1, name = "norm_jac_cov"), 2)) norm_jac_cov = tf.reduce_mean(tf.reduce_sum(jac_cov_rk2**2, axis=1), name="norm_jac_cov") reg_mean_node = tf.multiply(scale_m, norm_jac_mean, name="reg_mean") reg_cov_node = tf.multiply(scale_c, norm_jac_cov, name="reg_cov") return tf.add(reg_mean_node, reg_cov_node, name=name)
def create_lstm_batch_jacobian(batch_size, state_size, steps): inp, output = lstm_model_fn(batch_size, state_size, steps) pfor_jacobian = gradients.batch_jacobian(output, inp, use_pfor=True) while_jacobian = gradients.batch_jacobian(output, inp, use_pfor=False) return pfor_jacobian, while_jacobian
def __init__(self, g_net, h_net, dx_net, dy_net, x_sampler, y_sampler, data, pool, batch_size, nb_classes, alpha, beta, df, is_train): self.data = data self.g_net = g_net self.h_net = h_net self.dx_net = dx_net self.dy_net = dy_net self.x_sampler = x_sampler self.y_sampler = y_sampler self.batch_size = batch_size self.nb_classes = nb_classes self.alpha = alpha self.beta = beta self.df = df self.pool = pool self.x_dim = self.dx_net.input_dim self.y_dim = self.dy_net.input_dim tf.reset_default_graph() self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='x') self.x_onehot = tf.placeholder(tf.float32, [None, self.nb_classes], name='x_onehot') self.x_combine = tf.concat([self.x, self.x_onehot], axis=1) self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='y') self.y_ = self.g_net(self.x_combine, reuse=False) self.J = batch_jacobian(self.y_, self.x) self.x_ = self.h_net(self.y, reuse=False) self.x__ = self.h_net(self.y_) self.x_combine_ = tf.concat([self.x_, self.x_onehot], axis=1) self.y__ = self.g_net(self.x_combine_) self.dy_ = self.dy_net(tf.concat([self.y_, self.x_onehot], axis=1), reuse=False) self.dx_ = self.dx_net(self.x_, reuse=False) self.l1_loss_x = tf.reduce_mean(tf.abs(self.x - self.x__)) self.l1_loss_y = tf.reduce_mean(tf.abs(self.y - self.y__)) self.l2_loss_x = tf.reduce_mean((self.x - self.x__)**2) self.l2_loss_y = tf.reduce_mean((self.y - self.y__)**2) #(1-D(x))^2 self.g_loss_adv = tf.reduce_mean( (0.9 * tf.ones_like(self.dy_) - self.dy_)**2) self.h_loss_adv = tf.reduce_mean( (0.9 * tf.ones_like(self.dx_) - self.dx_)**2) self.g_loss = self.g_loss_adv + self.alpha * self.l2_loss_x + self.beta * self.l2_loss_y self.h_loss = self.h_loss_adv + self.alpha * self.l2_loss_x + self.beta * self.l2_loss_y self.g_h_loss = self.g_loss_adv + self.h_loss_adv + self.alpha * self.l2_loss_x + self.beta * self.l2_loss_y self.fake_x = tf.placeholder(tf.float32, [None, self.x_dim], name='fake_x') self.fake_x_onehot = tf.placeholder(tf.float32, [None, self.nb_classes], name='fake_x_onehot') self.fake_x_combine = tf.concat([self.fake_x, self.fake_x_onehot], axis=1) self.fake_y = tf.placeholder(tf.float32, [None, self.y_dim], name='fake_y') self.dx = self.dx_net(self.x) self.dy = self.dy_net(tf.concat([self.y, self.x_onehot], axis=1)) self.d_fake_x = self.dx_net(self.fake_x) self.d_fake_y = self.dy_net( tf.concat([self.fake_y, self.x_onehot], axis=1)) #(1-D(x))^2 self.dx_loss = (tf.reduce_mean((0.9*tf.ones_like(self.dx) - self.dx)**2) \ +tf.reduce_mean((0.1*tf.ones_like(self.d_fake_x) - self.d_fake_x)**2))/2.0 self.dy_loss = (tf.reduce_mean((0.9*tf.ones_like(self.dy) - self.dy)**2) \ +tf.reduce_mean((0.1*tf.ones_like(self.d_fake_y) - self.d_fake_y)**2))/2.0 self.d_loss = self.dx_loss + self.dy_loss #weight clipping self.clip_dx = [ var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.dx_net.vars ] self.clip_dy = [ var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.dy_net.vars ] self.lr = tf.placeholder(tf.float32, None, name='learning_rate') self.g_h_optim = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5, beta2=0.9) \ .minimize(self.g_h_loss, var_list=self.g_net.vars+self.h_net.vars) self.d_optim = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5, beta2=0.9) \ .minimize(self.d_loss, var_list=self.dx_net.vars+self.dy_net.vars) now = datetime.datetime.now(dateutil.tz.tzlocal()) self.timestamp = now.strftime('%Y%m%d_%H%M%S') self.g_loss_adv_summary = tf.summary.scalar('g_loss_adv', self.g_loss_adv) self.h_loss_adv_summary = tf.summary.scalar('h_loss_adv', self.h_loss_adv) self.l2_loss_x_summary = tf.summary.scalar('l2_loss_x', self.l2_loss_x) self.l2_loss_y_summary = tf.summary.scalar('l2_loss_y', self.l2_loss_y) self.dx_loss_summary = tf.summary.scalar('dx_loss', self.dx_loss) self.dy_loss_summary = tf.summary.scalar('dy_loss', self.dy_loss) self.g_merged_summary = tf.summary.merge([self.g_loss_adv_summary, self.h_loss_adv_summary,\ self.l2_loss_x_summary,self.l2_loss_y_summary]) self.d_merged_summary = tf.summary.merge( [self.dx_loss_summary, self.dy_loss_summary]) #graph path for tensorboard visualization self.graph_dir = 'graph/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}'.format( self.timestamp, self.data, self.x_dim, self.y_dim, self.alpha, self.beta) if not os.path.exists(self.graph_dir) and is_train: os.makedirs(self.graph_dir) #save path for saving predicted data self.save_dir = 'data/density_est_{}_{}_x_dim={}_y_dim={}_alpha={}_beta={}'.format( self.timestamp, self.data, self.x_dim, self.y_dim, self.alpha, self.beta) if not os.path.exists(self.save_dir) and is_train: os.makedirs(self.save_dir) self.saver = tf.train.Saver(max_to_keep=5000) run_config = tf.ConfigProto() run_config.gpu_options.per_process_gpu_memory_fraction = 1.0 run_config.gpu_options.allow_growth = True self.sess = tf.Session(config=run_config)
def compute_K_norm_from_input_perturbation(self, loss, scope="DP_K_NORM"): with tf.variable_scope(scope): ex = self.noised_data var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "CNN") #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] # layer-wised ''' K_norms = [] num = 0 for px_grad, v in zip(px_grads, var_list): #px_grad = utils.BatchClipByL2norm(px_grad, FLAGS.DP_GRAD_CLIPPING_L2NORM/ FLAGS.BATCH_SIZE) px_grad_vec = tf.reshape(px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] #import pdb; pdb.set_trace() # method 1 px_pp_grad = batch_jacobian(px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0]*px_grad_vec.get_shape().as_list()[1]) # [b, vec_param, ex_shape] px_pp_grad = tf.identity(px_pp_grad) px_pp_grad = tf.identity(tf.reshape(px_pp_grad, [px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1],-1])) #[b, vec_param, ex_size] s_, u, _ = tf.linalg.svd(tf.matmul(tf.identity(px_pp_grad), tf.identity(px_pp_grad), transpose_b=True), full_matrices=True) s = tf.identity(tf.linalg.diag(s_)) u = tf.identity(u) K = tf.identity(tf.matmul(u, tf.identity(tf.sqrt(s)))) # px_pp_grads.append(px_pp_grad) us.append(u) ss.append(s) Ks.append(K) px_pp_K_norm = tf.norm(tf.identity(K), ord="fro", axis=[1, 2], name="fro_{}".format(num)) px_pp_I_norm = tf.norm(tf.eye(px_pp_grad.get_shape().as_list()[1]), ord="fro", axis=[0, 1], name="fro_{}".format(num)) # normalize K_norm = tf.identity(px_pp_K_norm / px_pp_I_norm) K_norms.append(tf.identity(tf.reduce_min(K_norm))) num += 1 ''' # all in px_grad_vec_list = [ tf.reshape(px_grad, [tf.shape(px_grad)[0], -1]) for px_grad in px_grads ] # [batch_size, vec_param * L] px_grad_vec = tf.concat(px_grad_vec_list, axis=1) # [batch_size, vec_param] px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.identity( tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ])) #[b, vec_param, ex_size] # s_, u, _ = tf.linalg.svd(tf.matmul( px_pp_jac, tf.transpose(px_pp_jac, [0, 2, 1])), full_matrices=False) s = tf.linalg.diag(s_) K = tf.matmul(u, tf.sqrt(s)) px_pp_K_norm = tf.linalg.norm(K, ord="fro", axis=(1, 2)) px_pp_I_norm = tf.linalg.norm(tf.eye( px_pp_jac.get_shape().as_list()[1]), ord="fro", axis=(0, 1)) # normalize K_norm = tf.reduce_min(px_pp_K_norm / px_pp_I_norm) return K_norm
def test_batch_jacobian_bad_shapes(self): x = random_ops.random_uniform([2, 2]) y = random_ops.random_uniform([3, 2]) with self.assertRaisesRegexp(ValueError, "Need first dimension of output"): gradients.batch_jacobian(y, x, use_pfor=True)
def compute_K_inv_norm_from_input_perturbation_v2(self, loss, is_layerwised=False, scope="DP_K_INV_NORM"): with tf.variable_scope(scope): ex = self.noised_data var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "CNN") #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] # layer-wised if is_layerwised: K_norms = [] for px_grad, v in zip(px_grads, var_list): px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list() [0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] # M = tf.matmul(px_pp_jac, tf.transpose( px_pp_jac, [0, 2, 1])) #[b, vec_param, vec_param] s = tf.linalg.svd(M, full_matrices=True, compute_uv=False) s_inv_sqrt_norm = tf.linalg.norm(tf.sqrt(1.0 / s), ord=2, axis=1) #[b, vec_param] u_norm = tf.sqrt( tf.cast(px_pp_jac.get_shape().as_list()[1], dtype=tf.float32)) K_inv_norm = s_inv_sqrt_norm * u_norm K_norms.append(1.0 / K_inv_norm) K_norms_layerwise = tf.stack(K_norms, axis=1) #[b, #layer] return K_norms_layerwise else: # all in px_grad_vec_list = [ tf.reshape(px_grad, [tf.shape(px_grad)[0], -1]) for px_grad in px_grads ] # [batch_size, vec_param * L] px_grad_vec = tf.concat(px_grad_vec_list, axis=1) # [batch_size, vec_param] px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] # M = tf.matmul(px_pp_jac, tf.transpose( px_pp_jac, [0, 2, 1])) #[b, vec_param, vec_param] s = tf.linalg.svd(M, full_matrices=True, compute_uv=False) s_inv_sqrt_norm = tf.linalg.norm(tf.sqrt(1.0 / s), ord=2, axis=1) #[b, vec_param] u_norm = tf.sqrt( tf.cast(px_pp_jac.get_shape().as_list()[1], dtype=tf.float32)) K_inv_norm = s_inv_sqrt_norm * u_norm K_norm = 1.0 / K_inv_norm return K_norm
def compute_K_norm_from_input_perturbation_v2(self, loss, is_layerwised=False, scope="DP_K_NORM"): with tf.variable_scope(scope): ex = self.noised_data var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "CNN") #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] # layer-wised if is_layerwised: K_norms = [] for px_grad, v in zip(px_grads, var_list): px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list() [0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] px_pp_jac_norm = tf.linalg.norm(px_pp_jac, ord="fro", axis=(1, 2)) px_pp_I_norm = tf.sqrt( tf.cast(px_pp_jac.get_shape().as_list()[1], dtype=tf.float32)) #px_pp_I_norm = tf.linalg.norm(tf.eye(px_pp_jac.get_shape().as_list()[1]), ord="fro", axis=(0,1)) # normalize K_norm = px_pp_jac_norm / tf.sqrt(px_pp_I_norm) #[b] K_norms.append(K_norm) K_norms_layerwise = tf.stack(K_norms, axis=1) #[b, #layer] return K_norms_layerwise else: # all in px_grad_vec_list = [ tf.reshape(px_grad, [tf.shape(px_grad)[0], -1]) for px_grad in px_grads ] # [batch_size, vec_param * L] px_grad_vec = tf.concat(px_grad_vec_list, axis=1) # [batch_size, vec_param] px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] # px_pp_jac_norm = tf.linalg.norm(px_pp_jac, ord="fro", axis=(1, 2)) # [b] px_pp_I_norm = tf.sqrt( tf.cast(px_pp_jac.get_shape().as_list()[1], dtype=tf.float32)) # [b] #px_pp_I_norm = tf.linalg.norm(tf.eye(px_pp_jac.get_shape().as_list()[1]), ord="fro", axis=(0,1)) # normalize K_norm = tf.reduce_min(px_pp_jac_norm / tf.sqrt(px_pp_I_norm)) # () return K_norm
def test_batch_jacobian_bad_shapes(self): x = random_ops.random_uniform([2, 2]) y = random_ops.random_uniform([3, 2]) with self.assertRaisesRegexp(ValueError, "Need first dimension of output"): gradients.batch_jacobian(y, x, use_pfor=True)
def log_det_jacobian(outputs, inputs): from tensorflow.python.ops.parallel_for.gradients import batch_jacobian J = batch_jacobian(outputs, inputs, use_pfor=False) s = tf.svd(J, compute_uv=False) s = tf.abs(s) + 1e-6 # regularize return tf.reduce_sum(tf.log(s), axis=1, keepdims=True)
def compute_M_from_input_perturbation(self, loss, l2norm_bound, is_layerwised=False, scope="DP_S_MIN"): with tf.variable_scope(scope): ex = self.noised_pre var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.opt_scope_1.name) #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] # layer-wised if is_layerwised: Ms = [] sens = [] for px_grad, v in zip(px_grads, var_list): px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] # Clipping #px_grad_vec = utils.BatchClipByL2norm(px_grad_vec, FLAGS.DP_GRAD_CLIPPING_L2NORM) px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list() [0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] # M = tf.reduce_mean(tf.matmul( px_pp_jac, tf.transpose(px_pp_jac, [0, 2, 1])), axis=0) / tf.shape(px_grad)[ 0] #[b, vec_param, vec_param] #M = tf.matmul(px_pp_jac, tf.transpose(px_pp_jac, [0, 2, 1])) #[b, vec_param, vec_param] Ms.append(M) sens.append(px_grad_vec) #S_mins = tf.stack(S_mins, axis=1) return Ms, sens #, kk_square, mark_off, r_k, c_k, core, mask_on) else: # all in px_grad_vec_list = [ tf.reshape(px_grad, [tf.shape(px_grad)[0], -1]) for px_grad in px_grads ] # [batch_size, vec_param * L] px_grad_vec = tf.concat(px_grad_vec_list, axis=1) # [batch_size, vec_param] # Clipping #px_grad_vec = utils.BatchClipByL2norm(px_grad_vec, FLAGS.DP_GRAD_CLIPPING_L2NORM) px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] #px_pp_grad2 = batch_jacobian(px_grad_vec, self.data, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0]*px_grad_vec.get_shape().as_list()[1]) # [b, vec_param, ex_shape] px_pp_jac = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] #px_pp_jac2 = tf.reshape(px_pp_grad2, [px_pp_grad2.get_shape().as_list()[0], px_pp_grad2.get_shape().as_list()[1],-1]) #[b, vec_param, ex_size] # M = tf.reduce_mean( tf.matmul(px_pp_jac, tf.transpose(px_pp_jac, [0, 2, 1])), axis=0) / tf.cast( tf.shape(px_grad_vec)[0], dtype=tf.float32) #[b, vec_param, vec_param] #M = tf.matmul(px_pp_jac, tf.transpose(px_pp_jac, [0, 2, 1])) #[b, vec_param, vec_param] #b_left = tf.linalg.lstsq(px_pp_jac, tf.eye(px_pp_grad.get_shape().as_list()[1], batch_shape=[px_pp_grad.get_shape().as_list()[0]])) return M, px_grad_vec
def compute_sanitized_gradients_from_input_perturbation( self, loss, ex, input_sigma, var_list, add_noise=True): """Compute the sanitized gradients. Args: loss: the loss tensor. var_list: the optional variables. add_noise: if true, then add noise. Always clip. eps_delta: [epsilon, delta] input_sigma: input_sigma Returns: a pair of (list of sanitized gradients) and privacy spending accumulation operations. Raises: TypeError: if var_list contains non-variable. """ self._assert_valid_dtypes([loss]) #import pdb; pdb.set_trace() xs = [tf.convert_to_tensor(x) for x in var_list] #import pdb; pdb.set_trace() # Each element in px_grads is the px_grad for a param matrix, having the shape of [batch_size, shape of param matrix] px_grads = per_example_gradients.PerExampleGradients(loss, xs) # calculate sigma, sigma has the shape of [batch_size] unmasked_sigmas = [] sigmas = [] sanitized_grads = [] num = 0 for px_grad, v in zip(px_grads, var_list): num += 1 if num > FLAGS.ACCOUNT_NUM: break #px_grad = utils.BatchClipByL2norm(px_grad, FLAGS.DP_GRAD_CLIPPING_L2NORM/ FLAGS.BATCH_SIZE) px_grad_vec = tf.reshape( px_grad, [tf.shape(px_grad)[0], -1]) # [batch_size, vec_param] #import pdb; pdb.set_trace() # method 1 px_pp_grad = batch_jacobian( px_grad_vec, ex, use_pfor=False, parallel_iterations=px_grad_vec.get_shape().as_list()[0] * px_grad_vec.get_shape().as_list()[1] ) # [b, vec_param, ex_shape] px_pp_grad = tf.reshape(px_pp_grad, [ px_pp_grad.get_shape().as_list()[0], px_pp_grad.get_shape().as_list()[1], -1 ]) #[b, vec_param, ex_size] px_scale = tf.reduce_sum(tf.square(px_pp_grad), 2) # [batch_size, vec_param] ''' #elems = (np.array([1, 2, 3]), np.array([-1, 1, -1])) #map_fn(lambda x: x[0] * x[1], elems) #method 2 px_grad_vec = tf.split(px_grad_vec, px_grad_vec.get_shape().as_list()[0], axis=0) def px_fn(arg): import pdb; pdb.set_trace() px_grad = arg[0] px_ex = arg[1] px_grad = tf.squeeze(px_grad, axis=0) px_pp_grad = jacobian(px_grad, px_ex, use_pfor=False, parallel_iterations=px_grad.get_shape().as_list()[0]) # [vec_param, ex_shape] px_pp_grad = tf.reshape(px_pp_grad, [px_pp_grad.get_shape().as_list()[0], -1]) #[vec_param, ex_size] px_scale = tf.reduce_sum(tf.square(px_pp_grad), 1) # [vec_param] return px_scale #px_scale = control_flow_ops.pfor(px_fn, len(ex), parallel_iterations=len(ex)) px_scale = map_fn(px_fn, [px_grad_vec, ex]) import pdb; pdb.set_trace() ''' # heterogeneous: each param has different scale scale = tf.reduce_mean(px_scale, 1) # [batch_size] # minimum #scale = tf.reduce_min(px_scale, 1) # [batch_size] sigma = tf.sqrt(scale) * input_sigma #[batch_size] unmasked_sigmas.append(sigma) mask = tf.cast( tf.greater_equal(sigma, tf.constant(FLAGS.INPUT_DP_SIGMA_THRESHOLD)), tf.float32) sigma = sigma * mask sigmas.append(sigma) # tensor_name = utils.GetTensorOpName(v) sanitized_grad = self._sanitizer.sanitize(px_grad, self._eps_delta, sigma=sigma, tensor_name=tensor_name, add_noise=add_noise, num_examples=tf.slice( tf.shape(px_grad), [0], [1]), no_clipping=False) sanitized_grads.append(sanitized_grad) while num <= len(var_list): sigmas.append(tf.zeros([ex.get_shape().as_list()[0]])) num += 1 return sanitized_grads, sigmas, unmasked_sigmas
def create_fc_batch_jacobian(batch_size, activation_size, num_layers): inp, output = fully_connected_model_fn(batch_size, activation_size, num_layers) pfor_jacobian = gradients.batch_jacobian(output, inp, use_pfor=True) while_jacobian = gradients.batch_jacobian(output, inp, use_pfor=False) return pfor_jacobian, while_jacobian
# to get from the high-dimensional output of the final hidden layer # to a scalar output, we use this function, which basically uses # a linear transformation of the form "w^T h + b" # --> h is the vector of outputs from the last hidden layer # --> w is a weight vector of the same dimension as h # --> b is a scalar Vpredraw0 = tf.layers.dense(output[depth], units=1, name='output') Vpredraw = tf.exp(Vpredraw0) Vpred = tf.reshape(Vpredraw, shape=[n_instances, n_steps]) # In[11]: # automatically differentiate potential and generate gradV : R^d --> R^d from tensorflow.python.ops.parallel_for.gradients import jacobian, batch_jacobian gradVpredraw = batch_jacobian(Vpredraw, q) gradVpred = tf.reshape(gradVpredraw, shape=[n_instances, n_steps, d]) # In[12]: # compute loss and set up optimizer pdot = tf_diff_axis_0(pts) / dt loss = tf.reduce_mean( tf.square(pdot + tf.transpose(gradVpred[:, :-1], perm=[1, 0, 2]))) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) training_op = optimizer.minimize(loss) # In[13]: # typical TF initialization init = tf.global_variables_initializer()