def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument """Model function for a RNN.""" # Define RNN architecture using tf.keras.layers. x = features['x'] x = tf.reshape(x, [-1, SEQ_LEN]) input_layer = x[:, :-1] input_one_hot = tf.one_hot(input_layer, 256) # if FLAGS.float16: # input_one_hot = tf.cast(input_one_hot, tf.float16) lstm = tf.keras.layers.LSTM(256, return_sequences=True).apply(input_one_hot) logits = tf.keras.layers.Dense(256).apply(lstm) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.cast( tf.one_hot(x[:, 1:], 256), dtype=tf.float32), logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: ledger = privacy_ledger.PrivacyLedger( population_size=NB_TRAIN, selection_probability=(FLAGS.batch_size / NB_TRAIN), max_samples=1e6, max_queries=1e6) optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate, unroll_microbatches=True) opt_loss = vector_loss else: optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate) opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy(labels=tf.cast(x[:, 1:], dtype=tf.int32), predictions=tf.argmax(input=logits, axis=2)) } return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def get_model(features, labels, mode, params): n, n_in, n_hidden, n_out, non_linearity, model, privacy, dp, epsilon, delta, batch_size, learning_rate, l2_ratio, epochs = params if model == 'nn': #print('Using neural network...') input_layer = tf.reshape(features['x'], [-1, n_in]) y = tf.keras.layers.Dense(n_hidden, activation=non_linearity, kernel_regularizer=tf.keras.regularizers.l2( l2_ratio)).apply(input_layer) y = tf.keras.layers.Dense( n_hidden, activation=non_linearity, kernel_regularizer=tf.keras.regularizers.l2(l2_ratio)).apply(y) logits = tf.keras.layers.Dense( n_out, activation=tf.nn.softmax, kernel_regularizer=tf.keras.regularizers.l2(l2_ratio)).apply(y) else: #print('Using softmax regression...') input_layer = tf.reshape(features['x'], [-1, n_in]) logits = tf.keras.layers.Dense( n_out, activation=tf.nn.softmax, kernel_regularizer=tf.keras.regularizers.l2(l2_ratio)).apply( input_layer) predictions = { "classes": tf.argmax(input=logits, axis=1), #"probabilities": tf.nn.softmax(logits, name="softmax_tensor") "probabilities": logits } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) vector_loss = tf.keras.losses.sparse_categorical_crossentropy( labels, logits) scalar_loss = tf.reduce_mean(vector_loss) if mode == tf.estimator.ModeKeys.TRAIN: if privacy == 'grad_pert': C = 1 # Clipping Threshold sigma = 0. if dp == 'adv_cmp': sigma = np.sqrt(epochs * np.log(2.5 * epochs / delta)) * ( np.sqrt(np.log(2 / delta) + 2 * epsilon) + np.sqrt(np.log(2 / delta))) / epsilon # Adv Comp elif dp == 'zcdp': sigma = np.sqrt(epochs / 2) * ( np.sqrt(np.log(1 / delta) + epsilon) + np.sqrt(np.log(1 / delta))) / epsilon # zCDP elif dp == 'rdp': sigma = noise_multiplier[epsilon] elif dp == 'dp': sigma = epochs * np.sqrt( 2 * np.log(1.25 * epochs / delta)) / epsilon # DP print(sigma) optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=C, noise_multiplier=sigma, num_microbatches=batch_size, learning_rate=learning_rate, ledger=None) opt_loss = vector_loss else: optimizer = AdamOptimizer(learning_rate=learning_rate) opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def cnn_model_fn(features, labels): """Model function for a CNN.""" # Define CNN architecture using tf.keras.layers. if FLAGS.dataset == "mnist": input_layer = tf.reshape(features, [-1, 28, 28, 1]) elif FLAGS.dataset == "cifar10": input_layer = features # input_layer = tf.reshape(features, [-1, 32, 32, 3]) elif FLAGS.dataset == "svhn": input_layer = tf.reshape(features, [-1, 32, 32, 3]) # y = tf.keras.layers.Conv2D(16, 8, # strides=2, # padding='same', # activation='relu').apply(input_layer) # y = tf.keras.layers.MaxPool2D(2, 1).apply(y) # y = tf.keras.layers.Conv2D(32, 4, # strides=2, # padding='valid', # activation='relu').apply(y) # y = tf.keras.layers.MaxPool2D(2, 1).apply(y) # y = tf.keras.layers.Flatten().apply(y) # y = tf.keras.layers.Dense(32, activation='relu').apply(y) if FLAGS.model == "trival": logits = trival(input_layer=input_layer) elif FLAGS.model == "deep": logits = deep(input_layer=input_layer) # input_layer = tf.reshape(features, [-1, 32, 32, 3]) elif FLAGS.model == "letnet": logits = trival(input_layer=input_layer) # Calculate accuracy. correct_pred = tf.equal(tf.argmax(logits, 1), labels) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) # Define mean of loss across minibatch (for reporting through tf.Estimator). scalar_loss = tf.reduce_mean(vector_loss) if FLAGS.dpsgd: ledger = privacy_ledger.PrivacyLedger( population_size=60000, selection_probability=(FLAGS.batch_size / 60000)) # Use DP version of GradientDescentOptimizer. Other optimizers are # available in dp_optimizer. Most optimizers inheriting from # tf.train.Optimizer should be wrappable in differentially private # counterparts by calling dp_optimizer.optimizer_from_args(). if FLAGS.method == 'sgd': optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate) elif FLAGS.method == 'adam': optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate, unroll_microbatches=True) elif FLAGS.method == 'adagrad': optimizer = dp_optimizer.DPAdagradGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate) elif FLAGS.method == 'momentum': optimizer = dp_optimizer.DPMomentumGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ledger=ledger, learning_rate=FLAGS.learning_rate, momentum=FLAGS.momentum, use_nesterov=FLAGS.use_nesterov) else: raise ValueError( 'method must be sgd or adam or adagrad or momentum') opt_loss = vector_loss else: if FLAGS.method == 'sgd': optimizer = GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) elif FLAGS.method == 'adam': optimizer = AdamOptimizer(learning_rate=FLAGS.learning_rate) elif FLAGS.method == 'adagrad': optimizer = AdagradOptimizer(learning_rate=FLAGS.learning_rate) elif FLAGS.method == 'momentum': optimizer = MomentumOptimizer(learning_rate=FLAGS.learning_rate, momentum=FLAGS.momentum, use_nesterov=FLAGS.use_nesterov) else: raise ValueError( 'method must be sgd or adam or adagrad or momentum') opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) # In the following, we pass the mean of the loss (scalar_loss) rather than # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). return train_op, scalar_loss, accuracy
def __init__(self, sequence_length, num_classes, vocab_size, emb_dim, dis_emb_dim, noise_multiplier, l2_norm_clip, population_size, delta, num_microbatches, filter_sizes, num_filters, batch_size, hidden_dim, start_token, goal_out_size, goal_size, step_size, D_model, LSTMlayer_num=1, l2_reg_lambda=0.0, learning_rate=0.001): self.sequence_length = sequence_length self.num_classes = num_classes self.vocab_size = vocab_size self.emb_dim = emb_dim self.dis_emb_dim = dis_emb_dim self.noise_multiplier = noise_multiplier self.l2_norm_clip = l2_norm_clip self.population_size = population_size self.delta = delta self.num_microbatches = num_microbatches self.filter_sizes = filter_sizes self.num_filters = num_filters self.batch_size = batch_size self.hidden_dim = hidden_dim self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32) self.LSTMlayer_num = LSTMlayer_num self.l2_reg_lambda = l2_reg_lambda self.learning_rate = learning_rate self.num_filters_total = sum(self.num_filters) self.grad_clip = 5.0 self.goal_out_size = goal_out_size self.goal_size = goal_size self.step_size = step_size self.D_model = D_model self.FeatureExtractor_unit = self.D_model.FeatureExtractor_unit self.scope = self.D_model.feature_scope self.worker_params = [] self.manager_params = [] self.epis = 0.65 self.tem = 0.8 with tf.variable_scope('place_holder'): self.x = tf.placeholder( tf.int32, shape=[self.batch_size, self.sequence_length ]) # sequence of tokens generated by generator self.reward = tf.placeholder( tf.float32, shape=[self.batch_size, self.sequence_length / self.step_size ]) # sequence of tokens generated by generator self.given_num = tf.placeholder(tf.int32) self.drop_out = tf.placeholder(tf.float32, name="dropout_keep_prob") self.train = tf.placeholder(tf.int32, None, name="train") with tf.variable_scope('Worker'): self.g_embeddings = tf.Variable( tf.random_normal([self.vocab_size, self.emb_dim], stddev=0.1)) self.worker_params.append(self.g_embeddings) self.g_worker_recurrent_unit = self.create_Worker_recurrent_unit( self.worker_params) # maps h_tm1 to h_t for generator self.g_worker_output_unit = self.create_Worker_output_unit( self.worker_params) # maps h_t to o_t (output token logits) self.W_workerOut_change = tf.Variable( tf.random_normal([self.vocab_size, self.goal_size], stddev=0.1)) self.g_change = tf.Variable( tf.random_normal([self.goal_out_size, self.goal_size], stddev=0.1)) self.worker_params.extend([self.W_workerOut_change, self.g_change]) self.h0_worker = tf.zeros([self.batch_size, self.hidden_dim]) self.h0_worker = tf.stack([self.h0_worker, self.h0_worker]) with tf.variable_scope('Manager'): self.g_manager_recurrent_unit = self.create_Manager_recurrent_unit( self.manager_params) # maps h_tm1 to h_t for generator self.g_manager_output_unit = self.create_Manager_output_unit( self.manager_params) # maps h_t to o_t (output token logits) self.h0_manager = tf.zeros([self.batch_size, self.hidden_dim]) self.h0_manager = tf.stack([self.h0_manager, self.h0_manager]) self.goal_init = tf.get_variable( "goal_init", initializer=tf.truncated_normal( [self.batch_size, self.goal_out_size], stddev=0.1)) self.manager_params.extend([self.goal_init]) self.padding_array = tf.constant( -1, shape=[self.batch_size, self.sequence_length], dtype=tf.int32) with tf.name_scope("roll_out"): self.gen_for_reward = self.rollout(self.x, self.given_num) # processed for batch with tf.device("/cpu:0"): self.processed_x = tf.transpose( tf.nn.embedding_lookup(self.g_embeddings, self.x), perm=[1, 0, 2]) # seq_length x batch_size x emb_dim gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=1, dynamic_size=True, infer_shape=True, clear_after_read=False) goal = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True, clear_after_read=False) feature_array = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length + 1, dynamic_size=False, infer_shape=True, clear_after_read=False) real_goal_array = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length / self.step_size, dynamic_size=False, infer_shape=True, clear_after_read=False) gen_real_goal_array = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True, clear_after_read=False) gen_o_worker_array = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length / self.step_size, dynamic_size=False, infer_shape=True, clear_after_read=False) def _g_recurrence(i, x_t, h_tm1, h_tm1_manager, gen_o, gen_x, goal, last_goal, real_goal, step_size, gen_real_goal_array, gen_o_worker_array): ## padding sentence by -1 cur_sen = tf.cond( i > 0, lambda: tf.split( tf.concat([ tf.transpose(gen_x.stack(), perm=[1, 0]), self. padding_array ], 1), [self.sequence_length, i], 1)[0], lambda: self.padding_array) with tf.variable_scope(self.scope): feature = self.FeatureExtractor_unit(cur_sen, self.drop_out) h_t_Worker = self.g_worker_recurrent_unit( x_t, h_tm1) # hidden_memory_tuple o_t_Worker = self.g_worker_output_unit( h_t_Worker) # batch x vocab , logits not prob o_t_Worker = tf.reshape( o_t_Worker, [self.batch_size, self.vocab_size, self.goal_size]) h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager) sub_goal = self.g_manager_output_unit(h_t_manager) sub_goal = tf.nn.l2_normalize(sub_goal, 1) goal = goal.write(i, sub_goal) real_sub_goal = tf.add(last_goal, sub_goal) w_g = tf.matmul(real_goal, self.g_change) #batch x goal_size w_g = tf.nn.l2_normalize(w_g, 1) gen_real_goal_array = gen_real_goal_array.write(i, real_goal) w_g = tf.expand_dims(w_g, 2) #batch x goal_size x 1 gen_o_worker_array = gen_o_worker_array.write(i, o_t_Worker) x_logits = tf.matmul(o_t_Worker, w_g) x_logits = tf.squeeze(x_logits) log_prob = tf.log( tf.nn.softmax( tf.cond( i > 1, lambda: tf.cond(self.train > 0, lambda: self. tem, lambda: 1.5), lambda: 1.5) * x_logits)) next_token = tf.cast( tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim with tf.control_dependencies([cur_sen]): gen_x = gen_x.write(i, next_token) # indices, batch_size gen_o = gen_o.write(i, tf.reduce_sum( tf.multiply( tf.one_hot(next_token, self.vocab_size, 1.0, 0.0), tf.nn.softmax(x_logits)), 1)) # [batch_size] , prob return i+1,x_tp1,h_t_Worker,h_t_manager,gen_o,gen_x,goal,\ tf.cond(((i+1)%step_size)>0,lambda:real_sub_goal,lambda :tf.constant(0.0,shape=[self.batch_size,self.goal_out_size]))\ ,tf.cond(((i+1)%step_size)>0,lambda :real_goal,lambda :real_sub_goal),step_size,gen_real_goal_array,gen_o_worker_array _, _, _, _, self.gen_o, self.gen_x, _, _, _, _, self.gen_real_goal_array, self.gen_o_worker_array = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11: i < self.sequence_length, body=_g_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0_worker, self.h0_manager, gen_o, gen_x, goal, tf.zeros([self.batch_size, self.goal_out_size]), self.goal_init, step_size, gen_real_goal_array, gen_o_worker_array), parallel_iterations=1) self.gen_x = self.gen_x.stack() # seq_length x batch_size self.gen_x = tf.transpose(self.gen_x, perm=[1, 0]) # batch_size x seq_length self.gen_real_goal_array = self.gen_real_goal_array.stack( ) # seq_length x batch_size x goal self.gen_real_goal_array = tf.transpose( self.gen_real_goal_array, perm=[1, 0, 2]) # batch_size x seq_length x goal self.gen_o_worker_array = self.gen_o_worker_array.stack( ) # seq_length x batch_size* vocab*goal self.gen_o_worker_array = tf.transpose( self.gen_o_worker_array, perm=[1, 0, 2, 3]) # batch_size x seq_length * vocab*goal sub_feature = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length / self.step_size, dynamic_size=False, infer_shape=True, clear_after_read=False) all_sub_features = tensor_array_ops.TensorArray( dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True, clear_after_read=False) all_sub_goals = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True, clear_after_read=False) # supervised pretraining for generator g_predictions = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length) ta_emb_x = ta_emb_x.unstack(self.processed_x) def preTrain(i, x_t, g_predictions, h_tm1, input_x, h_tm1_manager, last_goal, real_goal, feature_array, real_goal_array, sub_feature, all_sub_features, all_sub_goals): ## padding sentence by -1 cur_sen = tf.split( tf.concat([ tf.split(input_x, [i, self.sequence_length - i], 1)[0], self.padding_array ], 1), [self.sequence_length, i], 1)[0] #padding sentence with tf.variable_scope(self.scope): feature = self.FeatureExtractor_unit(cur_sen, self.drop_out) feature_array = feature_array.write(i, feature) real_goal_array = tf.cond( i > 0, lambda: real_goal_array, lambda: real_goal_array.write(0, self.goal_init)) h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager) sub_goal = self.g_manager_output_unit(h_t_manager) sub_goal = tf.nn.l2_normalize(sub_goal, 1) h_t_Worker = tf.cond( i > 0, lambda: self.g_worker_recurrent_unit(x_t, h_tm1), lambda: h_tm1) # hidden_memory_tuple o_t_Worker = self.g_worker_output_unit( h_t_Worker) # batch x vocab , logits not prob o_t_Worker = tf.reshape( o_t_Worker, [self.batch_size, self.vocab_size, self.goal_size]) real_sub_goal = tf.cond(i > 0, lambda: tf.add(last_goal, sub_goal), lambda: real_goal) all_sub_goals = tf.cond( i > 0, lambda: all_sub_goals.write(i - 1, real_goal), lambda: all_sub_goals) w_g = tf.matmul(real_goal, self.g_change) # batch x goal_size w_g = tf.nn.l2_normalize(w_g, 1) w_g = tf.expand_dims(w_g, 2) # batch x goal_size x 1 x_logits = tf.matmul(o_t_Worker, w_g) x_logits = tf.squeeze(x_logits) g_predictions = tf.cond( i > 0, lambda: g_predictions.write(i - 1, tf.nn.softmax(x_logits)), lambda: g_predictions) sub_feature = tf.cond( ((((i) % step_size) > 0)), lambda: sub_feature, lambda: (tf.cond( i > 0, lambda: sub_feature.write( i / step_size - 1, tf.subtract(feature, feature_array.read(i - step_size)) ), lambda: sub_feature))) all_sub_features = tf.cond(i > 0,lambda: tf.cond((i % step_size) > 0, lambda :all_sub_features.write(i-1,tf.subtract(feature,feature_array.read(i-i%step_size))),\ lambda :all_sub_features.write(i-1,tf.subtract(feature,feature_array.read(i-step_size)))), lambda : all_sub_features) real_goal_array = tf.cond( ((i) % step_size) > 0, lambda: real_goal_array, lambda: tf.cond( (i) / step_size < self.sequence_length / step_size, lambda: tf.cond( i > 0, lambda: real_goal_array.write( (i) / step_size, real_sub_goal), lambda: real_goal_array), lambda: real_goal_array)) x_tp1 = tf.cond(i > 0, lambda: ta_emb_x.read(i - 1), lambda: x_t) return i+1, x_tp1, g_predictions, h_t_Worker, input_x, h_t_manager,\ tf.cond(((i)%step_size)>0,lambda:real_sub_goal,lambda :tf.constant(0.0,shape=[self.batch_size,self.goal_out_size])) ,\ tf.cond(((i) % step_size) > 0, lambda: real_goal, lambda: real_sub_goal),\ feature_array,real_goal_array,sub_feature,all_sub_features,all_sub_goals _, _, self.g_predictions, _, _, _, _, _, self.feature_array, self.real_goal_array, self.sub_feature, self.all_sub_features, self.all_sub_goals = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12: i < self.sequence_length + 1, body=preTrain, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), g_predictions, self.h0_worker, self.x, self.h0_manager, tf.zeros([self.batch_size, self.goal_out_size]), self.goal_init, feature_array, real_goal_array, sub_feature, all_sub_features, all_sub_goals), parallel_iterations=1) self.sub_feature = self.sub_feature.stack( ) # seq_length x batch_size x num_filter self.sub_feature = tf.transpose(self.sub_feature, perm=[1, 0, 2]) self.real_goal_array = self.real_goal_array.stack() self.real_goal_array = tf.transpose(self.real_goal_array, perm=[1, 0, 2]) print self.real_goal_array.shape print self.sub_feature.shape self.pretrain_goal_loss = -tf.reduce_sum(1 - tf.losses.cosine_distance( tf.nn.l2_normalize(self.sub_feature, 2), tf.nn.l2_normalize(self.real_goal_array, 2), 2)) / ( self.sequence_length * self.batch_size / self.step_size) with tf.name_scope("Manager_PreTrain_update"): pretrain_manager_opt = tf.train.AdamOptimizer(self.learning_rate) self.pretrain_manager_grad, _ = tf.clip_by_global_norm( tf.gradients(self.pretrain_goal_loss, self.manager_params), self.grad_clip) self.pretrain_manager_updates = pretrain_manager_opt.apply_gradients( zip(self.pretrain_manager_grad, self.manager_params)) # self.real_goal_array = self.real_goal_array.stack() self.g_predictions = tf.transpose( self.g_predictions.stack(), perm=[1, 0, 2]) # batch_size x seq_length x vocab_size self.cross_entropy = tf.reduce_sum(self.g_predictions * tf.log( tf.clip_by_value(self.g_predictions, 1e-20, 1.0))) / ( self.batch_size * self.sequence_length * self.vocab_size) self.pretrain_worker_loss = -tf.reduce_sum( tf.one_hot(tf.to_int32(tf.reshape( self.x, [-1])), self.vocab_size, 1.0, 0.0) * tf.log( tf.clip_by_value( tf.reshape(self.g_predictions, [-1, self.vocab_size]), 1e-20, 1.0))) / (self.sequence_length * self.batch_size) with tf.name_scope("Worker_PreTrain_update"): # training updates self.worker_pre_ledger = privacy_ledger.PrivacyLedger( population_size=self.population_size, selection_probability=(self.batch_size / self.population_size)) pretrain_worker_opt = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=self.l2_norm_clip, noise_multiplier=self.noise_multiplier, num_microbatches=self.num_microbatches, ledger=self.worker_pre_ledger, learning_rate=self.learning_rate) self.pretrain_worker_grad, _ = tf.clip_by_global_norm( tf.gradients(self.pretrain_worker_loss, self.worker_params), self.grad_clip) self.pretrain_worker_updates = pretrain_worker_opt.apply_gradients( zip(self.pretrain_worker_grad, self.worker_params)) self.goal_loss = -tf.reduce_sum( tf.multiply( self.reward, 1 - tf.losses.cosine_distance( tf.nn.l2_normalize(self.sub_feature, 2), tf.nn.l2_normalize(self.real_goal_array, 2), 2))) / ( self.sequence_length * self.batch_size / self.step_size) with tf.name_scope("Manager_update"): manager_opt = tf.train.AdamOptimizer(self.learning_rate) self.manager_grad, _ = tf.clip_by_global_norm( tf.gradients(self.goal_loss, self.manager_params), self.grad_clip) self.manager_updates = manager_opt.apply_gradients( zip(self.manager_grad, self.manager_params)) self.all_sub_features = self.all_sub_features.stack() self.all_sub_features = tf.transpose(self.all_sub_features, perm=[1, 0, 2]) self.all_sub_goals = self.all_sub_goals.stack() self.all_sub_goals = tf.transpose(self.all_sub_goals, perm=[1, 0, 2]) # self.all_sub_features = tf.nn.l2_normalize(self.all_sub_features, 2) self.Worker_Reward = 1 - tf.losses.cosine_distance( tf.nn.l2_normalize(self.all_sub_features, 2), tf.nn.l2_normalize(self.all_sub_goals, 2), 2) # print self.Worker_Reward.shape self.worker_loss = -tf.reduce_sum( tf.multiply( self.Worker_Reward, tf.one_hot(tf.to_int32(tf.reshape( self.x, [-1])), self.vocab_size, 1.0, 0.0) * tf.log( tf.clip_by_value( tf.reshape(self.g_predictions, [-1, self.vocab_size]), 1e-20, 1.0)))) / (self.sequence_length * self.batch_size) with tf.name_scope("Worker_update"): # training updates worker_opt = tf.train.AdamOptimizer(self.learning_rate) self.worker_grad, _ = tf.clip_by_global_norm( tf.gradients(self.worker_loss, self.worker_params), self.grad_clip) self.worker_updates = worker_opt.apply_gradients( zip(self.worker_grad, self.worker_params))
T_vars = tf.trainable_variables() D_vars = [var for var in T_vars if var.name.startswith('discriminator')] G_vars = [var for var in T_vars if var.name.startswith('generator')] # In[11]: ledger = privacy_ledger.PrivacyLedger(population_size=55000, selection_probability=(batch_size / 55000), max_samples=1e6, max_queries=1e6) G_optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=l2_norm_clip, noise_multiplier=noise_multiplier, num_microbatches=num_microbatches, learning_rate=lr, beta1=0.5, ledger=ledger) # In[12]: # optimizer for each network with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): D_optim = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(D_loss, var_list=D_vars) G_optim = G_optimizer.minimize(loss=vector_G_loss, var_list=G_vars) # In[13]: # open session and initialize all variables
model = Sequential() n_cols = train_X.shape[1] x = 0.1 e = 'relu' model.add(Dense(units=n_cols, activation=e, input_shape=(n_cols, ))) model.add(Dense(75, activation=e)) model.add(Dense(50, activation=e)) model.add(Dense(75, activation=e)) model.add(Dense(units=2, activation='softmax')) model.compile(loss='categorical_hinge', optimizer=dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, ), metrics=['accuracy']) csv_logger1 = keras.callbacks.CSVLogger('log1.csv', append=True, separator=',') model.load_weights('model1.h5') s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print('socket created') host = socket.gethostname() port = 9999 s.bind((host, port)) s.listen(1) print('waiting for connection') sc, address = s.accept()
def __init__(self, sequence_length, num_classes, vocab_size, dis_emb_dim, d_rate, noise_multiplier, l2_norm_clip, population_size, delta, num_microbatches, filter_sizes, num_filters, batch_size, hidden_dim, start_token, goal_out_size, step_size, l2_reg_lambda=0.0): self.sequence_length = sequence_length self.num_classes = num_classes self.vocab_size = vocab_size self.dis_emb_dim = dis_emb_dim self.filter_sizes = filter_sizes self.num_filters = num_filters self.batch_size = batch_size self.hidden_dim = hidden_dim self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32) self.l2_reg_lambda = l2_reg_lambda self.num_filters_total = sum(self.num_filters) self.temperature = 1.0 self.grad_clip = 5.0 #Does not apply to d_optimizer self.goal_out_size = goal_out_size self.step_size = step_size self.D_input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.D_input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.d_rate = d_rate self.l2_norm_clip = l2_norm_clip self.noise_multiplier = noise_multiplier self.num_microbatches = num_microbatches self.population_size = population_size self.delta = delta with tf.name_scope('D_update'): self.D_l2_loss = tf.constant(0.0) self.FeatureExtractor_unit = self.FeatureExtractor() # Train for Discriminator with tf.variable_scope("feature") as self.feature_scope: D_feature = self.FeatureExtractor_unit( self.D_input_x, self.dropout_keep_prob) #,self.dropout_keep_prob) self.feature_scope.reuse_variables() # tf.get_variable_scope().reuse_variables() D_scores, D_predictions, self.ypred_for_auc = self.classification( D_feature) losses = tf.nn.softmax_cross_entropy_with_logits( logits=D_scores, labels=self.D_input_y) self.D_loss = tf.reduce_mean( losses) + self.l2_reg_lambda * self.D_l2_loss self.D_params = [ param for param in tf.trainable_variables() if 'Discriminator' or 'FeatureExtractor' in param.name ] self.ledger = privacy_ledger.PrivacyLedger( population_size=self.population_size, selection_probability=(self.batch_size / self.population_size)) d_optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=self.l2_norm_clip, noise_multiplier=self.noise_multiplier, num_microbatches=self.num_microbatches, ledger=self.ledger, learning_rate=self.d_rate) D_grads_and_vars = d_optimizer.compute_gradients( self.D_loss, self.D_params, aggregation_method=2) self.D_train_op = d_optimizer.apply_gradients(D_grads_and_vars)