def __init__(self, **kwargs): np.random.seed(0) tf.set_random_seed(0) self.batch_size = kwargs.pop('batch_size') self.data_sets = kwargs.pop('data_sets') self.train_dir = kwargs.pop('train_dir', 'output') log_dir = kwargs.pop('log_dir', 'log') self.model_name = kwargs.pop('model_name') self.num_classes = kwargs.pop('num_classes') self.initial_learning_rate = kwargs.pop('initial_learning_rate') self.decay_epochs = kwargs.pop('decay_epochs') if 'keep_probs' in kwargs: self.keep_probs = kwargs.pop('keep_probs') else: self.keep_probs = None if 'mini_batch' in kwargs: self.mini_batch = kwargs.pop('mini_batch') else: self.mini_batch = True if 'damping' in kwargs: self.damping = kwargs.pop('damping') else: self.damping = 0.0 if not os.path.exists(self.train_dir): os.makedirs(self.train_dir) # Initialize session config = tf.ConfigProto(log_device_placement=True) self.sess = tf.Session(config=config) K.set_session(self.sess) # Setup input self.input_placeholder, self.labels_placeholder = self.placeholder_inputs( ) self.num_train_examples = self.data_sets.train.labels.shape[0] self.num_test_examples = self.data_sets.test.labels.shape[0] # Setup inference and training if self.keep_probs is not None: self.keep_probs_placeholder = tf.placeholder(tf.float32, shape=(2)) self.logits = self.inference(self.input_placeholder, self.keep_probs_placeholder) elif hasattr(self, 'inference_needs_labels'): self.logits = self.inference(self.input_placeholder, self.labels_placeholder) else: self.logits = self.inference(self.input_placeholder) print('self.logits', tf.shape(self.logits)) self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg = self.loss( self.logits, self.labels_placeholder) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.learning_rate = tf.Variable(self.initial_learning_rate, name='learning_rate', trainable=False) self.learning_rate_placeholder = tf.placeholder(tf.float32) self.update_learning_rate_op = tf.assign( self.learning_rate, self.learning_rate_placeholder) self.train_op = self.get_train_op(self.total_loss, self.global_step, self.learning_rate) self.train_sgd_op = self.get_train_sgd_op(self.total_loss, self.global_step, self.learning_rate) self.accuracy_op = self.get_accuracy_op(self.logits, self.labels_placeholder) self.preds = self.predictions(self.logits) # Setup misc self.saver = tf.train.Saver() # Setup gradients and Hessians self.params = self.get_all_params() self.grad_total_loss_op = tf.gradients(self.total_loss, self.params) self.grad_loss_no_reg_op = tf.gradients(self.loss_no_reg, self.params) self.v_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.u_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.hessian_vector = hessian_vector_product(self.total_loss, self.params, self.v_placeholder) self.grad_loss_wrt_input_op = tf.gradients(self.total_loss, self.input_placeholder) # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum) self.influence_op = tf.add_n([ tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(self.grad_total_loss_op, self.v_placeholder) ]) self.grad_influence_wrt_input_op = tf.gradients( self.influence_op, self.input_placeholder) self.checkpoint_file = os.path.join(self.train_dir, "%s-checkpoint" % self.model_name) self.all_train_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.train) self.all_test_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.test) init = tf.global_variables_initializer() self.sess.run(init) self.vec_to_list = self.get_vec_to_list_fn() self.adversarial_loss, self.indiv_adversarial_loss = self.adversarial_loss( self.logits, self.labels_placeholder) if self.adversarial_loss is not None: self.grad_adversarial_loss_op = tf.gradients( self.adversarial_loss, self.params) # self._v = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params] self.hvp_op = self.hessian_vector_product_op(self.total_loss, self.params, self.v_placeholder)
# Define loss and optimizer total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) grads = tf.gradients(total_loss,x) params = tf.trainable_variables() grad_total_loss_op = tf.gradients(total_loss, params) grad_loss_no_reg_op = grad_total_loss_op v_placeholder = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in params] u_placeholder = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in params] hessian_vector = hessian_vector_product(total_loss, params, v_placeholder) grad_loss_wrt_input_op = tf.gradients(total_loss, x) # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum) influence_op = tf.add_n( [tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(grad_total_loss_op, v_placeholder)]) grad_influence_wrt_input_op = tf.gradients(influence_op, x) train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(total_loss) ########### Import Trained Model saver = tf.train.Saver() sess = tf.Session()
def __init__(self, keras_model, batch_size, data_sets, model_name, temperature=1.0, **kwargs): self.keras_model = keras_model self.batch_size = batch_size self.data_sets = data_sets self.train_dir = kwargs.pop('train_dir', 'output') log_dir = kwargs.pop('log_dir', 'log') self.model_name = model_name self.temperature = temperature self.mini_batch = True self.damping = 0.0 # Initialize session self.sess = K.get_session() # Setup input self.input_placeholder, self.labels_placeholder =\ self.placeholder_inputs() self.num_train_examples = self.data_sets.train.labels.shape[0] self.num_test_examples = self.data_sets.test.labels.shape[0] self.logits = self.inference() self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg, self.obj =\ self.loss(self.logits, self.labels_placeholder) self.preds = self.predictions(self.logits) # Setup gradients and Hessians self.params = self.get_all_params() self.reshaped_params = [ tf.reshape(x, (np.prod(x.get_shape().as_list()), )) for x in self.params ] self.grad_total_loss_op = [ tf.reshape(x, (np.prod(x.get_shape().as_list()), )) for x in tf.gradients(self.total_loss, self.params) ] self.grad_loss_no_reg_op = [ tf.reshape(x, (np.prod(x.get_shape().as_list()), )) for x in tf.gradients(self.loss_no_reg, self.params) ] self.grad_obj_op = [ tf.reshape(x, (np.prod(x.get_shape().as_list()), )) for x in tf.gradients(self.obj, self.params) ] self.v_placeholder = [ tf.placeholder(tf.float32, shape=(np.prod(a.get_shape().as_list()), )) for a in self.params ] self.hessian_vector = hessian_vector_product(self.total_loss, self.reshaped_params, self.v_placeholder) self.grad_loss_wrt_input_op = tf.gradients(self.total_loss, self.input_placeholder) # Because tf.gradients auto accumulates, we probably #don't need the add_n (or even reduce_sum) self.influence_op = tf.add_n([ tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(self.grad_total_loss_op, self.v_placeholder) ]) self.grad_influence_wrt_input_op =\ tf.gradients(self.influence_op, self.input_placeholder) self.all_train_feed_dict =\ self.fill_feed_dict_with_all_ex(self.data_sets.train) self.all_test_feed_dict =\ self.fill_feed_dict_with_all_ex(self.data_sets.test) #init = tf.global_variables_initializer() #self.sess.run(init) self.vec_to_list = self.get_vec_to_list_fn() self.adversarial_loss, self.indiv_adversarial_loss =\ self.adversarial_loss(self.logits, self.labels_placeholder) if self.adversarial_loss is not None: self.grad_adversarial_loss_op = tf.gradients( self.adversarial_loss, self.params)
def build_graph(self): # Setup architecture self.input_dim = self.config['arch']['input_dim'] self.fit_intercept = self.config['arch']['fit_intercept'] self.num_classes = self.config['arch']['num_classes'] if self.num_classes > 2: self.multi_class = "multinomial" self.pseudo_num_classes = self.num_classes else: self.multi_class = "ovr" self.pseudo_num_classes = 1 # Setup input self.input_placeholder = tf.placeholder( tf.float32, shape=(None, self.input_dim), name='input_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, shape=(None,), name='labels_placeholder') self.sample_weights_placeholder = tf.placeholder( tf.float32, shape=(None,), name='sample_weights_placeholder') self.l2_reg = tf.Variable(0, dtype=tf.float32, trainable=False, name='l2_reg') self.l2_reg_assigner = get_assigners([self.l2_reg])[0] # Setup inference and losses self.logits, self.params = self.infer(self.input_placeholder, self.labels_placeholder) self.params_assigners = get_assigners(self.params) self.params_flat = flatten(self.params) self.params_dim = self.params_flat.shape[0] self.one_hot_labels = tf.one_hot(self.labels_placeholder, depth=self.num_classes) self.total_loss_reg, self.avg_loss_reg, self.total_loss_no_reg, self.indiv_loss = self.loss( self.logits, self.one_hot_labels, self.sample_weights_placeholder) self.loss_reg_term = tf.add_n(tf.get_collection('regularization'), name="loss_reg_term") self.predictions = self.predict(self.logits) self.accuracy = get_accuracy(self.logits, self.labels_placeholder) # Setup margins, but only for binary logistic regression if self.num_classes == 2: y = tf.cast(self.labels_placeholder, tf.float32) * 2 - 1 self.margins = tf.multiply(y, self.logits[:, 1]) margin_input = self.input_placeholder if self.fit_intercept: margin_input = tf.pad(margin_input, [[0, 0], [0, 1]], mode="CONSTANT", constant_values=1.0) self.indiv_grad_margin = tf.multiply(margin_input, tf.expand_dims(y, 1)) self.total_grad_margin = tf.einsum( 'ai,a->i', self.indiv_grad_margin, self.sample_weights_placeholder) # Calculate gradients explicitly self.gradients(self.input_placeholder, self.logits, self.one_hot_labels, self.sample_weights_placeholder) # Calculate gradients # self.total_grad_loss_reg = tf.gradients(self.total_loss_reg, self.params) # self.total_grad_loss_no_reg = tf.gradients(self.total_loss_no_reg, self.params) # self.total_grad_loss_reg_flat = flatten(self.total_grad_loss_reg) # self.total_grad_loss_no_reg_flat = flatten(self.total_grad_loss_no_reg) # Calculate gradients explicitly self.hessian(self.input_placeholder, self.logits, self.sample_weights_placeholder) # This only works for a single parameter. To fix, concatenate # all parameters into a flat tensor, then split them up again to obtain # phantom parameters and use those in the model. # Calculate Hessians # if not self.fit_intercept: # self.hessian_reg = tf.hessians(self.total_loss_reg, self.params)[0] self.matrix_placeholder = tf.placeholder( tf.float32, shape=(self.params_flat.shape[0], self.params_flat.shape[0]), name='matrix_placeholder') self.vectors_placeholder = tf.placeholder( tf.float32, shape=(None, self.params_flat.shape[0]), name='vectors_placeholder') self.inverse_vp_cho = tf.cholesky_solve(tf.cholesky(self.matrix_placeholder), tf.transpose(self.vectors_placeholder)) self.inverse_vp_lu = tfp.math.lu_solve(*tf.linalg.lu(self.matrix_placeholder), rhs=tf.transpose(self.vectors_placeholder)) self.vectors_placeholder_split = split_like(self.params, self.vectors_placeholder) self.hessian_vp_reg = flatten(hessian_vector_product(self.total_loss_reg, self.params, self.vectors_placeholder_split))