def __init__(self, **kwargs):
        np.random.seed(0)
        tf.set_random_seed(0)

        self.batch_size = kwargs.pop('batch_size')
        self.data_sets = kwargs.pop('data_sets')
        self.train_dir = kwargs.pop('train_dir', 'output')
        log_dir = kwargs.pop('log_dir', 'log')
        self.model_name = kwargs.pop('model_name')
        self.num_classes = kwargs.pop('num_classes')
        self.initial_learning_rate = kwargs.pop('initial_learning_rate')
        self.decay_epochs = kwargs.pop('decay_epochs')

        if 'keep_probs' in kwargs: self.keep_probs = kwargs.pop('keep_probs')
        else: self.keep_probs = None

        if 'mini_batch' in kwargs: self.mini_batch = kwargs.pop('mini_batch')
        else: self.mini_batch = True

        if 'damping' in kwargs: self.damping = kwargs.pop('damping')
        else: self.damping = 0.0

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)

        # Initialize session
        config = tf.ConfigProto(log_device_placement=True)
        self.sess = tf.Session(config=config)
        K.set_session(self.sess)

        # Setup input
        self.input_placeholder, self.labels_placeholder = self.placeholder_inputs(
        )
        self.num_train_examples = self.data_sets.train.labels.shape[0]
        self.num_test_examples = self.data_sets.test.labels.shape[0]

        # Setup inference and training
        if self.keep_probs is not None:
            self.keep_probs_placeholder = tf.placeholder(tf.float32, shape=(2))
            self.logits = self.inference(self.input_placeholder,
                                         self.keep_probs_placeholder)
        elif hasattr(self, 'inference_needs_labels'):
            self.logits = self.inference(self.input_placeholder,
                                         self.labels_placeholder)
        else:
            self.logits = self.inference(self.input_placeholder)
        print('self.logits', tf.shape(self.logits))

        self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg = self.loss(
            self.logits, self.labels_placeholder)

        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.learning_rate = tf.Variable(self.initial_learning_rate,
                                         name='learning_rate',
                                         trainable=False)
        self.learning_rate_placeholder = tf.placeholder(tf.float32)
        self.update_learning_rate_op = tf.assign(
            self.learning_rate, self.learning_rate_placeholder)

        self.train_op = self.get_train_op(self.total_loss, self.global_step,
                                          self.learning_rate)
        self.train_sgd_op = self.get_train_sgd_op(self.total_loss,
                                                  self.global_step,
                                                  self.learning_rate)
        self.accuracy_op = self.get_accuracy_op(self.logits,
                                                self.labels_placeholder)
        self.preds = self.predictions(self.logits)

        # Setup misc
        self.saver = tf.train.Saver()

        # Setup gradients and Hessians
        self.params = self.get_all_params()
        self.grad_total_loss_op = tf.gradients(self.total_loss, self.params)
        self.grad_loss_no_reg_op = tf.gradients(self.loss_no_reg, self.params)
        self.v_placeholder = [
            tf.placeholder(tf.float32, shape=a.get_shape())
            for a in self.params
        ]
        self.u_placeholder = [
            tf.placeholder(tf.float32, shape=a.get_shape())
            for a in self.params
        ]

        self.hessian_vector = hessian_vector_product(self.total_loss,
                                                     self.params,
                                                     self.v_placeholder)

        self.grad_loss_wrt_input_op = tf.gradients(self.total_loss,
                                                   self.input_placeholder)

        # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum)
        self.influence_op = tf.add_n([
            tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b)))
            for a, b in zip(self.grad_total_loss_op, self.v_placeholder)
        ])

        self.grad_influence_wrt_input_op = tf.gradients(
            self.influence_op, self.input_placeholder)

        self.checkpoint_file = os.path.join(self.train_dir,
                                            "%s-checkpoint" % self.model_name)

        self.all_train_feed_dict = self.fill_feed_dict_with_all_ex(
            self.data_sets.train)
        self.all_test_feed_dict = self.fill_feed_dict_with_all_ex(
            self.data_sets.test)

        init = tf.global_variables_initializer()
        self.sess.run(init)

        self.vec_to_list = self.get_vec_to_list_fn()
        self.adversarial_loss, self.indiv_adversarial_loss = self.adversarial_loss(
            self.logits, self.labels_placeholder)
        if self.adversarial_loss is not None:
            self.grad_adversarial_loss_op = tf.gradients(
                self.adversarial_loss, self.params)

        # self._v = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params]
        self.hvp_op = self.hessian_vector_product_op(self.total_loss,
                                                     self.params,
                                                     self.v_placeholder)
示例#2
0
# Define loss and optimizer
total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))

grads = tf.gradients(total_loss,x)

params = tf.trainable_variables()


grad_total_loss_op = tf.gradients(total_loss, params)
grad_loss_no_reg_op = grad_total_loss_op

v_placeholder = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in params]
u_placeholder = [tf.placeholder(tf.float32, shape=a.get_shape()) for a in params]

hessian_vector = hessian_vector_product(total_loss, params, v_placeholder)

grad_loss_wrt_input_op = tf.gradients(total_loss, x)        

# Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum)        
influence_op = tf.add_n(
            [tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(grad_total_loss_op, v_placeholder)])

grad_influence_wrt_input_op = tf.gradients(influence_op, x)
        
        
train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(total_loss)

########### Import Trained Model
saver = tf.train.Saver()
sess = tf.Session()
    def __init__(self,
                 keras_model,
                 batch_size,
                 data_sets,
                 model_name,
                 temperature=1.0,
                 **kwargs):

        self.keras_model = keras_model

        self.batch_size = batch_size
        self.data_sets = data_sets
        self.train_dir = kwargs.pop('train_dir', 'output')
        log_dir = kwargs.pop('log_dir', 'log')
        self.model_name = model_name
        self.temperature = temperature

        self.mini_batch = True
        self.damping = 0.0

        # Initialize session
        self.sess = K.get_session()

        # Setup input
        self.input_placeholder, self.labels_placeholder =\
            self.placeholder_inputs()
        self.num_train_examples = self.data_sets.train.labels.shape[0]
        self.num_test_examples = self.data_sets.test.labels.shape[0]

        self.logits = self.inference()

        self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg, self.obj =\
            self.loss(self.logits, self.labels_placeholder)

        self.preds = self.predictions(self.logits)

        # Setup gradients and Hessians
        self.params = self.get_all_params()
        self.reshaped_params = [
            tf.reshape(x, (np.prod(x.get_shape().as_list()), ))
            for x in self.params
        ]
        self.grad_total_loss_op = [
            tf.reshape(x, (np.prod(x.get_shape().as_list()), ))
            for x in tf.gradients(self.total_loss, self.params)
        ]
        self.grad_loss_no_reg_op = [
            tf.reshape(x, (np.prod(x.get_shape().as_list()), ))
            for x in tf.gradients(self.loss_no_reg, self.params)
        ]
        self.grad_obj_op = [
            tf.reshape(x, (np.prod(x.get_shape().as_list()), ))
            for x in tf.gradients(self.obj, self.params)
        ]
        self.v_placeholder = [
            tf.placeholder(tf.float32,
                           shape=(np.prod(a.get_shape().as_list()), ))
            for a in self.params
        ]

        self.hessian_vector = hessian_vector_product(self.total_loss,
                                                     self.reshaped_params,
                                                     self.v_placeholder)

        self.grad_loss_wrt_input_op = tf.gradients(self.total_loss,
                                                   self.input_placeholder)

        # Because tf.gradients auto accumulates, we probably
        #don't need the add_n (or even reduce_sum)
        self.influence_op = tf.add_n([
            tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b)))
            for a, b in zip(self.grad_total_loss_op, self.v_placeholder)
        ])

        self.grad_influence_wrt_input_op =\
            tf.gradients(self.influence_op, self.input_placeholder)

        self.all_train_feed_dict =\
            self.fill_feed_dict_with_all_ex(self.data_sets.train)
        self.all_test_feed_dict =\
            self.fill_feed_dict_with_all_ex(self.data_sets.test)

        #init = tf.global_variables_initializer()
        #self.sess.run(init)

        self.vec_to_list = self.get_vec_to_list_fn()
        self.adversarial_loss, self.indiv_adversarial_loss =\
            self.adversarial_loss(self.logits, self.labels_placeholder)
        if self.adversarial_loss is not None:
            self.grad_adversarial_loss_op = tf.gradients(
                self.adversarial_loss, self.params)
    def build_graph(self):
        # Setup architecture
        self.input_dim = self.config['arch']['input_dim']
        self.fit_intercept = self.config['arch']['fit_intercept']
        self.num_classes = self.config['arch']['num_classes']

        if self.num_classes > 2:
            self.multi_class = "multinomial"
            self.pseudo_num_classes = self.num_classes
        else:
            self.multi_class = "ovr"
            self.pseudo_num_classes = 1

        # Setup input
        self.input_placeholder = tf.placeholder(
            tf.float32,
            shape=(None, self.input_dim),
            name='input_placeholder')
        self.labels_placeholder = tf.placeholder(
            tf.int32,
            shape=(None,),
            name='labels_placeholder')
        self.sample_weights_placeholder = tf.placeholder(
            tf.float32,
            shape=(None,),
            name='sample_weights_placeholder')
        self.l2_reg = tf.Variable(0,
                                  dtype=tf.float32,
                                  trainable=False,
                                  name='l2_reg')
        self.l2_reg_assigner = get_assigners([self.l2_reg])[0]

        # Setup inference and losses
        self.logits, self.params = self.infer(self.input_placeholder, self.labels_placeholder)
        self.params_assigners = get_assigners(self.params)
        self.params_flat = flatten(self.params)
        self.params_dim = self.params_flat.shape[0]
        self.one_hot_labels = tf.one_hot(self.labels_placeholder, depth=self.num_classes)
        self.total_loss_reg, self.avg_loss_reg, self.total_loss_no_reg, self.indiv_loss = self.loss(
            self.logits,
            self.one_hot_labels,
            self.sample_weights_placeholder)
        self.loss_reg_term = tf.add_n(tf.get_collection('regularization'), name="loss_reg_term")
        self.predictions = self.predict(self.logits)
        self.accuracy = get_accuracy(self.logits, self.labels_placeholder)

        # Setup margins, but only for binary logistic regression
        if self.num_classes == 2:
            y = tf.cast(self.labels_placeholder, tf.float32) * 2 - 1
            self.margins = tf.multiply(y, self.logits[:, 1])
            margin_input = self.input_placeholder
            if self.fit_intercept:
                margin_input = tf.pad(margin_input, [[0, 0], [0, 1]],
                                      mode="CONSTANT", constant_values=1.0)
            self.indiv_grad_margin = tf.multiply(margin_input, tf.expand_dims(y, 1))
            self.total_grad_margin = tf.einsum(
                'ai,a->i', self.indiv_grad_margin, self.sample_weights_placeholder)

        # Calculate gradients explicitly
        self.gradients(self.input_placeholder,
                       self.logits,
                       self.one_hot_labels,
                       self.sample_weights_placeholder)

        # Calculate gradients
        # self.total_grad_loss_reg = tf.gradients(self.total_loss_reg, self.params)
        # self.total_grad_loss_no_reg = tf.gradients(self.total_loss_no_reg, self.params)
        # self.total_grad_loss_reg_flat = flatten(self.total_grad_loss_reg)
        # self.total_grad_loss_no_reg_flat = flatten(self.total_grad_loss_no_reg)

        # Calculate gradients explicitly
        self.hessian(self.input_placeholder,
                     self.logits,
                     self.sample_weights_placeholder)

        # This only works for a single parameter. To fix, concatenate
        # all parameters into a flat tensor, then split them up again to obtain
        # phantom parameters and use those in the model.
        # Calculate Hessians
        # if not self.fit_intercept:
            # self.hessian_reg = tf.hessians(self.total_loss_reg, self.params)[0]

        self.matrix_placeholder = tf.placeholder(
            tf.float32,
            shape=(self.params_flat.shape[0], self.params_flat.shape[0]),
            name='matrix_placeholder')
        self.vectors_placeholder = tf.placeholder(
            tf.float32,
            shape=(None, self.params_flat.shape[0]),
            name='vectors_placeholder')
        self.inverse_vp_cho = tf.cholesky_solve(tf.cholesky(self.matrix_placeholder),
                                                tf.transpose(self.vectors_placeholder))
        self.inverse_vp_lu = tfp.math.lu_solve(*tf.linalg.lu(self.matrix_placeholder),
                                               rhs=tf.transpose(self.vectors_placeholder))

        self.vectors_placeholder_split = split_like(self.params, self.vectors_placeholder)
        self.hessian_vp_reg = flatten(hessian_vector_product(self.total_loss_reg,
                                                             self.params,
                                                             self.vectors_placeholder_split))