Пример #1
0
def test_get_batch_num_gradients_cityblock(shape, batch_size):
    u = np.random.rand(batch_size, *shape)
    v = np.random.rand(1, *shape)

    grad_true = np.sign(u - v).reshape(batch_size, 1, *shape)  # expand dims to incorporate 1-d scalar response
    grad_approx = num_grad_batch(cityblock_batch, u, args=tuple([v]))

    assert grad_approx.shape == grad_true.shape
    assert np.allclose(grad_true, grad_approx)
Пример #2
0
def test_get_batch_num_gradients_logistic_iris(logistic_iris, batch_size):
    X, y, lr = logistic_iris
    predict_fn = lr.predict_proba
    x = X[0:batch_size]
    probas = predict_fn(x)

    # true gradient of the logistic regression wrt x
    grad_true = np.zeros((batch_size, 3, 4))
    for i, p in enumerate(probas):
        p = p.reshape(1, 3)
        grad = (p.T * (np.eye(3, 3) - p) @ lr.coef_)
        grad_true[i, :, :] = grad
    assert grad_true.shape == (batch_size, 3, 4)

    grad_approx = num_grad_batch(predict_fn, x)

    assert grad_approx.shape == grad_true.shape
    assert np.allclose(grad_true, grad_approx)
Пример #3
0
    def _minimize_loss(self, X: np.ndarray, X_init: np.ndarray,
                       Y: np.ndarray) -> None:

        # keep track of the number of CFs found for each lambda in outer loop
        cf_found = np.zeros((self.batch_size, self.max_lam_steps))

        # set the lower and upper bound for lamda to scale the distance loss term
        lam_lb = np.zeros(self.batch_size)
        lam_ub = np.ones(self.batch_size) * 1e10

        # make a one-hot vector of targets
        Y_ohe = np.zeros(Y.shape)
        np.put(Y_ohe, np.argmax(Y, axis=1), 1)

        # on first run estimate lambda bounds
        n_orders = 10
        n_steps = self.max_iter // n_orders
        lams = np.array([self.lam_init / 10**i
                         for i in range(n_orders)])  # exponential decay
        cf_count = np.zeros_like(lams)
        logger.debug('Initial lambda sweep: %s', lams)

        X_current = X_init
        # TODO this whole initial loop should be optional?
        for ix, l_step in enumerate(lams):
            lam = np.ones(self.batch_size) * l_step
            self.sess.run(self.tf_init)
            self.sess.run(
                self.setup, {
                    self.assign_orig: X,
                    self.assign_cf: X_current,
                    self.assign_target: Y_ohe
                })

            for i in range(n_steps):

                # numerical gradients
                grads_num = np.zeros(self.data_shape)
                if not self.model:
                    pred = self.predict_class_fn(X_current)
                    prediction_grad = num_grad_batch(self.predict_class_fn,
                                                     X_current,
                                                     eps=self.eps)

                    # squared difference prediction loss
                    loss_pred = (pred -
                                 self.target_proba.eval(session=self.sess))**2
                    grads_num = 2 * (pred - self.target_proba.eval(
                        session=self.sess)) * prediction_grad

                    grads_num = grads_num.reshape(
                        self.data_shape)  # TODO? correct?

                # add values to tensorboard (1st item in batch only) every n steps
                if self.debug and not i % 50:
                    if not self.model:
                        self._write_tb(lam,
                                       lam_lb,
                                       lam_ub,
                                       cf_found,
                                       X_current,
                                       loss_pred=loss_pred,
                                       pred=pred)
                    else:
                        self._write_tb(lam, lam_lb, lam_ub, cf_found,
                                       X_current)

                # compute graph gradients
                grads_vars_graph = self.sess.run(self.compute_grads,
                                                 feed_dict={self.lam: lam})
                grads_graph = [g for g, _ in grads_vars_graph][0]

                # apply gradients
                gradients = grads_graph + grads_num
                self.sess.run(self.apply_grads,
                              feed_dict={
                                  self.grad_ph: gradients,
                                  self.lam: lam
                              })

                # does the counterfactual condition hold?
                X_current = self.sess.run(self.cf)
                cond = self._prob_condition(X_current).squeeze()
                if cond:
                    cf_count[ix] += 1

        # find the lower bound
        logger.debug('cf_count: %s', cf_count)
        try:
            lb_ix = np.where(cf_count > 0)[0][
                1]  # take the second order of magnitude with some CFs as lower-bound
            # TODO robust?
        except IndexError:
            logger.error(
                'No appropriate lambda range found, try decreasing lam_init')
            return
        lam_lb = np.ones(self.batch_size) * lams[lb_ix]

        # find the upper bound
        try:
            ub_ix = np.where(cf_count == 0)[0][-1]  # TODO is 0 robust?
        except IndexError:
            ub_ix = 0
            logger.debug(
                'Could not find upper bound for lambda where no solutions found, setting upper bound to '
                'lam_init=%s', lams[ub_ix])
        lam_ub = np.ones(self.batch_size) * lams[ub_ix]

        # start the search in the middle
        lam = (lam_lb + lam_ub) / 2

        logger.debug('Found upper and lower bounds: %s, %s', lam_lb[0],
                     lam_ub[0])

        # on subsequent runs bisect lambda within the bounds found initially
        X_current = X_init
        for l_step in range(self.max_lam_steps):
            self.sess.run(self.tf_init)

            # assign variables for the current iteration
            self.sess.run(
                self.setup, {
                    self.assign_orig: X,
                    self.assign_cf: X_current,
                    self.assign_target: Y_ohe
                })

            found, not_found = 0, 0
            # number of gradient descent steps in each inner loop
            for i in range(self.max_iter):

                # numerical gradients
                grads_num = np.zeros(self.data_shape)
                if not self.model:
                    pred = self.predict_class_fn(X_current)
                    prediction_grad = num_grad_batch(self.predict_class_fn,
                                                     X_current,
                                                     eps=self.eps)

                    # squared difference prediction loss
                    loss_pred = (pred -
                                 self.target_proba.eval(session=self.sess))**2
                    grads_num = 2 * (pred - self.target_proba.eval(
                        session=self.sess)) * prediction_grad

                    grads_num = grads_num.reshape(self.data_shape)

                # add values to tensorboard (1st item in batch only) every n steps
                if self.debug and not i % 50:
                    if not self.model:
                        self._write_tb(lam,
                                       lam_lb,
                                       lam_ub,
                                       cf_found,
                                       X_current,
                                       found=found,
                                       not_found=not_found,
                                       loss_pred=loss_pred,
                                       pred=pred)
                    else:
                        self._write_tb(lam,
                                       lam_lb,
                                       lam_ub,
                                       cf_found,
                                       X_current,
                                       found=found,
                                       not_found=not_found)

                # compute graph gradients
                grads_vars_graph = self.sess.run(self.compute_grads,
                                                 feed_dict={self.lam: lam})
                grads_graph = [g for g, _ in grads_vars_graph][0]

                # apply gradients
                gradients = grads_graph + grads_num
                self.sess.run(self.apply_grads,
                              feed_dict={
                                  self.grad_ph: gradients,
                                  self.lam: lam
                              })

                # does the counterfactual condition hold?
                X_current = self.sess.run(self.cf)
                cond = self._prob_condition(X_current)
                if cond:
                    self._update_exp(i, l_step, lam, cf_found, X_current)
                    found += 1
                    not_found = 0
                else:
                    found = 0
                    not_found += 1

                # early stopping criterion - if no solutions or enough solutions found, change lambda
                if found >= self.early_stop or not_found >= self.early_stop:
                    break

            # adjust the lambda constant via bisection at the end of the outer loop
            self._bisect_lambda(cf_found, l_step, lam, lam_lb, lam_ub)

        self.return_dict['success'] = True