def test_get_batch_num_gradients_cityblock(shape, batch_size): u = np.random.rand(batch_size, *shape) v = np.random.rand(1, *shape) grad_true = np.sign(u - v).reshape(batch_size, 1, *shape) # expand dims to incorporate 1-d scalar response grad_approx = num_grad_batch(cityblock_batch, u, args=tuple([v])) assert grad_approx.shape == grad_true.shape assert np.allclose(grad_true, grad_approx)
def test_get_batch_num_gradients_logistic_iris(logistic_iris, batch_size): X, y, lr = logistic_iris predict_fn = lr.predict_proba x = X[0:batch_size] probas = predict_fn(x) # true gradient of the logistic regression wrt x grad_true = np.zeros((batch_size, 3, 4)) for i, p in enumerate(probas): p = p.reshape(1, 3) grad = (p.T * (np.eye(3, 3) - p) @ lr.coef_) grad_true[i, :, :] = grad assert grad_true.shape == (batch_size, 3, 4) grad_approx = num_grad_batch(predict_fn, x) assert grad_approx.shape == grad_true.shape assert np.allclose(grad_true, grad_approx)
def _minimize_loss(self, X: np.ndarray, X_init: np.ndarray, Y: np.ndarray) -> None: # keep track of the number of CFs found for each lambda in outer loop cf_found = np.zeros((self.batch_size, self.max_lam_steps)) # set the lower and upper bound for lamda to scale the distance loss term lam_lb = np.zeros(self.batch_size) lam_ub = np.ones(self.batch_size) * 1e10 # make a one-hot vector of targets Y_ohe = np.zeros(Y.shape) np.put(Y_ohe, np.argmax(Y, axis=1), 1) # on first run estimate lambda bounds n_orders = 10 n_steps = self.max_iter // n_orders lams = np.array([self.lam_init / 10**i for i in range(n_orders)]) # exponential decay cf_count = np.zeros_like(lams) logger.debug('Initial lambda sweep: %s', lams) X_current = X_init # TODO this whole initial loop should be optional? for ix, l_step in enumerate(lams): lam = np.ones(self.batch_size) * l_step self.sess.run(self.tf_init) self.sess.run( self.setup, { self.assign_orig: X, self.assign_cf: X_current, self.assign_target: Y_ohe }) for i in range(n_steps): # numerical gradients grads_num = np.zeros(self.data_shape) if not self.model: pred = self.predict_class_fn(X_current) prediction_grad = num_grad_batch(self.predict_class_fn, X_current, eps=self.eps) # squared difference prediction loss loss_pred = (pred - self.target_proba.eval(session=self.sess))**2 grads_num = 2 * (pred - self.target_proba.eval( session=self.sess)) * prediction_grad grads_num = grads_num.reshape( self.data_shape) # TODO? correct? # add values to tensorboard (1st item in batch only) every n steps if self.debug and not i % 50: if not self.model: self._write_tb(lam, lam_lb, lam_ub, cf_found, X_current, loss_pred=loss_pred, pred=pred) else: self._write_tb(lam, lam_lb, lam_ub, cf_found, X_current) # compute graph gradients grads_vars_graph = self.sess.run(self.compute_grads, feed_dict={self.lam: lam}) grads_graph = [g for g, _ in grads_vars_graph][0] # apply gradients gradients = grads_graph + grads_num self.sess.run(self.apply_grads, feed_dict={ self.grad_ph: gradients, self.lam: lam }) # does the counterfactual condition hold? X_current = self.sess.run(self.cf) cond = self._prob_condition(X_current).squeeze() if cond: cf_count[ix] += 1 # find the lower bound logger.debug('cf_count: %s', cf_count) try: lb_ix = np.where(cf_count > 0)[0][ 1] # take the second order of magnitude with some CFs as lower-bound # TODO robust? except IndexError: logger.error( 'No appropriate lambda range found, try decreasing lam_init') return lam_lb = np.ones(self.batch_size) * lams[lb_ix] # find the upper bound try: ub_ix = np.where(cf_count == 0)[0][-1] # TODO is 0 robust? except IndexError: ub_ix = 0 logger.debug( 'Could not find upper bound for lambda where no solutions found, setting upper bound to ' 'lam_init=%s', lams[ub_ix]) lam_ub = np.ones(self.batch_size) * lams[ub_ix] # start the search in the middle lam = (lam_lb + lam_ub) / 2 logger.debug('Found upper and lower bounds: %s, %s', lam_lb[0], lam_ub[0]) # on subsequent runs bisect lambda within the bounds found initially X_current = X_init for l_step in range(self.max_lam_steps): self.sess.run(self.tf_init) # assign variables for the current iteration self.sess.run( self.setup, { self.assign_orig: X, self.assign_cf: X_current, self.assign_target: Y_ohe }) found, not_found = 0, 0 # number of gradient descent steps in each inner loop for i in range(self.max_iter): # numerical gradients grads_num = np.zeros(self.data_shape) if not self.model: pred = self.predict_class_fn(X_current) prediction_grad = num_grad_batch(self.predict_class_fn, X_current, eps=self.eps) # squared difference prediction loss loss_pred = (pred - self.target_proba.eval(session=self.sess))**2 grads_num = 2 * (pred - self.target_proba.eval( session=self.sess)) * prediction_grad grads_num = grads_num.reshape(self.data_shape) # add values to tensorboard (1st item in batch only) every n steps if self.debug and not i % 50: if not self.model: self._write_tb(lam, lam_lb, lam_ub, cf_found, X_current, found=found, not_found=not_found, loss_pred=loss_pred, pred=pred) else: self._write_tb(lam, lam_lb, lam_ub, cf_found, X_current, found=found, not_found=not_found) # compute graph gradients grads_vars_graph = self.sess.run(self.compute_grads, feed_dict={self.lam: lam}) grads_graph = [g for g, _ in grads_vars_graph][0] # apply gradients gradients = grads_graph + grads_num self.sess.run(self.apply_grads, feed_dict={ self.grad_ph: gradients, self.lam: lam }) # does the counterfactual condition hold? X_current = self.sess.run(self.cf) cond = self._prob_condition(X_current) if cond: self._update_exp(i, l_step, lam, cf_found, X_current) found += 1 not_found = 0 else: found = 0 not_found += 1 # early stopping criterion - if no solutions or enough solutions found, change lambda if found >= self.early_stop or not_found >= self.early_stop: break # adjust the lambda constant via bisection at the end of the outer loop self._bisect_lambda(cf_found, l_step, lam, lam_lb, lam_ub) self.return_dict['success'] = True