def cavs(self, min_acc=0., ow=True): """Calculates cavs for all discovered concepts. This method calculates and saves CAVs for all the discovered concepts versus all random concepts in all the bottleneck layers Args: min_acc: Delete discovered concept if the average classification accuracy of the CAV is less than min_acc ow: If True, overwrites an already calcualted cav. Returns: A dicationary of classification accuracy of linear boundaries orthogonal to cav vectors """ acc = {bn: {} for bn in self.bottlenecks} concepts_to_delete = [] for bn in self.bottlenecks: for concept in self.dic[bn]['concepts']: concept_imgs = self.dic[bn][concept]['images'] concept_acts = tcav_helpers.get_acts_from_images( concept_imgs, self.model, bn) acc[bn][concept] = self._concept_cavs(bn, concept, concept_acts, ow=ow) if np.mean(acc[bn][concept]) < min_acc: concepts_to_delete.append((bn, concept)) target_class_acts = tcav_helpers.get_acts_from_images( self.discovery_images, self.model, bn) acc[bn][self.target_class] = self._concept_cavs( bn, self.target_class, target_class_acts, ow=ow) rnd_acts = self._random_concept_activations(bn, self.random_concept) acc[bn][self.random_concept] = self._concept_cavs( bn, self.random_concept, rnd_acts, ow=ow) for bn, concept in concepts_to_delete: self.delete_concept(bn, concept) return acc
def find_profile(self, bn, images, mean=True): """Transforms images from pixel space to concept space. Args: bn: Bottleneck layer images: Data points to be transformed mean: If true, the profile of each concept would be the average inner product of all that concepts' CAV vectors rather than the stacked up version. Returns: The concept profile of input images in the bn layer. """ profile = np.zeros( (len(images), len(self.dic[bn]['concepts']), self.num_random_exp)) class_acts = tcav_helpers.get_acts_from_images( images, self.model, bn).reshape([len(images), -1]) randoms = [ 'random500_{}'.format(i) for i in range(self.num_random_exp) ] for i, concept in enumerate(self.dic[bn]['concepts']): profile[:, i, :] = self._concept_profile(bn, class_acts, concept, randoms) if mean: profile = np.mean(profile, -1) return profile
def _random_concept_activations(self, bottleneck, random_concept): """Wrapper for computing or loading activations of random concepts. Takes care of making, caching (if desired) and loading activations. Args: bottleneck: The bottleneck layer name random_concept: Name of the random concept e.g. "random500_0" Returns: A nested dict in the form of {concept:{bottleneck:activation}} """ rnd_acts_path = os.path.join( self.activation_dir, 'acts_{}_{}'.format(random_concept, bottleneck)) if not tf.gfile.Exists(rnd_acts_path): rnd_imgs = self.load_concept_imgs(random_concept, self.max_imgs) acts = tcav_helpers.get_acts_from_images(rnd_imgs, self.model, bottleneck) with tf.gfile.Open(rnd_acts_path, 'wb') as f: np.save(f, acts, allow_pickle=False) del acts del rnd_imgs with tf.gfile.Open(rnd_acts_path, 'rb') as f: return np.load(f).squeeze()
def _return_gradients(self, images): """For the given images calculates the gradient tensors. Args: images: Images for which we want to calculate gradients. Returns: A dictionary of images gradients in all bottleneck layers. """ gradients = {} class_id = self.model.label_to_id(self.target_class.replace('_', ' ')) for bn in self.bottlenecks: acts = tcav_helpers.get_acts_from_images(images, self.model, bn) bn_grads = np.zeros((acts.shape[0], np.prod(acts.shape[1:]))) for i in range(len(acts)): bn_grads[i] = self.model.get_gradient( acts[i:i+1], [class_id], bn).reshape(-1) gradients[bn] = bn_grads return gradients