Exemplo n.º 1
0
Arquivo: tcav.py Projeto: munema/tcav
 def get_direction_dir_sign_true_cav(mymodel, act, cav, concept, class_id,
                                     example, acts, bottleneck,
                                     activation_generator, cav_dir, i):
     # Grad points in the direction which DECREASES probability of class
     grad = np.reshape(
         mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1)
     #真の方向ベクトルを取得
     mean_concept = np.mean(acts[concept][bottleneck], 0)
     act_example = activation_generator.get_activations_for_examples(
         np.expand_dims(example, 0), bottleneck)
     true_cav = np.reshape(mean_concept - act_example, -1)
     if not os.path.exists(cav_dir + '/' + 'cav-true:' + 'example-' +
                           str(i) + ':' + concept + ':' + bottleneck):
         pickle_dump(
             true_cav, cav_dir + '/' + 'cav-true:' + 'example-' + str(i) +
             ':' + concept + ':' + bottleneck)
     dot_prod = np.dot(grad, true_cav)
     return dot_prod < 0
Exemplo n.º 2
0
    def process_and_load_activations(self, bottleneck_names, concepts):
        acts = {}
        if self.acts_dir and not tf.io.gfile.exists(self.acts_dir):
            tf.io.gfile.makedirs(self.acts_dir)

        for concept in concepts:
            if concept not in acts:
                acts[concept] = {}
            for bottleneck_name in bottleneck_names:
                acts_path = os.path.join(
                    self.acts_dir, 'acts_{}_{}'.format(
                        concept, bottleneck_name)) if self.acts_dir else None
                if acts_path and os.path.exists(acts_path):
                    # if acts_path and tf.io.gfile.exists(acts_path):
                    # with tf.io.gfile.GFile(acts_path, 'rb') as f:
                    #   acts[concept][bottleneck_name] = np.load(
                    #       f, allow_pickle=True).squeeze()
                    try:
                        acts[concept][bottleneck_name] = pickle_load(
                            acts_path).squeeze()
                        tf.logging.debug('Loaded ' + acts_path)
                    except:
                        tf.logging.info(
                            'Fail loading Activation. Now calculating...')
                        acts[concept][
                            bottleneck_name] = self.get_activations_for_concept(
                                concept, bottleneck_name)

                else:
                    acts[concept][
                        bottleneck_name] = self.get_activations_for_concept(
                            concept, bottleneck_name)
                    if acts_path:
                        tf.logging.info(
                            '{} does not exist, Making one...'.format(
                                acts_path))
                        # tf.io.gfile.mkdir(os.path.dirname(acts_path))
                        # with tf.io.gfile.GFile(acts_path, 'w') as f:
                        #   np.save(f, acts[concept][bottleneck_name], allow_pickle=False)
                        pickle_dump(acts[concept][bottleneck_name], acts_path)
        return acts
Exemplo n.º 3
0
path = root_dir + 'log/2layers-colored-mnist-number_10/random100'
target = 'mnist-6'
#bottleneck = 'conv1'

# activation_path = path + '/activations/'
# cav_path = path + '/cavs/'
tcav_path = path + '/tcavs/'
keyword = 'grad_nomalize'
results_path = os.listdir(tcav_path)
all_results = []
non_dup_results = []
for result in results_path:
    if keyword in result:
        continue
    positive_num = int(
        re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*',
                 result).group(1))
    negative_num = int(
        re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*',
                 result).group(2))
    if result.split(':')[1] == target:
        all_results.append(pickle_load(tcav_path + result))
        if positive_num < negative_num:
            non_dup_results.append(pickle_load(tcav_path + result))

print(len(all_results))
print(len(non_dup_results))

pickle_dump(all_results, path + '/' + target + '_results_all')
pickle_dump(non_dup_results, path + '/' + target + '_results_non_dup')
Exemplo n.º 4
0
Arquivo: tcav.py Projeto: munema/tcav
    def _run_single_set(self, param, overwrite=False, run_parallel=False):
        """Run TCAV with provided for one set of (target, concepts).

    Args:
      param: parameters to run
      overwrite: if True, overwrite any saved CAV files.
      run_parallel: run this parallel.

    Returns:
      a dictionary of results (panda frame)
    """

        bottleneck = param.bottleneck
        concepts = param.concepts
        target_class = param.target_class
        activation_generator = param.activation_generator
        alpha = param.alpha
        mymodel = param.model
        cav_dir = param.cav_dir
        # first check if target class is in model.

        tf.logging.info('running %s %s' % (target_class, concepts))
        keyword = ''
        is_keyword = False
        if self.logit_grad:
            keyword += ':logit_grad'
            is_keyword = True
        if self.grad_nomalize:
            keyword += ':grad_nomalize'
            is_keyword = True
        if is_keyword:
            keyword += ':'
        if self.make_random and os.path.exists(
                self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword)):
            return None

        # Get acts
        acts = activation_generator.process_and_load_activations(
            [bottleneck], concepts + [target_class])
        # Get CAVs
        cav_hparams = CAV.default_hparams()
        cav_hparams.alpha = alpha
        cav_instance = get_or_train_cav(concepts,
                                        bottleneck,
                                        acts,
                                        cav_dir=cav_dir,
                                        cav_hparams=cav_hparams,
                                        overwrite=overwrite)

        if self.true_cav == False:
            # clean up
            for c in concepts:
                del acts[c]

        # Hypo testing
        a_cav_key = CAV.cav_key(concepts, bottleneck, cav_hparams.model_type,
                                cav_hparams.alpha)
        target_class_for_compute_tcav_score = target_class

        cav_concept = concepts[0]
        #tmp = activation_generator.get_examples_for_concept(target_class)
        i_up = self.compute_tcav_score(
            mymodel,
            target_class_for_compute_tcav_score,
            cav_concept,
            cav_instance,
            acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class),
            acts,
            cav_instance.bottleneck,
            activation_generator,
            cav_dir,
            self.true_cav,
            self.logit_grad,
            run_parallel=run_parallel)
        val_directional_dirs = self.get_directional_dir_plus(
            mymodel, target_class_for_compute_tcav_score, cav_concept,
            cav_instance, acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class),
            self.cav_dir, self.project_name, bottleneck, concepts[1], acts,
            activation_generator, self.true_cav, self.logit_grad,
            self.grad_nomalize, self.make_random)
        result = {
            'cav_key': a_cav_key,
            'cav_concept': cav_concept,
            'negative_concept': concepts[1],
            'target_class': target_class,
            'cav_accuracies': cav_instance.accuracies,
            'i_up': i_up,
            'val_directional_dirs': val_directional_dirs,
            'alpha': alpha,
            'bottleneck': bottleneck
        }
        del acts

        if self.make_random and not os.path.exists(
                self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword)):
            pickle_dump(
                result, self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword))
        return result
Exemplo n.º 5
0
Arquivo: tcav.py Projeto: munema/tcav
    def run(self,
            num_workers=10,
            run_parallel=False,
            overwrite=False,
            return_proto=False):
        """Run TCAV for all parameters (concept and random), write results to html.

    Args:
      num_workers: number of workers to parallelize
      run_parallel: run this parallel.
      overwrite: if True, overwrite any saved CAV files.
      return_proto: if True, returns results as a tcav.Results object; else,
        return as a list of dicts.

    Returns:
      results: an object (either a Results proto object or a list of
        dictionaries) containing metrics for TCAV results.
    """
        # for random exp,  a machine with cpu = 30, ram = 300G, disk = 10G and
        # pool worker 50 seems to work.
        tf.logging.info('running %s params' % len(self.params))
        tf.logging.info('training with alpha={}'.format(self.alphas))
        results = []
        if self.true_cav:
            concept_lst = self.concepts
            bottleneck_lst = self.bottlenecks
            concept_dct = {}
            for c in self.concepts:
                concept_dct[c] = {}
                for b in self.bottlenecks:
                    concept_dct[c][b] = 0

        now = time.time()
        if run_parallel:
            pool = multiprocessing.Pool(num_workers)
            for i, res in enumerate(
                    pool.imap(
                        lambda p: self._run_single_set(
                            p, overwrite=overwrite, run_parallel=run_parallel),
                        self.params), 1):
                tf.logging.info('Finished running param %s of %s' %
                                (i, len(self.params)))
                results.append(res)
        else:
            keyword = ''
            if self.logit_grad:
                keyword += ':logit_grad'
            if self.grad_nomalize:
                keyword += ':grad_nomalize'
            for i, param in enumerate(self.params):
                tf.logging.info('Running param %s of %s' %
                                (i, len(self.params)))
                # randomをスキップ
                if 'random' in param.concepts[0] and self.make_random == False:
                    continue
                # randomのみ計算
                elif self.make_random == True and (
                        'random' not in param.concepts[0] or os.path.
                        exists(self.tcav_dir + '{}:{}:{}:{}_{}{}'.format(
                            param.bottleneck, param.target_class, param.alpha,
                            param.concepts[0], param.concepts[1], keyword))):
                    continue
                # 真のCAVで計算
                elif self.true_cav:
                    if param.concepts[
                            0] not in concept_lst and param.bottleneck not in bottleneck_lst:
                        continue
                    elif concept_dct[param.concepts[0]][param.bottleneck] == 1:
                        continue

                    concept_dct[param.concepts[0]][param.bottleneck] = 1
                results.append(
                    self._run_single_set(param,
                                         overwrite=overwrite,
                                         run_parallel=run_parallel))
        tf.logging.info('Done running %s params. Took %s seconds...' %
                        (len(self.params), time.time() - now))

        keyword = ''
        is_keyword = False
        if self.logit_grad:
            keyword += ':logit_grad'
            is_keyword = True
        if self.grad_nomalize:
            keyword += ':grad_nomalize'
            is_keyword = True

        if return_proto:
            return utils.results_to_proto(results)
        elif self.make_random == False and self.true_cav == False:
            pickle_dump(results, self.tcav_dir + self.project_name + keyword)
        elif self.make_random == False and self.true_cav:
            pickle_dump(
                results,
                self.tcav_dir + 'trueCAV-' + self.project_name + keyword)
        return results
Exemplo n.º 6
0
Arquivo: tcav.py Projeto: munema/tcav
    def get_directional_dir_plus(mymodel, target_class, concept, cav,
                                 class_acts, examples, cav_dir, project_name,
                                 bottleneck, negative_concept, acts,
                                 activation_generator, true_cav, logit_grad,
                                 grad_nomalize, make_random):
        class_id = mymodel.label_to_id(target_class)
        directional_dir_vals = []
        cav_vector_vals = []
        if logit_grad:
            if os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' +
                              target_class):
                grad_vals = pickle_load(cav_dir + '/logitgrad:' + bottleneck +
                                        ':' + target_class)
            else:
                grad_vals = []
        else:
            if os.path.exists(cav_dir + '/grad:' + bottleneck + ':' +
                              target_class):
                grad_vals = pickle_load(cav_dir + '/grad:' + bottleneck + ':' +
                                        target_class)
            else:
                grad_vals = []
        if os.path.exists(cav_dir + '/predict:' + target_class):
            class_pred = pickle_load(cav_dir + '/predict:' + target_class)
        else:
            class_pred = []

        for i in range(len(class_acts)):
            act = np.expand_dims(class_acts[i], 0)
            if len(act.shape) == 3:
                act = np.expand_dims(act, 3)
            example = examples[i]
            if logit_grad:
                if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck +
                                      ':' + target_class):
                    grad = np.reshape(
                        mymodel.get_gradient(act, [class_id], cav.bottleneck,
                                             example), -1)
                else:
                    grad = grad_vals[i]
            else:
                if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' +
                                      target_class):
                    grad = np.reshape(
                        mymodel.get_gradient(act, [class_id], cav.bottleneck,
                                             example), -1)
                else:
                    grad = grad_vals[i]
            if not os.path.exists(cav_dir + '/predict:' + target_class):
                pred = mymodel.get_predictions(np.expand_dims(example,
                                                              0))[:, class_id]
            else:
                pred = class_pred[i]

            if true_cav:
                #真の方向ベクトルを取得
                mean_concept = np.mean(acts[concept][bottleneck], 0)
                act_example = activation_generator.get_activations_for_examples(
                    np.expand_dims(example, 0), bottleneck)
                cav_vector = np.reshape(mean_concept - act_example, -1)
                directional_dir = np.dot(grad, cav_vector)
            else:
                cav_vector = cav.get_direction(concept)
                if grad_nomalize == False:
                    if logit_grad:
                        directional_dir = np.dot(grad, cav_vector)
                    else:
                        directional_dir = np.dot(-pred * grad, cav_vector)
                else:
                    if logit_grad:
                        directional_dir = cos_sim(grad, cav_vector)
                    else:
                        directional_dir = cos_sim(-grad, cav_vector)
            directional_dir_vals.append(directional_dir)
            cav_vector_vals.append(cav_vector)
            if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' +
                                  target_class) or not os.path.exists(
                                      cav_dir + '/logitgrad:' + bottleneck +
                                      ':' + target_class):
                grad_vals.append(grad)
            if not os.path.exists(cav_dir + '/predict:' + target_class):
                class_pred.append(pred)
            #logit_grad = np.reshape(mymodel.get_logit_gradient(act,class_id,cav.bottleneck).squeeze(),-1)

        if logit_grad:
            if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' +
                                  target_class):
                pickle_dump(
                    grad_vals,
                    cav_dir + '/logitgrad:' + bottleneck + ':' + target_class)
        else:
            if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' +
                                  target_class):
                pickle_dump(
                    grad_vals,
                    cav_dir + '/grad:' + bottleneck + ':' + target_class)
        if not os.path.exists(cav_dir + '/predict:' + target_class):
            class_pred = mymodel.get_predictions(examples)[:, class_id]
            pickle_dump(class_pred, cav_dir + '/predict:' + target_class)

        return directional_dir_vals