示例#1
0
    def get_test_samples(
            self,
            num_images=10) -> typing.Iterable[typing.Mapping[str, Tensor]]:
        """
        Gets random samples from the test dataset
        """
        n_test = self.dataset_test.__len__()
        samples = []
        for idx in range(0, num_images * 5, 5):
            # sample, _ = self.dataset_test.__getitem__(random.randint(0, n_test - 1))
            sample, _ = self.dataset_test.__getitem__(idx)
            sample = dict_to_device(sample, self.flags.device)

            samples.append(sample)

        return samples
示例#2
0
def calc_coherence_random_gen(exp, mm_vae, iteration: int, rand_coherences: Mapping[str, typing.List], batch_d: dict) -> \
        Mapping[str, typing.List]:
    args = exp.flags
    # generating random samples
    rand_gen = mm_vae.module.generate() if args.distributed else mm_vae.generate()
    rand_gen = dict_to_device(rand_gen, args.device)
    # classifying generated examples
    coherence_random = calculate_coherence(exp, rand_gen)
    for j, l_key in enumerate(exp.labels):
        rand_coherences[l_key].append(coherence_random[l_key])

    if (exp.flags.batch_size * iteration) < exp.flags.num_samples_fid and args.save_figure:
        # saving generated samples to dir_fid
        save_generated_samples(exp, rand_gen, iteration, batch_d)

    return rand_coherences
示例#3
0
    def get_test_samples(
            self,
            num_images=10) -> typing.Iterable[typing.Mapping[str, Tensor]]:
        """
        Gets random samples for the cond. generation.
        """
        n_test = self.dataset_test.__len__()
        samples = []
        for _ in range(num_images):
            sample, _ = self.dataset_test.__getitem__(
                random.randint(0, n_test - 1))
            sample = utils.dict_to_device(sample, self.flags.device)

            samples.append(sample)

        return samples
示例#4
0
    def get_test_samples(self,
                         num_images=10) -> Iterable[Mapping[str, Tensor]]:
        """
        Gets random samples for the cond. generation.
        """
        random.seed(42)
        n_test = len(self.dataset_test)
        samples = []

        for i in range(num_images):
            while True:
                # loop until sample with label i is found
                ix = random.randint(0, n_test - 1)
                sample, target = self.dataset_test[ix]
                if target == i:
                    samples.append(dict_to_device(sample, self.flags.device))
                    break
        return samples
示例#5
0
def estimate_likelihoods(exp):
    model = exp.mm_vae
    mods = exp.modalities
    bs_normal = exp.flags.batch_size
    d_loader = DataLoader(exp.dataset_test,
                          batch_size=exp.flags.batch_size,
                          shuffle=True,
                          num_workers=exp.flags.dataloader_workers,
                          drop_last=True)

    subsets = exp.subsets
    if '' in subsets:
        del subsets['']
    lhoods = {}
    for s_key in subsets:
        lhoods[s_key] = {m_key: [] for m_key in mods}
        lhoods[s_key]['joint'] = []

    for batch in d_loader:
        batch_d = dict_to_device(batch[0], exp.flags.device)

        _, joint_latent = model.inference(batch_d)
        for s_key in (subsets.keys()):
            subset = subsets[s_key]
            ll_batch = calc_log_likelihood_batch(exp,
                                                 joint_latent,
                                                 s_key,
                                                 subset,
                                                 batch_d,
                                                 num_imp_samples=6)
            for m_key in (ll_batch.keys()):
                lhoods[s_key][m_key].append(ll_batch[m_key].item())

    for s_key, lh_subset in lhoods.items():
        for m_key in (lh_subset.keys()):
            mean_val = np.mean(np.array(lh_subset[m_key]))
            lhoods[s_key][m_key] = mean_val
    exp.flags.batch_size = bs_normal
    return lhoods
示例#6
0
def classify_generated_samples(args, d_loader, exp, mm_vae, mods, subsets):
    """
    Generates and classifies samples.
    """

    labels = exp.labels
    rand_coherences = {k: [].copy() for k in labels}

    # all labels accumulated over batches:
    batch_labels = torch.Tensor()
    cond_gen_classified = init_twolevel_nested_dict(subsets, mods, init_val=torch.Tensor())
    cond_gen_classified: Mapping[str, Mapping[mods, Tensor]]

    # for iteration, (batch_d, batch_l) in enumerate(d_loader):
    for iteration, (batch_d, batch_l) in tqdm(enumerate(d_loader), total=len(d_loader)):

        batch_labels = torch.cat((batch_labels, batch_l), 0)
        batch_d = dict_to_device(batch_d, exp.flags.device)

        # evaluating random generation
        rand_coherences = calc_coherence_random_gen(exp, mm_vae, iteration, rand_coherences, batch_d)

        # evaluating conditional generation
        # first generates the conditional gen_samples
        # classifies them and stores the classifier predictions
        _, joint_latent = mm_vae.module.inference(batch_d) if args.distributed else mm_vae.inference(batch_d)

        cg = mm_vae.module.cond_generation(joint_latent) if args.distributed else mm_vae.cond_generation(joint_latent)
        cg: typing.Mapping[str, typing.Mapping[mods, Tensor]]
        # classify the cond. generated samples
        for subset, cond_val in cg.items():
            clf_cg: Mapping[mods, Tensor] = classify_cond_gen_samples(exp, batch_l, cond_val)
            for mod in mods:
                cond_gen_classified[subset][mod] = torch.cat((cond_gen_classified[subset][mod], clf_cg[mod]), 0)
            if (exp.flags.batch_size * iteration) < exp.flags.num_samples_fid and exp.flags.save_figure:
                save_generated_samples_singlegroup(exp, iteration, subset, cond_val)

    return batch_labels, rand_coherences, cond_gen_classified
示例#7
0
def test_clf_lr_all_subsets(clf_lr: Mapping[str, Mapping[str,
                                                         LogisticRegression]],
                            exp, which_lr: str):
    """
    Test the classifiers that were trained on latent representations.

    which_lr: either q0.mu or zk.
    """
    args = exp.flags
    mm_vae = exp.mm_vae
    mm_vae.eval()

    d_loader = DataLoader(exp.dataset_test,
                          batch_size=exp.flags.batch_size,
                          shuffle=False,
                          num_workers=exp.flags.dataloader_workers,
                          drop_last=True)

    training_steps = exp.flags.steps_per_training_epoch or len(d_loader)
    log.info(
        f'Creating {training_steps} batches of latent representations for classifier testing '
        f'with a batch_size of {exp.flags.batch_size}.')

    # clf_predictions = init_clf_predictions(subsets, which_lr, mm_vae)
    clf_predictions = {}
    batch_labels = torch.Tensor()

    for iteration, (batch_d, batch_l) in enumerate(d_loader):
        if iteration > training_steps:
            break
        batch_labels = torch.cat((batch_labels, batch_l), 0)

        batch_d = dict_to_device(batch_d, exp.flags.device)

        _, joint_latent = mm_vae.module.inference(
            batch_d) if args.distributed else mm_vae.inference(batch_d)
        lr_subsets = joint_latent.subsets

        lr_data = joint_latent.get_lreval_data()
        data_test = lr_data[which_lr]

        clf_predictions_batch = classify_latent_representations(
            exp, clf_lr, data_test)
        clf_predictions_batch: Mapping[str, Mapping[str, np.array]]

        for subset in data_test:
            clf_predictions_batch_subset = torch.cat(
                tuple(
                    torch.tensor(clf_predictions_batch[label]
                                 [subset]).unsqueeze(1)
                    for label in exp.labels), 1)

            if subset in clf_predictions:
                clf_predictions[subset] = torch.cat(
                    [clf_predictions[subset], clf_predictions_batch_subset], 0)
            else:
                clf_predictions[subset] = clf_predictions_batch_subset

    batch_labels = atleast_2d(batch_labels, -1)
    results = {}
    for subset in clf_predictions:
        # calculate metrics
        metrics = exp.metrics(clf_predictions[subset],
                              batch_labels,
                              str_labels=exp.labels)
        metrics_dict = metrics.evaluate()
        results[subset] = metrics.extract_values(metrics_dict)

    return results
示例#8
0
def df_maker(epoch: int):
    config = json2dict(Path(('conf.json')))
    methods = config['methods']
    data_dir = Path(__file__).parent.parent / 'data/thesis'
    experiment_uids_path = data_dir / ('experiment_uids.json')
    exp_uids = json2dict(experiment_uids_path)['polymnist']

    for method in methods:
        method_uids = exp_uids[method]['3_mods']

        d = {
            'missing_mod_scores': [],
            'reconstr_mod_scores': [],
            'random_prd_scores': []
        }
        for method_uid in method_uids:
            epoch_results_dir = data_dir / 'experiments' / 'polymnist' / method / method_uid / 'epoch_results'

            prd_dict = json2dict(epoch_results_dir /
                                 f'{epoch}.json')['test_results']['prd_scores']

            if prd_dict is None:
                tmpdirname = Path('/mnt/data/hendrik/mmnf_data/tempdir')
                tmpdirname.mkdir()
                experiment_dir = data_dir / 'experiments' / method / method_uid
                exp = load_experiment(
                    experiment_dir,
                    _id=method_uid,
                    epoch=epoch,
                    add_args={'dir_gen_eval_fid': tmpdirname})
                args = exp.flags
                mm_vae = exp.mm_vae
                rand_gen = mm_vae.generate()
                d_loader = DataLoader(exp.dataset_test,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=exp.flags.dataloader_workers,
                                      drop_last=True)
                for iteration, (batch_d, batch_l) in tqdm(enumerate(d_loader),
                                                          total=len(d_loader)):
                    batch_d = dict_to_device(batch_d, exp.flags.device)
                    save_generated_samples(exp, rand_gen, iteration, batch_d)
                    _, joint_latent = mm_vae.inference(batch_d)
                    cg = mm_vae.cond_generation(joint_latent)
                    for subset, cond_val in cg.items():
                        save_generated_samples_singlegroup(
                            exp, iteration, subset, cond_val)
                prd_dict = calc_prd_score(exp)
                ep_res_dict = json2dict(epoch_results_dir / f'{epoch}.json')

                ep_res_dict['test_results']['prd_scores'] = prd_dict
                dict2json(out_path=epoch_results_dir / f'{epoch}.json',
                          d=ep_res_dict)
                tmpdirname.rmdir()

            d['random_prd_scores'].append(
                np.mean([
                    score for k, score in prd_dict.items()
                    if k.startswith('random')
                ]))

            prd_dict = {
                k: v
                for k, v in prd_dict.items() if not k.startswith('random')
            }

            d['missing_mod_scores'].append(
                np.mean(
                    [score for score in get_missing_mod_scores_prd(prd_dict)]))
            d['reconstr_mod_scores'].append(
                np.mean([
                    score for score in get_reconstr_mod_scores_prd(prd_dict)
                ]))

        yield {
            'Method': method,
            'Missing Mod': np.round(np.mean(d['missing_mod_scores']), 3),
            'Reconstruction': np.round(np.mean(d['reconstr_mod_scores']), 3),
            'Random': np.round(np.mean(d['random_prd_scores']), 3),
            'Missing Mod__STDEV': np.round(np.std(d['missing_mod_scores']), 3),
            'Reconstruction__STDEV': np.round(np.std(d['reconstr_mod_scores']),
                                              3),
            'Random__STDEV': np.round(np.std(d['random_prd_scores']), 3),
        }