Exemplo n.º 1
0
def run_ivae_exp(args, config):
    """run iVAE simulations"""
    data_dim = config.data_dim
    n_segments = config.n_segments
    n_layers = config.n_layers
    n_obs_per_seg = config.n_obs_per_seg
    data_seed = config.data_seed

    max_iter = config.ivae.max_iter
    lr = config.ivae.lr
    cuda = config.ivae.cuda

    results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers}

    nSims = args.nSims
    dataset = args.dataset
    test = args.test
    for l in n_layers:
        for n in n_obs_per_seg:
            x, y, s = generate_synthetic_data(data_dim,
                                              n_segments,
                                              n,
                                              l,
                                              seed=data_seed,
                                              simulationMethod=dataset,
                                              one_hot_labels=True,
                                              varyMean=True)
            for seed in range(nSims):
                print('Running exp with L={} and n={}; seed={}'.format(
                    l, n, seed))
                # generate data
                # run iVAE
                ckpt_file = os.path.join(
                    args.checkpoints,
                    'ivae_{}_l{}_n{}_s{}.pt'.format(dataset, l, n, seed))
                res_iVAE = IVAE_wrapper(X=x,
                                        U=y,
                                        n_layers=l + 1,
                                        hidden_dim=data_dim * 2,
                                        cuda=cuda,
                                        max_iter=max_iter,
                                        lr=lr,
                                        ckpt_file=ckpt_file,
                                        seed=seed,
                                        test=test)

                # store results
                results[l][n].append(
                    mean_corr_coef(res_iVAE[0].detach().numpy(), s))
                print(mean_corr_coef(res_iVAE[0].detach().numpy(), s))
    # prepare output
    Results = {
        'data_dim': data_dim,
        'data_segments': n_segments,
        'CorrelationCoef': results
    }

    return Results
Exemplo n.º 2
0
def compute_mcc(args, config):
    rep1 = pickle.load(
        open(
            os.path.join(args.checkpoints, 'seed{}'.format(args.seed),
                         'test_representations.p'), 'rb'))['rep']
    rep2 = pickle.load(
        open(
            os.path.join(args.checkpoints, 'seed{}'.format(args.second_seed),
                         'test_representations.p'), 'rb'))['rep']

    # cutoff = 50 if args.dataset == 'CIFAR100' else 5
    # ii = np.where(res_cond[0]['lab'] < cutoff)[0]  # in sample points to learn from
    # iinot = np.where(res_cond[0]['lab'] >= cutoff)[0]  # out of sample points
    cutoff = 5000  # half the test dataset
    ii = np.arange(cutoff)
    iinot = np.arange(cutoff, 2 * cutoff)

    mcc_strong_out = mean_corr_coef_out_of_sample(x=rep1[ii],
                                                  y=rep2[ii],
                                                  x_test=rep1[iinot],
                                                  y_test=rep2[iinot])
    mcc_strong_in = (mean_corr_coef(x=rep1[ii], y=rep2[ii]))

    pickle.dump({
        'in': mcc_strong_in,
        'out': mcc_strong_out
    },
                open(
                    os.path.join(
                        args.output,
                        'mcc_strong_{}_{}.p'.format(args.seed,
                                                    args.second_seed)), 'wb'))

    cca_dim = 20
    cca = CCA(n_components=cca_dim)
    cca.fit(rep1[ii], rep2[ii])
    res_out = cca.transform(rep1[iinot], rep2[iinot])
    mcc_weak_out = mean_corr_coef(res_out[0], res_out[1])
    res_in = cca.transform(rep1[ii], rep2[ii])
    mcc_weak_in = mean_corr_coef(res_in[0], res_in[1])

    pickle.dump({
        'in': mcc_weak_in,
        'out': mcc_weak_out
    },
                open(
                    os.path.join(
                        args.output,
                        'mcc_weak_{}_{}.p'.format(args.seed,
                                                  args.second_seed)), 'wb'))
Exemplo n.º 3
0
def run_icebeem_exp(args, config):
    """run ICE-BeeM simulations"""
    data_dim = config.data_dim
    n_segments = config.n_segments
    n_layers = config.n_layers
    n_obs_per_seg = config.n_obs_per_seg
    data_seed = config.data_seed

    lr_flow = config.icebeem.lr_flow
    lr_ebm = config.icebeem.lr_ebm
    n_layers_flow = config.icebeem.n_layers_flow
    ebm_hidden_size = config.icebeem.ebm_hidden_size

    results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers}

    nSims = args.nSims
    dataset = args.dataset
    test = args.test

    for l in n_layers:
        for n in n_obs_per_seg:
            x, y, s = generate_synthetic_data(data_dim,
                                              n_segments,
                                              n,
                                              l,
                                              seed=data_seed,
                                              simulationMethod=dataset,
                                              one_hot_labels=True)
            for seed in range(nSims):
                print('Running exp with L={} and n={}; seed={}'.format(
                    l, n, seed))
                # generate data

                n_layers_ebm = l + 1
                ckpt_file = os.path.join(
                    args.checkpoints,
                    'icebeem_{}_l{}_n{}_s{}.pt'.format(dataset, l, n, seed))
                recov_sources = ICEBEEM_wrapper(
                    X=x,
                    Y=y,
                    ebm_hidden_size=ebm_hidden_size,
                    n_layers_ebm=n_layers_ebm,
                    n_layers_flow=n_layers_flow,
                    lr_flow=lr_flow,
                    lr_ebm=lr_ebm,
                    seed=seed,
                    ckpt_file=ckpt_file,
                    test=test)

                # store results
                results[l][n].append(
                    np.max([mean_corr_coef(z, s) for z in recov_sources]))
                print(np.max([mean_corr_coef(z, s) for z in recov_sources]))

    # prepare output
    Results = {
        'data_dim': data_dim,
        'data_segments': n_segments,
        'CorrelationCoef': results
    }

    return Results
Exemplo n.º 4
0
def run_tcl_exp(args, config):
    """run TCL simulations"""
    stepDict = {
        1: [int(5e3), int(5e3)],
        2: [int(1e4), int(1e4)],
        3: [int(1e4), int(1e4)],
        4: [int(1e4), int(1e4)],
        5: [int(1e4), int(1e4)]
    }

    data_dim = config.data_dim
    n_segments = config.n_segments
    n_layers = config.n_layers
    n_obs_per_seg = config.n_obs_per_seg
    data_seed = config.data_seed

    results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers}
    results_no_ica = {l: {n: [] for n in n_obs_per_seg} for l in n_layers}

    num_comp = data_dim

    nSims = args.nSims
    dataset = args.dataset
    test = args.test

    for l in n_layers:
        for n in n_obs_per_seg:
            # generate data
            x, y, s = generate_synthetic_data(data_dim,
                                              n_segments,
                                              n,
                                              l,
                                              seed=data_seed,
                                              simulationMethod=dataset,
                                              one_hot_labels=False)
            for seed in range(nSims):
                print('Running exp with L={} and n={}; seed={}'.format(
                    l, n, seed))
                # checkpointing done in TF is more complicated than pytorch, create a separate folder per arg tuple
                ckpt_folder = os.path.join(args.checkpoints, args.dataset,
                                           str(l), str(n), str(seed))
                # run TCL
                res_TCL = TCL_wrapper(sensor=x.T,
                                      label=y,
                                      random_seed=seed,
                                      list_hidden_nodes=[num_comp * 2] *
                                      (l - 1) + [num_comp],
                                      max_steps=stepDict[l][0] * 2,
                                      max_steps_init=stepDict[l][1],
                                      ckpt_dir=ckpt_folder,
                                      test=test)
                # store results
                mcc_no_ica = mean_corr_coef(res_TCL[0].T, s**2)
                mcc_ica = mean_corr_coef(res_TCL[1].T, s**2)
                print('TCL mcc (no ICA): {}\t mcc: {}'.format(
                    mcc_no_ica, mcc_ica))
                results[l][n].append(mcc_ica)
                results_no_ica[l][n].append(mcc_no_ica)

    # prepare output
    Results = {
        'data_dim': data_dim,
        'data_segments': n_segments,
        'CorrelationCoef': results,
        'CorrelationCoef_no_ica': results_no_ica,
    }

    return Results