Exemplo n.º 1
0
def main():
    assert(len(sys.argv) == 2)
    config_file = io.abspath2(sys.argv[1])

    config = load_config(config_file)
    ext = config['csv_ext']

    samplers, examples, file_lookup = \
        io.find_traces(config['input_path'], config['exact_name'], ext)
    print 'found %d samplers and %d examples' % (len(samplers), len(examples))
    print '%d files in lookup table' % \
        sum(len(file_lookup[k]) for k in file_lookup)

    # examples = examples[:3]  # TODO remove

    # This could get big
    print samplers
    print examples

    metrics = MOMENT_METRICS.keys() + OTHER_METRICS.keys()
    R = build_metrics_array(samplers, examples, metrics, file_lookup, config)
    perf_df, sync_perf = R

    # Save TS, make sure it has enough info to compute ess and eff
    io.save_pd(perf_df, config['output_path'], 'perf', ext)

    # Save diagnostics, make sure it has enough info to compute ess and eff
    io.save_pd(sync_perf, config['output_path'], 'perf_sync', ext, index=False)

    # Could also include option to dump everything in netCDF if we want
    print 'done'
Exemplo n.º 2
0
def load_config(config_file):
    config = ConfigParser.RawConfigParser()
    assert(os.path.isabs(config_file))
    config.read(config_file)

    D = {}
    D['input_path'] = io.abspath2(config.get('phase3', 'output_path'))
    D['output_path'] = io.abspath2(config.get('phase4', 'output_path'))

    D['n_grid'] = config.getint('phase3', 'n_grid')
    D['n_chains'] = config.getint('phase3', 'n_chains')

    D['csv_ext'] = config.get('common', 'csv_ext')
    D['meta_ext'] = config.get('common', 'meta_ext')
    D['exact_name'] = config.get('common', 'exact_name')

    return D
Exemplo n.º 3
0
def main():
    assert (len(sys.argv) == 4)
    config_file = io.abspath2(sys.argv[1])
    param_name = sys.argv[2]
    sampler = sys.argv[3]
    assert (io.is_safe_name(param_name))

    config = io.load_config(config_file)

    run_experiment(config, param_name, sampler)
    print 'done'
def main():
    num_args = len(sys.argv) - 1
    if num_args < 1:
        config_path = '../config.ini'
    elif num_args > 1:
        raise Exception('too many arguments: %d. %d expected' % (num_args, 1))
    else:
        config_path = sys.argv[1]
    config_file = io.abspath2(config_path)

    np.random.seed(3463)

    config = io.load_config(config_file)

    model_list = io.get_model_list(config['input_path'], config['pkl_ext'])
    np.random.shuffle(
        model_list)  # In case we don't finish at least random subset
    # model_list = model_list[:5]  # TODO remove, test only
    assert (all(io.is_safe_name(ss) for ss in model_list))
    print 'using models:'
    print model_list

    # Sort for reprodicibility
    sampler_list = sorted(BUILD_STEP_PM.keys() + BUILD_STEP_MC.keys())
    print 'using samplers:'
    print sampler_list

    # Run n_chains in the outer loop since if process get killed we have less
    # chains but with even distribution over models and samplers.
    scheduled_jobs = set(queued_or_running_jobs())
    for model_name in model_list:
        # Get the exact samples
        run_experiment(config, model_name, config['exact_name'])

        # Get the sampler samples
        for i in xrange(config['n_chains']):
            # TODO could put ADVI init here to keep it fixed across samplers
            for sampler in sampler_list:
                t = time()
                job_name = "slurm-%s-%s-%d" % (model_name, sampler, i)
                cmd_line_args = (config_file, model_name, sampler)
                if job_name in scheduled_jobs:
                    print '%s already in scheduled jobs, but running anyway' % job_name
                options = "-c 1 --job-name=%s -t 45:00 --mem=32gb --output %s.out" % (
                    job_name, job_name)
                end = "slurm_job_main.sh %s %s %s" % cmd_line_args
                command = "sbatch %s %s" % (options, end)
                print 'Executing:', command
                os.system(command)
                print 'wall time %fs' % (time() - t)
    print 'done'
Exemplo n.º 5
0
def main():
    assert (len(sys.argv) == 2)
    config_file = io.abspath2(sys.argv[1])

    config = ConfigParser.RawConfigParser()
    config.read(config_file)
    input_original = io.abspath2(config.get('phase1', 'output_path'))
    input_exact = io.abspath2(config.get('phase3', 'output_path'))
    exact_name = config.get('common', 'exact_name')
    csv_ext = config.get('common', 'csv_ext')
    sep = '_'

    _, examples, file_lookup = io.find_traces(input_exact, exact_name, csv_ext)
    for example in examples:
        original_chain, _ = example.rsplit(sep, 1)
        X_original = io.load_np(input_original, original_chain, csv_ext)
        assert (X_original.ndim == 2)
        D = X_original.shape[1]

        fname_exact, = file_lookup[(example, exact_name)]
        X_exact = io.load_np(input_exact, fname_exact, '')
        assert (X_exact.ndim == 2 and X_exact.shape[1] == D)

        print example
        for ii in xrange(D):
            stat, pval = ss.ttest_ind(X_original[:, ii],
                                      X_exact[:, ii],
                                      equal_var=False)
            print 'dim %d t: %f p = %f' % (ii, stat, pval)
            stat, pval = ss.levene(X_original[:, ii],
                                   X_exact[:, ii],
                                   center='median')
            print 'dim %d BF: %f p = %f' % (ii, stat, pval)
            stat, pval = ss.ks_2samp(X_original[:, ii], X_exact[:, ii])
            print 'dim %d KS: %f p = %f' % (ii, stat, pval)
        print '-' * 20
    print 'done'
Exemplo n.º 6
0
def main():
    '''This program can be run in parallel across different MC_chain files
    indep. This is a top level routine so I am not worried about needing a
    verbosity setting.'''
    assert (len(sys.argv) == 3
            )  # Print usage error instead to be user friendly
    config_file = io.abspath2(sys.argv[1])
    mc_chain_name = sys.argv[2]
    assert (io.is_safe_name(mc_chain_name))

    print 'config %s' % config_file
    config = io.load_config(config_file)

    run_experiment(config, mc_chain_name)
    print 'done'
def main():
    assert (len(sys.argv) == 2
            )  # Print usage error instead to be user friendly
    config_file = io.abspath2(sys.argv[1])

    config = ConfigParser.RawConfigParser()
    config.read(config_file)
    input_path = io.abspath2(config.get('phase1', 'output_path'))
    data_ext = config.get('common', 'csv_ext')

    print 'searching for input data in'
    print input_path
    chain_files = sorted(f for f in os.listdir(input_path)
                         if f.endswith(data_ext))
    np.random.shuffle(chain_files)
    print 'found %d files' % len(chain_files)

    for chain in chain_files:
        print '-' * 20
        print chain
        X = io.load_np(input_path, chain, '')
        assert (X.ndim == 2)
        moments_report_w_burn(X)
    print 'done'
Exemplo n.º 8
0
def main():
    num_args = len(sys.argv) - 1
    if num_args < 1:
        config_path = '../config.ini'
    elif num_args > 1:
        raise Exception('too many arguments: %d. %d expected' % (num_args, 1))
    else:
        config_path = sys.argv[1]
    config_file = io.abspath2(config_path)

    config = io.load_config(config_file)

    model_list = io.get_model_list(config['input_path'], config['pkl_ext'])
    # model_list = model_list[:5]  # TODO remove, test only
    assert (all(io.is_safe_name(ss) for ss in model_list))
    print 'using models:'
    print model_list

    # Sort for reprodicibility
    sampler_list = sorted(BUILD_STEP_PM.keys() + BUILD_STEP_MC.keys())
    print 'using samplers:'
    print sampler_list

    # Get the exact samples
    for model_name in model_list:
        run_experiment(config, model_name, config['exact_name'])

    # Run n_chains in the outer loop since if process get killed we have less
    # chains but with even distribution over models and samplers.
    for model_name in model_list:
        for _ in xrange(config['n_chains']):
            # TODO could put ADVI init here to keep it fixed across samplers
            for sampler in sampler_list:
                t = time()
                try:
                    run_experiment(config, model_name, sampler)
                except Exception as err:
                    print '%s/%s failed' % (model_name, sampler)
                    print str(err)
                print 'wall time %fs' % (time() - t)
    print 'done'
Exemplo n.º 9
0
def main():
    num_args = len(sys.argv) - 1
    if num_args < 1:
        config_path = '../config.ini'
    elif num_args > 1:
        raise Exception('too many arguments: %d. %d expected' % (num_args, 1))
    else:
        config_path = sys.argv[1]
    config_file = io.abspath2(config_path)

    print 'config %s' % config_file
    config = io.load_config(config_file)
    print(config['input_path'])

    chains = io.get_chains(config['input_path'], config['csv_ext'],
                           config['size_limit_bytes'])
    print 'inputs chains:'
    print chains

    print 'Running njobs=%d in parallel' % config['njobs']
    try_run_experiment_with_config = partial(try_run_experiment, config)
    Parallel(n_jobs=config['njobs'])(map(
        delayed(try_run_experiment_with_config), chains))
    print 'done'