Пример #1
0
def test_trial_results():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([
            OSPREY_BIN, 'skeleton', '-t', 'random_example', '-f', 'config.yaml'
        ])
        subprocess.check_call([OSPREY_BIN, 'worker', 'config.yaml', '-n', '5'])
        assert os.path.exists('osprey-trials.db')

        config = Config('config.yaml')

        df = config.trial_results()

        assert df.shape[0] == 5

        for key in Trial.__table__.columns.keys():
            assert key in df.columns

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Пример #2
0
def data_from_config(config_file):
    """
    Returns features, scores, elapsed time in seconds etc.
    and searchspace from a config file
    """
    config = Config(config_file)
    session = config.trials()

    searchspace = config.search_space()
    history = [[t.parameters, t.test_scores, t.status, t.elapsed]
               for t in session.query(Trial).all()]

    return get_data(history, searchspace) + (searchspace, )
Пример #3
0
def test_1():
    assert OSPREY_BIN is not None
    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()

    try:
        os.chdir(dirname)
        subprocess.check_call([OSPREY_BIN, 'skeleton', '-t', 'msmbuilder',
                              '-f', 'config.yaml'])
        assert os.path.exists('config.yaml')
        with open('config.yaml', 'rb') as f:
            yaml.load(f)
        Config('config.yaml')

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Пример #4
0
# # Load the data

# In[4]:

root_dir = 'fs-peptide'
# Load Configuation Files
databases = {
    'bayesian': root_dir + '/gp-m52-ei-tica-indv/config-all_tor.yaml',
    'random': root_dir + '/rand-tica-indv/config_random-all_tor.yaml',
    'sobol': root_dir + '/sobol-tica-indv/config-all_tor.yaml',
    'tpe': root_dir + '/tpe-s20-g25-tica-indv/config-all_tor.yaml'
}

all_dfs = []
for k, v in databases.items():
    config = Config(v)
    df = config.trial_results()
    df['method'] = k
    all_dfs.append(df)

df_all = pd.concat(all_dfs)

# In[7]:

df_all.head()

# # Drop unnecessary columns and rename

# In[67]:

df = df_all.loc[:, [
Пример #5
0
    results = {
        'id': id_num,
        'cse_train_scores': train_scores,
        'cse_train_gaps': train_gaps,
        'cse_train_n_timescales': train_n_timescales,
        'cse_test_scores': test_scores
    }

    return results


if __name__ == "__main__":

    np.random.seed(42)

    config = Config(config_path)
    trials = config.trial_results()
    trials = trials.sort_values(by='mean_test_score', ascending=False)
    # Select the top ten percent
    trials = trials.iloc[160:, :]
    trial_configs = [get_parameters(irow) for irow in trials.iterrows()]

    n_cpu = int(os.environ['SLURM_JOB_CPUS_PER_NODE'])
    print('Number of cpus detected {}'.format(n_cpu))

    pool = Pool(n_cpu)
    results = pool.imap_unordered(run_trial, trial_configs)

    results = list(results)

    all_ids = [x['id'] for x in results]
# Imports
from osprey.config import Config
import sys

if len(sys.argv) != 2:
    print('Usage: count_project_trails [config file]')

# Load Configuation File
my_config = sys.argv[1] 

config = Config(my_config)

# Retrieve Trial Results
df = config.trial_results()
print(df['project_name'].value_counts())
Пример #7
0
if len(argv) != 4:
    print('usage: sample_db.py config.yaml sample_size n_samples')
    exit(1)

inp_file = argv[1]
num = int(argv[2]) # TOTAL size of samples to use e.g. 100
iter = int(argv[3])  # Number of splits e.g. 5
# This will give 5 splits of 20 samples each.100

if num % iter != 0:
    print('sample_size not strictly divisible by n_samples')
    exit(1)

# Get original database and history
config1 = Config(inp_file)
df1 = config1.trial_results()
hist1 = config1.trials().query(Trial).all()


# Main loop
for name, group in df1.groupby('project_name'):
    # Sample the group
    sample = group.sample(num, random_state=42)
    cv = KFold(n_splits=iter, random_state=42)
    all_keep = sample['id'].values
    for i, (_, test_idx) in enumerate(cv.split(all_keep)):

        keep = all_keep[test_idx]

        db2 = make_session('sqlite:///osprey-trials-{0}-{1}.db'.format(int(num/iter), i), project_name=name)