Пример #1
0
def btb_test(tuner=None,
             selector=None,
             dataruns=None,
             datasets=None,
             processes=1,
             graph=False):
    """
    Run a test datarun using the chosen tuner and selector, and compare it to
    the baseline performance
    """
    sql_conf, run_conf, _ = load_config(sql_path=SQL_CONFIG,
                                        run_path=RUN_CONFIG)

    if tuner is not None:
        run_conf.tuner = tuner
    if selector is not None:
        run_conf.selector = selector

    db = Database(**vars(sql_conf))
    datarun_ids = dataruns or []
    datasets = datasets or DATASETS_MAX_FIRST

    # if necessary, generate datasets and dataruns
    if not datarun_ids:
        for ds in datasets:
            run_conf.train_path = DATA_URL + ds
            run_conf.dataset_id = None
            print('Creating datarun for', run_conf.train_path)
            datarun_ids.append(enter_datarun(sql_conf, run_conf))

    # work on the dataruns til they're done
    print('Working on %d dataruns' % len(datarun_ids))
    work_parallel(db=db, datarun_ids=datarun_ids, n_procs=processes)
    print('Finished!')

    results = {}

    # compute and maybe graph the results
    for rid in datarun_ids:
        res = report_auc_vs_baseline(db, rid, graph=graph)
        results[rid] = {'test': res[0], 'baseline': res[1]}

    return results
Пример #2
0
parser.add_argument('--method-path',
                    help='path to JSON config for method to test')

args = parser.parse_args()
sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG,
                                                 run_path=RUN_CONFIG)
db = Database(**vars(sql_config))

print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
    run_config.train_path = join(DATA_DIR, ds)
    run_config.methods = [args.method]
    dataset = enter_dataset(db, run_config, aws_config)
    datarun_ids.extend(
        enter_datarun(sql_config,
                      run_config,
                      aws_config,
                      run_per_partition=True))

print('computing on dataruns', datarun_ids)
work_parallel(db=db,
              datarun_ids=datarun_ids,
              aws_config=aws_config,
              n_procs=args.processes)

print('workers finished.')

for rid in datarun_ids:
    print_hp_summary(db, rid)
Пример #3
0
parser.add_argument('--processes', help='number of processes to run concurrently',
                    type=int, default=1)
parser.add_argument('--method', help='code for method to test')
parser.add_argument('--method-path', help='path to JSON config for method to test')

args = parser.parse_args()
sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG,
                                                 run_path=RUN_CONFIG)
db = Database(**vars(sql_config))

print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
    run_config.train_path = join(DATA_DIR, ds)
    if args.method:
        run_config.methods = [args.method]
    else:
        run_config.methods = METHODS
    dataset = enter_dataset(db, run_config, aws_config)
    datarun_ids.extend(enter_datarun(sql_config, run_config, aws_config,
                                     run_per_partition=True))

print('computing on dataruns', datarun_ids)
work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config,
              n_procs=args.processes)

print('workers finished.')

for rid in datarun_ids:
    print_hp_summary(db, rid)
Пример #4
0
                    help='number of processes to run concurrently',
                    type=int,
                    default=4)
args = parser.parse_args()

sql_conf, run_conf, _ = load_config(sql_path=SQL_CONFIG,
                                    run_path=RUN_CONFIG,
                                    args=args)
db = Database(**vars(sql_conf))
datarun_ids = {}

datasets = os.listdir(BASELINE_PATH)
datasets = datasets[:5]
print 'using datasets', ', '.join(datasets)

# generate datasets and dataruns
for ds in datasets:
    run_conf.train_path = DATA_URL + ds
    run_conf.dataset_id = None
    datarun_ids[ds] = enter_datarun(sql_conf, run_conf)

# work on the dataruns til they're done
work_parallel(db=db, datarun_ids=datarun_ids.values(), n_procs=args.processes)

# graph the results
for ds in datasets:
    with open(join(BASELINE_PATH, ds)) as f:
        baseline = [float(l.strip()) for l in f]
    test = get_best_so_far(db, datarun_ids[ds])
    graph_series(100, ds, baseline=baseline, test=test)
Пример #5
0
The script will create a datarun for each dataset, then run a worker until the
jobs are finished.
''')
parser.add_argument('--processes',
                    help='number of processes to run concurrently',
                    type=int,
                    default=4)

args = parser.parse_args()
sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG,
                                                 run_path=RUN_CONFIG,
                                                 aws_path=AWS_CONFIG)
db = Database(**vars(sql_config))

print 'creating dataruns...'
datarun_ids = []
for ds in DATASETS:
    run_config.train_path = join(DATA_DIR, ds)
    dataset = enter_dataset(db, run_config, aws_config)
    datarun_ids.append(enter_datarun(sql_config, run_config, aws_config))

work_parallel(db=db,
              datarun_ids=datarun_ids,
              aws_config=aws_config,
              n_procs=args.processes)

print 'workers finished.'

for rid in datarun_ids:
    print_summary(db, rid)
Пример #6
0
parser = argparse.ArgumentParser(description='''
Run a single end-to-end test with 10 sample datasets.
The script will create a datarun for each dataset, then run a worker until the
jobs are finished.
''')
parser.add_argument('--processes',
                    help='number of processes to run concurrently',
                    type=int,
                    default=4)

args = parser.parse_args()
sql_config, run_config, _ = load_config(sql_path=SQL_CONFIG,
                                        run_path=RUN_CONFIG)

db = Database(**vars(sql_config))

print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
    run_config.train_path = join(DATA_DIR, ds)
    dataset = enter_dataset(db=db, run_config=run_config)
    datarun_ids.append(
        enter_datarun(sql_config=sql_config, run_config=run_config))

work_parallel(db=db, datarun_ids=datarun_ids, n_procs=args.processes)

print('workers finished.')

for rid in datarun_ids:
    print_summary(db, rid)