def btb_test(tuner=None, selector=None, dataruns=None, datasets=None, processes=1, graph=False): """ Run a test datarun using the chosen tuner and selector, and compare it to the baseline performance """ sql_conf, run_conf, _ = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG) if tuner is not None: run_conf.tuner = tuner if selector is not None: run_conf.selector = selector db = Database(**vars(sql_conf)) datarun_ids = dataruns or [] datasets = datasets or DATASETS_MAX_FIRST # if necessary, generate datasets and dataruns if not datarun_ids: for ds in datasets: run_conf.train_path = DATA_URL + ds run_conf.dataset_id = None print('Creating datarun for', run_conf.train_path) datarun_ids.append(enter_datarun(sql_conf, run_conf)) # work on the dataruns til they're done print('Working on %d dataruns' % len(datarun_ids)) work_parallel(db=db, datarun_ids=datarun_ids, n_procs=processes) print('Finished!') results = {} # compute and maybe graph the results for rid in datarun_ids: res = report_auc_vs_baseline(db, rid, graph=graph) results[rid] = {'test': res[0], 'baseline': res[1]} return results
parser.add_argument('--method-path', help='path to JSON config for method to test') args = parser.parse_args() sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG) db = Database(**vars(sql_config)) print('creating dataruns...') datarun_ids = [] for ds in DATASETS: run_config.train_path = join(DATA_DIR, ds) run_config.methods = [args.method] dataset = enter_dataset(db, run_config, aws_config) datarun_ids.extend( enter_datarun(sql_config, run_config, aws_config, run_per_partition=True)) print('computing on dataruns', datarun_ids) work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config, n_procs=args.processes) print('workers finished.') for rid in datarun_ids: print_hp_summary(db, rid)
parser.add_argument('--processes', help='number of processes to run concurrently', type=int, default=1) parser.add_argument('--method', help='code for method to test') parser.add_argument('--method-path', help='path to JSON config for method to test') args = parser.parse_args() sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG) db = Database(**vars(sql_config)) print('creating dataruns...') datarun_ids = [] for ds in DATASETS: run_config.train_path = join(DATA_DIR, ds) if args.method: run_config.methods = [args.method] else: run_config.methods = METHODS dataset = enter_dataset(db, run_config, aws_config) datarun_ids.extend(enter_datarun(sql_config, run_config, aws_config, run_per_partition=True)) print('computing on dataruns', datarun_ids) work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config, n_procs=args.processes) print('workers finished.') for rid in datarun_ids: print_hp_summary(db, rid)
help='number of processes to run concurrently', type=int, default=4) args = parser.parse_args() sql_conf, run_conf, _ = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG, args=args) db = Database(**vars(sql_conf)) datarun_ids = {} datasets = os.listdir(BASELINE_PATH) datasets = datasets[:5] print 'using datasets', ', '.join(datasets) # generate datasets and dataruns for ds in datasets: run_conf.train_path = DATA_URL + ds run_conf.dataset_id = None datarun_ids[ds] = enter_datarun(sql_conf, run_conf) # work on the dataruns til they're done work_parallel(db=db, datarun_ids=datarun_ids.values(), n_procs=args.processes) # graph the results for ds in datasets: with open(join(BASELINE_PATH, ds)) as f: baseline = [float(l.strip()) for l in f] test = get_best_so_far(db, datarun_ids[ds]) graph_series(100, ds, baseline=baseline, test=test)
The script will create a datarun for each dataset, then run a worker until the jobs are finished. ''') parser.add_argument('--processes', help='number of processes to run concurrently', type=int, default=4) args = parser.parse_args() sql_config, run_config, aws_config = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG, aws_path=AWS_CONFIG) db = Database(**vars(sql_config)) print 'creating dataruns...' datarun_ids = [] for ds in DATASETS: run_config.train_path = join(DATA_DIR, ds) dataset = enter_dataset(db, run_config, aws_config) datarun_ids.append(enter_datarun(sql_config, run_config, aws_config)) work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config, n_procs=args.processes) print 'workers finished.' for rid in datarun_ids: print_summary(db, rid)
parser = argparse.ArgumentParser(description=''' Run a single end-to-end test with 10 sample datasets. The script will create a datarun for each dataset, then run a worker until the jobs are finished. ''') parser.add_argument('--processes', help='number of processes to run concurrently', type=int, default=4) args = parser.parse_args() sql_config, run_config, _ = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG) db = Database(**vars(sql_config)) print('creating dataruns...') datarun_ids = [] for ds in DATASETS: run_config.train_path = join(DATA_DIR, ds) dataset = enter_dataset(db=db, run_config=run_config) datarun_ids.append( enter_datarun(sql_config=sql_config, run_config=run_config)) work_parallel(db=db, datarun_ids=datarun_ids, n_procs=args.processes) print('workers finished.') for rid in datarun_ids: print_summary(db, rid)