예제 #1
0
def experiment2(FLAGS,
                expname='global_exp',
                test_time=False,
                dataname='eicu',
                debug=None):
    '''
    Global model only experiment
    '''
    pct = FLAGS.train_data_subset_path.split('pct_')[1].split('_')[0]
    pct_num = os.path.basename(
        FLAGS.train_data_subset_path)[:-4]  # remove '.pkl'
    expname = "pct{}_{}_{}".format(pct, pct_num, expname)

    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'GLOBAL'),
              ('--train_data_subset_path', FLAGS.train_data_subset_path),
              ('--result_dir', FLAGS.eicu_cohort),
              ('--eicu_cohort', FLAGS.eicu_cohort), ('--dataname', dataname),
              ('--result_suffix', '_' + expname)] + setting
             for setting in settings]
    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=gpus, n_concurrent_process=FLAGS.nc)
예제 #2
0
def experiment16(FLAGS,
                 expname='global_plus_mtl_feature_exp',
                 test_time=False,
                 debug=None):
    '''
    careunit separate models
    '''
    cluster_name = expname.split('feature')[0] + "exp"
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'GLOBAL'), ('--result_dir', FLAGS.result_dir),
              ('--cohorts', 'custom'), '--include_cohort_as_feature',
              ('--cohort_filepath', str(i) + '_' + cluster_name + '.npy'),
              ('--result_suffix', '_' + expname)] + setting
             for i, setting in enumerate(settings)]

    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
def experiment4(FLAGS,
                expname='mtl_val_curve',
                test_time=False,
                debug=None,
                dataname='eicu'):
    '''
    mtl val curve
    '''
    pct = FLAGS.train_data_subset_path.split('pct_')[1].split('_')[0]
    pct_num = os.path.basename(
        FLAGS.train_data_subset_path)[:-4]  # remove '.pkl'
    expname = "pct{}_{}_{}".format(pct, pct_num, expname)
    if FLAGS.pct_val < 1:
        expname = "{}_val{}".format(expname, int(FLAGS.pct_val * 100))

    if FLAGS.global_model_fn is None: return
    cluster_settings, model_settings = create_cluster_model_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            cluster_settings = [cluster_settings[idx] for idx in debug]
            model_settings = [model_settings[idx] for idx in debug]
        else:
            idx = debug
            cluster_settings = cluster_settings[idx:idx + 1]
            model_settings = model_settings[idx:idx + 1]

    cluster_settings = [
        [('--model_type', 'VAL_CURVE'),
         ('--train_data_subset_path', FLAGS.train_data_subset_path),
         ('--pct_val', FLAGS.pct_val), "--cluster_add_result_suffix",
         ('--result_dir', FLAGS.eicu_cohort),
         ('--eicu_cohort', FLAGS.eicu_cohort), ('--dataname', dataname),
         ('--global_model_fn', FLAGS.global_model_fn),
         ('--result_suffix', '_' + expname)] + setting
        for setting in cluster_settings
    ]
    model_settings = [
        [('--model_type', 'MULTITASK'),
         ('--train_data_subset_path', FLAGS.train_data_subset_path),
         ('--result_dir', FLAGS.eicu_cohort),
         ('--eicu_cohort', FLAGS.eicu_cohort), ('--dataname', dataname),
         ('--result_suffix', '_' + expname),
         ('--global_model_fn', FLAGS.global_model_fn),
         ('--cohort_filepath', str(i) + '_' + expname + '.npy')] + setting
        for i, setting in enumerate(model_settings)
    ]

    # acknowledge the temporal dependence between the runs
    # first run cluster_settings, followed by model_settings
    # also make sure model_settings uses cluster settings' model
    run('cluster_moe.py',
        cluster_settings,
        gpus=gpus,
        n_concurrent_process=FLAGS.nc)
    if test_time:
        model_settings = [['--test_time', '--bootstrap'] + setting
                          for setting in model_settings]
    run('moe.py', model_settings, gpus=gpus, n_concurrent_process=FLAGS.nc)
예제 #4
0
def experiment8(FLAGS,
                expname='snapshot_oi',
                test_time=False,
                debug=None,
                dataname='eicu'):
    '''
    snapshot outcome independent (AE)
    '''
    pct = FLAGS.train_data_subset_path.split('pct_')[1].split('_')[0]
    pct_num = os.path.basename(
        FLAGS.train_data_subset_path)[:-4]  # remove '.pkl'
    cluster_expname = "pct{}_{}_{}".format(pct, pct_num,
                                           "mtl_oi")  # share the cluster
    expname = "pct{}_{}_{}".format(pct, pct_num, expname)

    if FLAGS.global_model_fn is None: return
    cluster_settings, model_settings = create_cluster_model_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            cluster_settings = [cluster_settings[idx] for idx in debug]
            model_settings = [model_settings[idx] for idx in debug]
        else:
            idx = debug
            cluster_settings = cluster_settings[idx:idx + 1]
            model_settings = model_settings[idx:idx + 1]

    # cluster_settings = [[('--model_type', 'AE'),
    #                      ('--train_data_subset_path', FLAGS.train_data_subset_path),
    #                      "--cluster_add_result_suffix",
    #                      ('--result_dir', FLAGS.eicu_cohort),
    #                      ('--eicu_cohort', FLAGS.eicu_cohort),
    #                      ('--dataname', dataname),
    #                      ('--global_model_fn', FLAGS.global_model_fn),
    #                      ('--result_suffix', '_' + cluster_expname)] +
    #                     setting for setting in cluster_settings]
    model_settings = [
        [('--model_type', 'SNAPSHOT'),
         ('--train_data_subset_path', FLAGS.train_data_subset_path),
         ('--result_dir', FLAGS.eicu_cohort),
         ('--eicu_cohort', FLAGS.eicu_cohort), ('--dataname', dataname),
         ('--result_suffix', '_' + expname),
         ('--global_model_fn', FLAGS.global_model_fn),
         ('--cohort_filepath', str(i) + '_' + cluster_expname + '.npy')] +
        setting for i, setting in enumerate(model_settings)
    ]

    # acknowledge the temporal dependence between the runs
    # first run cluster_settings, followed by model_settings
    # also make sure model_settings uses cluster settings' model
    # don't need to run cluster setting b/c exp5 should already ran this
    # run('cluster_moe.py', cluster_settings, gpus=gpus, n_concurrent_process=FLAGS.nc)
    if test_time:
        model_settings = [['--test_time', '--bootstrap'] + setting
                          for setting in model_settings]
    run('moe.py', model_settings, gpus=gpus, n_concurrent_process=FLAGS.nc)
예제 #5
0
def experiment_debug_joint(FLAGS,
                           expname='debug',
                           test_time=False,
                           viz_time=False,
                           debug=None):
    '''
    debug settings: see read me for manually tuned performance
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    settings = [[('--lr', 0.001), ('--wd', 1e-4)]]

    tasks = [
        [
            ('--model_type', 'MULTITASK'),
            ('--result_dir', 'debug'),
            '--include_cohort_as_feature',
            # '--test_time',
            # '--bootstrap',
            ('--epochs', 100),
            ('--global_model_fn', FLAGS.global_model_fn),
            ('--result_suffix', '_' + expname),
            ('--cohorts', 'careunit')
        ] + setting for setting in settings
    ]

    # tasks = [[('--model_type', 'MULTITASK'), # auroc 0.852
    #           ('--result_dir', FLAGS.result_dir),
    #           ('--epochs', 100),
    #           ('--global_model_fn', FLAGS.global_model_fn),
    #           ('--result_suffix', '_' + expname),
    #           ('--cohort_filepath', 'sample_y_quartile.npy'),
    #           ('--cohorts', 'custom')] +
    #          setting for setting in settings]

    # tasks = [[('--model_type', 'GLOBAL'), # test auc: 0.872, val auc: 0.880
    #           ('--epochs', 100),
    #           ('--result_dir', FLAGS.result_dir),
    #           ('--global_model_fn', FLAGS.global_model_fn),
    #           ('--result_suffix', '_' + expname),
    #           '--include_cohort_as_feature',
    #           ('--cohorts', 'saps')] +
    #          setting for setting in settings]

    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    if viz_time:
        tasks = [['--viz_time'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
def experiment7(FLAGS,
                expname='snapshot_val_curve',
                test_time=False,
                debug=None,
                dataname='eicu'):
    '''
    snapshot val curve
    '''
    if FLAGS.global_model_fn is None: return
    cluster_settings, model_settings = create_cluster_model_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            cluster_settings = [cluster_settings[idx] for idx in debug]
            model_settings = [model_settings[idx] for idx in debug]
        else:
            idx = debug
            cluster_settings = cluster_settings[idx:idx + 1]
            model_settings = model_settings[idx:idx + 1]

    cluster_settings = [[('--model_type', 'VAL_CURVE'),
                         ('--result_dir', FLAGS.eicu_cohort),
                         ('--eicu_cohort', FLAGS.eicu_cohort),
                         ('--dataname', dataname),
                         ('--global_model_fn', FLAGS.global_model_fn),
                         ('--result_suffix', '_' + expname)] + setting
                        for setting in cluster_settings]
    model_settings = [
        [('--model_type', 'SNAPSHOT'), ('--result_dir', FLAGS.eicu_cohort),
         ('--eicu_cohort', FLAGS.eicu_cohort), ('--dataname', dataname),
         ('--result_suffix', '_' + expname),
         ('--global_model_fn', FLAGS.global_model_fn),
         ('--cohort_filepath', str(i) + '_' + expname + '.npy')] + setting
        for i, setting in enumerate(model_settings)
    ]

    # acknowledge the temporal dependence between the runs
    # first run cluster_settings, followed by model_settings
    # also make sure model_settings uses cluster settings' model
    run('cluster_moe.py',
        cluster_settings,
        gpus=[5, 6],
        n_concurrent_process=FLAGS.nc)
    if test_time:
        model_settings = [['--test_time', '--bootstrap'] + setting
                          for setting in model_settings]
    run('moe.py', model_settings, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
예제 #7
0
def experiment9(FLAGS, expname='MTL_saps_exp', test_time=False, debug=None):
    '''
    saps quartile based MTL
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'MULTITASK'),
              ('--result_dir', FLAGS.result_dir),
              ('--result_suffix', '_' + expname),
              ('--cohorts', 'saps')] + setting for setting in settings]
    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
예제 #8
0
def experiment_debug_joint(FLAGS,
                           expname='debug',
                           test_time=False,
                           viz_time=False,
                           debug=None):
    '''
    debug settings: see read me for manually tuned performance
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    settings = [[('--lr', 0.001), ('--wd', 1e-4)]]

    tasks = [
        [
            ('--model_type', 'MULTITASK'),
            ('--result_dir', FLAGS.result_dir_prefix + 'debug'),
            '--include_cohort_as_feature',
            # '--test_time',
            # '--bootstrap',
            ('--epochs', 100),
            ('--global_model_fn', FLAGS.global_model_fn),
            ('--result_suffix', '_' + expname),
            ('--cohorts', 'careunit')
        ] + setting for setting in settings
    ]

    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    if viz_time:
        tasks = [['--viz_time'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=gpus, n_concurrent_process=FLAGS.nc)
예제 #9
0
def experiment15(FLAGS,
                 expname='saps_feature_exp',
                 test_time=False,
                 debug=None):
    '''
    saps quartile separate models
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'GLOBAL'), ('--result_dir', FLAGS.result_dir),
              ('--cohorts', 'saps'), '--include_cohort_as_feature',
              ('--result_suffix', '_' + expname)] + setting
             for setting in settings]
    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
예제 #10
0
def experiment12(FLAGS,
                 expname='separate_careunit_exp',
                 test_time=False,
                 debug=None):
    '''
    careunit separate models
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'SEPARATE'), ('--result_dir', FLAGS.result_dir),
              ('--cohorts', 'careunit'),
              ('--result_suffix', '_' + expname)] + setting
             for setting in settings]
    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
예제 #11
0
def experiment2(FLAGS,
                expname='global_exp',
                test_time=False,
                dataname='mimic',
                debug=None):
    '''
    Global model only experiment
    '''
    settings = create_joint_settings(FLAGS)

    if debug is not None:
        if type(debug) is list:
            settings = [settings[idx] for idx in debug]
        else:
            idx = debug
            settings = settings[idx:idx + 1]

    tasks = [[('--model_type', 'GLOBAL'), ('--result_dir', FLAGS.result_dir),
              ('--dataname', dataname),
              ('--result_suffix', '_' + expname)] + setting
             for setting in settings]
    if test_time:
        tasks = [['--test_time', '--bootstrap'] + setting for setting in tasks]
    run('moe.py', tasks, gpus=[5, 6], n_concurrent_process=FLAGS.nc)
예제 #12
0
def run(result_dir, nmaxepochs, nthreads, cuda):
    experiment = transform_experiment()
    # We'll use multiple processes so disable MKL multithreading
    os.environ['MKL_NUM_THREADS'] = str(nthreads)
    torch.set_num_threads(nthreads)
    try:
        with open('../config/redis_address', 'r') as f:
            address = f.read().strip()
            ray.init(redis_address=address)
    except:
        ray.init()
    ahb = AsyncHyperBandScheduler(reward_attr='negative_loss',
                                  max_t=nmaxepochs)
    trials = run(experiment,
                 scheduler=ahb,
                 raise_on_failed_trial=False,
                 queue_trials=True,
                 early_stop_all_trials=True)
    trials = [trial for trial in trials if trial.last_result is not None]
    losses = [
        -trial.last_result.get('negative_loss', float('-inf'))
        for trial in trials
    ]
    print(np.array(losses))

    # Polish solutions with L-BFGS
    polish_fn = ray.remote(num_gpus=0.25 if cuda else 0)(polish)
    sorted_trials = sorted(trials,
                           key=lambda trial: -trial.last_result.get(
                               'negative_loss', float('-inf')))
    polished_losses = ray.get([
        polish_fn.remote(trial) for trial in sorted_trials[:N_TRIALS_TO_POLISH]
    ])
    for i in range(min(N_TRIALS_TO_POLISH, len(trials))):
        sorted_trials[i].last_result[
            'polished_negative_loss'] = -polished_losses[i]
    print(np.sort(losses)[:N_TRIALS_TO_POLISH])
    print(np.sort(polished_losses))

    checkpoint_path = Path(result_dir) / experiment.name
    checkpoint_path.mkdir(parents=True, exist_ok=True)
    checkpoint_path /= 'trial.pkl'
    with checkpoint_path.open('wb') as f:
        pickle.dump(trials, f)

    ex.add_artifact(str(checkpoint_path))
    return min(losses + polished_losses)
예제 #13
0
        # import hyperopt as hp
        # from ray.tune.suggest.hyperopt import HyperOptSearch

        ray.shutdown()
        ray.init(local_mode=True)

        config = {
            # "lr": tune.grid_search([0.1, 0.5]),
            "lr": tune.grid_search([0.005]),
            # "arch": tune.grid_search(["resnext50_32x4d", "resnet50"]),
            "arch": tune.grid_search(["resnext50_32x4d"]),
            # "step": tune.grid_search([5]),
            "lr_decay": tune.grid_search(["linear"]),
            "loss": tune.grid_search(["focalloss"]),
            # "remark": tune.grid_search(["resume_linear"])
        }
        # hyperopt = HyperOptSearch(metric="valid_loss", mode="min", space=config)

        tune.run(
            main,
            config=config,
            name="run_report",
            local_dir="./ray_results",
            # search_alg=hyperopt,
            # num_samples=2,
            # stop={"training_iteration": 5},
            resources_per_trial={"gpu": 1})
    else:
        main({})