示例#1
0
def run_automl(input_dir, output_dir, data_name, time_budget, running_on_codalab):
    print('input_dir = "%s"' % input_dir)
    print('output_dir = "%s"' % output_dir)
    print('data_name = "%s"' % data_name)
    print('time_budget = %s' % time_budget)
    try:
        # automl.data_doubling_rf(input_dir, output_dir, data_name, time_budget, 20)
        # automl.cv_growing_rf(input_dir, output_dir, data_name, time_budget)
        # automl.cv_growing_rf_gbm(input_dir, output_dir, data_name, time_budget)

        # automl.competition_example(input_dir, output_dir, data_name, time_budget)
        # automl.competition_example_only_rf(input_dir, output_dir, data_name, time_budget)
        # automl.freeze_thaw_cv_rf(input_dir, output_dir, data_name, time_budget)
        # automl.freeze_thaw_cv_rf_gbm(input_dir, output_dir, data_name, time_budget, compute_quantum=10)
        # automl.automl_phase_0(input_dir, output_dir, data_name, time_budget)

        # mgr = managers.FixedLearnersFreezeThawManager(input_dir=input_dir, output_dir=output_dir,
        #                                               basename=data_name, time_budget=time_budget,
        #                                               compute_quantum=None, plot=not running_on_codalab, min_mem=4,
        #                                               n_folds=5)

        exp = dict()
        exp = experiment.exp_param_defaults(exp)

        mgr = managers.FixedLearnersStackingManager(input_dir=input_dir, output_dir=output_dir,
                                                    basename=data_name, time_budget=time_budget,
                                                    compute_quantum=None, plot=not running_on_codalab,
                                                    n_folds=5,
                                                    overhead_memory=constants.OVERHEAD,
                                                    cgroup_soft_limit=constants.CGROUP_SOFT_LIMIT,
                                                    cgroup_hard_limit=constants.CGROUP_HARD_LIMIT,
                                                    exp=exp)
        mgr.communicate()
    except:
        traceback.print_exc()
示例#2
0
def timing_triple_cloud():
    execfile('picloud_venture_credentials.py')
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    exp_params['max_burn_time'] = 30
    exp_params['max_sample_time'] = 30
    exp_params['n_samples'] = 25
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    
    # Timing run
    print 'Timing'
    job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    time_per_mh_iter = cloud.result(job_id)['time_per_mh_iter']
    
    # Live run
    print 'Live'
    exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter))))
    job_id = cloud.call(experiment.network_cv_single_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    cloud.join(job_id)
    print cloud.result(job_id)
示例#3
0
def timing_run_local():
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    print experiment.network_cv_timing_run(data, model, exp_params, model_params)
示例#4
0
def fold(unused=None):
    execfile('picloud_venture_credentials.py')
    data_file = '../data/irm_synth/irm_synth_20.mat'
    data_dir = '../data/irm_synth/'
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 20
    exp_params['max_burn_time'] = 10
    exp_params['max_sample_time'] = 20
    exp_params['n_samples'] = 25
    exp_params['n_restarts'] = 3
    print experiment.exp_params_to_str(exp_params)
    
    print experiment.network_cv_fold(data_file, data_dir, model, exp_params, model_params)
示例#5
0
def timing_run_cloud():
    execfile('picloud_venture_credentials.py')
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment)
    cloud.join(job_id)
    print cloud.result(job_id)
示例#6
0
def run_automl(input_dir, output_dir, data_name, time_budget,
               running_on_codalab):
    print('input_dir = "%s"' % input_dir)
    print('output_dir = "%s"' % output_dir)
    print('data_name = "%s"' % data_name)
    print('time_budget = %s' % time_budget)
    try:
        # automl.data_doubling_rf(input_dir, output_dir, data_name, time_budget, 20)
        # automl.cv_growing_rf(input_dir, output_dir, data_name, time_budget)
        # automl.cv_growing_rf_gbm(input_dir, output_dir, data_name, time_budget)

        # automl.competition_example(input_dir, output_dir, data_name, time_budget)
        # automl.competition_example_only_rf(input_dir, output_dir, data_name, time_budget)
        # automl.freeze_thaw_cv_rf(input_dir, output_dir, data_name, time_budget)
        # automl.freeze_thaw_cv_rf_gbm(input_dir, output_dir, data_name, time_budget, compute_quantum=10)
        # automl.automl_phase_0(input_dir, output_dir, data_name, time_budget)

        # mgr = managers.FixedLearnersFreezeThawManager(input_dir=input_dir, output_dir=output_dir,
        #                                               basename=data_name, time_budget=time_budget,
        #                                               compute_quantum=None, plot=not running_on_codalab, min_mem=4,
        #                                               n_folds=5)

        exp = dict()
        exp = experiment.exp_param_defaults(exp)

        mgr = managers.FixedLearnersStackingManager(
            input_dir=input_dir,
            output_dir=output_dir,
            basename=data_name,
            time_budget=time_budget,
            compute_quantum=None,
            plot=not running_on_codalab,
            n_folds=5,
            overhead_memory=constants.OVERHEAD,
            cgroup_soft_limit=constants.CGROUP_SOFT_LIMIT,
            cgroup_hard_limit=constants.CGROUP_HARD_LIMIT,
            exp=exp)
        mgr.communicate()
    except:
        traceback.print_exc()
示例#7
0
def timing_triple_local():
    exp_params = experiment.exp_param_defaults({})
    exp_params['intermediate_iter'] = 1
    exp_params['max_initial_run_time'] = 30
    exp_params['max_burn_time'] = 30
    exp_params['max_sample_time'] = 30
    exp_params['n_samples'] = 25
    print experiment.exp_params_to_str(exp_params)
    
    data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    data = {'observations' : observed, 'missing' : missing}
    
    model = models.product_IRM
    model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True}
    
    # Timing run
    time_per_mh_iter = experiment.network_cv_timing_run(data, model, exp_params, model_params)['time_per_mh_iter']
    
    # Live run
    exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter))))
    print experiment.network_cv_single_run(data, model, exp_params, model_params)