def run_models(models, orders, GP_likelihoods=['gaussian', 'warped'], WD_kernel_degrees=[3], adaboost_learning_rates=[0.1], adaboost_num_estimators=[100], adaboost_max_depths=[3], learn_options_set=None, test=False, CV=True, setup_function=setup, set_target_fn=set_target, pam_audit=True, length_audit=True, return_data=False): ''' CV is set to false if want to train a final model and not cross-validate, but it goes in to what looks like cv code ''' results = {} assert learn_options_set is not None, "need to specify learn_options_set" all_learn_options = {} # shorten so easier to display on graphs feat_models_short = { 'L1': "L1", 'L2': "L2", 'elasticnet': "EN", 'linreg': "LR", 'RandomForest': "RF", 'AdaBoost': "AB", 'AdaBoostClassifier': "ABClass", 'doench': 'doench', "logregL1": "logregL1", "sgrna_from_doench": "sgrna_from_doench", 'SVC': 'SVC', 'xu_et_al': 'xu_et_al' } if not CV: print( "Received option CV=False, so I'm training using all of the data") assert len(list(learn_options_set.keys( ))) == 1, "when CV is False, only 1 set of learn options is allowed" assert len(models) == 1, "when CV is False, only 1 model is allowed" for learn_options_str in list(learn_options_set.keys()): # these options get augmented in setup partial_learn_opt = learn_options_set[learn_options_str] # if the model requires encoded features for model in models: # models requiring explicit featurization if model in list(feat_models_short.keys()): for order in orders: print("running %s, order %d for %s" % (model, order, learn_options_str)) Y, feature_sets, target_genes, learn_options, num_proc = setup_function( test=test, order=order, learn_options=partial_learn_opt, pam_audit=pam_audit, length_audit=length_audit ) # TODO precompute features for all orders, as this is repated for each model if model == 'L1': learn_options_model = L1_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'L2': learn_options_model = L2_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'elasticnet': learn_options_model = elasticnet_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'linreg': learn_options_model = linreg_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == "logregL1": learn_options_model = logregL1_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'RandomForest': learn_options_model = RF_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'SVC': learn_options_model = SVC_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'doench': learn_options_model = doench_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'sgrna_from_doench': learn_options_model = sgrna_from_doench_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'xu_et_al': learn_options_model = xu_et_al_setup( copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'AdaBoost' or 'AdaBoostClassifier': for learning_rate in adaboost_learning_rates: for num_estimators in adaboost_num_estimators: for max_depth in adaboost_max_depths: learn_options_model = adaboost_setup( copy.deepcopy(learn_options), learning_rate=learning_rate, num_estimators=num_estimators, max_depth=max_depth, set_target_fn=set_target_fn, model=model) model_string = feat_models_short[ model] + '_or%d_md%d_lr%.2f_n%d_%s' % ( learn_options_set[learn_options_str]["order"], max_depth, learning_rate, num_estimators, learn_options_str) if model != 'AdaBoost': model_string = feat_models_short[ model] + '_ord%d_%s' % ( learn_options_set[learn_options_str]["order"], learn_options_str) results[model_string] = pd.cross_validate( Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV) all_learn_options[model_string] = learn_options_model # if the model doesn't require explicit featurization else: assert setup_fn == setup, "not yet modified to handle this" print("running %s for %s" % (model, learn_options_str)) Y, feature_sets, target_genes, learn_options, num_proc = setup( test=test, order=1, learn_options=partial_learn_opt, pam_audit=pam_audit, length_audit=length_audit) if model == 'mean': learn_options_model = mean_setup( copy.deepcopy(learn_options)) elif model == 'random': learn_options_model = random_setup( copy.deepcopy(learn_options)) elif model == 'DNN': learn_options_model = DNN_setup( copy.deepcopy(learn_options)) elif model == 'GP': for likelihood in GP_likelihoods: for degree in WD_kernel_degrees: learn_options_model = GP_setup( copy.deepcopy(learn_options), likelihood=likelihood, degree=degree) model_string = '%s_%s_degree%d_%s' % ( model, likelihood, degree, learn_options_str) results[model_string] = pd.cross_validate( Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV) else: raise NotImplementedError("model %s not supported" % model) # "GP" already calls pd.cross_validate() and has its own model_string, so skip this. if model != "GP": model_string = model + '_%s' % learn_options_str results[model_string] = pd.cross_validate( Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV) all_learn_options[model_string] = learn_options_model return results, all_learn_options
def run_models(models, orders, GP_likelihoods=['gaussian', 'warped'], WD_kernel_degrees=[3], adaboost_learning_rates=[0.1], adaboost_num_estimators=[100], adaboost_max_depths=[3], learn_options_set=None, test=False, CV=True, setup_function=setup, set_target_fn=set_target): ''' CV is set to false if want to train a final model and not cross-validate, but it goes in to what looks like cv code ''' results = {} assert learn_options_set is not None, "need to specify learn_options_set" all_learn_options = {} #shorten so easier to display on graphs feat_models_short = {'L1':"L1", 'L2':"L2", 'elasticnet':"EN", 'linreg':"LR", 'RandomForest': "RF", 'AdaBoost':"AB", 'doench': 'doench', "logregL1": "logregL1", "sgrna_from_doench":"sgrna_from_doench", 'SVC': 'SVC', 'xu_et_al': 'xu_et_al'} if not CV: print "Received option CV=False, so I'm training using all of the data" assert len(learn_options_set.keys()) == 1, "when CV is False, only 1 set of learn options is allowed" assert len(models) == 1, "when CV is False, only 1 model is allowed" for learn_options_str in learn_options_set.keys(): # these options get augmented in setup partial_learn_opt = learn_options_set[learn_options_str] # if the model requires encoded features for model in models: # models requiring explicit featurization if model in feat_models_short.keys(): for order in orders: print "running %s, order %d for %s" % (model, order, learn_options_str) Y, feature_sets, target_genes, learn_options, num_proc = setup_function(test=test, order=order, learn_options=partial_learn_opt) # TODO precompute features for all orders, as this is repated for each model if model == 'L1': learn_options_model = L1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'L2': learn_options_model = L2_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'elasticnet': learn_options_model = elasticnet_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'linreg': learn_options_model = linreg_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == "logregL1": learn_options_model = logregL1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'RandomForest': learn_options_model = RF_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'SVC': learn_options_model = SVC_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'doench': learn_options_model = doench_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'sgrna_from_doench': learn_options_model = sgrna_from_doench_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'xu_et_al': learn_options_model = xu_et_al_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn) elif model == 'AdaBoost': for learning_rate in adaboost_learning_rates: for num_estimators in adaboost_num_estimators: for max_depth in adaboost_max_depths: learn_options_model = adaboost_setup(copy.deepcopy(learn_options), learning_rate=learning_rate, num_estimators=num_estimators, max_depth=max_depth, set_target_fn=set_target_fn) model_string = feat_models_short[model] + '_or%d_md%d_lr%.2f_n%d_%s' % (learn_options_set[learn_options_str]["order"], max_depth, learning_rate, num_estimators, learn_options_str) if model != 'AdaBoost': model_string = feat_models_short[model] + '_ord%d_%s' % (learn_options_set[learn_options_str]["order"], learn_options_str) results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV) all_learn_options[model_string] = learn_options_model # if the model doesn't require explicit featurization else: assert setup_fn==setup, "not yet modified to handle this" print "running %s for %s" % (model, learn_options_str) Y, feature_sets, target_genes, learn_options, num_proc = setup(test=test, order=1, learn_options=partial_learn_opt) if model == 'mean': learn_options_model = mean_setup(copy.deepcopy(learn_options)) elif model == 'random': learn_options_model = random_setup(copy.deepcopy(learn_options)) elif model == 'DNN': learn_options_model = DNN_setup(copy.deepcopy(learn_options)) elif model == 'GP': for likelihood in GP_likelihoods: for degree in WD_kernel_degrees: learn_options_model = GP_setup(copy.deepcopy(learn_options), likelihood=likelihood, degree=degree) model_string = '%s_%s_degree%d_%s' % (model, likelihood, degree, learn_options_str) results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model,TEST=test, CV=CV) else: raise NotImplementedError("model %s not supported" % model) # "GP" already calls pd.cross_validate() and has its own model_string, so skip this. if model != "GP": model_string = model + '_%s' % learn_options_str results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV) all_learn_options[model_string] = learn_options_model return results, all_learn_options