def save_final_model_V3(filename=None, include_position=True): ''' run_models(produce_final_model=True) is what saves the model ''' test = False assert filename is not None, "need to provide filename to save final model" if include_position: learn_options = { "V": 3, 'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": True, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10, "cv": "gene", "include_gene_effect": False, "include_drug": False, "include_sgRNAscore": False, 'adaboost_loss': 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details 'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions. 'normalize_features': False, } else: learn_options = { "V": 3, 'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": False, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10, "cv": "gene", "include_gene_effect": False, "include_drug": False, "include_sgRNAscore": False, 'adaboost_loss': 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details 'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions. 'normalize_features': False, } learn_options_set = {'final': learn_options} results, all_learn_options = run_models( ["AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test, CV=False) model = results.values()[0][3][0] with open(filename, 'wb') as f: pickle.dump((model, learn_options), f, -1) return model
def save_final_model_V3(filename=None, include_position=True): ''' run_models(produce_final_model=True) is what saves the model ''' test = False assert filename is not None, "need to provide filename to save final model" if include_position: learn_options = {"V": 3, 'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": True, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10, "cv": "gene", "include_gene_effect": False, "include_drug": False, "include_sgRNAscore": False, 'adaboost_loss' : 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details 'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions. 'normalize_features': False, } else: learn_options = {"V": 3, 'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": False, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10, "cv": "gene", "include_gene_effect": False, "include_drug": False, "include_sgRNAscore": False, 'adaboost_loss' : 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details 'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions. 'normalize_features': False, } learn_options_set = {'final': learn_options} results, all_learn_options = run_models(["AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test, CV=False) model = results.values()[0][3][0] with open(filename, 'wb') as f: pickle.dump((model, learn_options), f, -1) return model
orders=[1], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test, produce_final_model=True) #results, all_learn_options = run_models([ "AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test) # all_metrics, gene_names = util.get_all_metrics(results, test_metrics=['AUC', 'spearmanr'], learn_options_set=learn_options_set) # util.plot_all_metrics(all_metrics, gene_names, all_learn_options, save=True) # plt.close('all') else: learn_options = { "V": 3, "train_genes": load_data.get_V3_genes(), "test_genes": load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": True, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10,
thiskey = 'try_models' learn_options_set = {thiskey: learn_options_2} print "working on %s" % thiskey #learn_options_set = {'drug_gene': learn_options, 'drug': learn_options_2} #results, all_learn_options = run_models(['linreg', 'L1', 'L2'], orders=[1], test=True, target_name='score') # all_learn_options is similar to (and contains all of_ learn_options_set, but has more # entries populated, by the model specification, etc. results, all_learn_options = run_models(["AdaBoost"], orders=[1], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test, produce_final_model=True) #results, all_learn_options = run_models([ "AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test) # all_metrics, gene_names = util.get_all_metrics(results, test_metrics=['AUC', 'spearmanr'], learn_options_set=learn_options_set) # util.plot_all_metrics(all_metrics, gene_names, all_learn_options, save=True) # plt.close('all') else : learn_options = {"V": 3, "train_genes": load_data.get_V3_genes(), "test_genes": load_data.get_V3_genes(), "testing_non_binary_target_name": 'ranks', 'include_pi_nuc_feat': True, "gc_features": True, "nuc_features": True, "include_gene_position": True, "include_NGGX_interaction": True, "include_Tm": True, "include_strand": False, "include_gene_feature": False, "include_gene_guide_feature": 0, "extra pairs": False, "weighted": None, "training_metric": 'spearmanr', "NDGC_k": 10,