示例#1
0
def save_final_model_V3(filename=None, include_position=True):
    '''
    run_models(produce_final_model=True) is what saves the model
    '''
    test = False
    assert filename is not None, "need to provide filename to save final model"

    if include_position:
        learn_options = {
            "V": 3,
            'train_genes': load_data.get_V3_genes(),
            'test_genes': load_data.get_V3_genes(),
            "testing_non_binary_target_name": 'ranks',
            'include_pi_nuc_feat': True,
            "gc_features": True,
            "nuc_features": True,
            "include_gene_position": True,
            "include_NGGX_interaction": True,
            "include_Tm": True,
            "include_strand": False,
            "include_gene_feature": False,
            "include_gene_guide_feature": 0,
            "extra pairs": False,
            "weighted": None,
            "training_metric": 'spearmanr',
            "NDGC_k": 10,
            "cv": "gene",
            "include_gene_effect": False,
            "include_drug": False,
            "include_sgRNAscore": False,
            'adaboost_loss':
            'ls',  # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
            'adaboost_alpha':
            0.5,  # this parameter is only used by the huber and quantile loss functions.
            'normalize_features': False,
        }
    else:
        learn_options = {
            "V": 3,
            'train_genes': load_data.get_V3_genes(),
            'test_genes': load_data.get_V3_genes(),
            "testing_non_binary_target_name": 'ranks',
            'include_pi_nuc_feat': True,
            "gc_features": True,
            "nuc_features": True,
            "include_gene_position": False,
            "include_NGGX_interaction": True,
            "include_Tm": True,
            "include_strand": False,
            "include_gene_feature": False,
            "include_gene_guide_feature": 0,
            "extra pairs": False,
            "weighted": None,
            "training_metric": 'spearmanr',
            "NDGC_k": 10,
            "cv": "gene",
            "include_gene_effect": False,
            "include_drug": False,
            "include_sgRNAscore": False,
            'adaboost_loss':
            'ls',  # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
            'adaboost_alpha':
            0.5,  # this parameter is only used by the huber and quantile loss functions.
            'normalize_features': False,
        }

    learn_options_set = {'final': learn_options}
    results, all_learn_options = run_models(
        ["AdaBoost"],
        orders=[2],
        adaboost_learning_rates=[0.1],
        adaboost_max_depths=[3],
        adaboost_num_estimators=[100],
        adaboost_CV=False,
        learn_options_set=learn_options_set,
        test=test,
        CV=False)
    model = results.values()[0][3][0]

    with open(filename, 'wb') as f:
        pickle.dump((model, learn_options), f, -1)

    return model
def save_final_model_V3(filename=None, include_position=True):
    '''
    run_models(produce_final_model=True) is what saves the model
    '''
    test = False
    assert filename is not None, "need to provide filename to save final model"

    if include_position:
        learn_options = {"V": 3,               
                    'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(),
                    "testing_non_binary_target_name": 'ranks',
                    'include_pi_nuc_feat': True,
                    "gc_features": True,
                    "nuc_features": True,
                    "include_gene_position": True,
                    "include_NGGX_interaction": True,
                    "include_Tm": True,
                    "include_strand": False,
                    "include_gene_feature": False,
                    "include_gene_guide_feature": 0,
                    "extra pairs": False,
                    "weighted": None,
                    "training_metric": 'spearmanr',
                    "NDGC_k": 10,
                    "cv": "gene",                
                    "include_gene_effect": False,
                    "include_drug": False,
                    "include_sgRNAscore": False,
                    'adaboost_loss' : 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
                    'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions.
                    'normalize_features': False,
                    }
    else:
        learn_options = {"V": 3,               
            'train_genes': load_data.get_V3_genes(), 'test_genes': load_data.get_V3_genes(),
            "testing_non_binary_target_name": 'ranks',
            'include_pi_nuc_feat': True,
            "gc_features": True,
            "nuc_features": True,
            "include_gene_position": False,
            "include_NGGX_interaction": True,
            "include_Tm": True,
            "include_strand": False,
            "include_gene_feature": False,
            "include_gene_guide_feature": 0,
            "extra pairs": False,
            "weighted": None,
            "training_metric": 'spearmanr',
            "NDGC_k": 10,
            "cv": "gene",                
            "include_gene_effect": False,
            "include_drug": False,
            "include_sgRNAscore": False,
            'adaboost_loss' : 'ls', # main "ls", alternatives: "lad", "huber", "quantile", see scikit docs for details
            'adaboost_alpha': 0.5, # this parameter is only used by the huber and quantile loss functions.
            'normalize_features': False,
            }

            
    learn_options_set = {'final': learn_options}
    results, all_learn_options = run_models(["AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], 
                                            adaboost_max_depths=[3], adaboost_num_estimators=[100], 
                                            adaboost_CV=False, learn_options_set=learn_options_set, 
                                            test=test, CV=False)
    model = results.values()[0][3][0]
        
    with open(filename, 'wb') as f:
        pickle.dump((model, learn_options), f, -1)
    
    return model
示例#3
0
             orders=[1],
             adaboost_learning_rates=[0.1],
             adaboost_max_depths=[3],
             adaboost_num_estimators=[100],
             adaboost_CV=False,
             learn_options_set=learn_options_set,
             test=test,
             produce_final_model=True)
         #results, all_learn_options = run_models([ "AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test)
         # all_metrics, gene_names = util.get_all_metrics(results, test_metrics=['AUC', 'spearmanr'], learn_options_set=learn_options_set)
         # util.plot_all_metrics(all_metrics, gene_names, all_learn_options, save=True)
         # plt.close('all')
 else:
     learn_options = {
         "V": 3,
         "train_genes": load_data.get_V3_genes(),
         "test_genes": load_data.get_V3_genes(),
         "testing_non_binary_target_name": 'ranks',
         'include_pi_nuc_feat': True,
         "gc_features": True,
         "nuc_features": True,
         "include_gene_position": True,
         "include_NGGX_interaction": True,
         "include_Tm": True,
         "include_strand": False,
         "include_gene_feature": False,
         "include_gene_guide_feature": 0,
         "extra pairs": False,
         "weighted": None,
         "training_metric": 'spearmanr',
         "NDGC_k": 10,
            thiskey = 'try_models'
            learn_options_set = {thiskey: learn_options_2}
            print "working on %s" % thiskey
             #learn_options_set = {'drug_gene': learn_options, 'drug': learn_options_2}

             #results, all_learn_options = run_models(['linreg', 'L1', 'L2'], orders=[1], test=True, target_name='score')
             # all_learn_options is similar to (and contains all of_ learn_options_set, but has more
             # entries populated, by the model specification, etc.
            results, all_learn_options = run_models(["AdaBoost"], orders=[1], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test, produce_final_model=True)
             #results, all_learn_options = run_models([ "AdaBoost"], orders=[2], adaboost_learning_rates=[0.1], adaboost_max_depths=[3], adaboost_num_estimators=[100], adaboost_CV=False, learn_options_set=learn_options_set, test=test)
             # all_metrics, gene_names = util.get_all_metrics(results, test_metrics=['AUC', 'spearmanr'], learn_options_set=learn_options_set)
             # util.plot_all_metrics(all_metrics, gene_names, all_learn_options, save=True)
             # plt.close('all')
    else :
        learn_options = {"V": 3,
                "train_genes":  load_data.get_V3_genes(),
                "test_genes": load_data.get_V3_genes(),
                "testing_non_binary_target_name": 'ranks',
                'include_pi_nuc_feat': True,
                "gc_features": True,
                "nuc_features": True,            
                "include_gene_position": True,                    
                "include_NGGX_interaction": True,
                "include_Tm": True,
                "include_strand": False,
                "include_gene_feature": False,
                "include_gene_guide_feature": 0,                          
                "extra pairs": False,
                "weighted": None,            
                "training_metric": 'spearmanr',
                "NDGC_k": 10,