示例#1
0
def get_params(train, labels, model=0):
    """Do a grid search for two models.
    """
    param_grid = [
        {
            'eta': [0.009],
            'num_trees': [1500],
            'max_depth': [13],
            'child': [0.40],
            'sub': [0.95],
            'col': [0.65],
            'gamma': [0.77]
        },
        {
            'eta': [0.009],
            'num_trees': [1000],
            'max_depth': [6],
            'child': [0.57],
            'sub': [0.75],
            'col': [0.60],
            'gamma': [0.93]
        },
    ][model]

    best_p = {}
    best_score = 1
    for p in grid_search.ParameterGrid(param_grid):
        cv_score, cv_scores, cp_preds = get_score(train, labels, p)
        if (cv_score < best_score):
            best_score = cv_score
            best_p = p

    print(best_p)
    return best_p
def _generate_model_grid():
    mg = []
    idx = 0
    for key in models.iterkeys():
        for p in grid_search.ParameterGrid(models[key]['parameters']):
            mg.append([idx, [], [], key, p, models[key]['dataset_type']])
            idx += 1
    return mg
示例#3
0
 def generate_copies(self, dict):
     copies = []
     param_grid = list(grid_search.ParameterGrid(dict))
     for params in param_grid:
         c2 = deepcopy(self)
         c2.set(**params)
         copies.append(c2)
     return copies
示例#4
0
    def generate_file_names(self, pc):
        self.files = OrderedDict()
        base_file_name = 'RelReg-cvx-constraints-%s=%s'
        use_test = other_method_configs['use_test_error_for_model_selection']

        self.files['SKL-RidgeReg.pkl'] = 'SKL Ridge Regression'
        self.files['SLL-NW.pkl'] = 'LLGC'
        self.files['NW.pkl'] = 'NW'
        #self.files['SKL-DumReg.pkl'] = 'Predict Mean'
        sizes = []
        suffixes = OrderedDict()
        #suffixes['mixedCV'] = [None,'']
        if not use_test:
            suffixes['nCV'] = [None, '10']

        #suffixes['numFeats'] = [str(num_feat)]

        ordered_keys = [
            'nCV',
        ]

        methods = []
        #methods.append(('numRandPairs','RelReg, %s pairs', 'Our Method: %s relative'))
        self.title = 'Test'

        all_params = list(grid_search.ParameterGrid(suffixes))
        for file_suffix, legend_name, legend_name_paper in methods:
            for size in sizes:
                for params in all_params:
                    file_name = base_file_name
                    file_name = file_name % (file_suffix, str(size))
                    legend = legend_name
                    if viz_for_paper:
                        legend = legend_name_paper
                    legend %= str(size)
                    for key in ordered_keys:
                        if not params.has_key(key):
                            continue
                        value = params[key]
                        if value is None:
                            continue
                        if value == '':
                            file_name += '-' + key
                            if not viz_for_paper:
                                legend += ', ' + key
                        else:
                            file_name += '-' + key + '=' + str(value)
                            if not viz_for_paper:
                                legend += ', ' + str(value) + ' ' + key
                    if use_test:
                        file_name += '-TEST'
                        legend = 'TEST: ' + legend
                    file_name += '.pkl'
                    self.files[file_name] = legend
示例#5
0
def _model_param_grid(params_config):
    for name, config in params_config.items():
        try:
            Model = yamlconf.import_module(config['class'])
        except Exception:
            logger.warn("Could not load model {0}".format(config['class']))
            logger.warn("Exception:\n" + traceback.format_exc())
            continue

        if not hasattr(Model, "train"):
            logger.warn("Model {0} does not have a train() method.".format(
                config['class']))
            continue

        param_grid = grid_search.ParameterGrid(config['params'])

        yield name, Model, param_grid
示例#6
0
def _estimator_param_grid(params_config):
    for name, config in params_config.items():
        try:
            EstimatorClass = yamlconf.import_module(config['class'])
            estimator = EstimatorClass()
        except Exception:
            logger.warn("Could not load estimator {0}".format(config['class']))
            logger.warn("Exception:\n" + traceback.format_exc())
            continue

        if not hasattr(estimator, "fit"):
            logger.warn("Estimator {0} does not have a fit() method.".format(
                config['class']))
            continue

        param_grid = grid_search.ParameterGrid(config['params'])

        yield name, estimator, param_grid
示例#7
0
          'get' : [1],
          'adv' : [1]}

reg = {'gender_female' : frange(0.0011, 0.0048, 0.0005),
       'age_15_24' : frange(0.0041, 0.0121, 0.0006),
       'income_high' : frange(0.0001, 0.004, 0.0004)}

pop = {'gender_female' : 1,
       'age_15_24' : 3,
       'income_high' : 1}

for label in classifiers:
        
    dump_string('Logreg for ' + label, 1)

    for P in grid.ParameterGrid(params):
        

        X, Y, Xt, Yt, fmap = gen_data(classifier=label, regex=P['r'], b1 = P['b1'], b2 = P['b2'], 
                    url_path = P['url_path'],
                    get = P['get'],
                    add_adv = P['adv'],
                    lower_pop_bound = pop[label],
                    n_files = 47)
       
        #grid search for C
        best, bestC = 0, 0
        for CC in reg[label]:
            
            model = lm.LogisticRegression(C=CC)
            model.fit(X, Y)
 def fit(self, X, y=None):
     if self.profile:
         return self.fit_ipp(X, y, grid_search.ParameterGrid(self.param_grid))
     else:
         return self._fit(X, y, grid_search.ParameterGrid(self.param_grid))
    def execute(self, name, x, y, training_frame, validation_frame, test_frame,
                subset_coef):
        params = grid.ParameterGrid(self.params_grid)
        if self.params_grid is None or len(self.params_grid) == 0:
            params = ["default"]
        results = []
        dt = datetime.datetime

        # R stuff
        ri.initr()
        h2or = importr("h2o")
        h2o_ensemble = importr("h2oEnsemble")
        base = importr("base")
        stats = importr("stats")
        cvauc = importr("cvAUC")

        h2or.h2o_init(ip=config.hostname, port=config.port, startH2O=False)

        # Add some base learners
        with open("{}/R/wrappers.r".format(os.path.dirname(__file__)),
                  "r") as f:
            ro.r("\n".join(f.readlines()))

        keep_frames = re.compile("|".join([
            training_frame.frame_id, validation_frame.frame_id,
            test_frame.frame_id
        ]) + "|.*\\.hex|py_.*")

        for p in params:
            row = [
                config.cluster, config.nthreads, name, subset_coef, self.name,
                str(p)
            ]

            # Initialize the model
            init_time = dt.now()
            # get frame names
            # load it in R
            train = h2or.h2o_getFrame(training_frame.frame_id)
            valid = h2or.h2o_getFrame(validation_frame.frame_id)
            test = h2or.h2o_getFrame(test_frame.frame_id)
            init_time = dt.now() - init_time

            # Train the model
            train_time = dt.now()
            if p == "default":
                model = h2o_ensemble.h2o_ensemble(x=toR(x),
                                                  y=y,
                                                  training_frame=train,
                                                  validation_frame=valid)
            else:
                p = {k: toR(v) for k, v in p.items()}
                model = h2o_ensemble.h2o_ensemble(x=toR(x),
                                                  y=y,
                                                  training_frame=train,
                                                  validation_frame=valid,
                                                  **p)
            train_time = dt.now() - train_time

            # Model metrics
            metrics_time = dt.now()
            RpredTrain = stats.predict(model, train)
            RpredValid = stats.predict(model, valid)
            RpredTest = stats.predict(model, test)
            predTrain = h2o.get_frame(
                h2or.h2o_getId(RpredTrain.rx2("pred"))[0])
            predValid = h2o.get_frame(
                h2or.h2o_getId(RpredValid.rx2("pred"))[0])
            predTest = h2o.get_frame(h2or.h2o_getId(RpredTest.rx2("pred"))[0])
            metrics_time = dt.now() - metrics_time

            row.append(init_time.total_seconds())
            row.append(train_time.total_seconds())
            row.append(metrics_time.total_seconds())
            row.append((init_time + train_time + metrics_time).total_seconds())

            datasets = [(RpredTrain, predTrain, train, training_frame),
                        (RpredValid, predValid, valid, validation_frame),
                        (RpredTest, predTest, test, test_frame)]

            append = row.append
            for pred_r_ptr, pred_py_ptr, data_r_ptr, data_py_ptr in datasets:
                acc = None
                err = None
                mse = ((pred_py_ptr - data_py_ptr[y])**2).mean()[0]
                if training_frame[y].isfactor()[0]:
                    acc = (pred_py_ptr == data_py_ptr[y]).mean()[0]
                    err = 1.0 - acc

                auc = cvauc.AUC(
                    base.attr(pred_r_ptr.rx2("pred"), "data")[2],
                    base.attr(data_r_ptr, "data").rx2(y))[0]

                # TODO: Add more metrics
                append(acc)
                append(err)
                append(None)  # F1()
                append(None)  # fnr()
                append(None)  # fpr()
                append(None)  # tnr()
                append(None)  # tpr()
                append(None)  # precision()
                append(None)  # recall()
                append(None)  # sensitivity()
                append(None)  # specificity()
                append(None)  # aic()
                append((auc))  # auc()
                append(None)  # logloss()
                append(None)  # mean_residual_deviance()
                append(mse)  # mse()
                append(None)  # null_degrees_of_freedom()
                append(None)  # null_deviance()
                append(None)  # r2()
                append(None)  # residual_degrees_of_freedom()
                append(None)  # residual_deviance()

                h2o.remove(pred_py_ptr)

            row = map(
                lambda x: None if isinstance(x, numbers.Number) and
                (x is None or np.isnan(x)) or x == u"NaN" or x == "NaN" else x,
                row)
            persist(row)
            results.append(row)
            for [frame] in h2o.ls().as_matrix():
                if not keep_frames.match(frame):
                    h2o.remove(frame)

        df = pd.DataFrame(results, columns=config.Names)
        return df
示例#10
0
    def execute(self, name, x, y, training_frame, validation_frame, test_frame,
                subset_coef):
        params = grid.ParameterGrid(self.params_grid)
        if self.params_grid == None or len(self.params_grid) == 0:
            params = ["default"]
        results = []
        dt = datetime.datetime

        keep_frames = re.compile("|".join([
            training_frame.frame_id, validation_frame.frame_id,
            test_frame.frame_id
        ]) + "|.*\\.hex|py_.*")
        for p in params:
            row = [
                config.cluster, config.nthreads, name, subset_coef, self.name,
                str(p)
            ]
            # Initialize the model
            init_time = dt.now()
            if p == "default":
                model = self.base_model()
            else:
                model = self.base_model(**p)

            init_time = dt.now() - init_time

            # Train the model
            train_time = dt.now()
            model.train(x,
                        y,
                        training_frame=training_frame,
                        validation_frame=validation_frame)
            train_time = dt.now() - train_time

            # Model metrics
            metrics_time = dt.now()
            metrics = model.model_performance(test_data=test_frame)
            err_tr = get_classification_error(model, training_frame, "train")
            err_va = get_classification_error(model, validation_frame, "valid")
            err_te = get_classification_error(metrics, test_frame, "test")
            metrics_time = dt.now() - metrics_time

            # results
            row.append(init_time.total_seconds())
            row.append(train_time.total_seconds())
            row.append(metrics_time.total_seconds())
            row.append((init_time + train_time + metrics_time).total_seconds())

            # on training data
            appendVal(row, lambda: 1 - err_tr)
            appendVal(row, lambda: err_tr)
            appendVal(row, lambda: model.F1())
            appendVal(row, lambda: model.fnr())
            appendVal(row, lambda: model.fpr())
            appendVal(row, lambda: model.tnr())
            appendVal(row, lambda: model.tpr())
            appendVal(row, lambda: model.precision())
            appendVal(row, lambda: model.recall())
            appendVal(row, lambda: model.sensitivity())
            appendVal(row, lambda: model.specificity())
            appendVal(row, lambda: model.aic())
            appendVal(row, lambda: model.auc())
            appendVal(row, lambda: model.logloss())
            appendVal(row, lambda: model.mean_residual_deviance())
            appendVal(row, lambda: model.mse())
            appendVal(row, lambda: model.null_degrees_of_freedom())
            appendVal(row, lambda: model.null_deviance())
            appendVal(row, lambda: model.r2())
            appendVal(row, lambda: model.residual_degrees_of_freedom())
            appendVal(row, lambda: model.residual_deviance())

            # on validation data
            appendVal(row, lambda: 1 - err_va)
            appendVal(row, lambda: err_va)
            appendVal(row, lambda: model.F1(valid=True))
            appendVal(row, lambda: model.fnr(valid=True))
            appendVal(row, lambda: model.fpr(valid=True))
            appendVal(row, lambda: model.tnr(valid=True))
            appendVal(row, lambda: model.tpr(valid=True))
            appendVal(row, lambda: model.precision(valid=True))
            appendVal(row, lambda: model.recall(valid=True))
            appendVal(row, lambda: model.sensitivity(valid=True))
            appendVal(row, lambda: model.specificity(valid=True))
            appendVal(row, lambda: model.aic(valid=True))
            appendVal(row, lambda: model.auc(valid=True))
            appendVal(row, lambda: model.logloss(valid=True))
            appendVal(row, lambda: model.mean_residual_deviance(valid=True))
            appendVal(row, lambda: model.mse(valid=True))
            appendVal(row, lambda: model.null_degrees_of_freedom(valid=True))
            appendVal(row, lambda: model.null_deviance(valid=True))
            appendVal(row, lambda: model.r2(valid=True))
            appendVal(row,
                      lambda: model.residual_degrees_of_freedom(valid=True))
            appendVal(row, lambda: model.residual_deviance(valid=True))

            # on test data
            appendVal(row, lambda: 1 - err_te)
            appendVal(row, lambda: err_te)
            appendVal(row, lambda: metrics.F1())
            appendVal(row, lambda: metrics.fnr())
            appendVal(row, lambda: metrics.fpr())
            appendVal(row, lambda: metrics.tnr())
            appendVal(row, lambda: metrics.tpr())
            appendVal(row, lambda: metrics.precision())
            appendVal(row, lambda: metrics.recall())
            appendVal(row, lambda: metrics.sensitivity())
            appendVal(row, lambda: metrics.specificity())
            appendVal(row, lambda: metrics.aic())
            appendVal(row, lambda: metrics.auc())
            appendVal(row, lambda: metrics.logloss())
            appendVal(row, lambda: metrics.mean_residual_deviance())
            appendVal(row, lambda: metrics.mse())
            appendVal(row, lambda: metrics.null_degrees_of_freedom())
            appendVal(row, lambda: metrics.null_deviance())
            appendVal(row, lambda: metrics.r2())
            appendVal(row, lambda: metrics.residual_degrees_of_freedom())
            appendVal(row, lambda: metrics.residual_deviance())

            row = map(
                lambda x: None if isinstance(x, numbers.Number) and
                (x is None or np.isnan(x)) or x == u"NaN" or x == "NaN" else x,
                row)

            persist(row)
            results.append(row)
            for [frame] in h2o.ls().as_matrix():
                if not keep_frames.match(frame):
                    h2o.remove(frame)
        df = pd.DataFrame(results, columns=config.Names)
        return df
示例#11
0
    #'selector__k': (30,50,100,250),
    'learner__C': 10.**arange(-5, 6, 1),
    'learner__gamma': 10.**arange(-6, 6, 1),
}
"""
params={
    #'selector__k': (30,50,100,250),
    'learner__C': 10.**arange(-5,6,2),
    'learner__gamma': 10.**arange(-6,6,2),
}
"""


#from Data import *
def myscore(model, a, b):
    p = model.predict(a)
    return precision_recall_fscore_support(b, p, average='binary')[2]


params_scores = []
for param in grid_search.ParameterGrid(params):
    pipe.set_params(**param)
    scores = cross_validation.cross_val_score(pipe,
                                              X,
                                              Y,
                                              cv=cross_validation.ShuffleSplit(
                                                  len(Y), 10, 0.1, 0.5),
                                              scoring=myscore)
    params_scores.append([param, mean(scores), std(scores)])
    print(params_scores[-1])
示例#12
0
    def generate_file_names(self, pc):
        self.files = OrderedDict()
        base_file_name = 'RelReg-cvx-constraints-%s=%s'
        use_test = use_test_error_for_model_selection
        num_feats_str = ''
        if pc.num_features > 0:
            num_feats_str = '-numFeats=%d' % pc.num_features
        if journal_plot_type == PLOT_VARIANCE:
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-logNoise=0.2-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative, 20% Noise Scale'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-logNoise=0.1-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative, 10% Noise Scale'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative'
        if journal_plot_type == PLOT_BIAS:
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.2-VAL.pkl'] = 'Ridge, 20% Overestimate'
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg-huber%s-nCV=10-biasThresh=10-biasScale=0.2-VAL.pkl'] = 'Huber, 20% Overestimate'
            #self.files['RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.15-VAL.pkl'] = 'Ridge, 15% Overestimate'
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.1-VAL.pkl'] = 'Ridge, 10% Overestimate'
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg-huber%s-nCV=10-biasThresh=10-biasScale=0.1-VAL.pkl'] = 'Huber, 10% Overestimate'
            #self.files['RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.05-VAL.pkl'] = 'Ridge, 5% Overestimate'
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-VAL.pkl'] = 'Ridge'
        if journal_plot_type == PLOT_DIVERSITY:
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=10-VAL.pkl'] = '50 Relative, |S| = 10'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=20-VAL.pkl'] = '50 Relative, |S| = 20'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=40-VAL.pkl'] = '50 Relative, |S| = 40'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=80-VAL.pkl'] = '50 Relative, |S| = 80'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative'
        if journal_plot_type == PLOT_CHAIN:
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=1-VAL.pkl'] = '50 Relative, 1 Root'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=5-VAL.pkl'] = '50 Relative, 5 Roots'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=10-VAL.pkl'] = '50 Relative, 10 Roots'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative'
        if journal_plot_type == PLOT_COMBINE_GUIDANCE:
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-VAL.pkl'] = 'Ridge'
            #self.files['RelReg-cvx-constraints-numRandPairs=25-scipy-numSimilar=25-scipy-noRidgeOnFail-eps=1e-10-solver=SCS-L-BFGS-B-nCV=10-VAL.pkl'] = '25 similar, 25 pairs'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=25-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Relative'
            self.files[
                'RelReg-cvx-constraints-numSimilar=25-scipy-noRidgeOnFail-eps=1e-10-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Similar'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=25-scipy-numSimilar=25-scipy-jointCV-noRidgeOnFail-eps=1e-10-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Similar, 25 Relative'
        if journal_plot_type == PLOT_FLIPPED:
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.25-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 25% Flipped'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.1-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 10% Flipped'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.05-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 5% Flipped'
            self.files[
                'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative'

        files = self.files
        self.files = OrderedDict()
        for key, value in files.iteritems():
            self.files[key % num_feats_str] = value

        self.files['LapRidge-VAL.pkl'] = 'Laplacian Ridge Regression'
        #self.files['SKL-DumReg.pkl'] = 'Predict Mean'
        sizes = []
        #sizes.append(10)
        sizes.append(20)
        sizes.append(50)
        #sizes.append(100)
        #sizes.append(150)
        #sizes.append(250)
        suffixes = OrderedDict()
        #suffixes['pairBound'] = [(0,.1),(0,.25),(0,.5),(0,.75),None]
        #suffixes['pairBound'] = [(.5,1), (.25,1), None]
        #suffixes['mixedCV'] = [None,'']
        #suffixes['logNoise'] = [None, .1, .5, 1, 2]
        #suffixes['logNoise'] = [.5]
        #suffixes['logNoise'] = [None,25,50,100]
        suffixes['baseline'] = [None, '']
        #suffixes['noGrad'] = ['']
        if pc.num_features > 0:
            if other_method_configs['use_perfect_feature_selection']:
                suffixes['numFeatsPerfect'] = [str(pc.num_features)]
            else:
                suffixes['numFeats'] = [str(pc.num_features)]
        suffixes['scipy'] = [None, '']
        suffixes['noRidgeOnFail'] = [None, '']
        suffixes['tuneScale'] = [None, '']
        #suffixes['smallScale'] = [None, '']
        #suffixes['minMax'] = [None, '']
        #suffixes['zScore'] = [None, '']
        suffixes['solver'] = ['SCS']
        suffixes['L-BFGS-B'] = [None, '']
        #suffixes['logNoise'] = [None, '0.01']
        if not use_test:
            suffixes['nCV'] = ['10']
        suffixes['VAL'] = ['']

        #suffixes['numFeats'] = [str(num_feat)]

        ordered_keys = [
            'fastDCCP', 'initRidge', 'init_ideal', 'initRidgeTrain',
            'logistic', 'pairBound', 'mixedCV', 'logNoise', 'scipy',
            'logNoise', 'noGrad', 'baseline', 'logFix', 'noRidgeOnFail',
            'tuneScale', 'smallScale', 'eps', 'solver', 'minMax', 'zScore',
            'numFeats', 'numFeatsPerfect', 'L-BFGS-B', 'nCV', 'VAL'
        ]

        methods = []
        if journal_plot_type == PLOT_NOT_JOURNAL:
            if self.plot_type == VisualizationConfigs.PLOT_PAIRWISE:
                methods.append(('numRandPairs', 'RelReg, %s pairs',
                                'Our Method: %s relative'))
                methods.append(('numRandPairsHinge', 'RelReg, %s pairs hinge',
                                'Zhu 2007: %s relative'))
                self.title = 'Relative'
                #if pc.data_set == bc.DATA_SYNTHETIC_LINEAR_REGRESSION:
                #    self.ylims = [0,12]
            elif self.plot_type == VisualizationConfigs.PLOT_BOUND:
                methods.append(('numRandLogBounds', '%s log bounds',
                                'Our Method: %s bound'))
                methods.append(('numRandQuartiles', 'RelReg, %s quartiles',
                                'Baseline: %s'))
                suffixes['eps'] = [None, '1e-08', '1e-10', '1e-16']
                self.title = 'Bound'
            elif self.plot_type == VisualizationConfigs.PLOT_NEIGHBOR:
                #methods.append(('numRandNeighborConvex', 'RelReg, %s rand neighbors convex', 'Our Method: %s neighbors'))
                methods.append(('numRandPairs', 'RelReg, %s pairs',
                                'Our Method: %s relative'))
                #methods.append(('numRandNeighborConvexHinge', 'RelReg, %s rand neighbors convex hinge', 'Our Method: %s neighbor, hinge'))
                methods.append(('numRandNeighborExp',
                                'RelReg, %s rand neighbors convex exp',
                                'Our Method: %s neighbor'))
                sizes = [20, 50]
                self.title = 'Neighbor'
            elif self.plot_type == VisualizationConfigs.PLOT_SIMILAR:
                suffixes['eps'] = [None, '1e-08', '1e-10', '1e-16']
                methods.append(('numSimilar', 'RelReg, %s similar',
                                'Our Method: %s similar'))
                #methods.append(('numSimilarHinge','RelReg, %s pairs hinge', 'Our Method: %s similar, hinge'))
                self.title = 'Similar'
            self.files[
                'RelReg-cvx-constraints-noPairwiseReg-nCV=10-VAL.pkl'] = 'Ridge'

        all_params = list(grid_search.ParameterGrid(suffixes))
        for file_suffix, legend_name, legend_name_paper in methods:
            for size in sizes:
                for params in all_params:
                    file_name = base_file_name
                    file_name = file_name % (file_suffix, str(size))
                    legend = legend_name
                    if viz_for_paper:
                        legend = legend_name_paper
                    legend = legend % str(size)
                    for key in ordered_keys:
                        if not params.has_key(key):
                            continue
                        value = params[key]
                        if value is None:
                            continue
                        if value == '':
                            file_name += '-' + key
                            if not viz_for_paper:
                                legend += ', ' + key
                        else:
                            file_name += '-' + key + '=' + str(value)
                            if not viz_for_paper:
                                legend += ', ' + str(value) + ' ' + key
                    if use_test:
                        file_name += '-TEST'
                        legend = 'TEST: ' + legend
                    file_name += '.pkl'
                    self.files[file_name] = legend
    def generate_file_names(self, pc):
        self.files = OrderedDict()
        base_file_name = 'RelReg-cvx-constraints-%s=%s'
        use_test = other_method_configs['use_test_error_for_model_selection']

        self.files['TargetTransfer+SKL-RidgeClass.pkl'] = 'Target Only'
        self.files['FuseTransfer+SKL-RidgeClass.pkl'] = 'Source and Target'
        #self.files['FuseTransfer+SKL-RidgeClass-tws=0.5.pkl'] = 'Source and Target: Weighted 50%'
        #self.files['FuseTransfer+SKL-RidgeClass-tws=1.pkl'] = 'Source and Target: Weighted 100%'
        '''
        self.files['HypTransfer-target-TEST.pkl'] = 'TEST: Hypothesis Transfer - target'
        self.files['HypTransfer-optimal-TEST.pkl'] = 'TEST: Hypothesis Transfer - optimal'
        self.files['HypTransfer-optimal-noC-TEST.pkl'] = 'TEST: Hypothesis Transfer - optimal, no C'
        self.files['HypTransfer-noC-TEST.pkl'] = 'TEST: Hypothesis Transfer - no C'
        '''
        self.files['HypTransfer-target.pkl'] = 'Hypothesis Transfer - target'
        self.files['HypTransfer-optimal.pkl'] = 'Hypothesis Transfer - optimal'
        self.files[
            'HypTransfer-optimal-noC.pkl'] = 'Hypothesis Transfer - optimal, no C'
        self.files['HypTransfer-noC.pkl'] = 'Hypothesis Transfer - no C'
        self.files[
            'HypTransfer-first-noC.pkl'] = 'Hypothesis Transfer - just first'

        sizes = []
        suffixes = OrderedDict()
        #suffixes['mixedCV'] = [None,'']
        if not use_test:
            suffixes['nCV'] = [None, '10']

        #suffixes['numFeats'] = [str(num_feat)]

        ordered_keys = [
            'nCV',
        ]

        methods = []
        #methods.append(('numRandPairs','RelReg, %s pairs', 'Our Method: %s relative'))
        self.title = 'Test'

        all_params = list(grid_search.ParameterGrid(suffixes))
        for file_suffix, legend_name, legend_name_paper in methods:
            for size in sizes:
                for params in all_params:
                    file_name = base_file_name
                    file_name = file_name % (file_suffix, str(size))
                    legend = legend_name
                    if viz_for_paper:
                        legend = legend_name_paper
                    legend %= str(size)
                    for key in ordered_keys:
                        if not params.has_key(key):
                            continue
                        value = params[key]
                        if value is None:
                            continue
                        if value == '':
                            file_name += '-' + key
                            if not viz_for_paper:
                                legend += ', ' + key
                        else:
                            file_name += '-' + key + '=' + str(value)
                            if not viz_for_paper:
                                legend += ', ' + str(value) + ' ' + key
                    if use_test:
                        file_name += '-TEST'
                        legend = 'TEST: ' + legend
                    file_name += '.pkl'
                    self.files[file_name] = legend
示例#14
0
 def make_param_grid(self):
     return list(grid_search.ParameterGrid(self.cv_params))