def get_params(train, labels, model=0): """Do a grid search for two models. """ param_grid = [ { 'eta': [0.009], 'num_trees': [1500], 'max_depth': [13], 'child': [0.40], 'sub': [0.95], 'col': [0.65], 'gamma': [0.77] }, { 'eta': [0.009], 'num_trees': [1000], 'max_depth': [6], 'child': [0.57], 'sub': [0.75], 'col': [0.60], 'gamma': [0.93] }, ][model] best_p = {} best_score = 1 for p in grid_search.ParameterGrid(param_grid): cv_score, cv_scores, cp_preds = get_score(train, labels, p) if (cv_score < best_score): best_score = cv_score best_p = p print(best_p) return best_p
def _generate_model_grid(): mg = [] idx = 0 for key in models.iterkeys(): for p in grid_search.ParameterGrid(models[key]['parameters']): mg.append([idx, [], [], key, p, models[key]['dataset_type']]) idx += 1 return mg
def generate_copies(self, dict): copies = [] param_grid = list(grid_search.ParameterGrid(dict)) for params in param_grid: c2 = deepcopy(self) c2.set(**params) copies.append(c2) return copies
def generate_file_names(self, pc): self.files = OrderedDict() base_file_name = 'RelReg-cvx-constraints-%s=%s' use_test = other_method_configs['use_test_error_for_model_selection'] self.files['SKL-RidgeReg.pkl'] = 'SKL Ridge Regression' self.files['SLL-NW.pkl'] = 'LLGC' self.files['NW.pkl'] = 'NW' #self.files['SKL-DumReg.pkl'] = 'Predict Mean' sizes = [] suffixes = OrderedDict() #suffixes['mixedCV'] = [None,''] if not use_test: suffixes['nCV'] = [None, '10'] #suffixes['numFeats'] = [str(num_feat)] ordered_keys = [ 'nCV', ] methods = [] #methods.append(('numRandPairs','RelReg, %s pairs', 'Our Method: %s relative')) self.title = 'Test' all_params = list(grid_search.ParameterGrid(suffixes)) for file_suffix, legend_name, legend_name_paper in methods: for size in sizes: for params in all_params: file_name = base_file_name file_name = file_name % (file_suffix, str(size)) legend = legend_name if viz_for_paper: legend = legend_name_paper legend %= str(size) for key in ordered_keys: if not params.has_key(key): continue value = params[key] if value is None: continue if value == '': file_name += '-' + key if not viz_for_paper: legend += ', ' + key else: file_name += '-' + key + '=' + str(value) if not viz_for_paper: legend += ', ' + str(value) + ' ' + key if use_test: file_name += '-TEST' legend = 'TEST: ' + legend file_name += '.pkl' self.files[file_name] = legend
def _model_param_grid(params_config): for name, config in params_config.items(): try: Model = yamlconf.import_module(config['class']) except Exception: logger.warn("Could not load model {0}".format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(Model, "train"): logger.warn("Model {0} does not have a train() method.".format( config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, Model, param_grid
def _estimator_param_grid(params_config): for name, config in params_config.items(): try: EstimatorClass = yamlconf.import_module(config['class']) estimator = EstimatorClass() except Exception: logger.warn("Could not load estimator {0}".format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(estimator, "fit"): logger.warn("Estimator {0} does not have a fit() method.".format( config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, estimator, param_grid
'get' : [1], 'adv' : [1]} reg = {'gender_female' : frange(0.0011, 0.0048, 0.0005), 'age_15_24' : frange(0.0041, 0.0121, 0.0006), 'income_high' : frange(0.0001, 0.004, 0.0004)} pop = {'gender_female' : 1, 'age_15_24' : 3, 'income_high' : 1} for label in classifiers: dump_string('Logreg for ' + label, 1) for P in grid.ParameterGrid(params): X, Y, Xt, Yt, fmap = gen_data(classifier=label, regex=P['r'], b1 = P['b1'], b2 = P['b2'], url_path = P['url_path'], get = P['get'], add_adv = P['adv'], lower_pop_bound = pop[label], n_files = 47) #grid search for C best, bestC = 0, 0 for CC in reg[label]: model = lm.LogisticRegression(C=CC) model.fit(X, Y)
def fit(self, X, y=None): if self.profile: return self.fit_ipp(X, y, grid_search.ParameterGrid(self.param_grid)) else: return self._fit(X, y, grid_search.ParameterGrid(self.param_grid))
def execute(self, name, x, y, training_frame, validation_frame, test_frame, subset_coef): params = grid.ParameterGrid(self.params_grid) if self.params_grid is None or len(self.params_grid) == 0: params = ["default"] results = [] dt = datetime.datetime # R stuff ri.initr() h2or = importr("h2o") h2o_ensemble = importr("h2oEnsemble") base = importr("base") stats = importr("stats") cvauc = importr("cvAUC") h2or.h2o_init(ip=config.hostname, port=config.port, startH2O=False) # Add some base learners with open("{}/R/wrappers.r".format(os.path.dirname(__file__)), "r") as f: ro.r("\n".join(f.readlines())) keep_frames = re.compile("|".join([ training_frame.frame_id, validation_frame.frame_id, test_frame.frame_id ]) + "|.*\\.hex|py_.*") for p in params: row = [ config.cluster, config.nthreads, name, subset_coef, self.name, str(p) ] # Initialize the model init_time = dt.now() # get frame names # load it in R train = h2or.h2o_getFrame(training_frame.frame_id) valid = h2or.h2o_getFrame(validation_frame.frame_id) test = h2or.h2o_getFrame(test_frame.frame_id) init_time = dt.now() - init_time # Train the model train_time = dt.now() if p == "default": model = h2o_ensemble.h2o_ensemble(x=toR(x), y=y, training_frame=train, validation_frame=valid) else: p = {k: toR(v) for k, v in p.items()} model = h2o_ensemble.h2o_ensemble(x=toR(x), y=y, training_frame=train, validation_frame=valid, **p) train_time = dt.now() - train_time # Model metrics metrics_time = dt.now() RpredTrain = stats.predict(model, train) RpredValid = stats.predict(model, valid) RpredTest = stats.predict(model, test) predTrain = h2o.get_frame( h2or.h2o_getId(RpredTrain.rx2("pred"))[0]) predValid = h2o.get_frame( h2or.h2o_getId(RpredValid.rx2("pred"))[0]) predTest = h2o.get_frame(h2or.h2o_getId(RpredTest.rx2("pred"))[0]) metrics_time = dt.now() - metrics_time row.append(init_time.total_seconds()) row.append(train_time.total_seconds()) row.append(metrics_time.total_seconds()) row.append((init_time + train_time + metrics_time).total_seconds()) datasets = [(RpredTrain, predTrain, train, training_frame), (RpredValid, predValid, valid, validation_frame), (RpredTest, predTest, test, test_frame)] append = row.append for pred_r_ptr, pred_py_ptr, data_r_ptr, data_py_ptr in datasets: acc = None err = None mse = ((pred_py_ptr - data_py_ptr[y])**2).mean()[0] if training_frame[y].isfactor()[0]: acc = (pred_py_ptr == data_py_ptr[y]).mean()[0] err = 1.0 - acc auc = cvauc.AUC( base.attr(pred_r_ptr.rx2("pred"), "data")[2], base.attr(data_r_ptr, "data").rx2(y))[0] # TODO: Add more metrics append(acc) append(err) append(None) # F1() append(None) # fnr() append(None) # fpr() append(None) # tnr() append(None) # tpr() append(None) # precision() append(None) # recall() append(None) # sensitivity() append(None) # specificity() append(None) # aic() append((auc)) # auc() append(None) # logloss() append(None) # mean_residual_deviance() append(mse) # mse() append(None) # null_degrees_of_freedom() append(None) # null_deviance() append(None) # r2() append(None) # residual_degrees_of_freedom() append(None) # residual_deviance() h2o.remove(pred_py_ptr) row = map( lambda x: None if isinstance(x, numbers.Number) and (x is None or np.isnan(x)) or x == u"NaN" or x == "NaN" else x, row) persist(row) results.append(row) for [frame] in h2o.ls().as_matrix(): if not keep_frames.match(frame): h2o.remove(frame) df = pd.DataFrame(results, columns=config.Names) return df
def execute(self, name, x, y, training_frame, validation_frame, test_frame, subset_coef): params = grid.ParameterGrid(self.params_grid) if self.params_grid == None or len(self.params_grid) == 0: params = ["default"] results = [] dt = datetime.datetime keep_frames = re.compile("|".join([ training_frame.frame_id, validation_frame.frame_id, test_frame.frame_id ]) + "|.*\\.hex|py_.*") for p in params: row = [ config.cluster, config.nthreads, name, subset_coef, self.name, str(p) ] # Initialize the model init_time = dt.now() if p == "default": model = self.base_model() else: model = self.base_model(**p) init_time = dt.now() - init_time # Train the model train_time = dt.now() model.train(x, y, training_frame=training_frame, validation_frame=validation_frame) train_time = dt.now() - train_time # Model metrics metrics_time = dt.now() metrics = model.model_performance(test_data=test_frame) err_tr = get_classification_error(model, training_frame, "train") err_va = get_classification_error(model, validation_frame, "valid") err_te = get_classification_error(metrics, test_frame, "test") metrics_time = dt.now() - metrics_time # results row.append(init_time.total_seconds()) row.append(train_time.total_seconds()) row.append(metrics_time.total_seconds()) row.append((init_time + train_time + metrics_time).total_seconds()) # on training data appendVal(row, lambda: 1 - err_tr) appendVal(row, lambda: err_tr) appendVal(row, lambda: model.F1()) appendVal(row, lambda: model.fnr()) appendVal(row, lambda: model.fpr()) appendVal(row, lambda: model.tnr()) appendVal(row, lambda: model.tpr()) appendVal(row, lambda: model.precision()) appendVal(row, lambda: model.recall()) appendVal(row, lambda: model.sensitivity()) appendVal(row, lambda: model.specificity()) appendVal(row, lambda: model.aic()) appendVal(row, lambda: model.auc()) appendVal(row, lambda: model.logloss()) appendVal(row, lambda: model.mean_residual_deviance()) appendVal(row, lambda: model.mse()) appendVal(row, lambda: model.null_degrees_of_freedom()) appendVal(row, lambda: model.null_deviance()) appendVal(row, lambda: model.r2()) appendVal(row, lambda: model.residual_degrees_of_freedom()) appendVal(row, lambda: model.residual_deviance()) # on validation data appendVal(row, lambda: 1 - err_va) appendVal(row, lambda: err_va) appendVal(row, lambda: model.F1(valid=True)) appendVal(row, lambda: model.fnr(valid=True)) appendVal(row, lambda: model.fpr(valid=True)) appendVal(row, lambda: model.tnr(valid=True)) appendVal(row, lambda: model.tpr(valid=True)) appendVal(row, lambda: model.precision(valid=True)) appendVal(row, lambda: model.recall(valid=True)) appendVal(row, lambda: model.sensitivity(valid=True)) appendVal(row, lambda: model.specificity(valid=True)) appendVal(row, lambda: model.aic(valid=True)) appendVal(row, lambda: model.auc(valid=True)) appendVal(row, lambda: model.logloss(valid=True)) appendVal(row, lambda: model.mean_residual_deviance(valid=True)) appendVal(row, lambda: model.mse(valid=True)) appendVal(row, lambda: model.null_degrees_of_freedom(valid=True)) appendVal(row, lambda: model.null_deviance(valid=True)) appendVal(row, lambda: model.r2(valid=True)) appendVal(row, lambda: model.residual_degrees_of_freedom(valid=True)) appendVal(row, lambda: model.residual_deviance(valid=True)) # on test data appendVal(row, lambda: 1 - err_te) appendVal(row, lambda: err_te) appendVal(row, lambda: metrics.F1()) appendVal(row, lambda: metrics.fnr()) appendVal(row, lambda: metrics.fpr()) appendVal(row, lambda: metrics.tnr()) appendVal(row, lambda: metrics.tpr()) appendVal(row, lambda: metrics.precision()) appendVal(row, lambda: metrics.recall()) appendVal(row, lambda: metrics.sensitivity()) appendVal(row, lambda: metrics.specificity()) appendVal(row, lambda: metrics.aic()) appendVal(row, lambda: metrics.auc()) appendVal(row, lambda: metrics.logloss()) appendVal(row, lambda: metrics.mean_residual_deviance()) appendVal(row, lambda: metrics.mse()) appendVal(row, lambda: metrics.null_degrees_of_freedom()) appendVal(row, lambda: metrics.null_deviance()) appendVal(row, lambda: metrics.r2()) appendVal(row, lambda: metrics.residual_degrees_of_freedom()) appendVal(row, lambda: metrics.residual_deviance()) row = map( lambda x: None if isinstance(x, numbers.Number) and (x is None or np.isnan(x)) or x == u"NaN" or x == "NaN" else x, row) persist(row) results.append(row) for [frame] in h2o.ls().as_matrix(): if not keep_frames.match(frame): h2o.remove(frame) df = pd.DataFrame(results, columns=config.Names) return df
#'selector__k': (30,50,100,250), 'learner__C': 10.**arange(-5, 6, 1), 'learner__gamma': 10.**arange(-6, 6, 1), } """ params={ #'selector__k': (30,50,100,250), 'learner__C': 10.**arange(-5,6,2), 'learner__gamma': 10.**arange(-6,6,2), } """ #from Data import * def myscore(model, a, b): p = model.predict(a) return precision_recall_fscore_support(b, p, average='binary')[2] params_scores = [] for param in grid_search.ParameterGrid(params): pipe.set_params(**param) scores = cross_validation.cross_val_score(pipe, X, Y, cv=cross_validation.ShuffleSplit( len(Y), 10, 0.1, 0.5), scoring=myscore) params_scores.append([param, mean(scores), std(scores)]) print(params_scores[-1])
def generate_file_names(self, pc): self.files = OrderedDict() base_file_name = 'RelReg-cvx-constraints-%s=%s' use_test = use_test_error_for_model_selection num_feats_str = '' if pc.num_features > 0: num_feats_str = '-numFeats=%d' % pc.num_features if journal_plot_type == PLOT_VARIANCE: self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-logNoise=0.2-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative, 20% Noise Scale' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-logNoise=0.1-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative, 10% Noise Scale' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative' if journal_plot_type == PLOT_BIAS: self.files[ 'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.2-VAL.pkl'] = 'Ridge, 20% Overestimate' self.files[ 'RelReg-cvx-constraints-noPairwiseReg-huber%s-nCV=10-biasThresh=10-biasScale=0.2-VAL.pkl'] = 'Huber, 20% Overestimate' #self.files['RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.15-VAL.pkl'] = 'Ridge, 15% Overestimate' self.files[ 'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.1-VAL.pkl'] = 'Ridge, 10% Overestimate' self.files[ 'RelReg-cvx-constraints-noPairwiseReg-huber%s-nCV=10-biasThresh=10-biasScale=0.1-VAL.pkl'] = 'Huber, 10% Overestimate' #self.files['RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-biasThresh=10-biasScale=0.05-VAL.pkl'] = 'Ridge, 5% Overestimate' self.files[ 'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-VAL.pkl'] = 'Ridge' if journal_plot_type == PLOT_DIVERSITY: self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=10-VAL.pkl'] = '50 Relative, |S| = 10' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=20-VAL.pkl'] = '50 Relative, |S| = 20' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=40-VAL.pkl'] = '50 Relative, |S| = 40' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-setSize=80-VAL.pkl'] = '50 Relative, |S| = 80' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative' if journal_plot_type == PLOT_CHAIN: self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=1-VAL.pkl'] = '50 Relative, 1 Root' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=5-VAL.pkl'] = '50 Relative, 5 Roots' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-numChains=10-VAL.pkl'] = '50 Relative, 10 Roots' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative' if journal_plot_type == PLOT_COMBINE_GUIDANCE: self.files[ 'RelReg-cvx-constraints-noPairwiseReg%s-nCV=10-VAL.pkl'] = 'Ridge' #self.files['RelReg-cvx-constraints-numRandPairs=25-scipy-numSimilar=25-scipy-noRidgeOnFail-eps=1e-10-solver=SCS-L-BFGS-B-nCV=10-VAL.pkl'] = '25 similar, 25 pairs' self.files[ 'RelReg-cvx-constraints-numRandPairs=25-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Relative' self.files[ 'RelReg-cvx-constraints-numSimilar=25-scipy-noRidgeOnFail-eps=1e-10-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Similar' self.files[ 'RelReg-cvx-constraints-numRandPairs=25-scipy-numSimilar=25-scipy-jointCV-noRidgeOnFail-eps=1e-10-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '25 Similar, 25 Relative' if journal_plot_type == PLOT_FLIPPED: self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.25-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 25% Flipped' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.1-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 10% Flipped' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noise=0.05-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative 5% Flipped' self.files[ 'RelReg-cvx-constraints-numRandPairs=50-scipy-noRidgeOnFail-solver=SCS%s-L-BFGS-B-nCV=10-VAL.pkl'] = '50 Relative' files = self.files self.files = OrderedDict() for key, value in files.iteritems(): self.files[key % num_feats_str] = value self.files['LapRidge-VAL.pkl'] = 'Laplacian Ridge Regression' #self.files['SKL-DumReg.pkl'] = 'Predict Mean' sizes = [] #sizes.append(10) sizes.append(20) sizes.append(50) #sizes.append(100) #sizes.append(150) #sizes.append(250) suffixes = OrderedDict() #suffixes['pairBound'] = [(0,.1),(0,.25),(0,.5),(0,.75),None] #suffixes['pairBound'] = [(.5,1), (.25,1), None] #suffixes['mixedCV'] = [None,''] #suffixes['logNoise'] = [None, .1, .5, 1, 2] #suffixes['logNoise'] = [.5] #suffixes['logNoise'] = [None,25,50,100] suffixes['baseline'] = [None, ''] #suffixes['noGrad'] = [''] if pc.num_features > 0: if other_method_configs['use_perfect_feature_selection']: suffixes['numFeatsPerfect'] = [str(pc.num_features)] else: suffixes['numFeats'] = [str(pc.num_features)] suffixes['scipy'] = [None, ''] suffixes['noRidgeOnFail'] = [None, ''] suffixes['tuneScale'] = [None, ''] #suffixes['smallScale'] = [None, ''] #suffixes['minMax'] = [None, ''] #suffixes['zScore'] = [None, ''] suffixes['solver'] = ['SCS'] suffixes['L-BFGS-B'] = [None, ''] #suffixes['logNoise'] = [None, '0.01'] if not use_test: suffixes['nCV'] = ['10'] suffixes['VAL'] = [''] #suffixes['numFeats'] = [str(num_feat)] ordered_keys = [ 'fastDCCP', 'initRidge', 'init_ideal', 'initRidgeTrain', 'logistic', 'pairBound', 'mixedCV', 'logNoise', 'scipy', 'logNoise', 'noGrad', 'baseline', 'logFix', 'noRidgeOnFail', 'tuneScale', 'smallScale', 'eps', 'solver', 'minMax', 'zScore', 'numFeats', 'numFeatsPerfect', 'L-BFGS-B', 'nCV', 'VAL' ] methods = [] if journal_plot_type == PLOT_NOT_JOURNAL: if self.plot_type == VisualizationConfigs.PLOT_PAIRWISE: methods.append(('numRandPairs', 'RelReg, %s pairs', 'Our Method: %s relative')) methods.append(('numRandPairsHinge', 'RelReg, %s pairs hinge', 'Zhu 2007: %s relative')) self.title = 'Relative' #if pc.data_set == bc.DATA_SYNTHETIC_LINEAR_REGRESSION: # self.ylims = [0,12] elif self.plot_type == VisualizationConfigs.PLOT_BOUND: methods.append(('numRandLogBounds', '%s log bounds', 'Our Method: %s bound')) methods.append(('numRandQuartiles', 'RelReg, %s quartiles', 'Baseline: %s')) suffixes['eps'] = [None, '1e-08', '1e-10', '1e-16'] self.title = 'Bound' elif self.plot_type == VisualizationConfigs.PLOT_NEIGHBOR: #methods.append(('numRandNeighborConvex', 'RelReg, %s rand neighbors convex', 'Our Method: %s neighbors')) methods.append(('numRandPairs', 'RelReg, %s pairs', 'Our Method: %s relative')) #methods.append(('numRandNeighborConvexHinge', 'RelReg, %s rand neighbors convex hinge', 'Our Method: %s neighbor, hinge')) methods.append(('numRandNeighborExp', 'RelReg, %s rand neighbors convex exp', 'Our Method: %s neighbor')) sizes = [20, 50] self.title = 'Neighbor' elif self.plot_type == VisualizationConfigs.PLOT_SIMILAR: suffixes['eps'] = [None, '1e-08', '1e-10', '1e-16'] methods.append(('numSimilar', 'RelReg, %s similar', 'Our Method: %s similar')) #methods.append(('numSimilarHinge','RelReg, %s pairs hinge', 'Our Method: %s similar, hinge')) self.title = 'Similar' self.files[ 'RelReg-cvx-constraints-noPairwiseReg-nCV=10-VAL.pkl'] = 'Ridge' all_params = list(grid_search.ParameterGrid(suffixes)) for file_suffix, legend_name, legend_name_paper in methods: for size in sizes: for params in all_params: file_name = base_file_name file_name = file_name % (file_suffix, str(size)) legend = legend_name if viz_for_paper: legend = legend_name_paper legend = legend % str(size) for key in ordered_keys: if not params.has_key(key): continue value = params[key] if value is None: continue if value == '': file_name += '-' + key if not viz_for_paper: legend += ', ' + key else: file_name += '-' + key + '=' + str(value) if not viz_for_paper: legend += ', ' + str(value) + ' ' + key if use_test: file_name += '-TEST' legend = 'TEST: ' + legend file_name += '.pkl' self.files[file_name] = legend
def generate_file_names(self, pc): self.files = OrderedDict() base_file_name = 'RelReg-cvx-constraints-%s=%s' use_test = other_method_configs['use_test_error_for_model_selection'] self.files['TargetTransfer+SKL-RidgeClass.pkl'] = 'Target Only' self.files['FuseTransfer+SKL-RidgeClass.pkl'] = 'Source and Target' #self.files['FuseTransfer+SKL-RidgeClass-tws=0.5.pkl'] = 'Source and Target: Weighted 50%' #self.files['FuseTransfer+SKL-RidgeClass-tws=1.pkl'] = 'Source and Target: Weighted 100%' ''' self.files['HypTransfer-target-TEST.pkl'] = 'TEST: Hypothesis Transfer - target' self.files['HypTransfer-optimal-TEST.pkl'] = 'TEST: Hypothesis Transfer - optimal' self.files['HypTransfer-optimal-noC-TEST.pkl'] = 'TEST: Hypothesis Transfer - optimal, no C' self.files['HypTransfer-noC-TEST.pkl'] = 'TEST: Hypothesis Transfer - no C' ''' self.files['HypTransfer-target.pkl'] = 'Hypothesis Transfer - target' self.files['HypTransfer-optimal.pkl'] = 'Hypothesis Transfer - optimal' self.files[ 'HypTransfer-optimal-noC.pkl'] = 'Hypothesis Transfer - optimal, no C' self.files['HypTransfer-noC.pkl'] = 'Hypothesis Transfer - no C' self.files[ 'HypTransfer-first-noC.pkl'] = 'Hypothesis Transfer - just first' sizes = [] suffixes = OrderedDict() #suffixes['mixedCV'] = [None,''] if not use_test: suffixes['nCV'] = [None, '10'] #suffixes['numFeats'] = [str(num_feat)] ordered_keys = [ 'nCV', ] methods = [] #methods.append(('numRandPairs','RelReg, %s pairs', 'Our Method: %s relative')) self.title = 'Test' all_params = list(grid_search.ParameterGrid(suffixes)) for file_suffix, legend_name, legend_name_paper in methods: for size in sizes: for params in all_params: file_name = base_file_name file_name = file_name % (file_suffix, str(size)) legend = legend_name if viz_for_paper: legend = legend_name_paper legend %= str(size) for key in ordered_keys: if not params.has_key(key): continue value = params[key] if value is None: continue if value == '': file_name += '-' + key if not viz_for_paper: legend += ', ' + key else: file_name += '-' + key + '=' + str(value) if not viz_for_paper: legend += ', ' + str(value) + ' ' + key if use_test: file_name += '-TEST' legend = 'TEST: ' + legend file_name += '.pkl' self.files[file_name] = legend
def make_param_grid(self): return list(grid_search.ParameterGrid(self.cv_params))