Python EvolutionaryAlgorithmSearchCV.fit示例，evolutionary_search.EvolutionaryAlgorithmSearchCV.fit Python示例

示例#1

0

显示文件

文件： svm_deap.py 项目： spolex/spiral

def svm_ga(X, y, rfe=True, paramgrid=None):

    # feature selection
    fltr = RFE(ReliefF(), n_features_to_select=5,
               step=0.5) if rfe else ReliefF(n_features_to_select=5,
                                             n_neighbors=3)

    clf = SVC()

    param_grid = {
        "svc__kernel": ["rbf"],
        'svc__C': [10e-2, 10e-1, 10, 10e1, 10e2, 10e3, 10e4],
        'svc__gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1, 1.1]
    } if paramgrid is None else paramgrid

    # make pipeline
    pipe = make_pipeline(preprocessing.StandardScaler(), fltr, clf)

    from evolutionary_search import EvolutionaryAlgorithmSearchCV
    cv = EvolutionaryAlgorithmSearchCV(estimator=pipe,
                                       params=param_grid,
                                       scoring="accuracy",
                                       cv=10,
                                       verbose=1,
                                       population_size=50,
                                       gene_mutation_prob=0.1,
                                       gene_crossover_prob=0.8,
                                       tournament_size=10,
                                       generations_number=25)
    cv.fit(X, y)

    print(cv.best_params_)
    print(cv.best_score_)

示例#2

0

显示文件

def readme():
    data = sklearn.datasets.load_digits()
    X = data["data"]
    y = data["target"]

    paramgrid = {
        "kernel": ["rbf"],
        "C": np.logspace(-9, 9, num=25, base=10),
        "gamma": np.logspace(-9, 9, num=25, base=10)
    }

    random.seed(1)

    cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                       params=paramgrid,
                                       scoring="accuracy",
                                       cv=StratifiedKFold(n_splits=4),
                                       verbose=1,
                                       population_size=10,
                                       gene_mutation_prob=0.10,
                                       gene_crossover_prob=0.5,
                                       tournament_size=3,
                                       generations_number=5)
    cv.fit(X, y)
    return cv

示例#3

0

显示文件

def get_GeneticGridSearchCV(model, params, X, y):
    from evolutionary_search import EvolutionaryAlgorithmSearchCV
    print("performing genetic grid search ...")
    grid = EvolutionaryAlgorithmSearchCV(estimator=model,
                                         params=params,
                                         scoring="r2",
#                                        cv=StratifiedKFold(n_splits=2),
                                         verbose=True,
                                         population_size=50,
                                         gene_mutation_prob=0.10,
                                         tournament_size=3,
                                         generations_number=10,
#                                        pmap = pool.map,
                                        )

    grid.fit(X, y.ravel()) # fit the model and parameters

    # our classical metric for performance
    print("Best Accuracy: {}".format(grid.best_score_))

    # the best parameters that caused the best accuracy
    print("Best Parameters: {}".format(grid.best_params_))
    
    # the average time it took a model to fit to the data (in seconds)
    print("Average Time to Fit (s): {}".format(round(grid.cv_results_['mean_fit_time'].mean(), 3)))
    
    # the average time it took a model to predict out of sample data (in seconds)
    # this metric gives us insight into how this model will perform in real-time analysis
    print("Average Time to Score (s): {}".format(round(grid.cv_results_['mean_score_time'].mean(), 3)))

    print(pd.DataFrame(grid.cv_results_).sort_values("mean_test_score", ascending=False).head())

示例#4

0

显示文件

def evo_search(xtrain, xtest, ytrain, ytest):
    layers = [[a, a] for a in range(10, 500, 100)]
    print(layers)

    parameters = {
                   'activation': ['identity', 'logistic', 'tanh', 'relu'],
                  # 'solver': ['lbfgs', 'sgd', 'adam'],
                  # 'learning_rate': ['constant', 'invscaling', 'adaptive'],
                   'batch_size': [5, 10, 20, 50, 100],
                   'learning_rate_init': [0.0001, 0.001, 0.01, 0.1],
                  # 'hidden_layer_sizes': generate_networks(),
                    'hidden_layer_sizes': layers
                  }
    print(parameters)
    print('Starting evolutionary search')

    cv = EvolutionaryAlgorithmSearchCV(estimator=MLPClassifier(random_state=42, max_iter=20000),
                                       params=parameters,
                                       scoring=make_scorer(f1_score),
                                       #cv=StratifiedKFold(n_splits=4),
                                       verbose=10,
                                       population_size=20,
                                       gene_mutation_prob=0.10,
                                       gene_crossover_prob=0.5,
                                       tournament_size=3,
                                       generations_number=10,
                                       n_jobs=1)
    cv.fit(xtrain, ytrain.values.ravel())

    print_classifier_stats(cv.best_estimator_, xtrain, xtest, ytrain, ytest)
    print('Evo search done...')

示例#5

0

显示文件

文件： SVM_deap.py 项目： sucof/Malware-Classification-using-Deep-Learning

def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values
    #X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values

    # log-uniform: understand as search over p = exp(x) by varying x
    opt = EvolutionaryAlgorithmSearchCV(
        estimator=SVC(),
        # ref: https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/libsvm_svc.py
        params={
            "kernel": ["rbf"],
            "C": np.logspace(1e-6, 1e+6, num=20, base=10),
            "gamma": np.logspace(3.0517578125e-05, 8, num=20, base=10),
            'decision_function_shape': ['ovo', 'ovr'],
            'degree': list(range(2, 5)),
            'coef0': np.logspace(-1, 1, num=20, base=10),
            'coef0': np.logspace(1e-5, 1e-1, num=20, base=10),
        },
        cv=StratifiedKFold(n_splits=10, shuffle=True),
        scoring="accuracy",
        verbose=True,
        population_size=50,
        gene_mutation_prob=0.10,
        tournament_size=3,
        generations_number=10,
    )

    opt.fit(X_train, y_train)

示例#6

0

显示文件

文件： ML.py 项目： fm94/Datasets-preprocessing

def LR2(X_train_little,
        y_train_little,
        X_train_pca,
        X_test_pca,
        y_train,
        y_test,
        tune_only=False):
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import cross_validate
    C_range = np.linspace(1, 50, 50)
    tol_range = np.linspace(0.001, 0.01, 50)
    param_dist = dict(tol=tol_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=SEED)
    rnds = EvolutionaryAlgorithmSearchCV(
        estimator=LogisticRegression(penalty='l2'),
        params=param_dist,
        scoring="f1",
        cv=cv,
        verbose=1,
        population_size=50,
        gene_mutation_prob=0.10,
        gene_crossover_prob=0.5,
        tournament_size=3,
        generations_number=6,
        n_jobs=4)
    rnds.fit(X_train_little, y_train_little)
    # summarize the results of the random parameter search
    file = open("LR2_best_parameters.txt", "w")
    file.write("{}\n".format(rnds.best_score_))
    file.write('C: {}\n'.format(rnds.best_estimator_.C))
    file.write('tol: {}\n'.format(rnds.best_estimator_.tol))
    file.close()

    if not tune_only:
        # apply best parameters
        l2r = LogisticRegression(C=rnds.best_estimator_.C,
                                 tol=rnds.best_estimator_.tol,
                                 random_state=SEED)
        l2r.fit(X_train_pca, y_train)
        sc_tr = cross_validate(l2r,
                               X_train_pca,
                               y_train,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        sc_ts = cross_validate(l2r,
                               X_test_pca,
                               y_test,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        pred = l2r.predict(X_test_pca)
        pred_train = l2r.predict(X_train_pca)

        output_report("LR2", y_train, pred_train, y_test, pred, sc_tr, sc_ts)

示例#7

0

显示文件

文件： ML.py 项目： fm94/Datasets-preprocessing

def SVM(X_train_little,
        y_train_little,
        X_train_pca,
        X_test_pca,
        y_train,
        y_test,
        tune_only=False):
    from sklearn.svm import SVC
    from sklearn.model_selection import cross_validate
    C_range = np.linspace(1, 10, 101)
    gamma_range = np.linspace(3000, 4000, 100)
    param_dist = dict(gamma=gamma_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=SEED)
    rnds = EvolutionaryAlgorithmSearchCV(estimator=SVC(max_iter=200),
                                         params=param_dist,
                                         scoring="f1",
                                         cv=cv,
                                         verbose=1,
                                         population_size=50,
                                         gene_mutation_prob=0.10,
                                         gene_crossover_prob=0.5,
                                         tournament_size=3,
                                         generations_number=6,
                                         n_jobs=4)
    rnds.fit(X_train_little, y_train_little)
    # summarize the results of the random parameter search
    file = open("SVM_best_parameters.txt", "w")
    file.write("{}\n".format(rnds.best_score_))
    file.write('C: {}\n'.format(rnds.best_estimator_.C))
    file.write('gamma: {}\n'.format(rnds.best_estimator_.gamma))
    file.close()

    if not tune_only:
        # apply best parameters
        svc = SVC(max_iter=200,
                  C=rnds.best_estimator_.C,
                  gamma=rnds.best_estimator_.gamma,
                  random_state=SEED)
        svc.fit(X_train_pca, y_train)
        sc_tr = cross_validate(svc,
                               X_train_pca,
                               y_train,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        sc_ts = cross_validate(svc,
                               X_test_pca,
                               y_test,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        pred = svc.predict(X_test_pca)
        pred_train = svc.predict(X_train_pca)

        output_report("SVM", y_train, pred_train, y_test, pred, sc_tr, sc_ts)

示例#8

0

显示文件

文件： util_model.py 项目： leandrominer85/dsa2

def sk_params_search_best(
    clf,
    X,
    y,
    param_grid={"alpha": np.linspace(0, 1, 5)},
    method="gridsearch",
    param_search={
        "scorename": "r2",
        "cv": 5,
        "population_size": 5,
        "generations_number": 3
    },
):
    """
   Genetic: population_size=5, ngene_mutation_prob=0.10,,gene_crossover_prob=0.5, tournament_size=3,  generations_number=3

    :param X:
    :param y:
    :param clf:
    :param param_grid:
    :param method:
    :param param_search:
    :return:
  """
    p = param_search
    myscore = sk_score_get(p["scorename"])

    if method == "gridsearch":
        from sklearn.model_selection import GridSearchCV

        grid = GridSearchCV(clf, param_grid, cv=p["cv"], scoring=myscore)
        grid.fit(X, y)
        return grid.best_score_, grid.best_params_

    if method == "genetic":
        from evolutionary_search import EvolutionaryAlgorithmSearchCV
        from sklearn.model_selection import StratifiedKFold

        # paramgrid = {"alpha":  np.linspace(0,1, 20) , "l1_ratio": np.linspace(0,1, 20) }
        cv = EvolutionaryAlgorithmSearchCV(
            estimator=clf,
            params=param_grid,
            scoring=myscore,
            cv=StratifiedKFold(y),
            verbose=True,
            population_size=p["population_size"],
            gene_mutation_prob=0.10,
            gene_crossover_prob=0.5,
            tournament_size=3,
            generations_number=p["generations_number"],
        )

        cv.fit(X, y)
        return cv.best_score_, cv.best_params_

示例#9

0

显示文件

文件： classification.py 项目： accurat/ackeras

    def ev_tree(self):
        ev_params = self.default_evparams
        ev_params['estimator'] = RandomForestClassifier()
        ev_params['params'] = self.frst_space

        cv = EvolutionaryAlgorithmSearchCV(**ev_params)
        cv.fit(self.X_insample, self.y_insample)

        clf = cv.best_estimator_

        self.frst_called = True
        self.opt_frst = clf

示例#10

0

显示文件

文件： classification.py 项目： accurat/ackeras

    def ev_svm(self):
        ev_params = self.default_evparams
        ev_params['estimator'] = SVC(probability=True)
        ev_params['params'] = self.svm_space

        cv = EvolutionaryAlgorithmSearchCV(**ev_params)
        cv.fit(self.X_insample, self.y_insample)

        clf = cv.best_estimator_

        self.svm_called = True
        self.opt_svm = clf

示例#11

0

显示文件

文件： ML.py 项目： fm94/Datasets-preprocessing

def NB(X_train_little,
       y_train_little,
       X_train_pca,
       X_test_pca,
       y_train,
       y_test,
       tune_only=False):
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.model_selection import cross_validate
    alpha_range = np.linspace(0, 500, 500)
    param_dist = dict(alpha=alpha_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2)
    rnds = EvolutionaryAlgorithmSearchCV(estimator=BernoulliNB(),
                                         params=param_dist,
                                         scoring="f1",
                                         cv=cv,
                                         verbose=1,
                                         population_size=50,
                                         gene_mutation_prob=0.10,
                                         gene_crossover_prob=0.5,
                                         tournament_size=3,
                                         generations_number=6,
                                         n_jobs=4)
    rnds.fit(X_train_little, y_train_little)
    # summarize the results of the random parameter search
    file = open("NB_best_parameters.txt", "w")
    file.write("{}\n".format(rnds.best_score_))
    file.write('alpha: {}\n'.format(rnds.best_estimator_.alpha))
    file.close()

    if not tune_only:
        # apply best parameters
        gnb = BernoulliNB(alpha=rnds.best_estimator_.alpha)
        gnb.fit(X_train_pca, y_train)
        sc_tr = cross_validate(gnb,
                               X_train_pca,
                               y_train,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        sc_ts = cross_validate(gnb,
                               X_test_pca,
                               y_test,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        pred = gnb.predict(X_test_pca)
        pred_train = gnb.predict(X_train_pca)

        output_report("NB", y_train, pred_train, y_test, pred, sc_tr, sc_ts)

示例#12

0

显示文件

    def geneticGridTest(self):
        print("performing Genetic grid search...")

        gridSearch = EvolutionaryAlgorithmSearchCV(estimator=self.classifier,
                                                   params=self.gridParams,
                                                   cv=self.kfold,
                                                   scoring='accuracy',
                                                   verbose=True,
                                                   iid='False',
                                                   n_jobs=4,
                                                   population_size=20,
                                                   gene_mutation_prob=0.30,
                                                   tournament_size=2,
                                                   generations_number=5)
        gridSearch.fit(self.X, self.y)

示例#13

0

显示文件

文件： modelling_pipeline.py 项目： SOUDIPKARMAKAR/HFT_prediction

 def GA_tune_lgbm(cls, x, y):
     tuner = EvolutionaryAlgorithmSearchCV(
         estimator=LGBMClassifier(),
         params=cls.lgbm_paramgrid,
         scoring="accuracy",
         cv=TimeSeriesSplit(n_splits=4),
         verbose=1,
         population_size=50,
         gene_mutation_prob=0.2,
         gene_crossover_prob=0.5,
         tournament_size=3,
         generations_number=20,
     )
     tuner.fit(x, y)
     return tuner.best_params_

示例#14

0

显示文件

文件： junkyard.py 项目： drapadubok/Playground

def cv_optimize(clf, parameters, X, y, n_jobs=1, n_folds=5, score_func=None, evo=None, population_size=5):
    if score_func:
        if evo:
            gs = EvolutionaryAlgorithmSearchCV(pipeline, grid=parameters, scoring=score_func, n_jobs=n_jobs, population_size=population_size)
        else:
            gs = GridSearchCV(clf, param_grid=parameters, cv=n_folds, n_jobs=n_jobs, scoring=score_func)
    else:
        if evo:
            gs = EvolutionaryAlgorithmSearchCV(pipeline, grid=parameters, scoring=None, verbose=True, n_jobs=4, population_size=population_size)
        else:
            gs = GridSearchCV(clf, param_grid=parameters, n_jobs=n_jobs, cv=n_folds)
    gs.fit(X, y)
    print "BEST", gs.best_params_, gs.best_score_, gs.grid_scores_
    best = gs.best_estimator_
    return best

示例#15

0

显示文件

def RF_DT(X_train_little, y_train_little, X_train_pca, X_test_pca, y_train, y_test, tune_only=False):
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import cross_validate
	min_samples_leaf_range = np.round(np.linspace(1, 10, 10)).astype(int)
	max_depth_range 	   = np.round(np.linspace(1, 30, 30)).astype(int)
	param_dist 			   = dict(min_samples_leaf=min_samples_leaf_range, max_depth=max_depth_range)
	num_features		   = len(X_train_little[0])
	cv 					   = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=SEED)
	rnds 				   = EvolutionaryAlgorithmSearchCV( estimator     	    = RandomForestClassifier(n_estimators=int((1+num_features/2))),
															params              = param_dist,
															scoring             = "f1",
															cv                  = cv,
															verbose				= 1,
															population_size	    = 50,
															gene_mutation_prob  = 0.10,
															gene_crossover_prob = 0.5,
															tournament_size		= 3,
															generations_number	= 6,
															n_jobs				= 4)
	rnds.fit(X_train_little, y_train_little)
	# summarize the results of the random parameter search
	file = open("RF_DT_best_parameters.txt","w")
	file.write("{}\n".format(rnds.best_score_))
	file.write('min_samples_leaf: {}\n'.format(rnds.best_estimator_.min_samples_leaf))
	file.write('max_depth: {}\n'.format(rnds.best_estimator_.max_depth))
	file.close()

	if not tune_only:
		# apply best parameters RF
		rfc = RandomForestClassifier(n_estimators    = int((1+num_features/2)), 
									min_samples_leaf = rnds.best_estimator_.min_samples_leaf, 
									max_depth        = rnds.best_estimator_.max_depth,
									random_state     = SEED)
		rfc.fit(X_train_pca,y_train)
		sc_tr      = cross_validate(rfc, X_train_pca, y_train, scoring=SCORING, cv=5, return_train_score=False)
		sc_ts      = cross_validate(rfc, X_test_pca, y_test, scoring=SCORING, cv=5, return_train_score=False)
		pred       = rfc.predict(X_test_pca)
		pred_train = rfc.predict(X_train_pca)

		output_report("RF", y_train, pred_train, y_test, pred, sc_tr, sc_ts)

	return pred, pred_train

示例#16

0

显示文件

文件： test1.py 项目： LaoKpa/djia-gasvr-bert

def main():
    paramgrid = {"kernel": ["rbf"],
             "C"     : np.logspace(-9, 9, num=25, base=10),
             "gamma" : np.logspace(-9, 9, num=25, base=10)}

    random.seed(1)

    from evolutionary_search import EvolutionaryAlgorithmSearchCV
    cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                   params=paramgrid,
                                   scoring="accuracy",
                                   cv=StratifiedKFold(n_splits=4),
                                   verbose=1,
                                   population_size=5,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=4)
    cv.fit(X, y)

示例#17

0

显示文件

def tune(model, X, y, cv):

    C = np.round(np.linspace(1, 10, 10)).astype(int)
    param_dist = dict(C=C, )
    #num_features		   = len(X[0])

    best_model = EvolutionaryAlgorithmSearchCV(estimator=model,
                                               params=param_dist,
                                               scoring="f1_weighted",
                                               cv=cv,
                                               verbose=1,
                                               population_size=50,
                                               gene_mutation_prob=0.10,
                                               gene_crossover_prob=0.5,
                                               tournament_size=3,
                                               generations_number=6,
                                               n_jobs=4)
    best_model.fit(X, y)

    return best_model

示例#18

0

显示文件

文件： classification_RF.py 项目： fm94/itmlwa

def tune(model, X, y, cv):

	min_samples_leaf_range = np.round(np.linspace(1, 10, 10)).astype(int)
	max_depth_range 	   = np.round(np.linspace(1, 30, 30)).astype(int)
	param_dist 			   = dict(min_samples_leaf=min_samples_leaf_range, max_depth=max_depth_range)

	best_model 			   = EvolutionaryAlgorithmSearchCV( estimator     	    = model,
															params              = param_dist,
															scoring             = "f1_weighted",
															cv                  = cv,
															verbose				= 1,
															population_size	    = 50,
															gene_mutation_prob  = 0.10,
															gene_crossover_prob = 0.5,
															tournament_size		= 3,
															generations_number	= 6,
															n_jobs				= 4)
	best_model.fit(X, y)

	return best_model

示例#19

0

显示文件

文件： hyperparameter_optimization.py 项目： u1234x1234/mlschool

    for index, row in data.iterrows():#iterate over csv file
        if index==limit:
            break
        img=cv2.imread(root_path + '/images/' + str(row['image_id']) + '.jpg')
        histogram=np.zeros((3, 256))
        for i in range(3):#calc hist for each channel
            histogram[i] = cv2.calcHist([img],[i],None,[256],[0,255]).ravel()
        X[index]=histogram.ravel()#to 1d array
    return X
    
X = extract_features(train_data, 1500)
y = train_data['image_label'].values[:X.shape[0]].ravel()
    
grid = {
    'knn__n_neighbors': [1, 10, 20, 30, 40, 60, 75, 100, 120, 160, 200],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev'],
    'knn__weights': ['uniform', 'distance'],
    'preprocess__norm': ['l1', 'l2', 'max']
}
pipeline = Pipeline(steps=[
    ('preprocess', preprocessing.Normalizer()),
    ('knn', neighbors.KNeighborsClassifier())
])

model = EvolutionaryAlgorithmSearchCV(pipeline, grid, scoring='roc_auc', verbose=True, n_jobs=4, population_size=10)
model.fit(X, y)

preds = model.predict_proba(extract_features(test_data))[:, 1]
test_data = test_data.drop('image_url', 1)
test_data['image_label'] = preds
test_data.to_csv(root_path + '/res.csv', index=False)

示例#20

0

显示文件

# use a full grid over all parameters

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings." %
      (time() - start, len(grid_search.cv_results_['params'])))
#report(grid_search.cv_results_)
print(grid_search.best_score_)

# run evolutionary_
evolution_search = EvolutionaryAlgorithmSearchCV(
    estimator=clf,
    params=param_grid,
    #scoring="accuracy",
    verbose=1,
    population_size=50,
    gene_mutation_prob=0.10,
    gene_crossover_prob=0.5,
    tournament_size=3,
    generations_number=4,
)

start = time()
evolution_search.fit(X, y)
print("evolution_searchCV took %.2f seconds for %d candidates"
      " parameter settings." % ((time() - start), n_iter_search))
print(evolution_search.cv_results_)

示例#21

0

显示文件

                                   gene_crossover_prob=0.25,
                                   tournament_size=2,
                                   generations_number=3,
                                   n_jobs=2)


#print(model.wv.most_similar('sensitive'))
cv.fit(tfidf_transformer.fit_transform(X), y['section'].tolist())
#cv.fit(MeanEmbeddingVectorizer(w2v).transform(X), y['section'].tolist())


'''

#--------------------------------- GA-SVC ---------------------------------

paramgrid = {"C": np.logspace(-9, 9, num=25, base=10)}

cv = EvolutionaryAlgorithmSearchCV(estimator=LinearSVC(),
                                   params=paramgrid,
                                   scoring="accuracy",
                                   cv=StratifiedKFold(n_splits=4),
                                   verbose=1,
                                   population_size=50,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=4)

cv.fit(tfidf_transformer.fit_transform(X), y['section'].tolist())

示例#22

0

显示文件

X = hyper_data.values[:, 15:]
y = hyper_data.values[:2]

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=100,
                                                    test_size=0.25)

paramgrid = {
    'kernel': ['rbf'],
    'C': np.logspace(-9, 9, num=25, base=10),
    'gamma': np.logspace(-9, 9, num=25, base=10)
}

random.seed(1)

cv = EvolutionaryAlgorithmSearchCV(estimator=RandomForestClassifier(),
                                   params=paramgrid,
                                   scoring='accuracy',
                                   cv=StratifiedKFold(n_splits=4),
                                   verbose=1,
                                   population_size=50,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=4)

cv.fit(X_train, y_train)

示例#23

0

显示文件

文件： Kernel_gene.py 项目： jp1989326/Machine_learning_for_reliability_analysis

             "degree":[3]            
             }

random.seed(1)
 
cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                    params=paramgrid,
                                    scoring="accuracy",
                                    cv=StratifiedKFold(trainlabel, n_folds=10),
                                    verbose=True,
                                    population_size=50,
                                    gene_mutation_prob=0.10,
                                    tournament_size=10,
                                    generations_number=5,
                                    n_jobs = 5)
cv.fit(train, trainlabel)
 
print cv.best_score_, cv.best_params_
##############################################


#ff = mysvc.training_manCV()



#ff.train_gene(train, trainlabel, 'poly', Cmin=-10, Cmax=10, numC=21, rmin=-10, rmax=10, numr=21, degree = 3)
#------------------------------------------------------------------------------
#------------------------------------------------------------- print df, df_this
#------------------------------------------------------------------------------
#df.to_csv('/home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/poly_cm_10CV_d4_n10_p10_21.csv', header = True)
#df_this.to_csv('/home/peng/git/Machine_learning_for_reliability_analysis/Test_1/Results/Try_this_score.csv', header = True)

示例#24

0

显示文件

print c_range
param_dist = dict(C=c_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.25, random_state=2018)
rnds = EvolutionaryAlgorithmSearchCV(
    estimator=LogisticRegression(random_state=0),
    params=param_dist,
    scoring="f1",
    cv=cv,
    verbose=1,
    population_size=50,
    gene_mutation_prob=0.10,
    gene_crossover_prob=0.5,
    tournament_size=3,
    generations_number=6,
    n_jobs=4)
rnds.fit(X, y)
best_C = rnds.best_estimator_.C

# apply best parameters
lr = RandomizedLogisticRegression(C=best_C,
                                  random_state=0,
                                  sample_fraction=0.75,
                                  n_resampling=200,
                                  selection_threshold=0.25)
lr.fit(X, y)

importances = lr.scores_
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

示例#25

0

显示文件

文件： ML.py 项目： fm94/Datasets-preprocessing

def NN(X_train_little,
       y_train_little,
       X_train_pca,
       X_test_pca,
       y_train,
       y_test,
       tune_only=False):
    from sklearn.neural_network import MLPClassifier
    from sklearn.model_selection import cross_validate
    num_features = len(X_train_little[0])
    # prepare parameter grid
    alpha_range = np.linspace(0.005, 0.015, 50)
    learning_rate_range = np.linspace(0.01, 0.07, 50)
    epsilon_range = np.logspace(-9, -6, 50)
    beta_1_range = np.linspace(0.3, 0.7, 50)
    beta_2_range = np.linspace(0.3, 0.7, 50)
    a = int((num_features + 1) / 2)
    b = int((num_features + 1) / 2 + 10)
    med_layer_range = np.arange(a, b)
    param_dist = dict(alpha=alpha_range,
                      hidden_layer_sizes=(num_features, med_layer_range, 1),
                      learning_rate_init=learning_rate_range,
                      epsilon=epsilon_range,
                      beta_1=beta_1_range,
                      beta_2=beta_2_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=SEED)
    rnds = EvolutionaryAlgorithmSearchCV(
        estimator=MLPClassifier(early_stopping=True),
        params=param_dist,
        scoring="f1",
        cv=cv,
        verbose=1,
        population_size=50,
        gene_mutation_prob=0.10,
        gene_crossover_prob=0.5,
        tournament_size=3,
        generations_number=6,
        n_jobs=4)
    rnds.fit(X_train_little, y_train_little)
    # summarize the results of the random parameter search
    file = open("MLP_best_parameters.txt", "w")
    file.write("{}\n".format(rnds.best_score_))
    file.write('alpha: {}\n'.format(rnds.best_estimator_.alpha))
    file.write('hidden_layer_sizes: {}\n'.format(
        rnds.best_estimator_.hidden_layer_sizes))
    file.write('learning_rate_init: {}\n'.format(
        rnds.best_estimator_.learning_rate_init))
    file.write('epsilon: {}\n'.format(rnds.best_estimator_.epsilon))
    file.write('beta_1: {}\n'.format(rnds.best_estimator_.beta_1))
    file.write('beta_2: {}\n'.format(rnds.best_estimator_.beta_2))
    file.close()

    if not tune_only:
        # apply best parameters
        mlp = MLPClassifier(
            hidden_layer_sizes=rnds.best_estimator_.hidden_layer_sizes,
            early_stopping=True,
            alpha=rnds.best_estimator_.alpha,
            learning_rate_init=rnds.best_estimator_.learning_rate_init,
            epsilon=rnds.best_estimator_.epsilon,
            beta_1=rnds.best_estimator_.beta_1,
            beta_2=rnds.best_estimator_.beta_2,
            random_state=SEED)
        mlp.fit(X_train_pca, y_train)
        sc_tr = cross_validate(mlp,
                               X_train_pca,
                               y_train,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        sc_ts = cross_validate(mlp,
                               X_test_pca,
                               y_test,
                               scoring=SCORING,
                               cv=5,
                               return_train_score=False)
        pred = mlp.predict(X_test_pca)
        pred_train = mlp.predict(X_train_pca)

        output_report("MLP", y_train, pred_train, y_test, pred, sc_tr, sc_ts)

示例#26

0

显示文件

文件： parameter_adjustment.py 项目： fm94/Datasets-preprocessing

    C_range = np.linspace(1, 10, 100)
    gamma_range = np.linspace(3000, 4000, 100)
    param_dist = dict(gamma=gamma_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=42)
    rnds = EvolutionaryAlgorithmSearchCV(estimator=SVC(max_iter=200),
                                         params=param_dist,
                                         scoring="f1",
                                         cv=cv,
                                         verbose=1,
                                         population_size=50,
                                         gene_mutation_prob=0.10,
                                         gene_crossover_prob=0.5,
                                         tournament_size=3,
                                         generations_number=6,
                                         n_jobs=4)
    rnds.fit(X_train_little, y_train_little)
    # summarize the results of the random parameter search
    print(rnds.best_score_)
    print('\nC: ')
    print(rnds.best_estimator_.C)
    print('\ngamma: ')
    print(rnds.best_estimator_.gamma)
    # apply best parameters
    svc = SVC(max_iter=400,
              C=rnds.best_estimator_.C,
              gamma=rnds.best_estimator_.gamma)
    svc.fit(X_train_pca, y_train)
    pred = svc.predict(X_test_pca)
    pred_train = svc.predict(X_train_pca)
if learner:
    # NNs

示例#27

0

显示文件

文件： svm_parameter_selection.py 项目： Mrenyq/phd-thesis-iii

sizes=['10','50','100','150','200','250']
methods=['MRMR','JMI','JMIM']
targets=np.array(joblib.load('DatasetA_ValidationClasses.joblib.pkl'))
	
for method in methods:
    for size in sizes:
	random.seed(1)
	X=X_original
	indices= joblib.load(method+' PICKLES/selected_indices_'+method+'.joblib.pkl')
	X=np.array(X)[:,indices]
	indices= joblib.load(method+' PICKLES/'+size+'-'+method+'.joblib.pkl')
	X=np.array(X)[:,indices]
	f=open('genetic/'+method+'-'+size+'.txt','w')
        print size
        print method
        print "svm.SVC"
        f.write("svm.SVC\n")
        cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                   params=paramgrid,
                                   scoring="accuracy",
                                   cv=StratifiedKFold(targets, n_folds=10),
                                   verbose=1,
                                   population_size=50,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=-1)
	cv.fit(X, targets)
        f.write('\n=======================\n')

示例#28

0

显示文件

class Wrap:
    """use GridSearchCV, RandomizedSearchCV and Evolutionary Search with this class.

    Methods
    -------
    grid_method(self)
        uses the GridSearchCV object and .fit()
        :returns grid_history = grid.fit()

    random_method(self)
        uses the RandomizedSearchCV object and .fit()
        :returns rand_history = rand.fit()

    Attributes
    ----------
    data_obj : (DataManager)
        passed in object from DataManager class
    network_obj: (NeuralArch)
        architecture of the neural network, so we can use it in KerasRegressor
    keras_regressor: (KerasRegressor)
        a KerasRegressor object with build_fn=network_obj.build_nn"""
    def __init__(self, data_obj, patience=10):
        # todo explain why do we have these different object in the class
        self.data_obj = data_obj
        network_obj = Net(self.data_obj)
        self.keras_regressor = KerasRegressor(build_fn=network_obj.build_nn)
        self.callback = MyCallbacks(patience=patience)

    def grid_method(self):
        """grid_method(self)
            uses the GridSearchCV object and .fit()
            :returns grid_history = grid.fit()"""

        params = dict(epochs=[200], batch_size=[4, 8])
        cv = [(slice(None), slice(None))
              ]  # why have i written this over and over??

        es = self.callback.es
        mc = self.callback.mc
        tb = self.callback.tb
        csv_log = self.callback.csv_log
        my_callbacks = [es, mc, csv_log]

        self.grid = GridSearchCV(estimator=self.keras_regressor,
                                 param_grid=params,
                                 cv=cv)
        grid_history = self.grid.fit(
            X=self.data_obj.x_train,
            y=self.data_obj.y_train,
            validation_data=(self.data_obj.x_validation,
                             self.data_obj.y_validation),
            verbose=0,
            callbacks=my_callbacks)
        return grid_history

    def random_method(self):
        """grid_method(self)
            uses the GridSearchCV object and .fit()
            returns grid_history = grid.fit()"""

        params = dict(epochs=[100],
                      batch_size=[2, 4, 8, 12, 16, 20, 24, 32, 36])
        cv = [(slice(None), slice(None))]

        es = self.callback.es
        mc = self.callback.mc
        tb = self.callback.tb
        my_callbacks = [es, mc]

        self.rand = RandomizedSearchCV(estimator=self.keras_regressor,
                                       param_distributions=params,
                                       n_iter=8)
        rand_history = self.rand.fit(
            X=self.data_obj.x_train,
            y=self.data_obj.y_train,
            validation_data=(self.data_obj.x_validation,
                             self.data_obj.y_validation),
            verbose=0,
            callbacks=my_callbacks)

        return rand_history

    def evolution_method(self):
        # this does not work, but we need to continue
        params = dict(epochs=[200], batch_size=[4, 8])
        # cv = [(slice(None), slice(None))]

        es = self.callback.es
        mc = self.callback.mc
        tb = self.callback.tb
        my_callbacks = [es, mc]

        fit_params = {
            "epochs":
            300,
            "validation_data":
            (self.data_obj.x_validation, self.data_obj.y_validation),
            "callbacks":
            my_callbacks
        }

        self.evo = EvolutionaryAlgorithmSearchCV(
            estimator=self.keras_regressor,
            params=params,
            verbose=0,
            population_size=10,
            fit_params=fit_params)

        evo_hist = self.evo.fit(X=self.data_obj.x_train,
                                y=self.data_obj.y_train)
        return evo_hist

示例#29

0

显示文件

clf2 = EvolutionaryAlgorithmSearchCV(
                                   estimator=pipe_xg,  # How will objective be evaluated
                                   params=parameters,  # Parameters range
                                   scoring="accuracy", # Criteria
                                   cv=2,               # No of folds
                                   verbose=True,
                                   population_size=50,
                                   gene_mutation_prob=0.10,
                                   tournament_size=3,
                                   generations_number=10
                                   )


start = time.time()
clf2.fit(X_train, y_train)   # 1hr 2 minute
end = time.time()
(end-start)/60


clf2.best_params_

# Our cvresults table (note, includes all individuals
#   with their mean, max, min, and std test score).
out = pd.DataFrame(
                  clf2.cv_results_
                   )

out = out.sort_values(
                     "mean_test_score",
                      ascending=False

示例#30

0

显示文件

文件： ga-artigo.py 项目： Sette/patentes-wipo

def main():

    rand_st = 42

    classes = ["A", "B", "C", "D", "E", "F", "G", "H"]

    from itertools import combinations

    subsets = []

    for subset in combinations(classes, 2):
        subsets.append(subset)
        try:
            os.makedirs(PREPROCESS_PATH + str(subset[0] + subset[1]))
        except:
            pass

    for sub in subsets:

        PATH = "/home/bruno/base-wipo/preprocess-artigo/" + str(sub[0]) + str(
            sub[1]) + "/"

        print(" --------------------------" + str(sub[0]) + str(sub[1]) +
              "--------------------------------- ")
        treinamento = "treinamento.csv"

        y = pd.read_csv(os.path.join(os.path.dirname(__file__),
                                     PATH + treinamento),
                        header=0,
                        delimiter=";",
                        usecols=["section"],
                        quoting=3)
        '''
        X = pd.read_csv(os.path.join(os.path.dirname(__file__),PATH+treinamento),
                            header=0,delimiter=";",usecols=["data"], quoting=3)

        X = X["data"].tolist()
        '''

        X = TideneIterCSVGA(PATH + treinamento)

        tfidf_transformer = TfidfVectorizer()

        n = len(y)

        random.seed(1)
        from evolutionary_search import EvolutionaryAlgorithmSearchCV
        '''
        #--------------------------------- GA-RF ---------------------------------
        from evolutionary_search import EvolutionaryAlgorithmSearchCV

        clf_RF_gs = RandomForestClassifier(random_state=rand_st, n_jobs=-1)
        clf_RF_pg = [{
            'max_depth': np.logspace(0.3,4,num = 10 ,base=10,dtype='int'), #[1, 5, 13, 34, 87, 226, 584, 1505, 3880, 10000]
            'n_estimators' : np.logspace(0.1,3,num = 10 ,base=10,dtype='int'), #[1, 2, 5, 11, 24, 51, 107, 226, 476, 1000]
            'min_samples_split' : np.logspace(0.4, 1, num=5, base=10, dtype='int'), #[2, 3, 5, 7, 10]
            'min_samples_leaf' : np.logspace(0.1,1,num = 4 ,base=9,dtype='int'), #[1, 2, 4, 9]
            'max_features' : ['auto', None]
                      }]


        model_name = "100features_40minwords_10context"


        model = gensim.models.Word2Vec.load(model_name)


        w2v = {w: vec for w, vec in zip(model.wv.index2word, model.wv.syn0)}



        cv = EvolutionaryAlgorithmSearchCV(estimator=clf_RF_gs,
                                           params=clf_RF_pg,
                                           scoring="accuracy",
                                           cv=StratifiedKFold(n_splits=4),
                                           verbose=1,
                                           population_size=10,
                                           gene_mutation_prob=0.05,
                                           gene_crossover_prob=0.25,
                                           tournament_size=2,
                                           generations_number=3,
                                           n_jobs=2)


        #print(model.wv.most_similar('sensitive'))
        cv.fit(tfidf_transformer.fit_transform(X), y['section'].tolist())
        #cv.fit(MeanEmbeddingVectorizer(w2v).transform(X), y['section'].tolist())


        '''

        #--------------------------------- GA-SVC ---------------------------------

        paramgrid = {"C": np.logspace(-9, 9, num=25, base=10)}

        cv = EvolutionaryAlgorithmSearchCV(estimator=LinearSVC(),
                                           params=paramgrid,
                                           scoring="accuracy",
                                           cv=StratifiedKFold(n_splits=4),
                                           verbose=1,
                                           population_size=50,
                                           gene_mutation_prob=0.10,
                                           gene_crossover_prob=0.5,
                                           tournament_size=3,
                                           generations_number=5,
                                           n_jobs=4)

        out = cv.fit(tfidf_transformer.fit_transform(X), y['section'].tolist())

示例#31

0

显示文件

文件： tune.py 项目： CN-TU/network-attack-outlierness

cv = EvolutionaryAlgorithmSearchCV (
	estimator = odIfEstimator(),
	params = params,
	gene_type = [2, 2, 2],
	verbose = 1,
	population_size = 80,
	gene_mutation_prob = .1,
	gene_crossover_prob = .5,
	tournament_size = 3,
	generations_number = 8,
	# this is already validation set, no need for cross validation
	cv = ShuffleSplit(test_size=0.99, n_splits=1),
	n_jobs = 40)
	
cv.fit(data, labels)


params = {
	'k': list(range(100,1000)),
	'x': list(range(3,30)),
	'qv': [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5] }
	
print ('\nSDO\n----------------')
class odSDOEstimator:
	def __init__(self, **kwargs):
		self.set_params (**kwargs)
	def get_params(self, deep = True):
		return self.params
	def set_params(self, **kwargs):
		self.params = kwargs

示例#32

0

显示文件

    train_SJ_train, train_SJ_test = train_test_split(train_SJ,
                                                     test_size=0.2,
                                                     random_state=24)
    total_CASESJ_train, total_CASESJ_test = train_test_split(total_CASESJ,
                                                             test_size=0.2,
                                                             random_state=24)

    train_IQ_train, train_IQ_test = train_test_split(train_IQ,
                                                     test_size=0.2,
                                                     random_state=24)
    total_CASEIQ_train, total_CASEIQ_test = train_test_split(total_CASEIQ,
                                                             test_size=0.2,
                                                             random_state=24)

    rtreeForSJ.fit(train_SJ, total_CASESJ)
    rtreeForIQ.fit(train_IQ, total_CASEIQ)

    predictionsSJ = rtreeForSJ.predict(train_SJ_test)
    predictionsIQ = rtreeForIQ.predict(train_IQ_test)
    sjscore = mean_absolute_error(total_CASESJ_test, predictionsSJ)
    iqscore = mean_absolute_error(total_CASEIQ_test, predictionsIQ)
    print(sjscore)
    print(iqscore)
    # print(len(predictionsSJ)+len(predictionsIQ))
    # print(len(predictionsIQ))
    # finalArr = []
    # for k in predictionsSJ:
    #     finalArr.append(k)
    # for t in predictionsIQ:
    #     finalArr.append(t)

示例#33

0

显示文件

                        alpha=1e-5,
                        hidden_layer_sizes=(2),
                        random_state=1)

from evolutionary_search import EvolutionaryAlgorithmSearchCV
cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                   params=paramgrid,
                                   scoring="accuracy",
                                   cv=StratifiedKFold(n_splits=4),
                                   verbose=1,
                                   population_size=50,
                                   gene_mutation_prob=0.10,
                                   gene_crossover_prob=0.5,
                                   tournament_size=3,
                                   generations_number=5,
                                   n_jobs=1)
cv.fit(X, y)

from evolutionary_search import maximize

def func(x, y, m=1., z=False):
    return m * (np.exp(-(x**2 + y**2)) + float(z))

param_grid = {'x': [-1., 0., 1.], 'y': [-1., 0., 1.], 'z': [True, False]}
args = {'m': 1.}
best_params, best_score, score_results, hist, logbook = maximize(func, param_grid, args, verbose=False)

print(best_params)
print(best_score)
print(score_results)
#TODO: test this program with Neural Network Model