示例#1
0
def auto_test_for_other_model(model, param_grid, name_scoring):
    data_df = get_sms_dataset(noStopwords=True, overwrite=True)
    X, y = data_df['message'], data_df['target']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        random_state=0,
                                                        test_size=0.2)
    cv = CountVectorizer()
    X_train = cv.fit_transform(X_train)
    grid = gsc(model, param_grid, scoring=name_scoring)
    grid.fit(X_train, y_train)
    print(grid.best_params_, grid.best_score_)
    return grid.best_params_, grid.best_score_
示例#2
0
def auto_test(x_data, y_data, begin, end, test_num, name_scoring):
    param_grid = [
        {
            'alpha': np.linspace(begin, end, test_num),
            'fit_prior': [True, False]
        },
    ]
    nb = MultinomialNB()
    # GridSearchCV
    grid = gsc(nb, param_grid, scoring=name_scoring)
    grid.fit(x_data, y_data)
    print(grid.best_params_, grid.best_score_)
    return float(grid.best_params_['alpha'])
示例#3
0
parser.add_argument('--output',
                    type=str,
                    required=True,
                    help="directory to save the output files")
parser.add_argument('--lang', type=str, required=True, help="fr or zh")
args = parser.parse_args()

X_train = bp.unpack_ndarray_from_file(args.input + '/train_X.blp')
Y_train = bp.unpack_ndarray_from_file(args.input + '/train_Y.blp')

X_test = bp.unpack_ndarray_from_file(args.input + '/test_X.blp')
Y_test = bp.unpack_ndarray_from_file(args.input + '/test_Y.blp')

# train the model on training set
model = LinearSVC(class_weight="balanced", random_state=42)
# defining parameter range
params = {"C": [0.0001, 0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1]}
tense_clf = gsc(model, params, n_jobs=5, refit=True)

#fitting the model for grid search
tense_clf.fit(X_train, Y_train)

# print best parameter after tuning
print(tense_clf.best_params_)
# print how our model looks after hyper-parameter tuning
print(tense_clf.best_estimator_)

y_pred = tense_clf.predict(X_test)
print(classification_report(Y_test, y_pred))
#plot_confusion_matrix(model,X_test,Y_test,display_labels=['Past','Fut','Pres'],cmap=plt.cm.Blues,normalize='true')
示例#4
0
# print prediction results
y_predTrain1 = model.predict(X_train)
print(accuracy_score(y_train, y_predTrain1))
y_pred1 = model.predict(X_test)
print(accuracy_score(y_test,y_pred1))
print(classification_report(y_test,y_pred1))
e1 = time.perf_counter()
print ("################## training without tuning: ",e1-s1)

s2=time.perf_counter()
# defining parameter range
params = {"C": [0.0001,0.001,0.02,0.03,0.04,0.05,0.06,0.1,1,10,100,1000,10000,100000]}
#params = {"C": expon(scale=100)}

#model = LinearSVC(random_state=42)
tense_clf = gsc(model,params,refit = True, cv=3,scoring='f1_macro')
#fitting the model for grid search
tense_clf.fit(X_train,y_train)

# print best parameter after tuning
print(tense_clf.best_params_)
# print how our model looks after hyper-parameter tuning
print(tense_clf.best_estimator_)

y_predTrain = tense_clf.predict(X_train)
print(accuracy_score(y_train, y_predTrain))
y_predTest = tense_clf.predict(X_test)
print(accuracy_score(y_test, y_predTest))
print(classification_report(y_test,y_predTest))
e2=time.perf_counter()
print ("################## training with tuning: ",e1-s1)
y_test = y_test.tolist()
#print(y_test[:100])
s1 = time.perf_counter()

# modèle pres non-présent
# train the model on train set
model1 = LinearSVC(C=0.01, class_weight="balanced")
model2 = LinearSVC(C=0.01, class_weight="balanced", random_state=42)

params = {
    "C": [
        0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.014, 0.015, 0.016, 0.017,
        0.02
    ]
}
pres_notPres_clf = gsc(model1, params, n_jobs=5, refit=True)
# fitting the model for grid search
pres_notPres_clf.fit(X1_train, y1_train)
# print best parameter after tuning
print(pres_notPres_clf.best_params_)
# print how our model looks after hyper-parameter tuning
print(pres_notPres_clf.best_estimator_)

y1_predTrain = pres_notPres_clf.predict(X1_train)
print(classification_report(y1_train, y1_predTrain, digits=3))

y1predTest = pres_notPres_clf.predict(X_test)
#print(y1predTest.shape)
#print(y1predTest[:30])

no_pres_idxs = np.where(y1predTest == 3)[0]
print("Mean:",lrmses.mean())
print("Standard Deviation:", lrmses.std())
fr=rfr()
fr.fit(housing_final, housing_labels)
housing_predictions=fr.predict(housing_final)
fmse=rmse(housing_labels, housing_predictions)
frmse=np.sqrt(fmse)
print(frmse)
fscores=cvs(fr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10)
frmses=np.sqrt(-fscores)
print("RandomForestRegressor")
print("Scores:",frmses)
print("Mean:",frmses.mean())
print("Standard Deviation:", frmses.std())
pgrid=[{'n_estimators':[3,10,30],'max_features':[2,4,6,8]},{'bootstrap':[False], 'n_estimators':[3,10], 'max_features':[2,3,4]}]
gs=gsc(fr, pgrid, cv=5, scoring="neg_mean_squared_error", return_train_score=True)
gs.fit(housing_final, housing_labels)
print(gs.best_params_)
print(gs.best_estimator_)
cres=gs.cv_results_
for ms, ps in zip(cres["mean_test_score"],cres["params"]):
    print(np.sqrt(-ms), ps)

fi=gs.best_estimator_.feature_importances_
print(fi)
ea=["roomsperhousehold", "popperhouse", "bedperroom "]
ce=fp.named_transformers_["cat"]
c1hotatt=list(ce.categories_[0])
attr=nattr+ea+c1hotatt
print(sorted(zip(fi, attr), reverse=True))
final_model=gs.best_estimator_