def select_classify(): return [ naive(), tree(criterion="entropy"), knn(n_neighbors=8, weights='uniform', metric="manhattan"), mlp(hidden_layer_sizes=(128, ), alpha=0.01, activation='tanh', solver='sgd', max_iter=300, learning_rate='constant', learning_rate_init=0.001) ]
def predictNaiveBayes(): if not os.path.isfile("sentiment/naive_model.plk"): start = time.time() #MultinomialNB Pipeline clf = Pipeline([ ('vect', vectorizer), ('clf', naive(alpha=1.0, fit_prior=True)) ], verbose=True) parameters={ 'vect__ngram_range': [(1, 1), (1, 2), (1,3), (1,4), (1,5)], 'clf__fit_prior': (True, False), 'clf__alpha': (1.0, 0.1, 0.5, 2.0, .25, 0.75, 0.002), } gs_clf = GridSearchCV(clf, parameters, cv=5, iid=False, n_jobs=-1) gs_clf.fit(docs_train, y_train) print(gs_clf.best_params_) y_predicted = gs_clf.predict(docs_test) print("End.......... total=%.2f s" % (start - time.time())) # Print the classification report print(metrics.classification_report(y_test, y_predicted, target_names=dataset.target_names)) cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) plt.matshow(cm, cmap=plt.cm.jet) #plt.show() joblib.dump(gs_clf, "naive_model.plk") return gs_clf else: return joblib.load("sentiment/naive_model.plk")
rkf = RepeatedKFold(n_splits=10, n_repeats=10, random_state=1967) #Loop para percorrer os folds for train, test in rkf.split(data, target): X_train = data[train] X_test = data[test] y_train = target[train] y_test = target[test] for i in range(10): if (i < 3): clf.append(knn(n_neighbors=2)) elif (i >= 3 and i < 8): clf.append(naive()) else: clf.append( mlp(solver='sgd', momentum=0.8, hidden_layer_sizes=(150), learning_rate='constant', learning_rate_init=0.1, max_iter=500, random_state=870)) meta = naive() sclf = StackingClassifier(classifiers=[ clf[0], clf[0], clf[1], clf[2], clf[3], clf[4], clf[5], clf[6], clf[7], clf[8], clf[9]
data = sismica.iloc[:, :-1].values target = sismica.iloc[:, -1].values #Gerando a divisão kfold = StratifiedKFold(n_splits=10) #Loop para percorrer os folds for train, test in kfold.split(data, target): X_train = data[train] X_test = data[test] y_train = target[train] y_test = target[test] model = naive() model.fit(X_train, y_train) y_pred = model.predict(X_test) print(y_pred) cm = confusion_matrix(y_test, y_pred) acc = str(accuracy_score(y_test, y_pred)) print(cm) print(acc)
# Fill Nan Categorical values with Unknown age df['Embarked'] = df['Embarked'].fillna("Unknown") df['Cabin'] = df['Cabin'].fillna("Unknown") x = df.loc[:, sel_cols].values y = df.iloc[:, 1].values # change Age from ['Male', 'Female'] to [0, 1] through sklearn label encoder x[:, 2] = label_encoder_x.fit_transform(x[:, 2]) x[:, -1] = label_encoder_x.fit_transform(x[:, -1]) x[:, -2] = label_encoder_x.fit_transform(x[:, -2]) x[:, 6] = label_encoder_x.fit_transform(x[:, 6]) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) gbm = naive().fit(x_train, y_train) predictions = gbm.predict(x_test) # Print accuracy accuracy = accuracy_score(y_test, predictions) print("Accuracy: %.2f%%" % (accuracy * 100.0)) confusion_matrix(y_test, predictions) df2 = pd.read_csv("/Users/apple/Desktop/Vision/Python_Workspace/dataScience/MachineHack/Titanic/data/titanic/train.csv") # Fill Nan values with average age df2['Age'] = df2['Age'].fillna((df2['Age'].mean())) # Fill Nan Categorical values with Unknown age df2['Embarked'] = df2['Embarked'].fillna("Unknown") df2['Cabin'] = df2['Cabin'].fillna("Unknown")
X_train = data[train] X_test = data[test] y_train = target[train] y_test = target[test] for i in range(20): clf.append( MLPClassifier(solver='sgd', momentum=0.8, hidden_layer_sizes=(150), learning_rate='constant', learning_rate_init=0.1, max_iter=500, random_state=870)) meta = naive() sclf = StackingClassifier(classifiers=[ clf[0], clf[0], clf[1], clf[2], clf[3], clf[4], clf[5], clf[6], clf[7], clf[8], clf[9], clf[10], clf[11], clf[12], clf[13], clf[14], clf[15], clf[16], clf[17], clf[18], clf[19] ], meta_classifier=meta) sclf.fit(X_train, y_train) y_pred = sclf.predict(X_test) print(y_pred) print(y_test)