Пример #1
0
def select_classify():
    return [
        naive(),
        tree(criterion="entropy"),
        knn(n_neighbors=8, weights='uniform', metric="manhattan"),
        mlp(hidden_layer_sizes=(128, ),
            alpha=0.01,
            activation='tanh',
            solver='sgd',
            max_iter=300,
            learning_rate='constant',
            learning_rate_init=0.001)
    ]
Пример #2
0
def predictNaiveBayes():

    if not os.path.isfile("sentiment/naive_model.plk"):

        start = time.time()

        #MultinomialNB Pipeline
        clf = Pipeline([
            ('vect', vectorizer),
            ('clf', naive(alpha=1.0, fit_prior=True))
        ], verbose=True)

        parameters={
            'vect__ngram_range': [(1, 1), (1, 2), (1,3), (1,4), (1,5)],
            'clf__fit_prior': (True, False),
            'clf__alpha': (1.0, 0.1, 0.5, 2.0, .25, 0.75, 0.002),
        }

        gs_clf = GridSearchCV(clf, parameters, cv=5, iid=False, n_jobs=-1)
        gs_clf.fit(docs_train, y_train)

        print(gs_clf.best_params_)

        y_predicted = gs_clf.predict(docs_test)

        print("End.......... total=%.2f s" % (start - time.time()))

        # Print the classification report
        print(metrics.classification_report(y_test, y_predicted,
                                            target_names=dataset.target_names))

        cm = metrics.confusion_matrix(y_test, y_predicted)
        print(cm)

        plt.matshow(cm, cmap=plt.cm.jet)
        #plt.show()


        joblib.dump(gs_clf, "naive_model.plk")

        return gs_clf
    else:
        return joblib.load("sentiment/naive_model.plk")
Пример #3
0
rkf = RepeatedKFold(n_splits=10, n_repeats=10, random_state=1967)

#Loop para percorrer os folds

for train, test in rkf.split(data, target):
    X_train = data[train]
    X_test = data[test]
    y_train = target[train]
    y_test = target[test]

for i in range(10):
    if (i < 3):
        clf.append(knn(n_neighbors=2))
    elif (i >= 3 and i < 8):
        clf.append(naive())
    else:
        clf.append(
            mlp(solver='sgd',
                momentum=0.8,
                hidden_layer_sizes=(150),
                learning_rate='constant',
                learning_rate_init=0.1,
                max_iter=500,
                random_state=870))

meta = naive()

sclf = StackingClassifier(classifiers=[
    clf[0], clf[0], clf[1], clf[2], clf[3], clf[4], clf[5], clf[6], clf[7],
    clf[8], clf[9]
Пример #4
0
data = sismica.iloc[:, :-1].values
target = sismica.iloc[:, -1].values

#Gerando a divisão

kfold = StratifiedKFold(n_splits=10)

#Loop para percorrer os folds

for train, test in kfold.split(data, target):
    X_train = data[train]
    X_test = data[test]
    y_train = target[train]
    y_test = target[test]

model = naive()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(y_pred)

cm = confusion_matrix(y_test, y_pred)

acc = str(accuracy_score(y_test, y_pred))

print(cm)

print(acc)
Пример #5
0
# Fill Nan Categorical values with Unknown age
df['Embarked'] = df['Embarked'].fillna("Unknown")
df['Cabin'] = df['Cabin'].fillna("Unknown")

x = df.loc[:, sel_cols].values
y = df.iloc[:, 1].values

# change Age from ['Male', 'Female'] to [0, 1] through sklearn label encoder
x[:, 2] = label_encoder_x.fit_transform(x[:, 2])
x[:, -1] = label_encoder_x.fit_transform(x[:, -1])
x[:, -2] = label_encoder_x.fit_transform(x[:, -2])
x[:, 6] = label_encoder_x.fit_transform(x[:, 6])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

gbm = naive().fit(x_train, y_train)
predictions = gbm.predict(x_test)

# Print accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
confusion_matrix(y_test, predictions)

df2 = pd.read_csv("/Users/apple/Desktop/Vision/Python_Workspace/dataScience/MachineHack/Titanic/data/titanic/train.csv")

# Fill Nan values with average age
df2['Age'] = df2['Age'].fillna((df2['Age'].mean()))

# Fill Nan Categorical values with Unknown age
df2['Embarked'] = df2['Embarked'].fillna("Unknown")
df2['Cabin'] = df2['Cabin'].fillna("Unknown")
Пример #6
0
    X_train = data[train]
    X_test = data[test]
    y_train = target[train]
    y_test = target[test]

for i in range(20):
    clf.append(
        MLPClassifier(solver='sgd',
                      momentum=0.8,
                      hidden_layer_sizes=(150),
                      learning_rate='constant',
                      learning_rate_init=0.1,
                      max_iter=500,
                      random_state=870))

meta = naive()

sclf = StackingClassifier(classifiers=[
    clf[0], clf[0], clf[1], clf[2], clf[3], clf[4], clf[5], clf[6], clf[7],
    clf[8], clf[9], clf[10], clf[11], clf[12], clf[13], clf[14], clf[15],
    clf[16], clf[17], clf[18], clf[19]
],
                          meta_classifier=meta)

sclf.fit(X_train, y_train)

y_pred = sclf.predict(X_test)

print(y_pred)
print(y_test)