示例#1
0
def predictResult(x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = data2[cols2]
    fts2 = Normalizer().fit_transform(fts2)

    randomForest.fit(x_train, y_train)

    dump(randomForest, 'randomForest.model')

    randomForestLoaded = load('randomForest.model')
    prFit = randomForestLoaded.predict(x_test)
    print("predicao:", prFit)
    print("Matriz de Confusao LR:")
    print(cfm(y_test, prFit))
    print("F1 score LR:")
    print(f1s(y_test, prFit))
    print("Precision score LR:")
    print(ps(y_test, prFit))
    print("Recall score LR:")
    print(rs(y_test, prFit))
    print("Classification Report")
    print(cr(y_test, prFit))

    pr1 = randomForestLoaded.predict(fts2)
    print("predico unica", pr1)
    return pr1
 def logistic_model_using_pca_plus_prediction(self):
     self.clf = LR(random_state=29, tol=0.000000000001)
     self.data4 = self.df1.drop(["y"], axis=1)
     Y = self.df1["y"]
     scaler = StandardScaler()
     self.data4 = scaler.fit_transform(self.data4)
     pca = PCA(n_components=100
               )  # 100 by optimal number of principal components needed
     x = pca.fit_transform(self.data4)
     x_train1, x_test1, y_train1, y_test1 = tts(x,
                                                Y,
                                                test_size=0.2,
                                                stratify=Y,
                                                random_state=29)
     self.model = self.clf.fit(x_train1, y_train1)
     probs = self.model.predict_proba(x_test1)
     prob1 = self.model.predict_proba(x_train1)
     #y_pred=self.model.predict(x_test1)
     preds = probs[:, 1]
     self.fpr, self.tpr, self.threshold = roc_curve(y_train1, prob1[:, 1])
     optimal_idx = np.argmax(self.tpr - self.fpr)
     self.optimal = self.threshold[optimal_idx]
     self.out1 = pd.DataFrame({"y_true": y_test1, "y_pred": preds})
     self.out1["predicted_class"] = self.out1["y_pred"].apply(
         self.class_value)
     print(rs(self.out1["y_true"], self.out1["predicted_class"]))
     print(ase(self.out1["y_true"], self.out1["predicted_class"]))
     print(auc_score(y_test1, preds))
示例#3
0
def predictResult(betterN, x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = np.array(data2[cols2])

    #quando nao mandar um vaor de betterN, significa que demos o load do modelo
    if betterN > 0:
        knn.n_neighbors = betterN
        knn.fit(x_train, y_train)

        # dump(knn, 'models/knn_teste.joblib')

        prFit = knn.predict(x_test)
        print("predicao: a", prFit)
        print("Matriz de Confusao NB:")
        print(cfm(y_test, prFit))
        print("F1 score NB:")
        print(f1s(y_test, prFit))
        print("Precision score NB:")
        print(ps(y_test, prFit))
        print("Recall score NB:")
        print(rs(y_test, prFit))
        print("Classification Report")
        print(cr(y_test, prFit))

    pr1 = knn.predict(fts2)
    print("predico unica", int(pr1[0]))
    print("predicao unica score")
    print(pr1)
    return pr1
示例#4
0
def predictResult(x_train, y_train, y_test, x_test):
    data2 = pd.read_csv("/tmp/predict_result.csv", header=0)
    # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array
    cols2 = data2.columns[(data2.columns != columnResultName)]
    fts2 = data2[cols2]
    fts2 = Normalizer().fit_transform(fts2)

    scores = cross_val_score(logisticR, x_train, y_train, n_jobs=30)
    print("scores cross val")
    print(scores)

    logisticR.fit(x_train, y_train)
    dump(logisticR, 'logistic.model')

    logisticLoaded = load('logistic.model')

    prFit = logisticLoaded.predict(x_test)
    print("predicao:", prFit)
    print("Matriz de Confusao LR:")
    print(cfm(y_test, prFit))
    print("F1 score LR:")
    print(f1s(y_test, prFit))
    print("Precision score LR:")
    print(ps(y_test, prFit))
    print("Recall score LR:")
    print(rs(y_test, prFit))
    print("Classification Report")
    print(cr(y_test, prFit))
    print("Accuracy score")
    print(asc(y_test, prFit))

    class_names = [0, 1]  # name  of classes
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)
    # create heatmap
    sns.heatmap(pd.DataFrame(cfm(y_test, prFit)),
                annot=True,
                cmap="YlGnBu",
                fmt='g')
    ax.xaxis.set_label_position("top")
    plt.tight_layout()
    plt.title('Confusion matrix', y=1.1)
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.show()

    y_pred_proba = logisticLoaded.predict_proba(x_test)[::, 1]
    fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
    auc = metrics.roc_auc_score(y_test, y_pred_proba)
    plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
    plt.legend(loc=4)
    plt.show()

    pr1 = logisticLoaded.predict(fts2)
    print("predico unica", pr1)
    return pr1
 def keras_nn_model(self):
     from numpy.random import seed
     seed(1)
     from tensorflow import set_random_seed
     set_random_seed(2)
     self.model1 = Sequential()
     self.model1.add(Dense(256, input_dim=428, activation='relu'))
     self.model1.add(Dense(128, activation='relu'))
     self.model1.add(Dense(64, activation='relu'))
     self.model1.add(Dense(4, activation='relu'))
     self.model1.add(Dense(1, activation='sigmoid'))
     scaler = StandardScaler()
     scaled = scaler.fit(self.x_train)
     self.X_train = scaled.fit_transform(self.x_train)
     self.X_test = scaled.fit_transform(self.x_test)
     adam = keras.optimizers.Adam(lr=0.001,
                                  beta_1=0.9,
                                  beta_2=0.999,
                                  epsilon=None,
                                  decay=0.0,
                                  amsgrad=False)
     self.model1.compile(optimizer=adam,
                         loss=keras.losses.binary_crossentropy)
     self.model1.fit(self.X_train,
                     self.y_train,
                     epochs=1,
                     batch_size=256,
                     class_weight={
                         0: 1,
                         1: 8
                     })
     pred = self.model1.predict(self.X_test)
     pred1 = self.model1.predict(self.X_train)
     print(auc_score(self.y_test, pred))
     self.fpr, self.tpr, self.threshold = roc_curve(self.y_train,
                                                    pred1.ravel())
     optimal_idx = np.argmax(self.tpr - self.fpr)
     self.optimal = self.threshold[optimal_idx]
     # for recall calculation
     self.out = pd.DataFrame({
         "y_true": self.y_test,
         "y_pred": pred.ravel()
     })
     self.out["predicted_class"] = self.out["y_pred"].apply(
         self.class_value)
     print(rs(self.out["y_true"], self.out["predicted_class"]))
     print(ase(self.out["y_true"], self.out["predicted_class"]))
 def prediction_on_test(self):
     self.probs = self.model.predict_proba(self.x_test)
     #self.y_pred=self.model.predict(self.x_test)
     self.preds = self.probs[:, 1]
     self.prob1 = self.model.predict_proba(self.x_train)
     #print(ase(self.y_test,self.y_pred))
     self.fpr, self.tpr, self.threshold = roc_curve(self.y_train,
                                                    self.prob1[:, 1])
     optimal_idx = np.argmax(self.tpr - self.fpr)
     self.optimal = self.threshold[optimal_idx]
     self.out1 = pd.DataFrame({"y_true": self.y_test, "y_pred": self.preds})
     self.out1["predicted_class"] = self.out1["y_pred"].apply(
         self.class_value)
     print(rs(self.out1["y_true"], self.out1["predicted_class"]))
     print(ase(self.out1["y_true"], self.out1["predicted_class"]))
     #print(rs(self.y_test,self.y_pred))
     print(auc_score(self.y_test, self.preds))
示例#7
0
# Model-6 Lasso Regression
print("Model-6 Lasso Regression")
tuned_params = {'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}
model = GridSearchCV(Lasso(), tuned_params, scoring = 'neg_mean_absolute_error', cv=20, n_jobs=-1)
model.fit(X_train, y_train)
print("model.best_estimator",model.best_estimator_)
# Predict Train results
y_train_pred = model.predict(X_train)
# Predict Test results
y_pred = model.predict(X_test)

print("Train Results for Lasso Regression:")
print("*******************************")
print("Root mean squared error: ", sqrt(mse(y_train.values, y_train_pred)))
print("R-squared: ", rs(y_train.values, y_train_pred))
print("Mean Absolute Error: ", mae(y_train.values, y_train_pred))

# Feature Importance
print("Feature Importance")
## Building the model again with the best hyperparameters
model = Lasso(alpha=1000)
model.fit(X_train, y_train)
indices = np.argsort(-abs(model.coef_))
print("The features in order of importance are:")
print(50*'-')
for feature in X.columns[indices]:
    print(feature)
print("#################################################################")

示例#8
0
trainINSTAGRAM = np.array(train[['CO2EMISSIONS']])

rgr.fit(trainTIKTOK, trainINSTAGRAM)
#.fit applies model to data

print(rgr.coef_)
print(rgr.intercept_)


def future(data1, intercept, slope):
    return data1 * slope + intercept


enginesizevar1 = 3.3  #liters
ee = future(enginesizevar1, rgr.intercept_[0], rgr.coef_[0][0])
print(ee)

#check for acccuracy
from sklearn.metrics import r2_score as rs

testTIKTOK = np.array(testdata[['ENGINESIZE']])
testINSTAGRAM = np.array(testdata[['CO2EMISSIONS']])

ra = rgr.predict(testTIKTOK)

absolute = np.mean(np.absolute(ra - testINSTAGRAM))

r2score = rs(ra, testINSTAGRAM)
print(absolute)
print(r2score)
示例#9
0
trad = death[:b]
td = death[b:]
tranc = ncase[:b]
tnc = ncase[b:]
trand = ndeath[:b]
tnd = ndeath[b:]
trawc = wcase[:b]
twc = wcase[b:]
trawd = wdeath[:b]
twd = wdeath[b:]
trawnc = wncase[:b]
twnc = wncase[b:]
trawnd = wndeath[:b]
twnd = wndeath[b:]
modelc = n.poly1d(n.polyfit(trax,trac,3))
conc = rs(tc,modelc(tx))
modeld = n.poly1d(n.polyfit(trax,trad,3))
cond = rs(td,modeld(tx))
modelnc = n.poly1d(n.polyfit(trax,tranc,3))
connc = rs(tnc,modelnc(tx))
modelnd = n.poly1d(n.polyfit(trax,trand,3))
connd = rs(tnd,modelnd(tx))
modelwc = n.poly1d(n.polyfit(trax,trawc,3))
conwc = rs(twc,modelwc(tx))
modelwd = n.poly1d(n.polyfit(trax,trawd,3))
conwd = rs(twd,modelwd(tx))
modelwnc = n.poly1d(n.polyfit(trax,trawnc,3))
conwnc = rs(twnc,modelwnc(tx))
modelwnd = n.poly1d(n.polyfit(trax,trawnd,3))
conwnd = rs(twnd,modelwnd(tx))
mc = n.poly1d(n.polyfit(x,case,3))
示例#10
0
    #make predictions
    y_pred_rfc = rfc.predict(X_test)
    y_pred_lr = lr.predict(X_test)
    y_pred_knn = knn.predict(X_test)

    # get the metrics
    accs_rfc.append(acc(y_pred_rfc, y_test))
    accs_lr.append(acc(y_pred_lr, y_test))
    accs_knn.append(acc(y_pred_knn, y_test))

    ps_rfc.append(ps(y_pred_rfc, y_test))
    ps_lr.append(ps(y_pred_lr, y_test))
    ps_knn.append(ps(y_pred_knn, y_test))

    rs_rfc.append(rs(y_pred_rfc, y_test))
    rs_lr.append(rs(y_pred_lr, y_test))
    rs_knn.append(rs(y_pred_knn, y_test))

    print(i)

#==============================
# examine performances of all models
"""
Note - can see that across all metrics, logistic regression performs best
"""

# accuracy
plt.figure(figsize=(12, 6))
plt.grid()
sns.distplot(accs_rfc, hist=False, kde_kws={"shade": True}, label='RFC')
#Extracting the data set into X and Y values
X = data[['gender','age','fever','dry cough','difficulty in breathing','tiredness','soar_throat','nasal_congestion','diff_symptoms']]
Y = data['result']


#Spliting data set into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
# TRaining the model
rf= RandomForestClassifier(n_estimators=50, random_state=1)
rf.fit(X,Y)
#rf.fit(X_train,np.array(Y_train).reshape(Y_train.shape[0],1))

#predicting the values
pred = np.array(rf.predict(X_test))

recall = rs(Y_test,pred)
precision = ps(Y_test,pred)
f1 = fs(Y_test,pred)
ma = rf.score(X_test,Y_test)

#Printing All score
print('*** Evaluation metrics for test dataset ***\n')
print('Recall Score: ',recall)
print('Precision Score: ',precision)
print('F1 Score: ',f1)
print('Accuracy: ',ma)
a = pd.DataFrame(Y_test)
a['pred']= rf.predict(X_test)
print('\n\tTable 3\n')
print(a.head())
示例#12
0
    knn3.fit(x_train, y_train)

    print "Accuracy Training KNN:", knn3.score(x_train, y_train)

    predictions = knn3.predict(x_test)
    accuracy = metrics.accuracy_score(y_test, predictions)

    print "Accuracy Test KNN:", accuracy
    print "Matriz de Confusao KNN:"
    print cfm(y_test, predictions)
    print "F1 score KNN:"
    print f1s(y_test, predictions)
    print "Precision score KNN:"
    print ps(y_test, predictions)
    print "Recall score KNN:"
    print rs(y_test, predictions)

#svm kernel linear

svm = svm.SVC(kernel='linear', C=1.0)
svm.fit(x_train, y_train)

predictionsSvm = svm.predict(x_test)

accuracySvm = metrics.accuracy_score(predictionsSvm, y_test)

print "SVM LINEAR Accuracy Test:", accuracySvm

print "Matriz de Confusao SVM LINEAR:"
print cfm(y_test, predictionsSvm)
print "F1 score SVM LINEAR:"