Exemplo n.º 1
0
            ax.text(i - 0.2, j + 0.2,
                    "{:3.0f}".format(norm_conf_mat[j, i] * 100.))
    return conf_mat


sl = SKSupervisedLearning(SVC, X, Y_train, Xt, Y_test)
sl.fit_standard_scaler()
sl.train_params = {'C': 100, 'gamma': 0.01, 'probability': True}
ll_trn, ll_tst = sl.fit_and_validate()

print("SVC log loss: ", ll_tst)

conf_svm = plot_confusion(sl)

#Neural net
trndata, tstdata = createDataSets(sl.X_train_scaled, Y_train, sl.X_test_scaled,
                                  Y_test)
fnn = train(trndata,
            tstdata,
            epochs=1000,
            test_error=0.025,
            momentum=0.15,
            weight_decay=0.0001)

sl_ccrf = SKSupervisedLearning(CalibratedClassifierCV, X, Y_train, Xt, Y_test)
sl_ccrf.train_params = \
    {'base_estimator': RandomForestClassifier(**{'n_estimators' : 7500, 'max_depth' : 200}), 'cv': 10}
sl_ccrf.fit_standard_scaler()
ll_ccrf_trn, ll_ccrf_tst = sl_ccrf.fit_and_validate()

print("Calibrated log loss: ", ll_ccrf_tst)
conf_ccrf = plot_confusion(sl_ccrf)
    for i in x:
        for j in x:
            ax.text(i - 0.2, j + 0.2, "{:3.0f}".format(norm_conf_mat[j, i] * 100.))
    return conf_mat

sl = SKSupervisedLearning(SVC, X, Y_train, Xt, Y_test)
sl.fit_standard_scaler()
sl.train_params = {'C': 100, 'gamma': 0.01, 'probability' : True}
ll_trn, ll_tst = sl.fit_and_validate()

print "SVC log loss: ", ll_tst

conf_svm = plot_confusion(sl)

#Neural net
trndata, tstdata = createDataSets(sl.X_train_scaled, Y_train, sl.X_test_scaled, Y_test)
fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.15, weight_decay = 0.0001)

sl_ccrf = SKSupervisedLearning(CalibratedClassifierCV, X, Y_train, Xt, Y_test)
sl_ccrf.train_params = \
    {'base_estimator': RandomForestClassifier(**{'n_estimators' : 7500, 'max_depth' : 200}), 'cv': 10}
sl_ccrf.fit_standard_scaler()
ll_ccrf_trn, ll_ccrf_tst = sl_ccrf.fit_and_validate()

print "Calibrated log loss: ", ll_ccrf_tst
conf_ccrf = plot_confusion(sl_ccrf)

#predicted = cross_val_predict(SVC(**sl.train_params), sl.X_train_scaled, n_jobs = -1, y = Y_train, cv=10)

#fig,ax = plt.subplots()
#ax.scatter(Y_train, predicted)
Exemplo n.º 3
0
from sklearn.lda import LDA
from sklearn.preprocessing import normalize

tf = TrainFiles('/kaggle/malware/scratchpad/text/train/instr_freq', '/kaggle/malware/scratchpad/text/test/instr_freq', "/kaggle/malware/trainLabels.csv")
tf1 = TrainFiles('/kaggle/malware/scratchpad/train/1dlbp', '/kaggle/malware/scratchpad/test/1dlbp', "/kaggle/malware/trainLabels.csv")

X_train, Y_train, X_test, Y_test = tf1.prepare_inputs()

n_components = 300
pca = PCA(n_components = n_components)
pca.fit(np.r_[X_train, X_test])

#n_components = np.where(np.cumsum(pca.explained_variance_ratio_) >= 0.99)[0][0]
#print n_components

#pca = PCA(n_components = n_components)
#pca.fit(np.r_[X_train, X_test])
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)


# Naive Bayes

sl = SKSupervisedLearning(LDA, X_train, Y_train, X_test, Y_test)
#sl.fit_standard_scaler()
trndata, tstdata = createDataSets(normalize(X_train), Y_train, normalize(X_test), Y_test)
train(trndata, tstdata, epochs = 1000, weight_decay = 0.0001, momentum = 0.15)

ll = sl.fit_and_validate()

print "Log loss: ", ll
Exemplo n.º 4
0
                '/kaggle/malware/scratchpad/text/test/instr_freq',
                "/kaggle/malware/trainLabels.csv")
tf1 = TrainFiles('/kaggle/malware/scratchpad/train/1dlbp',
                 '/kaggle/malware/scratchpad/test/1dlbp',
                 "/kaggle/malware/trainLabels.csv")

X_train, Y_train, X_test, Y_test = tf1.prepare_inputs()

n_components = 300
pca = PCA(n_components=n_components)
pca.fit(np.r_[X_train, X_test])

#n_components = np.where(np.cumsum(pca.explained_variance_ratio_) >= 0.99)[0][0]
#print n_components

#pca = PCA(n_components = n_components)
#pca.fit(np.r_[X_train, X_test])
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# Naive Bayes

sl = SKSupervisedLearning(LDA, X_train, Y_train, X_test, Y_test)
#sl.fit_standard_scaler()
trndata, tstdata = createDataSets(normalize(X_train), Y_train,
                                  normalize(X_test), Y_test)
train(trndata, tstdata, epochs=1000, weight_decay=0.0001, momentum=0.15)

ll = sl.fit_and_validate()

print "Log loss: ", ll