sl = SKSupervisedLearning(SVC, X, Y_train, Xt, Y_test) sl.fit_standard_scaler() sl.train_params = {'C': 100, 'gamma': 0.01, 'probability': True} ll_trn, ll_tst = sl.fit_and_validate() print("SVC log loss: ", ll_tst) conf_svm = plot_confusion(sl) #Neural net trndata, tstdata = createDataSets(sl.X_train_scaled, Y_train, sl.X_test_scaled, Y_test) fnn = train(trndata, tstdata, epochs=1000, test_error=0.025, momentum=0.15, weight_decay=0.0001) sl_ccrf = SKSupervisedLearning(CalibratedClassifierCV, X, Y_train, Xt, Y_test) sl_ccrf.train_params = \ {'base_estimator': RandomForestClassifier(**{'n_estimators' : 7500, 'max_depth' : 200}), 'cv': 10} sl_ccrf.fit_standard_scaler() ll_ccrf_trn, ll_ccrf_tst = sl_ccrf.fit_and_validate() print("Calibrated log loss: ", ll_ccrf_tst) conf_ccrf = plot_confusion(sl_ccrf) #predicted = cross_val_predict(SVC(**sl.train_params), sl.X_train_scaled, n_jobs = -1, y = Y_train, cv=10) #fig,ax = plt.subplots()
from train_nn import train,forward_nn import sys import numpy as np network,d=train(open(sys.argv[1])) for line in open(sys.argv[2]): phi0=np.zeros(len(d)) for item in line.strip("\n").split(" "): if item in d: phi0[d[item]]+=1 phi=forward_nn(network,phi0) if phi[-1]<0: y="-1" else: y="+1" print(y+"\t"+line.strip("\n"))
for j in x: ax.text(i - 0.2, j + 0.2, "{:3.0f}".format(norm_conf_mat[j, i] * 100.)) return conf_mat sl = SKSupervisedLearning(SVC, X, Y_train, Xt, Y_test) sl.fit_standard_scaler() sl.train_params = {'C': 100, 'gamma': 0.01, 'probability' : True} ll_trn, ll_tst = sl.fit_and_validate() print "SVC log loss: ", ll_tst conf_svm = plot_confusion(sl) #Neural net trndata, tstdata = createDataSets(sl.X_train_scaled, Y_train, sl.X_test_scaled, Y_test) fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.15, weight_decay = 0.0001) sl_ccrf = SKSupervisedLearning(CalibratedClassifierCV, X, Y_train, Xt, Y_test) sl_ccrf.train_params = \ {'base_estimator': RandomForestClassifier(**{'n_estimators' : 7500, 'max_depth' : 200}), 'cv': 10} sl_ccrf.fit_standard_scaler() ll_ccrf_trn, ll_ccrf_tst = sl_ccrf.fit_and_validate() print "Calibrated log loss: ", ll_ccrf_tst conf_ccrf = plot_confusion(sl_ccrf) #predicted = cross_val_predict(SVC(**sl.train_params), sl.X_train_scaled, n_jobs = -1, y = Y_train, cv=10) #fig,ax = plt.subplots() #ax.scatter(Y_train, predicted) #ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
from sklearn.lda import LDA from sklearn.preprocessing import normalize tf = TrainFiles('/kaggle/malware/scratchpad/text/train/instr_freq', '/kaggle/malware/scratchpad/text/test/instr_freq', "/kaggle/malware/trainLabels.csv") tf1 = TrainFiles('/kaggle/malware/scratchpad/train/1dlbp', '/kaggle/malware/scratchpad/test/1dlbp', "/kaggle/malware/trainLabels.csv") X_train, Y_train, X_test, Y_test = tf1.prepare_inputs() n_components = 300 pca = PCA(n_components = n_components) pca.fit(np.r_[X_train, X_test]) #n_components = np.where(np.cumsum(pca.explained_variance_ratio_) >= 0.99)[0][0] #print n_components #pca = PCA(n_components = n_components) #pca.fit(np.r_[X_train, X_test]) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # Naive Bayes sl = SKSupervisedLearning(LDA, X_train, Y_train, X_test, Y_test) #sl.fit_standard_scaler() trndata, tstdata = createDataSets(normalize(X_train), Y_train, normalize(X_test), Y_test) train(trndata, tstdata, epochs = 1000, weight_decay = 0.0001, momentum = 0.15) ll = sl.fit_and_validate() print "Log loss: ", ll
'/kaggle/malware/scratchpad/text/test/instr_freq', "/kaggle/malware/trainLabels.csv") tf1 = TrainFiles('/kaggle/malware/scratchpad/train/1dlbp', '/kaggle/malware/scratchpad/test/1dlbp', "/kaggle/malware/trainLabels.csv") X_train, Y_train, X_test, Y_test = tf1.prepare_inputs() n_components = 300 pca = PCA(n_components=n_components) pca.fit(np.r_[X_train, X_test]) #n_components = np.where(np.cumsum(pca.explained_variance_ratio_) >= 0.99)[0][0] #print n_components #pca = PCA(n_components = n_components) #pca.fit(np.r_[X_train, X_test]) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # Naive Bayes sl = SKSupervisedLearning(LDA, X_train, Y_train, X_test, Y_test) #sl.fit_standard_scaler() trndata, tstdata = createDataSets(normalize(X_train), Y_train, normalize(X_test), Y_test) train(trndata, tstdata, epochs=1000, weight_decay=0.0001, momentum=0.15) ll = sl.fit_and_validate() print "Log loss: ", ll