Exemplo n.º 1
0
def MLP(data_directory, model_dir, features):
    X_train, X_test, y_train, y_test, predict_X, features = pre(
        data_directory, features)
    os.chdir(model_dir)
    model = mlp(random_state=1, max_iter=10000)
    grid = gs(estimator=model,
              param_grid={
                  'hidden_layer_sizes': [(500, 500)],
                  'activation': ['logistic', 'tanh', 'relu'],
                  'alpha': np.exp(2.303 * np.arange(-8, 0)),
                  'learning_rate': ['constant']
              },
              cv=5,
              n_jobs=6)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    print(grid.best_estimator_.score(X_test, y_test))

    joblib.dump(
        grid.best_estimator_, 'mlp_%d_%.4f.m' %
        (len(features), grid.best_estimator_.score(X_test, y_test)))

    df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap'])
    df['pbe_bandgap'] = y_test
    df['ml_bandgap'] = grid.best_estimator_.predict(X_test)
    print(df)
Exemplo n.º 2
0
def train_model():
    print("Loading Data...")
    pd.set_option('display.max_columns', 20)
    f = open("NSL_KDD-master\KDDTrain+.csv")
    cnfile = open("NSL_KDD-master\Field Names.csv")
    column_names = pd.read_csv(cnfile, header=None)
    column_names_list = column_names[0].tolist()
    col_list = list(range(0, 42))
    column_names_list.append("labels")
    data = pd.read_csv(f,
                       header=None,
                       names=column_names_list,
                       usecols=col_list)
    data = data[data.service != "harvest"]
    data = data[data.service != "urh_i"]
    data = data[data.service != "red_i"]
    data = data[data.service != "ftp_u"]
    data = data[data.service != "tftp_u"]
    data = data[data.service != "aol"]
    data = data[data.service != "http_8001"]
    data = data[data.service != "http_2784"]
    data = data[data.service != "pm_dump"]
    data = data[data.labels != "spy"]
    data = data[data.labels != "warezclient"]

    labels = data.labels
    print(labels)
    data.drop(['labels'], axis=1, inplace=True)
    df = pd.get_dummies(data)

    print(df.head(20))
    print(df.shape)

    c_range = [50, 75, 100]
    gamma_range = [.0001, .0005, .00001]
    tuned_parameters = dict(kernel=['rbf'],
                            gamma=gamma_range,
                            C=c_range,
                            shrinking=[False])
    grid = gs(svm.SVC(),
              tuned_parameters,
              cv=3,
              scoring='accuracy',
              n_jobs=2,
              verbose=10)
    grid.fit(df, labels)
    print(grid.best_params_)
    print(grid.best_score_)

    clf = svm.SVC(gamma=.0001,
                  verbose=1,
                  shrinking=False,
                  C=100,
                  kernel='rbf',
                  max_iter=100000)
    clf.fit(df, labels)
    save_file = 'SVM_trained_model.sav'
    pickle.dump(clf, open(save_file, 'wb'))
Exemplo n.º 3
0
def svc_param_selection(X, y, nfolds):
    Cs = [1000]
    gammas = [0.001, 0.01, 0.1, 1]
    degrees = [0]
    param_grid = {'C': Cs, 'gamma': gammas, 'degree': degrees}
    grid_search = gs(svm.SVC(kernel='sigmoid'),
                     param_grid,
                     cv=nfolds,
                     verbose=2)
    grid_search.fit(X, y)
    return grid_search.best_params_
def DecisionTree(data_directory, model_dir, features):
    X_train, X_test, y_train, y_test, predict_X, features = pre(data_directory, features)
    os.chdir(model_dir)
    model = dt(random_state=1)
    grid = gs(estimator=model, param_grid={'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'],
                                           'max_features': ['auto', 'sqrt', 'log2']}, cv=5)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    print(grid.best_estimator_.score(X_test, y_test))
    joblib.dump(grid.best_estimator_, 'dtr_%d_%.4f.m'%(len(features),grid.best_estimator_.score(X_test, y_test)))

    df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap'])
    df['pbe_bandgap'] = y_test
    df['ml_bandgap'] = grid.best_estimator_.predict(X_test)
    print(df)
Exemplo n.º 5
0
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                0.9]  #probing for best dropout rate
no_of_neurons = [1, 5, 10, 15, 20, 25,
                 30]  #probing for no of neurons in hidden layers

param_grid = dict(batch_size=batch_Size,
                  epochs=epochs,
                  optimizer=optimizers,
                  learn_rate=learn_rate,
                  momentum=momentum,
                  init=initialization,
                  activation=activation,
                  weight_constraint=weights,
                  dropout_rate=dropout_rate,
                  neurons=no_of_neurons)
grid = gs(estimator=classifier, param_grid=param_grid, n_jobs=-1)

gSearch = grid.fit(input_variables, output)
best_params = gSearch.best_params_
best_accuracy = gSearch.best_score_

# summarize results
print("Best score: %f using params %s" %
      (gSearch.best_score_, gSearch.best_params_))
means = gSearch.cv_results_['mean_test_score']
stds = gSearch.cv_results_['std_test_score']
params = gSearch.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# evaluate using 10-fold cross validation
Exemplo n.º 6
0
                                         range=(0, max_bins),
                                         density=True)

    return sample_hist


sample_data, sample_label = load_picture()
sample_hist = feature_detect()

param_dist = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': np.arange(1, 2001, 100),
    'gamma': np.arange(0.1, 1.1, 0.1)
}
clf = SVC()
grid_search = gs(clf, param_dist, cv=3, scoring="precision")
grid_result = grid_search.fit(sample_hist, sample_label)
best_estimator = grid_result.best_estimator_
print(best_estimator)

#上述程序代替了下面的程序
# result=[]
# maxid=0
# max=0
# id=0
# kenellist=['linear', 'poly', 'rbf', 'sigmoid']
# for kenel in kenellist :
#     for c in range(1,2001,100):
#         gama=0.1
#         while(gama<=1):
#             clf = SVC(kernel=kenel, C=c, gamma=gama)
def build_classifier():
    classifier = Sequential(optimizer)
    classifier.add(
        Dense(units=6,
              kernel_initializer='uniform',
              activation='relu',
              input_dim=11))
    classifier.add(
        Dense(units=6, kernel_initializer='uniform', activation='relu'))
    classifier.add(
        Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
    classifier.compile(optimizer=optimizer,
                       loss='binary_crossentropy',
                       metrics=['accuracy'])
    return classifier


classifier = kc(build_fn=build_classifier)
params = {
    'batch_size': [25, 32],
    'epochs': [100, 500],
    'optimizer': ['adam', 'rmsprop']
}
gSearch = gs(estimator=classifier,
             param_grid=params,
             scoring='accuracy',
             cv=10)
gSearch = gSearch.fit(xTrain, yTrain)
best_params = gSearch.best_params_
best_accuracy = gSearch.best_score_