Пример #1
0
def dfs_trainNtest(dfs_y_train, dfs_y_test):
    model = DFS(in_dim = vocab_size, num_classes = 2, lambda1 = 0.04) #
    print(dfs_y_train)
    print(X_train)
    # prints
    model.fit(X_train, dfs_y_train, epochs = 10, batch_size = 100, validation_data = [X_test, dfs_y_test])
    print(model.accuracy(X_test, dfs_y_test))
    print(model.write_predictions("dfs_results.txt", X_test, dfs_y_test))
#now handle NaN's
data = data.fillna(0)
#do dataframe normalization to 0-1 range
X = (data - data.min()) / (data.max() - data.min())
#NaN's can creep back if data.max() - data.min() = 0
X = X.fillna(0)

#do test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
num_classes = len(y[0])
input_dim = len(X.columns)
#actually do neural net training

lambda1s = [1, 10]
models = []
for lmda in lambda1s:
    print("Training on lambda = " + str(lmda))
    model = DFS(input_dim,
                num_classes,
                hidden_layers=[1024, 256],
                lambda1=lmda,
                alpha1=0.001,
                learning_rate=0.01)
    model.fit(X_train,
              y_train,
              batch_size=100,
              epochs=5,
              validation_data=[X_test, y_test])
    models.append(model)
def r2_keras(y_true, y_pred):
    SS_res = K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - SS_res / (SS_tot + K.epsilon()))


#rachet down on features, starting with no regularization and going to relatively harsh regularization
lambdas = [0, 0.0001, 0.001, 0.01, 0.1]
for lda in lambdas:
    reg_model = DFS(in_dim=len(X_train.columns),
                    num_classes=1,
                    lambda1=lda,
                    alpha1=0.0001,
                    hidden_layers=[128, 32],
                    hidden_layer_activation='relu',
                    output_layer_activation='linear',
                    loss_function='mean_squared_error',
                    learning_rate=0.005,
                    addl_metrics=[r2_keras])

    reg_model.fit(x=X_train,
                  y=y_train,
                  batch_size=10,
                  epochs=100,
                  validation_data=[X_test, y_test])
    reg_model.write_features('../results/housing_weights_' + str(lda) + '.csv',
                             X.columns)
    reg_model.write_predictions(
        '../results/housing_predictions_' + str(lda) + '.csv', X_test, y_test)
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 26 16:57:45 2018

@author: Erik
"""

from DFS import DFS
from keras.utils import to_categorical
import numpy as np

#de normalizing
X = np.array([[0, 0], [0, 10], [1, 0], [1, 10]])
y = np.array([0, 1, 1, 0])
y = to_categorical(y)

model = DFS(2,
            2,
            hidden_layers=[200],
            learning_rate=0.05,
            lambda1=0.001,
            lambda2=0.5,
            alpha1=0.01,
            alpha2=0.5)
model.fit(x=X, y=y, batch_size=1, epochs=5000)
print(model.predict(X))
model.show_bar_chart()
data_dir = 'C:/Working/UIC/Fall2018/CS502/CS502_final_project/DECRES/data/'

# Get data
filename = data_dir + "GM12878_200bp_Data.txt"
X = np.loadtxt(filename, delimiter='\t', dtype='float32')
#X = (X - np.min(X, axis =0))/(np.max(X, axis = 0) - np.min(X, axis = 0))

filename = data_dir + "GM12878_200bp_Classes.txt"
y_str = np.loadtxt(filename, delimiter='\t', dtype=object)

#do one hot encoding=====================
#transform to int
le = LabelEncoder()
y_int = le.fit_transform(y_str)
#and transform to encoded
y_enc = to_categorical(y_int)
#done one hot encoding===================

X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

model = DFS(in_dim=102, num_classes=7, lambda1=0.0001)  #
model.fit(x=X_train,
          y=y_train,
          epochs=100,
          batch_size=100,
          validation_data=[X_val, y_val])
print(model.accuracy(X_test, y_test))
model.show_bar_chart()