예제 #1
0
def train_model(model_type: str,
                train: np.lib.npyio.NpzFile,
                test: np.lib.npyio.NpzFile,
                crime: str,
                data_type: str) -> None:

    x, y = train['x'], train['y']
    
#     x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=test_size, shuffle=False, random_state=42)
    
    weight_ratio = float(len(y[y == 1])) / float(len(y[y == 0]))

    results = pd.DataFrame({'crime': [],'model': [], 'data_type': [], 'month': [],'f1': [], 'auc': []})
    
    if model_type == 'xgboost':
        model = XGBClassifier(n_estimators=200, objective='binary:logistic',
                              n_jobs=-1,
                              scale_pos_weight=weight_ratio,
                              eval_metric='logloss')

        model.fit(x, y)
        
        for month in test:
            x_test, y_test = test[month]['x'], test[month]['y']
            proba = model.predict_proba(x_test)[:, 1]
            preds = model.predict(x_test)
            
            auc = round(roc_auc_score(y_test, proba), 4)
            f1 = round(f1_score(y_test, preds), 4)
            
            results = results.append(pd.Series({'crime': crime,
                                      'model': model_type,
                                      'data_type':  data_type,
                                      'month': month,
                                      'f1': f1,
                                      'auc': auc}), ignore_index=True)

    else:
        model = CrimeModel(32)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()])

        model.fit(x, y, epochs=5, batch_size=64, verbose=0, class_weight={0: 1, 1: weight_ratio})
        
        for month in test:
            x_test, y_test = test[month]['x'], test[month]['y']
            proba = model.predict(x_test, batch_size=64)
            preds = [1 if prob > 0.5 else 0 for prob in proba]
            
            auc = round(roc_auc_score(y_test, proba), 4)
            f1 = round(f1_score(y_test, preds), 4)
            
            results = results.append(pd.Series({'crime': crime,
                                      'model': model_type,
                                      'data_type':  data_type,
                                      'month': month,
                                      'f1': f1,
                                      'auc': auc}), ignore_index=True)
        
        
    return results
예제 #2
0
class Classifier:
    def __init__(self, model_type, **kwargs):
        self.model_type = model_type

        if model_type == 'LR':  # Logistic Regression
            # modeling
            self.model = LogisticRegression(**kwargs)
        elif model_type == 'DT':  # Decision Tree
            # modeling
            self.model = DecisionTreeClassifier(**kwargs)
        elif model_type == 'RF':  # Random Forest
            # modeling
            self.model = RandomForestClassifier(**kwargs)
        elif model_type == 'XGB':  # XGboost
            # default
            kwargs['objective'] = "binary:logistic"
            # modeling
            self.model = XGBClassifier(**kwargs)
        elif model_type == 'DNN':  # Deep Neural Network
            # default params
            nb_features = kwargs['nb_features'] if 'nb_features' in kwargs.keys(
            ) else NameError("name 'nb_features' is not defined")
            nb_class = kwargs['nb_class'] if 'nb_class' in kwargs.keys(
            ) else NameError("name 'nb_class' is not defined")
            nb_layers = kwargs['nb_layers'] if 'nb_layers' in kwargs.keys(
            ) else NameError("name 'nb_layers' is not defiend")
            loss = 'categorical_crossentropy' if nb_class > 2 else 'binary_crossentropy'
            act_func = 'softmax' if nb_class > 2 else 'sigmoid'

            # modeling
            input_ = tf.keras.layers.Input(shape=(nb_features, ))
            x = input_
            for i in range(len(nb_layers)):
                x = tf.keras.layers.Dense(nb_layers[i], activation='relu')(x)
            output = tf.keras.layers.Dense(nb_class, activation=act_func)(x)
            self.model = tf.keras.models.Model(input_, output)

            # complie
            self.model.compile(optimizer=kwargs['optimizer'],
                               loss=loss,
                               metrics=['acc'])

    def train(self, X, y, savedir=None, **kwargs):
        # set evaluation dataset when model selected as XGB
        if self.model_type == 'XGB':
            kwargs['eval_set'] = [(X, y)]

        # model training
        self.model.fit(X, y, **kwargs)

        # save model
        if savedir != None:
            # check save directory
            if not os.path.isdir('../saved_models'):
                os.mkdir('../saved_models')
            # model save to pickle except DNN
            if self.model_type == 'DNN':
                self.model.save(savedir)
            else:
                pickle.dump(self.model, open(savedir, "wb"))
예제 #3
0
## Adding the third hidden layer
#classifier.add(Dense(units = Nh, kernel_initializer = 'uniform', activation = 'relu'))
##classifier.add(LeakyReLU(alpha=0.1))
#classifier.add(Dropout(0.01))

# Adding the output layer
classifier.add(
    Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier.add(Dropout(0.01))

# Define the optimizer
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
# Compile the model
classifier.compile(optimizer=optimizer,
                   loss="binary_crossentropy",
                   metrics=["accuracy"])

# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='acc',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)
# Fitting the ANN to the Training set
history = classifier.fit(X1,
                         y,
                         batch_size=25,
                         epochs=10000,
                         callbacks=[learning_rate_reduction])
예제 #4
0
# Neural Network Architecture

# Create initial set of linear layers
model=Sequential()
# Now, add to our linear layers and note their neurons in each added layer
# Input dimension only needs to be noted for the first layer and it is the number of features/columns
model.add(Dense(input_dim=17, units=8, activation='relu', name='output_1'))
model.add(Dense(units=16, activation='relu', name='output_2'))
# Make sure output later has two neurons for each type of classification of attrition
model.add(Dense(units=2, activation='sigmoid'))

# Compile the Network
# More information on optimizer types:
# https://keras.io/optimizers/
model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
# loss='binary_crossentropy' specifies that your model should optimize the log 
# loss for binary classification.  
# metrics=['accuracy'] specifies that accuracy should be printed out

# Review NN configuration
model.summary()

History = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=1)

model.predict_classes(X_test)

#model.predict(X_test)

# Log Loss over time
plt.plot(History.history['loss'])
예제 #5
0
            X_train_validate_nn = X_train_validate

            model = K.Sequential()
            model.add(
                K.layers.Dense(128,
                               input_dim=X_train_nn.shape[1],
                               activation='sigmoid'))
            model.add(K.layers.Dense(64, activation='sigmoid'))
            model.add(K.layers.Dense(32, activation='sigmoid'))
            model.add(K.layers.Dense(1))
            model.summary()

            model.compile(optimizer='adam',
                          loss='mean_squared_error',
                          metrics=['mean_squared_error'],
                          loss_weights=None,
                          sample_weight_mode=None,
                          weighted_metrics=None,
                          target_tensors=None)

            model.fit(x=X_train_nn,
                      y=y_train,
                      batch_size=None,
                      epochs=15,
                      verbose=1,
                      callbacks=None,
                      validation_split=0.0,
                      validation_data=None,
                      shuffle=True,
                      class_weight=None,
                      sample_weight=None,
예제 #6
0
# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 8))

# Adding the second hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))

# Adding the third hidden layer
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))


# Adding the output layer
classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 3, nb_epoch = 100)

# Part 3 - Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm_ann = confusion_matrix(y_test, y_pred)
예제 #7
0
        from keras.callbacks import EarlyStopping

        num_feats = len(x_train[0])
        model = Sequential()

        early_stop = EarlyStopping(monitor='loss',
                                   patience=0,
                                   verbose=1,
                                   min_delta=0.005,
                                   mode='auto')

        model.add(Dense(num_feats, activation='relu', input_dim=(num_feats)))
        model.add(Dropout(0.50))
        model.add(Dense(500, activation='relu', kernel_initializer='uniform'))
        model.add(Dropout(0.50))
        model.add(Dense(2, kernel_initializer='uniform', activation='softmax'))

        model.compile(loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'],
                      optimizer='adam')

        model.fit(x_train,
                  y_train,
                  epochs=25,
                  verbose=1,
                  callbacks=[early_stop])

        y_pred = model.predict_classes(x_test)
        accuracy = accuracy_score(y_test, y_pred)
        print("Accuracy: %.2f%%" % (accuracy * 100.0))
예제 #8
0
			y_train = to_categorical(y_train, num_classes)
			y_test  = to_categorical(y_test, num_classes)

			patience = 16
			early_stop = EarlyStopping(monitor='loss', patience=patience, verbose=1, min_delta=0.005, mode='auto')
			model_save = ModelCheckpoint("best_model.hdf5",monitor='loss', verbose = 0, save_best_only =True, save_weights_only = False, mode ='auto', period =1)
			reduce_LR = ReduceLROnPlateau(monitor='loss', factor= 0.1, patience=(patience/2), verbose = 0, min_delta=0.005,mode = 'auto', cooldown=0, min_lr=0)

			model = Sequential()
			model.add(Dense(num_feats,activation='relu',input_dim=(num_feats)))
			model.add(Dropout(0.5))
			model.add(Dense(int((num_feats+num_classes)/2), activation='relu', kernel_initializer='uniform'))
			model.add(Dropout(0.5))
			model.add(Dense(num_classes, kernel_initializer='uniform', activation='softmax'))

			model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

			model.fit(x_train, y_train, epochs=100, verbose=1, callbacks=[early_stop, reduce_LR])
		else:
			raise Exception('Unrecognized Model. Use XGB, SVM or ANN')

		if(model_type == 'ANN'):
			results = ann_1d(model, x_test, y_test, 0)
			#OBOResults = ann_1d(model, x_test, y_test, 1)
		else:
			results = xgb_tester(model, x_test, y_test, 0)
			#OBOResults = xgb_tester(model, x_test, y_test, 1)

		#window_scores.append(OBOResults[0])
		mcc_scores.append(results[1])
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(activation="relu", input_dim=1000, units=300))
classifier.add(Dropout(rate=0.05))

# Adding the second hidden layer
classifier.add(Dense(activation="sigmoid", units=300))
classifier.add(Dropout(rate=0.05))

# Adding the output layer
classifier.add(Dense(activation="softmax", units=9))

# Compiling the ANN
classifier.compile(optimizer='adam',
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(x_train, y_train, batch_size=300, epochs=500)
classifier.fit(X, y, batch_size=300, epochs=100)

# Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(x_test)
y_pred = classifier.predict(X_test)

# Evaluating the model
# turn probabilities into class predictions
import numpy as np
예제 #10
0
#score=0.52721
#%%
import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from keras.utils import to_categorical
#%%
model = Sequential()
model.add(Dense(200, input_shape=(x_train.shape[1], )))
model.add(Dense(200))
model.add(Dropout(0.2))
model.add(Dense(150))
model.add(Dense(50))
model.add(Dense(6, activation='softmax'))
#%%

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['acc'])

#%%

model.fit(x_train, y_train, epochs=1000)
#%%
prediction = model.predict_classes(x_test)
#%%
y_test = [i.argmax() for i in y_test]
#%%
score = f1_score(y_test, prediction, average='weighted')
print(score)
예제 #11
0
def train_non_swat_model(segment, tab, authUser, authPw, urlPrefix):
    try:
        df = segment['segment_tbl'].to_frame()
        df = df[df['_PartInd_'] == 1]
        df = df.select_dtypes(exclude=['object'])
        imputed_cols = [col for col in df.columns if "IMP" in col]
        df = df[imputed_cols]
        target = "IMP_" + tab.children[0].children[3].children[0].children[0].value
        imputed_cols.remove(target)
        X_train = df[imputed_cols]
        y_train = df[target]
        model = None
    except:
        print("could not load segment data")
        segment['modelObj'] = None
        return None
    if segment['model'] =='XGBoost':
        model = XGBClassifier(**segment['train_params'])
        model.fit(X_train, y_train)
    elif segment['model'] =='TensorFlow':
        'TANH', 'EXP',
        if segment['train_params']['tf_acts'] == 'RECTIFIER':
            act = 'relu'
        elif segment['train_params']['tf_acts'] == 'TANH':
            act = 'tanh'
        else:
            act = 'exponential'
        hid_layers = list()
        hid_num = 4
        if segment['train_params']['tf_hidden'] != '':
            hid_num = int(segment['train_params']['tf_hidden'])
        for hidden in range(hid_num):
            hid_layers.append(tf.keras.layers.Dense(units=32, activation = act))
            
        model = tf.keras.models.Sequential([tf.keras.layers.Dense(units=128, activation = act,input_shape=(X_train.shape[-1],))] + 
                                   hid_layers + 
                                   [tf.keras.layers.Dense(1, activation = 'sigmoid')])

        # use binary_crossentropy loss function for binary target
        model.compile(optimizer = 'adam',
                      loss='binary_crossentropy',
                      metrics = ['acc'])

        # train a model
        #X_train = (X_train - X_train.mean()) / (X_train.max() - X_train.min())
        sm = SMOTE(random_state=10)
        X_train, y_train = sm.fit_sample(X_train.as_matrix(), y_train)

        model.fit(X_train,y_train, epochs=9, verbose=0)
    elif segment['model'] == 'AutoML':
        
        var_list = list()
        for col in list(segment['session'].CASTable(tab.children[0].children[1].value, caslib='Public').columns):
            var_list.append({"name" : col})
        
        projectName = 'MLPA_test_' + str(random.randint(1,100)) + random.choice(string.ascii_letters)
        segment['session'].table.partition(table={'name' : segment['segment_tbl'].name, 
                                                            "vars" : var_list, "where" : "_PartInd_ = 1"}, 
                                           casout={'name' : projectName + "_Train", 
                                                   'promote' : True, 'caslib' : 'CASUSER({})'.format(authUser)})
        # -----------------------------------------------
        # Get authentication token
        # -----------------------------------------------
        authCred = 'password&username='******'&password='******'Authorization': oauthToken,
            'Accept': "application/vnd.sas.analytics.ml.pipeline.automation.project+json",
            'Content-Type': "application/json"
        }
        payload = {
            'dataTableUri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~CASUSER({})/tables/'.format(authUser) + projectName + "_Train",
            'type': 'predictive',
            'name': projectName,
            'description': 'Project generated for test',
            'settings': {
                'autoRun': True,
                'modelingMode': 'Standard',
                'maxModelingTime': segment['train_params']['ntime']
            },
            'analyticsProjectAttributes': {
                'targetVariable': tab.children[0].children[3].children[0].children[0].value
            }
        }

        payload_data = json.dumps(payload, indent=4)

        # Create new MLPA project and run pipeline
        mlpaProject = executeRestCallWithPayload(urlPrefix, "POST", tokenUri, payload_data, headers)

        # -----------------------------------------------
        # Poll every 5 seconds until MLPA project state is completed
        # -----------------------------------------------
        projectStateLink = list(filter(lambda x: x["rel"] == "state", mlpaProject["links"]))[0]
        headers = {
            'Authorization': oauthToken,
            'Accept': projectStateLink["type"]
        }
        segment['mlpaProject'] = mlpaProject
        segment["projectStateLink"] = projectStateLink
        segment["oauthToken"] = oauthToken
        segment['projectName'] = projectName
        
        
        
        
    segment['modelObj'] = model
예제 #12
0
    print('1- Load previous model')
    print('2- Train model')
    option = input('Choose option: ')

    if option == '2':

        # Create de model
        model = Sequential()

        # Add layers to the model
        model.add(Dense(64, input_dim=7, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        # Compile the model with an Adam optimizar and a learning rate of 0.02
        model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=0.0001))
        checkpoint = ModelCheckpoint('keras_models/weights.hdf5',
                                     monitor='val_loss',
                                     verbose=0,
                                     save_best_only=True,
                                     mode='auto')

        # Train the model
        history = model.fit(x_train,
                            y_train,
                            validation_data=(x_test, y_test),
                            batch_size=4,
                            epochs=100,
                            callbacks=[checkpoint])
        model.load_weights('keras_models/weights.hdf5')
        save_model(model)