def test(self, test): x, y = test if self.num_class > 2: y = label_binarize(y, classes=self.classes) if self.num_class == 2: probs = np.array([[1-row, row] for row in self.model.predict(x)]) preds = np.argmax(probs, axis=-1) stats = get_stat_dict(y, probs) else: probs = self.model.predict(x) preds = np.argmax(probs, axis=-1) stats = get_stat_dict(y, probs) return preds, stats
def test(self, test): test_x, test_y = test num_class = test_y.shape[1] probs = self.model.predict(test_x) preds = np.argmax(probs, axis=1) stats = get_stat_dict(np.argmax(test_y, axis=1), probs, preds) return stats, preds
def test(self, test): x, y = test x_filt = x[:, self.feature_list] probs = np.array([row for row in self.model.predict_proba(x_filt)]) preds = np.argmax(probs, axis=-1) stat = get_stat_dict(y, probs) return preds, stat
def test(self, test): test_x, test_y = test test_x = np.expand_dims(test_x, -1) test_x = np.expand_dims(test_x, 1) preds = self.model.predict(test_x) stats = get_stat_dict(test_y, preds) return preds, stats
def test(self, test): x, y = test if self.num_class > 2: y = label_binarize(y, classes=self.classes) probs = np.array([row for row in self.model.predict_proba(x)]) preds = np.argmax(probs, axis=-1) stat= get_stat_dict(y, probs) return preds, stat
def train(train, test, config, metric, seed=42): n_iter = int(config.get('LASSO', 'NumberIterations')) num_cv = int(config.get('LASSO', 'GridCV')) train_x, train_y = train test_x, test_y = test cl = np.unique(train_y) num_class = len(cl) if num_class > 2: train_y = label_binarize(train_y, classes=cl) test_y = label_binarize(test_y, classes=cl) clf = OneVsRestClassifier( LassoCV(alphas=np.logspace(-4, -0.5, 50), cv=num_cv, n_jobs=-1, max_iter=n_iter)) else: clf = LassoCV(alphas=np.logspace(-4, -0.5, 50), cv=num_cv, n_jobs=-1, max_iter=n_iter) clf.fit(train_x, train_y) if num_class == 2: test_probs = np.array([[1 - row, row] for row in clf.predict(test_x)]) test_pred = np.argmax(test_probs, axis=-1) test_stat_dict = get_stat_dict(test_y, test_probs) fpr, tpr, thresh = roc_curve(test_y, test_probs[:, 1]) weights = clf.coef_ else: test_pred = clf.predict(test_x) test_probs = clf.predict(test_x) test_stat_dict = get_stat_dict(test_y, test_pred) fpr, tpr, thresh = None, None, None weights = None return clf, test_stat_dict, tpr, fpr, thresh, weights, test_probs
def train(train, test, config, metric, seed=42, regularization=True): n_iter = int(config.get('Logistic Regression', 'NumberIterations')) num_cv = int(config.get('Logistic Regression', 'GridCV')) train_x, train_y = train test_x, test_y = test cl = np.unique(train_y) num_class = len(cl) if regularization: grid = {"C": np.logspace(-3, 3, 50), "penalty": ["l1"]} clf = GridSearchCV(LogisticRegression(solver="saga"), grid, cv=StratifiedKFold(num_cv).split(train_x, train_y)) clf.fit(train_x, train_y) clf = clf.best_estimator_ else: clf = LogisticRegression(solver="saga") clf.fit(train_x, train_y) test_probs = clf.predict_proba(test_x) test_preds = np.argmax(clf.predict_proba(test_x), axis=1) test_stat_dict = get_stat_dict(test_y, test_probs, test_preds) if regularization: weights = np.array(clf.coef_) else: weights = np.array(clf.coef_) if num_class == 2: fpr, tpr, thresh = roc_curve(test_y, test_probs[:, 1]) else: fpr, tpr, thresh = 0, 0, 0 return clf, test_stat_dict, tpr, fpr, thresh, weights, test_probs
def tune_mlpnn(train, test, config, train_weights=[]): train_x, train_y = train test_x, test_y = test num_class = train_y.shape[1] input_len = train_x.shape[1] def auc_metric(y_true, y_pred): return tf.numpy_function(roc_auc_score, (y_true, y_pred), tf.double) dropout = [0.1, 0.3, 0.5] l2_grid = [0.01, 0.001, 0.0001] num_layer = [1, 2] num_nodes = [32, 64, 128] best_l2 = 0.0001 best_drop = 0.5 best_layer = 2 best_nodes = 128 best_stat = 0 for d in dropout: for l in l2_grid: reg = tf.keras.regularizers.l2(l) model = tf.keras.Sequential() for i in range(0, best_layer): model.add( tf.keras.layers.Dense(best_nodes, activation='relu', kernel_regularizer=reg, bias_regularizer=reg, name="fc_" + str(i))) model.add(tf.keras.layers.Dropout(d)) model.add( tf.keras.layers.Dense(num_class, activation='softmax', kernel_regularizer=reg, bias_regularizer=reg, name="output")) patience = int(config.get('MLPNN', 'Patience')) batch_size = int(config.get('MLPNN', 'BatchSize')) learning_rate = float(config.get('MLPNN', 'LearningRate')) es_cb = tf.keras.callbacks.EarlyStopping('val_loss', patience=patience, restore_best_weights=True) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy') print(train_x) print(train_y) model.fit(train_x, train_y, batch_size=batch_size, verbose=1, epochs=1000, callbacks=[es_cb], validation_split=0.1) model.fit(train_x, train_y, batch_size=batch_size, verbose=1, epochs=10) probs = model.predict(test_x) preds = np.argmax(probs, axis=1) stat = get_stat_dict(np.argmax(test_y, axis=1), probs, preds) if stat["AUC"] > best_stat: best_stat = stat["AUC"] best_drop = d best_l2 = l tf.reset_default_graph() tf.keras.backend.clear_session() for l in num_layer: for n in num_nodes: reg = tf.keras.regularizers.l2(best_l2) model = tf.keras.Sequential() for i in range(0, l): model.add( tf.keras.layers.Dense(n, activation='relu', kernel_regularizer=reg, bias_regularizer=reg, name="fc_" + str(i))) model.add(tf.keras.layers.Dropout(best_drop)) model.add( tf.keras.layers.Dense(num_class, activation='softmax', kernel_regularizer=reg, bias_regularizer=reg, name="output")) patience = int(config.get('MLPNN', 'Patience')) batch_size = int(config.get('MLPNN', 'BatchSize')) es_cb = tf.keras.callbacks.EarlyStopping('val_loss', patience=patience, restore_best_weights=True) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss='categorical_crossentropy') model.fit(train_x, train_y, batch_size=batch_size, verbose=0, epochs=1000, callbacks=[es_cb], validation_split=0.1) model.fit(train_x, train_y, batch_size=batch_size, verbose=0, epochs=10) probs = model.predict(test_x) preds = np.argmax(probs, axis=1) stat = get_stat_dict(np.argmax(test_y, axis=1), probs, preds) if stat["AUC"] > best_stat: best_stat = stat["AUC"] best_layer = l best_nodes = n tf.reset_default_graph() tf.keras.backend.clear_session() return best_layer, best_nodes, best_l2, best_drop
def train(train, test, config, metric, seed=42, feature_select=True): number_trees = int(config.get('RF', 'NumberTrees')) num_models = int(config.get('RF', 'ValidationModels')) x, y = train test_x, test_y = test if metric == "AUC": scoring = "roc_auc" else: scoring = "accuracy" clf = RandomForestClassifier(n_estimators=number_trees, n_jobs=-1) clf.fit(x, y) feature_importance = clf.feature_importances_ feature_ranking = np.flip(np.argsort(feature_importance)) num_features = x.shape[1] best_num_features = num_features if feature_select: percent_features = [1.0, 0.75, 0.5, 0.25] skf = StratifiedKFold(n_splits=num_models, shuffle=True) best_score = -1 for percent in percent_features: run_score = -1 run_probs = [] for train_index, valid_index in skf.split(x, y): train_x, valid_x = x[train_index], x[valid_index] train_y, valid_y = y[train_index], y[valid_index] features_using = int(round(num_features * percent)) feature_list = feature_ranking[0:features_using] filtered_train_x = train_x[:, feature_list] filtered_valid_x = valid_x[:, feature_list] clf = RandomForestClassifier(n_estimators=number_trees, n_jobs=-1).fit( filtered_train_x, train_y) probs = [row for row in clf.predict_proba(filtered_valid_x)] run_probs = list(run_probs) + list(probs) run_score = get_stat(y, run_probs, metric) if run_score > best_score: best_num_features = num_features feature_list = feature_ranking[0:best_num_features] x_filt = x[:, feature_list] test_x_filt = test_x[:, feature_list] clf = RandomForestClassifier(n_estimators=number_trees, n_jobs=-1).fit(x, y) test_probs = np.array([row for row in clf.predict_proba(test_x)]) test_pred = np.argmax(test_probs, axis=-1) test_stat_dict = get_stat_dict(test_y, test_probs, test_pred) if len(np.unique(y)) == 2: fpr, tpr, thresh = roc_curve(test_y, test_probs[:, 1]) else: fpr, tpr, thresh = 0, 0, 0 return clf, test_stat_dict, tpr, fpr, thresh, feature_importance, test_probs
def test(self, test): test_x, test_y = test preds = self.model.predict(test_x) stats = get_stat_dict(test_y, preds) return preds, stats
def train(train, test, config, metric, seed=42, max_iter=100000, gaussian=False): num_cv = int(config.get('SVM', 'GridCV')) train_x, train_y = train test_x, test_y = test cl = np.unique(train_y) num_class = len(cl) scoring = "roc_auc" if num_class > 2: train_y_binarize = label_binarize(train_y, classes=cl) test_y_binarize = label_binarize(test_y, classes=cl) if gaussian == True: grid = [{ 'estimator__kernel': ['rbf'], 'estimator__gamma': [1e-3, 1e-4], 'estimator__C': [1, 10, 100, 1000] }, { 'estimator__kernel': ['linear'], 'estimator__C': [1, 10, 100, 1000] }] else: grid = [{ 'estimator__C': [1, 10, 100, 1000], 'estimator__kernel': ['linear'] }] clf = GridSearchCV(OneVsRestClassifier( SVC(probability=True, max_iter=max_iter)), grid, cv=StratifiedKFold(num_cv).split(train_x, train_y), scoring=scoring, n_jobs=-1) clf.fit(train_x, train_y_binarize) else: if gaussian == True: grid = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000] }, { 'kernel': ['linear'], 'C': [1, 10, 100, 1000] }] else: grid = [{'C': [1, 10, 100, 1000], 'kernel': ['linear']}] clf = GridSearchCV(SVC(probability=True, max_iter=max_iter), grid, cv=StratifiedKFold(num_cv).split(train_x, train_y), scoring=scoring, n_jobs=-1) clf.fit(train_x, train_y) test_probs = clf.predict_proba(test_x) test_preds = np.argmax(clf.predict_proba(test_x), axis=1) test_stat_dict = get_stat_dict(test_y, test_probs, test_preds) if num_class == 2 and gaussian == False: weights = np.array(clf.best_estimator_.coef_).reshape(-1) fpr, tpr, thresh = roc_curve(test_y, test_probs[:, 1]) elif num_class > 2 and gaussian == False: weights = np.array(clf.best_estimator_.coef_) fpr, tpr, thresh = None, None, None elif gaussian == True: weights = None fpr, tpr, thresh = None, None, None return clf.best_estimator_, test_stat_dict, tpr, fpr, thresh, weights, test_probs