def nn_SCM(): X, y_orig = load_features_data(correlation_threshold=0.05) y = prepare_multiclass_target(y_orig) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True) # normalize X X_train = normalize_X(X_train) X_test = normalize_X(X_test) start = time.time() model = MLPClassifier(hidden_layer_sizes=(100, 100, 50), activation='relu', solver='adam', verbose=False, max_iter=1000, alpha=0.0001, batch_size=100, warm_start=True) model = model.fit(X_train, y_train) print(f"trained in {time.time() - start} sec") y_pred = model.predict(X_test) scores = get_scores_for_cross_val(model, X, y) print('-'*15, 'CROSS VALIDATION SCORES', '-'*15) for score in scores: print(score, ": ", scores[score], " Average:", np.mean(scores[score])) scores = get_final_metrics(y_test, y_pred) print('-'*15, 'FINAL SCORES SINGLE CLASSIFIER MODEL NEURAL NETWORK', '-'*15) for score in scores: print(score, ":\n", scores[score])
def nn_EWCM_simple(): X, y_orig = load_features_data(correlation_threshold=0.05) X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(X, y_orig, test_size=0.2, random_state=42, shuffle=True) start = time.time() print('-'*15, 'SEPARATIONG WALK AND NON-WALK', '-'*15) X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \ X_test_false, y_test_true, y_test_false = make_binary_classification(X_train_orig, X_test_orig, y_train_orig, y_test_orig, ['WALK']) # result test set, add all trips which were classifies as WALK res = X_test_true res['mode'] = 'WALK' print('-'*15, 'SEPARATING ALL OTHER CLASSES', '-'*15) X_test_false = X_test_false.sort_index() to_append = X_test_false y_pred = make_multiclass_classification(X_train_false, X_test_false, y_train_false) print(f"trained in {time.time() - start} sec") to_append['mode'] = y_pred # get precitions from both parts together res = res.append(to_append) res = res.sort_index() y_test_orig = y_test_orig.sort_index() y_pred = res['mode'] scores = get_final_metrics(y_test_orig, y_pred) print('-' * 15, 'FINAL SCORES ENSEMBLE CLASSIFIER MODEL NEURAL NETWORK (SIMPLE)', '-' * 15) for score in scores: print(score, ":\n", scores[score])
def ensemble_classifier_resuts(training_data: np.array, labels: np.array) -> tuple: x = training_data y = pd.DataFrame.copy(labels) y = prepare_binary_target(y, ['WALK']) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1) dtc = DecisionTreeClassifier(criterion='entropy') start = time.time() dtc.fit(x_train, y_train) print(f"trained in {time.time() - start} sec") y_pred = dtc.predict(x_test) binary_classifying_results = get_final_metrics(y_test, y_pred) walk_data_idx = list() idx = -1 for _, row in x.iterrows(): data_sample = np.array(row).reshape(1, -1) prediction = dtc.predict(data_sample) idx += 1 if prediction == 1: # if prediction label is 1 -> walk data sample walk_data_idx.append(idx) # remove 'walk' predicated data from data set and corresponding label array non_walk_data_to_learn = x.drop(x.index[[walk_data_idx]]) y = pd.DataFrame.copy(labels) non_walk_labels = y.drop(y.index[[walk_data_idx]]) # train on the non walk classified data # labels := {'TRAM', 'TRAIN', 'METRO', 'CAR', 'BUS' 'BICYCLE'} non_walk_labels = prepare_multiclass_target(non_walk_labels) x_train, x_test, y_train, y_test = train_test_split(non_walk_data_to_learn, non_walk_labels, test_size=0.2, random_state=1) start = time.time() dtc.fit(x_train, y_train) print(f"trained in {time.time() - start} sec") y_pred = dtc.predict(x_test) non_walk_predictions = get_final_metrics(y_test, y_pred) return binary_classifying_results, non_walk_predictions
def single_classifier(X: pd.DataFrame, y: np.ndarray, kernel_name="linear"): print(f"*** SINGLE CLASSIFIER ***") print(f"\tkernel: {kernel_name}") # converts string target values to numbers y = prepare_multiclass_target(y) # retrieve model object from models_dict svc_model = models_dict[kernel_name] # split data x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True) print(f"shape x_train: {x_train.shape}") print(f"shape y_train: {y_train.shape}") print(f"shape x_test: {x_test.shape}") print(f"shape y_test: {y_test.shape}") print() print("*" * 15) print(f"MODEL: svc-{kernel_name}") start = time.time() # train model svc_model.fit(x_train, y_train) # predict test_data y_pred = svc_model.predict(x_test) print(f"trained and predicted in {time.time() - start} sec") print() # confusion matrix print("confusion matrix: ") labels = np.unique(y_pred) c_matrix = confusion_matrix(y_test, y_pred, labels=labels) print(c_matrix) print() # print metrics print("scoring in all metrics: ") scoring = get_final_metrics(y_test, y_pred) print(scoring) print() # cross validation print("scores in cross validation:") scores = get_scores_for_cross_val(svc_model, X, y) for score in scores: print(score, ": ", scores[score]) print() return
def make_binary_classification(X_train_orig, X_test_orig, y_train_orig, y_test_orig, mode): y_train = prepare_binary_target(y_train_orig, mode) y_test = prepare_binary_target(y_test_orig, mode) # normalize X X_train = normalize_X(X_train_orig) X_test = normalize_X(X_test_orig) model = MLPClassifier(hidden_layer_sizes=(80, 60), activation='relu', solver='adam', verbose=False, max_iter=1000, alpha=0.0001, batch_size=100, warm_start=True) # cross validation scores scores = get_scores_for_cross_val(model, X_train, y_train) for score in scores: print(score, ": ", scores[score], " Average:", np.mean(scores[score])) model = model.fit(X_train, y_train) y_pred = model.predict(X_test) scores = get_final_metrics(y_test, y_pred) print('-' * 15, 'SCORES BINARY CLASSIFIER WALK/NON-WALK', '-' * 15) for score in scores: print(score, ":\n", scores[score]) # get indices of trips classified as true in test set true_index = list() for i, row in X_test.iterrows(): data_sample = np.array(row).reshape(1, -1) prediction = model.predict(data_sample) if prediction == 1: # if prediction label is 1 -> walk data sample true_index.append(i) # separate "true" and "false" trips X_test_false = X_test_orig.drop(true_index) y_test_false = y_test_orig.drop(true_index) X_test_true = X_test_orig.loc[true_index] y_test_true = y_test_orig.loc[true_index] # remove walk data from X train true_index = list() for i, _ in X_train.iterrows(): for m in mode: if y_train_orig.loc[i, 'mode'] == m: # if label is 1 -> walk data sample true_index.append(i) break # separate "true" and "false" trips X_train_false = X_train_orig.drop(true_index) y_train_false = y_train_orig.drop(true_index) X_train_true = X_train_orig.loc[true_index] y_train_true = y_train_orig.loc[true_index] return X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \ X_test_false, y_test_true, y_test_false
def single_classifier_results(training_data: np.array, labels: np.array) -> dict: x = training_data y = pd.DataFrame.copy(labels) y = prepare_multiclass_target(y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1) dtc = DecisionTreeClassifier(criterion='gini') start = time.time() dtc.fit(x_train, y_train) print(f"trained in {time.time() - start} sec") y_pred = dtc.predict(x_test) # measure accuracy of training scores = get_final_metrics(y_test, y_pred) return scores
def ensemble_classifier(X: pd.DataFrame, y: np.ndarray, kernel_name="linear"): print(f"*** ENSEMBLE CLASSIFIER ***") print(f"\tkernel: {kernel_name}") # split data x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True) print(f"\tshape x_train: {x_train.shape}") print(f"\tshape y_train: {y_train.shape}") start = time.time() # consecutive binary classification for mode in mode_list: print() print(f"*** {mode} ***") # prepare target vector (the matching mode is set to 1 all others to 0) y_test_binary = prepare_binary_target(y_test, [mode]) y_train_binary = prepare_binary_target(y_train, [mode]) # retrieve model object from models_dict svc_model = models_dict[kernel_name] print(f"\tshape x_test: {x_test.shape}") print(f"\tshape y_test: {y_test_binary.shape}") print() # train model svc_model.fit(x_train, y_train_binary) # compute confusion matrix: y_pred = svc_model.predict(x_test) c_matrix = confusion_matrix(y_test_binary, y_pred) print("confusion matrix: ") print(c_matrix) print() # print metrics print("scoring in all metrics: ") scoring = get_final_metrics(y_test_binary, y_pred) [print(f"\t{key}: {value}") for key, value in scoring.items()] print() # remove as target mode classified objects from TEST data only true_index_list = list() for index, row in x_test.iterrows(): data_sample = np.array(row).reshape(1, -1) prediction = svc_model.predict(data_sample) if prediction == 1: # if prediction label is 1 -> walk data sample true_index_list.append(index) y_test = y_test.drop(true_index_list) x_test = x_test.drop(true_index_list) print(f"*****" * 10) print(f"trained and predicted in {time.time() - start} sec") print() return
def nn_EWCM_umbrella(): X, y_orig = load_features_data(correlation_threshold=0.05) X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(X, y_orig, test_size=0.2, random_state=42, shuffle=True) start = time.time() print('-'*15, 'SEPARATIONG WALK AND NON-WALK', '-'*15) X_train_true, X_train_false, y_train_true, y_train_false, X_test_true, \ X_test_false, y_test_true, y_test_false = make_binary_classification(X_train_orig, X_test_orig, y_train_orig, y_test_orig, ['WALK']) # result test set, add all trips which were classifies as WALK res = X_test_true res['mode'] = 'WALK' # separate rail and road print('-'*15, 'SEPARATIONG RAIL AND ROAD', '-'*15) rail = ['TRAIN', 'METRO', 'TRAM'] road = ['BICYCLE', 'CAR', 'BUS'] X_train_rail, X_train_road, y_train_rail, y_train_road, X_test_rail, \ X_test_road, y_test_rail, y_test_road = make_binary_classification(X_train_orig, X_test_false, y_train_orig, y_test_false, rail) _, X_test_false, _, y_test_false = X_train_rail, X_test_rail, y_train_rail, y_test_rail for mode in rail: print('-' * 15, 'SEPARATIONG ', mode.upper() +' AND NON-', mode.upper(), '-' * 15) _, _, _, _, X_test_true, X_test_false, _, y_test_false = make_binary_classification(X_train_orig, X_test_false, y_train_orig, y_test_false, [mode]) # add classifies trips to result test set to_append = X_test_true to_append['mode'] = mode res = res.append(to_append) # non-classifies trips to_append_X = X_test_false to_append_y = y_test_false _, X_test_false,_, y_test_false = X_train_road, X_test_road, y_train_road, y_test_road # add non-classified trips to ROAD X_test_false = X_test_false.append(to_append_X) y_test_false = y_test_false.append(to_append_y) for mode in road: print('-' * 15, 'SEPARATIONG ', mode.upper() +' AND NON-', mode.upper(), '-' * 15) _, _, _, _, X_test_true, X_test_false, _, y_test_false = make_binary_classification(X_train_orig, X_test_false, y_train_orig, y_test_false, [mode]) # add classifies trips to result test set to_append = X_test_true to_append['mode'] = mode res = res.append(to_append) to_append = X_test_false to_append['mode'] = 'WALK' # set all trips which were not classified to walk res = res.append(to_append) y_pred = res['mode'] y_pred = y_pred.sort_index() y_test_orig = y_test_orig.sort_index() print(f"trained in {time.time() - start} sec") scores = get_final_metrics(y_test_orig, y_pred) print('-'*15, 'FINAL SCORES ENSEMBLE CLASSIFIER NEURAL NETWORK (UMBRELLA)', '-'*15) for score in scores: print(score, ":\n", scores[score])