def execute_creation(source_folder, files_amount): """ creation of test routes files :param source_folder: source path :param files_amount: files amount :return: """ for i in range(files_amount): rout_name = "rout_" + str(i) create_directories(f'{source_folder}/{rout_name}') for i in range(files_amount): rout_name = "rout_" + str(i) num_of_way_points = random.randint(10, 20) for j in range(2): create_test_set(num_of_way_points=num_of_way_points, velocity_state=get_random_state(), height_state=get_random_state(), directory_path=f'{source_folder}/{rout_name}', file_name="Mixed_Attack_{0}".format(j), spoofed_way_point=random.randint( 6, num_of_way_points - 2), attack="Mixed") create_test_set(num_of_way_points=num_of_way_points, velocity_state=get_random_state(), height_state=get_random_state(), directory_path=f'{source_folder}/{rout_name}', file_name="Velocity_Attack_{0}".format(j), spoofed_way_point=random.randint( 6, num_of_way_points - 2), attack="Velocity") create_test_set(num_of_way_points=num_of_way_points, velocity_state=get_random_state(), height_state=get_random_state(), directory_path=f'{source_folder}/{rout_name}', file_name="Constant_Attack_{0}".format(j), spoofed_way_point=random.randint( 6, num_of_way_points - 2), attack="Constant") create_test_set(num_of_way_points=num_of_way_points, velocity_state=get_random_state(), height_state=get_random_state(), directory_path=f'{source_folder}/{rout_name}', file_name="Height_Attack_{0}".format(j), spoofed_way_point=random.randint( 6, num_of_way_points - 2), attack="Height")
def create_attack_files(source_directory, files_amount, route_name): """ create attack files folders :param source_directory: source path :param files_amount: files amount :param route_name: route file name :return: """ for index in range(files_amount): create_directories(f'{source_directory}/{route_name}') for attack in [ 'Constant Attack', 'Height Attack', 'Velocity Attack', 'Mixed Attack' ]: create_directories(f'{source_directory}/{route_name}/{attack}')
def plot_tuning_results(mses_train, ml_name, input_path, scaler, factor): """ Plot results of tuning window size parameter :param mses_train: numpy array of train MSEs :param ml_name: machine learning model name :param input_path: tuning directory :param scaler: scaler string :param factor: stable number to factor the matrix :return: plot displayed """ c2 = "blue" c1 = "lightblue" p_color = "black" box_plot = plt.boxplot(np.transpose(mses_train) * factor, patch_artist=True) for item in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']: plt.setp(box_plot[item], color=p_color) plt.setp(box_plot["boxes"], facecolor=c1) plt.setp(box_plot["fliers"], markeredgecolor=c2) # plt.boxplot(np.log(np.transpose(mses_train))) # plt.yscale('log') plt.title( "Anomaly prediction over the simulator data set - {0} Model".format( ml_name)) plt.ylabel( "Testing log(MSE)s of the records = Actual MSE * {0}".format(factor)) plt.xlabel("Setting of n_prev") min_value = 0.7 * np.amin(mses_train * factor) max_value = 1.3 * np.amax(mses_train * factor) plt.gcf().set_size_inches(12, 9) plt.gca().set_ylim([min_value, max_value]) # plt.show() ml_directory_route = os.path.join(input_path, ml_name) create_directories(ml_directory_route) plot_directory_route = os.path.join(ml_directory_route, scaler) create_directories(plot_directory_route) current_time = get_current_time() plt_path = os.path.join(*[ str(plot_directory_route), str(ml_name) + '_' + str(scaler) + '_' + str(current_time) + '.png' ]) plt.savefig(f"{plt_path}") plt.clf()
def create_train_set(source_folder, files_amount, velocity_state, height_state): """ creation of train routes files :param source_folder: source path :param files_amount: files amount :param velocity_state: 'Up' , 'Down' or 'Stable' :param height_state: 'Up' , 'Down' or 'Stable' :return: """ for i in range(files_amount): route_name = "route_" + str(i) create_directories(f'{source_folder}/{route_name}') create_train_file(num_of_way_points=random.randint(10, 20), velocity_state=velocity_state, height_state=height_state, directory_path=f'{source_folder}/{route_name}', file_name=route_name)
def move_train_files_to_target_path(source_directory, target_directory, train_folder_name): """ move train files from source path to target path :param source_directory: source path :param target_directory: target path :param train_folder_name: the name of the route :return: """ flight_files = get_subdirectories(source_directory) for index, route in enumerate(flight_files): new_route_name = train_folder_name + "Route_" + str(index) current_directory = os.path.join(source_directory, route) sensors_file = get_sensors_file(current_directory) create_directories(f'{target_directory}/{new_route_name}') move_file_to_target_path(current_directory, f'{target_directory}/{new_route_name}', sensors_file, "without_anom.csv")
def move_test_files_to_target_path(source_directory, target_directory, test_folder_name, attack_name): """ move test files from source path to target path :param source_directory: source path :param target_directory: target path :param test_folder_name: the name of the route :param attack_name: the name of the attack :return: """ flight_files = get_subdirectories(source_directory) for index, route in enumerate(flight_files): current_directory = os.path.join(source_directory, route) sensors_file = get_sensors_file(current_directory) middle_target_directory = os.path.join(target_directory, test_folder_name) create_directories(middle_target_directory) full_path = os.path.join(middle_target_directory, attack_name) create_directories(full_path) move_file_to_target_path(current_directory, full_path, sensors_file, "sensors_{0}.csv".format(index))
def execute_creation(source_directory, target_directory): """ creation of test routes files :param source_directory: directory that contains the train routes :param target_directory: target path that the test files will be save there :return: """ flight_files = get_subdirectories(source_directory) for flight in flight_files: current_flight_path = os.path.join(source_directory, flight) routes_files = get_subdirectories(current_flight_path) for index, route in enumerate(routes_files): full_route_name = str(flight) + "_Route_" + str(index) route_dir = os.path.join(current_flight_path, route) create_directories( f'{target_directory}/{full_route_name}' ) # create the route folder in the target directory train_file_df = pd.read_csv(f'{route_dir}/{route}.csv') num_of_way_points = len(train_file_df["NameOfWpt"]) - 4 for j in range(1): create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Mixed_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Mixed", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Velocity_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Velocity", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Height_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Height", source_df=train_file_df) create_test_set( num_of_way_points=num_of_way_points, directory_path=f'{target_directory}/{full_route_name}', file_name="Constant_Attack", spoofed_way_point=random.randint(num_of_way_points - 3, num_of_way_points - 1), attack="Constant", source_df=train_file_df)
def run_model(training_data_path, test_data_path, results_path, similarity_score, save_model, new_model_running, algorithm_path, threshold, features_list, target_features_list, train_scaler_path, target_scaler_path, event): """ Run SVR model process :param training_data_path: train data set directory path :param test_data_path: test data set directory path :param results_path: results directory path :param similarity_score: chosen similarity functions :param save_model: indicator whether the user want to save the model or not :param new_model_running: indicator whether we are in new model creation flow or not :param algorithm_path: path of existing algorithm :param threshold: saved threshold for load model flow :param features_list: saved chosen features for load model flow :param target_features_list: all the features in the test data set for the target :param train_scaler_path: path of existing input train scaler directory :param target_scaler_path: path of existing input target scaler directory :param event: running state flag :return: reported results for SVR execution """ # Choose between new model creation flow and load existing model flow if new_model_running: kernel, gamma, epsilon, threshold, window_size = get_svr_new_model_parameters( ) else: svr_model = pickle.load(open(algorithm_path, 'rb')) X_train_scaler = pickle.load(open(train_scaler_path, 'rb')) Y_train_scaler = pickle.load(open(target_scaler_path, 'rb')) window_size = svr_model.n_prev X_train = None Y_train = None FLIGHT_ROUTES = get_subdirectories(test_data_path) current_time = get_current_time() current_time_path = os.path.join( *[str(results_path), 'svr', str(current_time)]) create_directories(f"{current_time_path}") # Create sub directories for each similarity function for similarity in similarity_score: similarity_path = os.path.join( *[str(current_time_path), str(similarity)]) create_directories(f"{similarity_path}") # Train the model for each flight route for flight_route in FLIGHT_ROUTES: # Execute training for new model flow if new_model_running: svr_model, X_train_scaler, Y_train_scaler, X_train, Y_train = execute_train( flight_route, training_data_path=training_data_path, kernel=kernel, gamma=gamma, epsilon=epsilon, features_list=features_list, window_size=window_size, target_features_list=target_features_list, event=event) # Get results for each similarity function for similarity in similarity_score: current_results_path = os.path.join( *[str(current_time_path), str(similarity), str(flight_route)]) create_directories(f"{current_results_path}") tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attacks_duration = execute_predict( flight_route, test_data_path=test_data_path, similarity_score=similarity, threshold=threshold, svr_model=svr_model, X_train_scaler=X_train_scaler, results_path=current_results_path, add_plots=True, run_new_model=new_model_running, X_train=X_train, features_list=features_list, target_features_list=target_features_list, save_model=save_model, Y_train_scaler=Y_train_scaler, Y_train=Y_train, window_size=window_size, event=event) df = pd.DataFrame(tpr_scores) tpr_path = os.path.join( *[str(current_results_path), str(flight_route) + '_tpr.csv']) df.to_csv(f"{tpr_path}", index=False) df = pd.DataFrame(fpr_scores) fpr_path = os.path.join( *[str(current_results_path), str(flight_route) + '_fpr.csv']) df.to_csv(f"{fpr_path}", index=False) df = pd.DataFrame(acc_scores) acc_path = os.path.join( *[str(current_results_path), str(flight_route) + '_acc.csv']) df.to_csv(f"{acc_path}", index=False) df = pd.DataFrame(delay_scores) delay_path = os.path.join( *[str(current_results_path), str(flight_route) + '_delay.csv']) df.to_csv(f"{delay_path}", index=False) algorithm_name = "SVR" # Report results for training data to csv files for similarity in similarity_score: report_similarity_path = os.path.join( *[str(results_path), 'svr', str(current_time), str(similarity)]) report_results(f"{report_similarity_path}", test_data_path, FLIGHT_ROUTES, algorithm_name, similarity, routes_duration, attacks_duration)
def predict_train_set(svr_model, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler): """ Execute prediction on the train data set :param svr_model: SVR model :param X_train: train input data frame :param save_model: indicator whether the user want to save the model or not :param add_plots: indicator whether to add plots or not :param threshold: threshold from the train :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param results_path: the path of results directory :param flight_route: current flight route we are working on :param similarity_score: similarity function :param X_train_scaler: train input normalization scalar :param Y_train: train target data frame :param Y_train_scaler: train target normalization scalar :return: threshold """ X_pred = svr_model.predict(X_train) scores_train = [] Y_train_preprocessed = svr_model._preprocess(Y_train, Y_train)[1] assert len(Y_train_preprocessed) == len(X_pred) for i, pred in enumerate(X_pred): scores_train.append( anomaly_score(Y_train_preprocessed[i], pred, similarity_score)) # choose threshold for which <MODEL_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower threshold = get_threshold(scores_train, threshold) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) if add_plots: train_figures_results_path = os.path.join(figures_results_path, "Train") create_directories(train_figures_results_path) for i, target_feature in enumerate(target_features_list): title = "Training performance of SVR for " + target_feature + " in " + flight_route plot_prediction_performance( Y_train=Y_train_preprocessed[:, i], X_pred=X_pred[:, i], results_path=train_figures_results_path, title=title) # Save created model if the indicator is true if save_model: data = {} data['features'] = features_list data['target_features'] = target_features_list data['threshold'] = threshold data['params'] = get_svr_parameters_dictionary() model_results_path = os.path.join(results_path, "model_data") create_directories(model_results_path) model_data_path = os.path.join( *[str(model_results_path), 'model_data.json']) with open(f"{model_data_path}", 'w') as outfile: json.dump(data, outfile) save_model_file_path = os.path.join(model_results_path, flight_route + "_model.pkl") with open(save_model_file_path, 'wb') as file: pickle.dump(svr_model, file) save_input_scaler_file_path = os.path.join( model_results_path, flight_route + "_train_scaler.pkl") with open(save_input_scaler_file_path, 'wb') as file: pickle.dump(X_train_scaler, file) save_target_scaler_file_path = os.path.join( model_results_path, flight_route + "_target_scaler.pkl") with open(save_target_scaler_file_path, 'wb') as file: pickle.dump(Y_train_scaler, file) return threshold
def execute_predict(flight_route, test_data_path=None, similarity_score=None, threshold=None, svr_model=None, X_train_scaler=None, results_path=None, add_plots=True, run_new_model=False, X_train=None, features_list=None, target_features_list=None, save_model=False, Y_train_scaler=None, Y_train=None, window_size=None, event=None): """ Execute predictions function for a specific flight route :param flight_route: current flight route we should train on :param test_data_path: the path of test data directory :param similarity_score: similarity function :param threshold: threshold from the train :param svr_model: SVR model :param X_train_scaler: normalization train input scalar :param results_path: the path of results directory :param add_plots: indicator whether to add plots or not :param run_new_model: indicator whether current flow is new model creation or not :param X_train: train input data frame :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param save_model: indicator whether the user want to save the model or not :param Y_train_scaler: normalization train target scalar :param Y_train: train target data frame :param window_size: window size for each instance in training :param event: running state flag :return: tpr scores, fpr scores, acc scores, delay scores, routes duration, attack duration """ tpr_scores = defaultdict(list) fpr_scores = defaultdict(list) acc_scores = defaultdict(list) delay_scores = defaultdict(list) routes_duration = defaultdict(list) attack_duration = defaultdict(list) # Set a threshold in new model creation flow if run_new_model: event.wait() threshold = predict_train_set(svr_model, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler) flight_dir = os.path.join(test_data_path, flight_route) ATTACKS = get_subdirectories(flight_dir) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) attacks_figures_results_path = os.path.join(figures_results_path, "Attacks") create_directories(attacks_figures_results_path) # Iterate over all attacks in order to find anomalies for attack in ATTACKS: event.wait() attack_name = attack if "_" in attack_name: attack_name = attack_name.split("_")[0] current_attack_figures_results_path = os.path.join( attacks_figures_results_path, attack_name) create_directories(current_attack_figures_results_path) attacks_path = os.path.join( *[str(test_data_path), str(flight_route), str(attack)]) for flight_csv in os.listdir(f"{attacks_path}"): flight_attack_path = os.path.join( *[str(attacks_path), str(flight_csv)]) df_test_source = pd.read_csv(f"{flight_attack_path}") Y_test_labels = df_test_source[[ATTACK_COLUMN]].values Y_test_labels_preprocessed = svr_model._preprocess( Y_test_labels, Y_test_labels)[1] attack_time = len(Y_test_labels) input_df_test = df_test_source[features_list] target_df_test = df_test_source[target_features_list] # Step 1 : Clean test data set input_clean_df_test = clean_data(input_df_test) target_clean_df_test = clean_data(target_df_test) # Step 2: Normalize the data X_test = X_train_scaler.transform(input_clean_df_test) # Y_test = normalize_data(data=target_clean_df_test, # scaler="power_transform")[0] Y_test = Y_train_scaler.transform(target_clean_df_test) Y_test_preprocessed = svr_model._preprocess(Y_test, Y_test)[1] X_pred = svr_model.predict(X_test) assert len(Y_test_preprocessed) == len(X_pred) scores_test = [] for i, pred in enumerate(X_pred): scores_test.append( anomaly_score(Y_test_preprocessed[i], pred, similarity_score)) # Add reconstruction error scatter if plots indicator is true event.wait() if add_plots: plot_reconstruction_error_scatter( scores=scores_test, labels=Y_test_labels_preprocessed, threshold=threshold, plot_dir=current_attack_figures_results_path, title= f'Outlier Score Testing for {flight_csv} in {flight_route}({attack})' ) for i, target_feature in enumerate(target_features_list): title = "Test performance of SVR for " + target_feature + " feature in " + flight_csv plot_prediction_performance( Y_train=Y_test_preprocessed[:, i], X_pred=X_pred[:, i], results_path=current_attack_figures_results_path, title=title) predictions = [1 if x >= threshold else 0 for x in scores_test] # Add roc curve if plots indicator is true if add_plots: pass # plot_roc(y_true=Y_test,y_pred=predictions, plot_dir=results_path,title=f'ROC Curve - {flight_csv} in {flight_route}({attack})') attack_start, attack_end = get_attack_boundaries( df_test_source[ATTACK_COLUMN]) method_scores = get_method_scores(predictions, attack_start, attack_end, add_window_size=True, window_size=window_size) tpr_scores[attack].append(method_scores[0]) fpr_scores[attack].append(method_scores[1]) acc_scores[attack].append(method_scores[2]) delay_scores[attack].append(method_scores[3]) routes_duration[attack].append(attack_time) attack_duration[attack].append(method_scores[4]) return tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attack_duration
def predict_train_set(lstm, X_train, save_model, add_plots, threshold, features_list, target_features_list, results_path, flight_route, similarity_score, X_train_scaler, Y_train, Y_train_scaler): """ Execute prediction on the train data set :param lstm: LSTM model :param X_train: train input data frame :param save_model: indicator whether the user want to save the model or not :param add_plots: indicator whether to add plots or not :param threshold: threshold from the train :param features_list: the list of features which the user chose for the input :param target_features_list: the list of features which the user chose for the target :param results_path: the path of results directory :param flight_route: current flight route we are working on :param similarity_score: similarity function :param X_train_scaler: train input normalization scalar :param Y_train: train target data frame :param Y_train_scaler: train target normalization scalar :return: threshold """ X_pred = lstm.predict(X_train, verbose=0) scores_train = [] for i, pred in enumerate(X_pred): scores_train.append( anomaly_score_multi(Y_train[i], pred, similarity_score)) # choose threshold for which <LSTM_THRESHOLD_FROM_TRAINING_PERCENT> % of training were lower threshold = get_threshold(scores_train, threshold) figures_results_path = os.path.join(results_path, "Figures") create_directories(figures_results_path) if add_plots: train_figures_results_path = os.path.join(figures_results_path, "Train") create_directories(train_figures_results_path) mean_x_actual = multi_mean(Y_train) mean_x_pred = multi_mean(X_pred) assert mean_x_actual.shape == mean_x_pred.shape for i, target_feature in enumerate(target_features_list): title = "Training performance of LSTM for " + target_feature + " in " + flight_route plot_prediction_performance( Y_train=mean_x_actual[:, i], X_pred=mean_x_pred[:, i], results_path=train_figures_results_path, title=title, y_label="Sensor's Mean Value") # Save created model if the indicator is true if save_model: data = {} data['features'] = features_list data['target_features'] = target_features_list data['threshold'] = threshold data['params'] = get_lstm_parameters_dictionary() model_results_path = os.path.join(results_path, "model_data") create_directories(model_results_path) model_data_path = os.path.join( *[str(model_results_path), 'model_data.json']) with open(f"{model_data_path}", 'w') as outfile: json.dump(data, outfile) lstm_model_path = os.path.join( *[str(model_results_path), str(flight_route) + '.h5']) lstm.save(f"{lstm_model_path}") save_input_scaler_file_path = os.path.join( model_results_path, flight_route + "_train_scaler.pkl") with open(save_input_scaler_file_path, 'wb') as file: pickle.dump(X_train_scaler, file) save_target_scaler_file_path = os.path.join( model_results_path, flight_route + "_target_scaler.pkl") with open(save_target_scaler_file_path, 'wb') as file: pickle.dump(Y_train_scaler, file) return threshold