예제 #1
0
    def set_features_columns_options():

        # Get the columns in test data set in order to do feature selection by the user
        test_data_path = InputSettings.get_test_data_path()
        flight_route = get_subdirectories(test_data_path).__getitem__(0)
        flight_dir = os.path.join(test_data_path, flight_route)
        attack = get_subdirectories(flight_dir).__getitem__(0)
        flight_csv = os.listdir(
            f'{test_data_path}/{flight_route}/{attack}').__getitem__(0)
        df_test = pd.read_csv(
            f'{test_data_path}/{flight_route}/{attack}/{flight_csv}')
        test_columns = list(df_test.columns)

        # Cleaning meta-data - Remove columns from a yaml file, such as: index, flight_id etc.
        for column in COLUMNS_TO_REMOVE:
            if column in test_columns:
                test_columns.remove(column)

        InputSettings.FEATURES_COLUMNS_OPTIONS = test_columns
예제 #2
0
def rename_files(directory_path):
    """
    Rename the files in the input - add sensors_ as prefix of files' names
    :param directory_path: the path of the directory in the input
    :return:
    """

    FLIGHT_ROUTES = get_subdirectories(directory_path)

    for flight_route in FLIGHT_ROUTES:
        flight_dir = os.path.join(directory_path, flight_route)
        attacks = get_subdirectories(flight_dir)
        for attack in attacks:
            attack_dir = os.path.join(flight_dir, attack)
            for flight_csv in os.listdir(
                    f'{directory_path}/{flight_route}/{attack}'):
                origin_file_dir = os.path.join(attack_dir, flight_csv)
                new_file_dir = os.path.join(attack_dir,
                                            "sensors_" + flight_csv)
                os.rename(origin_file_dir, new_file_dir)
예제 #3
0
def move_test_files_to_target_path(source_directory, target_directory):
    """
    move test files from source path to target path
    :param source_directory: source path
    :param target_directory: target path
    :return:
    """

    flight_files = get_subdirectories(source_directory)

    for index, rout in enumerate(flight_files):
        current_directory = os.path.join(source_directory, rout)
        sensors_file = get_sensors_file(current_directory)
        move_file_to_target_path(current_directory, target_directory,
                                 sensors_file, "sensors_{0}.csv".format(index))
    def init_models():
        """
        executes all the algorithms which were chosen - suitable for both flows
        :return:
        """

        similarity_score, test_data_path, results_path, new_model_running = ModelsExecution.get_parameters(
        )

        # Init evaluation metrics data which will be presented in the results table
        InputSettings.init_results_metrics_data()

        # Set test data - flight routes
        flight_routes = get_subdirectories(test_data_path)
        InputSettings.set_flight_routes(flight_routes)

        return similarity_score, test_data_path, results_path, new_model_running
예제 #5
0
def move_train_files_to_target_path(source_directory, target_directory,
                                    train_folder_name):
    """
    move train files from source path to target path
    :param source_directory: source path
    :param target_directory: target path
    :param train_folder_name: the name of the route
    :return:
    """

    flight_files = get_subdirectories(source_directory)

    for index, route in enumerate(flight_files):
        new_route_name = train_folder_name + "Route_" + str(index)
        current_directory = os.path.join(source_directory, route)
        sensors_file = get_sensors_file(current_directory)
        create_directories(f'{target_directory}/{new_route_name}')
        move_file_to_target_path(current_directory,
                                 f'{target_directory}/{new_route_name}',
                                 sensors_file, "without_anom.csv")
def move_test_files_to_target_path(source_directory, target_directory,
                                   test_folder_name, attack_name):
    """
    move test files from source path to target path
    :param source_directory: source path
    :param target_directory: target path
    :param test_folder_name: the name of the route
    :param attack_name: the name of the attack
    :return:
    """

    flight_files = get_subdirectories(source_directory)

    for index, route in enumerate(flight_files):
        current_directory = os.path.join(source_directory, route)
        sensors_file = get_sensors_file(current_directory)
        middle_target_directory = os.path.join(target_directory,
                                               test_folder_name)
        create_directories(middle_target_directory)
        full_path = os.path.join(middle_target_directory, attack_name)
        create_directories(full_path)
        move_file_to_target_path(current_directory, full_path, sensors_file,
                                 "sensors_{0}.csv".format(index))
def execute_creation(source_directory, target_directory):
    """
    creation of test routes files
    :param source_directory: directory that contains the train routes
    :param target_directory: target path that the test files will be save there
    :return:
    """

    flight_files = get_subdirectories(source_directory)

    for flight in flight_files:

        current_flight_path = os.path.join(source_directory, flight)

        routes_files = get_subdirectories(current_flight_path)

        for index, route in enumerate(routes_files):

            full_route_name = str(flight) + "_Route_" + str(index)
            route_dir = os.path.join(current_flight_path, route)

            create_directories(
                f'{target_directory}/{full_route_name}'
            )  # create the route folder in the target directory

            train_file_df = pd.read_csv(f'{route_dir}/{route}.csv')

            num_of_way_points = len(train_file_df["NameOfWpt"]) - 4

            for j in range(1):
                create_test_set(
                    num_of_way_points=num_of_way_points,
                    directory_path=f'{target_directory}/{full_route_name}',
                    file_name="Mixed_Attack",
                    spoofed_way_point=random.randint(num_of_way_points - 3,
                                                     num_of_way_points - 1),
                    attack="Mixed",
                    source_df=train_file_df)

                create_test_set(
                    num_of_way_points=num_of_way_points,
                    directory_path=f'{target_directory}/{full_route_name}',
                    file_name="Velocity_Attack",
                    spoofed_way_point=random.randint(num_of_way_points - 3,
                                                     num_of_way_points - 1),
                    attack="Velocity",
                    source_df=train_file_df)

                create_test_set(
                    num_of_way_points=num_of_way_points,
                    directory_path=f'{target_directory}/{full_route_name}',
                    file_name="Height_Attack",
                    spoofed_way_point=random.randint(num_of_way_points - 3,
                                                     num_of_way_points - 1),
                    attack="Height",
                    source_df=train_file_df)

                create_test_set(
                    num_of_way_points=num_of_way_points,
                    directory_path=f'{target_directory}/{full_route_name}',
                    file_name="Constant_Attack",
                    spoofed_way_point=random.randint(num_of_way_points - 3,
                                                     num_of_way_points - 1),
                    attack="Constant",
                    source_df=train_file_df)
def run_model(training_data_path, test_data_path, results_path,
              similarity_score, save_model, new_model_running, algorithm_path,
              threshold, features_list, target_features_list,
              train_scaler_path, target_scaler_path, event):
    """
    Run SVR model process
    :param training_data_path: train data set directory path
    :param test_data_path: test data set directory path
    :param results_path: results directory path
    :param similarity_score: chosen similarity functions
    :param save_model: indicator whether the user want to save the model or not
    :param new_model_running: indicator whether we are in new model creation flow or not
    :param algorithm_path: path of existing algorithm
    :param threshold: saved threshold for load model flow
    :param features_list:  saved chosen features for load model flow
    :param target_features_list: all the features in the test data set for the target
    :param train_scaler_path: path of existing input train scaler directory
    :param target_scaler_path: path of existing input target scaler directory
    :param event: running state flag
    :return:  reported results for SVR execution
    """

    # Choose between new model creation flow and load existing model flow
    if new_model_running:
        kernel, gamma, epsilon, threshold, window_size = get_svr_new_model_parameters(
        )
    else:
        svr_model = pickle.load(open(algorithm_path, 'rb'))
        X_train_scaler = pickle.load(open(train_scaler_path, 'rb'))
        Y_train_scaler = pickle.load(open(target_scaler_path, 'rb'))
        window_size = svr_model.n_prev
        X_train = None
        Y_train = None

    FLIGHT_ROUTES = get_subdirectories(test_data_path)

    current_time = get_current_time()

    current_time_path = os.path.join(
        *[str(results_path), 'svr',
          str(current_time)])
    create_directories(f"{current_time_path}")

    # Create sub directories for each similarity function
    for similarity in similarity_score:
        similarity_path = os.path.join(
            *[str(current_time_path), str(similarity)])
        create_directories(f"{similarity_path}")

    # Train the model for each flight route
    for flight_route in FLIGHT_ROUTES:

        # Execute training for new model flow
        if new_model_running:
            svr_model, X_train_scaler, Y_train_scaler, X_train, Y_train = execute_train(
                flight_route,
                training_data_path=training_data_path,
                kernel=kernel,
                gamma=gamma,
                epsilon=epsilon,
                features_list=features_list,
                window_size=window_size,
                target_features_list=target_features_list,
                event=event)

        # Get results for each similarity function
        for similarity in similarity_score:
            current_results_path = os.path.join(
                *[str(current_time_path),
                  str(similarity),
                  str(flight_route)])
            create_directories(f"{current_results_path}")
            tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attacks_duration = execute_predict(
                flight_route,
                test_data_path=test_data_path,
                similarity_score=similarity,
                threshold=threshold,
                svr_model=svr_model,
                X_train_scaler=X_train_scaler,
                results_path=current_results_path,
                add_plots=True,
                run_new_model=new_model_running,
                X_train=X_train,
                features_list=features_list,
                target_features_list=target_features_list,
                save_model=save_model,
                Y_train_scaler=Y_train_scaler,
                Y_train=Y_train,
                window_size=window_size,
                event=event)

            df = pd.DataFrame(tpr_scores)
            tpr_path = os.path.join(
                *[str(current_results_path),
                  str(flight_route) + '_tpr.csv'])
            df.to_csv(f"{tpr_path}", index=False)

            df = pd.DataFrame(fpr_scores)
            fpr_path = os.path.join(
                *[str(current_results_path),
                  str(flight_route) + '_fpr.csv'])
            df.to_csv(f"{fpr_path}", index=False)

            df = pd.DataFrame(acc_scores)
            acc_path = os.path.join(
                *[str(current_results_path),
                  str(flight_route) + '_acc.csv'])
            df.to_csv(f"{acc_path}", index=False)

            df = pd.DataFrame(delay_scores)
            delay_path = os.path.join(
                *[str(current_results_path),
                  str(flight_route) + '_delay.csv'])
            df.to_csv(f"{delay_path}", index=False)

    algorithm_name = "SVR"

    # Report results for training data to csv files
    for similarity in similarity_score:
        report_similarity_path = os.path.join(
            *[str(results_path), 'svr',
              str(current_time),
              str(similarity)])
        report_results(f"{report_similarity_path}", test_data_path,
                       FLIGHT_ROUTES, algorithm_name, similarity,
                       routes_duration, attacks_duration)
def execute_predict(flight_route,
                    test_data_path=None,
                    similarity_score=None,
                    threshold=None,
                    svr_model=None,
                    X_train_scaler=None,
                    results_path=None,
                    add_plots=True,
                    run_new_model=False,
                    X_train=None,
                    features_list=None,
                    target_features_list=None,
                    save_model=False,
                    Y_train_scaler=None,
                    Y_train=None,
                    window_size=None,
                    event=None):
    """
    Execute predictions function for a specific flight route
    :param flight_route: current flight route we should train on
    :param test_data_path: the path of test data directory
    :param similarity_score: similarity function
    :param threshold: threshold from the train
    :param svr_model: SVR model
    :param X_train_scaler: normalization train input scalar
    :param results_path: the path of results directory
    :param add_plots: indicator whether to add plots or not
    :param run_new_model: indicator whether current flow is new model creation or not
    :param X_train: train input data frame
    :param features_list: the list of features which the user chose for the input
    :param target_features_list: the list of features which the user chose for the target
    :param save_model: indicator whether the user want to save the model or not
    :param Y_train_scaler: normalization train target scalar
    :param Y_train: train target data frame
    :param window_size: window size for each instance in training
    :param event: running state flag
    :return: tpr scores, fpr scores, acc scores, delay scores, routes duration, attack duration
    """

    tpr_scores = defaultdict(list)
    fpr_scores = defaultdict(list)
    acc_scores = defaultdict(list)
    delay_scores = defaultdict(list)
    routes_duration = defaultdict(list)
    attack_duration = defaultdict(list)

    # Set a threshold in new model creation flow
    if run_new_model:
        event.wait()
        threshold = predict_train_set(svr_model, X_train, save_model,
                                      add_plots, threshold, features_list,
                                      target_features_list, results_path,
                                      flight_route, similarity_score,
                                      X_train_scaler, Y_train, Y_train_scaler)

    flight_dir = os.path.join(test_data_path, flight_route)
    ATTACKS = get_subdirectories(flight_dir)

    figures_results_path = os.path.join(results_path, "Figures")
    create_directories(figures_results_path)

    attacks_figures_results_path = os.path.join(figures_results_path,
                                                "Attacks")
    create_directories(attacks_figures_results_path)

    # Iterate over all attacks in order to find anomalies
    for attack in ATTACKS:
        event.wait()
        attack_name = attack

        if "_" in attack_name:
            attack_name = attack_name.split("_")[0]

        current_attack_figures_results_path = os.path.join(
            attacks_figures_results_path, attack_name)
        create_directories(current_attack_figures_results_path)

        attacks_path = os.path.join(
            *[str(test_data_path),
              str(flight_route),
              str(attack)])
        for flight_csv in os.listdir(f"{attacks_path}"):

            flight_attack_path = os.path.join(
                *[str(attacks_path), str(flight_csv)])
            df_test_source = pd.read_csv(f"{flight_attack_path}")

            Y_test_labels = df_test_source[[ATTACK_COLUMN]].values
            Y_test_labels_preprocessed = svr_model._preprocess(
                Y_test_labels, Y_test_labels)[1]

            attack_time = len(Y_test_labels)

            input_df_test = df_test_source[features_list]
            target_df_test = df_test_source[target_features_list]

            # Step 1 : Clean test data set
            input_clean_df_test = clean_data(input_df_test)
            target_clean_df_test = clean_data(target_df_test)

            # Step 2: Normalize the data
            X_test = X_train_scaler.transform(input_clean_df_test)

            # Y_test = normalize_data(data=target_clean_df_test,
            #                         scaler="power_transform")[0]

            Y_test = Y_train_scaler.transform(target_clean_df_test)

            Y_test_preprocessed = svr_model._preprocess(Y_test, Y_test)[1]

            X_pred = svr_model.predict(X_test)
            assert len(Y_test_preprocessed) == len(X_pred)

            scores_test = []
            for i, pred in enumerate(X_pred):
                scores_test.append(
                    anomaly_score(Y_test_preprocessed[i], pred,
                                  similarity_score))

            # Add reconstruction error scatter if plots indicator is true
            event.wait()
            if add_plots:
                plot_reconstruction_error_scatter(
                    scores=scores_test,
                    labels=Y_test_labels_preprocessed,
                    threshold=threshold,
                    plot_dir=current_attack_figures_results_path,
                    title=
                    f'Outlier Score Testing for {flight_csv} in {flight_route}({attack})'
                )

                for i, target_feature in enumerate(target_features_list):
                    title = "Test performance of SVR for " + target_feature + " feature in " + flight_csv
                    plot_prediction_performance(
                        Y_train=Y_test_preprocessed[:, i],
                        X_pred=X_pred[:, i],
                        results_path=current_attack_figures_results_path,
                        title=title)

            predictions = [1 if x >= threshold else 0 for x in scores_test]

            # Add roc curve if plots indicator is true
            if add_plots:
                pass
                # plot_roc(y_true=Y_test,y_pred=predictions, plot_dir=results_path,title=f'ROC Curve - {flight_csv} in {flight_route}({attack})')

            attack_start, attack_end = get_attack_boundaries(
                df_test_source[ATTACK_COLUMN])

            method_scores = get_method_scores(predictions,
                                              attack_start,
                                              attack_end,
                                              add_window_size=True,
                                              window_size=window_size)

            tpr_scores[attack].append(method_scores[0])
            fpr_scores[attack].append(method_scores[1])
            acc_scores[attack].append(method_scores[2])
            delay_scores[attack].append(method_scores[3])
            routes_duration[attack].append(attack_time)
            attack_duration[attack].append(method_scores[4])

    return tpr_scores, fpr_scores, acc_scores, delay_scores, routes_duration, attack_duration