Пример #1
0
    def generate_matrix_meta_data(self, X, sub_dir):
        """

            Generates files/graphics in the proper directory for the matrix.

        Args:
            X: list of list; numpy array of numpy array or numpy matrix
                Numpy matrix

            sub_dir: string
                Specify the sub directory to append to the pre-defined folder path.
        """

        # Convert to numpy array if possible
        X = np.array(X)

        create_dir_structure(self.folder_path,
                             correct_directory_path(sub_dir + "/Meta Data"))

        output_folder_path = correct_directory_path(self.folder_path)

        # Create files relating to dataframe's shape
        shape_df = pd.DataFrame.from_dict({
            'Rows': [X.shape[0]],
            'Columns': [X.shape[1]]
        })
        df_to_image(shape_df,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Matrix Shape Table",
                    show_index=False)
Пример #2
0
def move_folder_to_eflow_garbage(directory_path, create_sub_dir=None):
    """

        Renames and moves contents to a folder labeled 'Garbage' for the user/system
        to later handle.

    Args:
        directory_path:
            Path to given folder to move to 'Garbage'

        create_sub_dir:
            If the folder 'Garbage' needs further organization then you can specify
            a folder for the given folder to be embedded in.
    """
    directory_path = correct_directory_path(directory_path)
    check_if_directory_exists(directory_path)

    if not create_sub_dir:
        create_sub_dir = ""
    else:
        correct_directory_path(create_sub_dir)

    garbage_folder_path = create_dir_structure(
        os.getcwd(),
        f"{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/_Extras/Garbage/{create_sub_dir}"
    )

    path_to_folder, folder_name = directory_path[:-1].rsplit('/', 1)

    _, folder_name = get_unique_directory_path(garbage_folder_path,
                                               folder_name).rsplit('/', 1)

    os.rename(directory_path, f'{path_to_folder}/{folder_name}')

    shutil.move(f'{path_to_folder}/{folder_name}', garbage_folder_path)
Пример #3
0
    def __init__(self,
                 dataset_name,
                 overwrite_full_path=None):
        """
        Args:
            dataset_name: string
                Sub directory to create on top of the directory
                'PARENT_OUTPUT_FOLDER_NAME'.

            overwrite_full_path: string
                The passed directory path must already exist. Will completely
                ignore the project name and attempt to point to this already
                created directory.
        """

        # Setup project structure
        if not overwrite_full_path:
            parent_structure = "/" + SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME \
                               + "/" + dataset_name + "/"

            create_dir_structure(os.getcwd(),
                                       parent_structure)
            tmp_path = correct_directory_path(
                os.getcwd() + parent_structure)

        # Trusting the user that this path must already exist
        else:
            overwrite_full_path = correct_directory_path(overwrite_full_path)

            # Path doesn't contain eflow's main output
            if f"/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/" not in overwrite_full_path:
                raise UnsatisfiedRequirments(f"Directory path must have {SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME} "
                                             f"as a directory name or this program will not work correctly.")

            # Unknown path found
            if not os.path.exists(overwrite_full_path):
                raise SystemError("The path must already be defined in full on "
                                  "your system to use a different directory "
                                  "structure than orginally intended.")

            tmp_path = overwrite_full_path

        from eflow._hidden.general_objects import enum
        self.__PROJECT = enum(PATH_TO_OUTPUT_FOLDER=tmp_path,
                              RELATIVE_PATH_TO_OUTPUT_FOLDER=tmp_path.split(f"/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/")[1])
Пример #4
0
    def __create_json_pipeline_file(self):
        """
        Returns:
            Creates a dict based on the given contents of the variable
            'self.__pipeline_segment_deque' to convert to a json file.
            This file will later be used to instruct our object to execute
            specific code.
        """

        # -------------
        json_dict = dict()
        segment_order = 1

        json_dict["Pipeline Name"] = self.__pipeline_name
        json_dict["Pipeline Segment Order"] = dict()
        for segment_name, segment_path_id, pipeline_segment_obj in self.__pipeline_segment_deque:
            json_dict["Pipeline Segment Order"][segment_order] = dict()
            json_dict["Pipeline Segment Order"][segment_order][
                "Pipeline Segment Path"] = segment_path_id
            json_dict["Pipeline Segment Order"][segment_order][
                "Pipeline Segment Type"] = pipeline_segment_obj.__class__.__name__
            json_dict["Pipeline Segment Order"][segment_order][
                "Pipeline Segment Name"] = segment_name
            json_dict["Pipeline Segment Order"][segment_order][
                "Pipeline Segment ID"] = segment_path_id.split("/")[-1].split(
                    ".")[0]

            segment_order += 1

        json_dict["Pipeline Segment Count"] = segment_order - 1

        # Create a folder for all non-root json files.
        if self.__pipeline_modify_id:
            create_dir_structure(self.folder_path, "/Modified Pipelines")
            dict_to_json_file(json_dict,
                              self.folder_path + "/Modified Pipelines",
                              self.__json_file_name)
        # Root json files only
        else:
            dict_to_json_file(json_dict, self.folder_path,
                              self.__json_file_name)
Пример #5
0
    def __save_update_best_model_clusters(self):

        create_dir_structure(self.folder_path,
                             "_Extras")

        pickle_object_to_file(self.__models_suggested_clusters,
                              self.folder_path + "_Extras",
                              "All suggested clusters")
        write_object_text_to_file(self.__models_suggested_clusters,
                                  self.folder_path + "_Extras",
                                  "All suggested clusters")

        all_clusters = []
        for model_name, best_clusters in self.__models_suggested_clusters.items():
            write_object_text_to_file(best_clusters,
                                      self.folder_path + "_Extras",
                                      f"{model_name} suggested clusters")

            all_clusters += best_clusters


        write_object_text_to_file(round(sum(all_clusters) / len(all_clusters)),
                                  self.folder_path + "_Extras",
                                  "Average of suggested clusters")
Пример #6
0
def create_plt_png(directory_path,
                   sub_dir,
                   filename,
                   sharpness=1.7):

    """

        Saves the plt based image in the correct directory.

    Args:
        directory_path:
            Already existing directory path.

        sub_dir:
            Directory structure to create on top of the already generated path of
            'directory_path'.

        filename:
            Filename to save into the full path of 'directory_path' + 'sub_dir'.

        sharpness:
            Changes the image's sharpness to look better.
    """
    directory_path = correct_directory_path(directory_path)

    # Ensure directory structure is init correctly
    abs_path = create_dir_structure(directory_path,
                                    sub_dir)

    # Ensure file ext is on the file.
    if filename[-4:] != ".png":
        filename += ".png"

    # plt.show()

    plt.savefig(abs_path + "/" + filename, bbox_inches='tight')

    if sharpness:
        full_path = directory_path + sub_dir + "/" + filename
        adjust_sharpness(full_path,
                         full_path,
                         sharpness)
Пример #7
0
def remove_unconnected_pipeline_segments():
    """

        Removes all pipeline segments that aren't connected to a pipeline structure.
    """

    pipeline_struct_dir = os.getcwd(
    ) + f"/{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/_Extras/Pipeline Structure/"

    if not os.path.exists(pipeline_struct_dir):
        print(
            "Project structure for pipelines has yet to be initalized. Can't clean/remove any files related to pipeline..."
        )
    else:
        segment_dict = dict()
        pipeline_segments_dict = dict()
        # Get all segment files by their types.
        if os.path.exists(pipeline_struct_dir + "/Data Pipeline Segments/"):
            all_segment_dirs = get_all_directories_from_path(
                pipeline_struct_dir + "/Data Pipeline Segments")

            for segment_type in all_segment_dirs:
                segment_dict[segment_type] = get_all_files_from_path(
                    pipeline_struct_dir +
                    f"/Data Pipeline Segments/{segment_type}")

        # Get all segments related to each pipeline.
        if os.path.exists(pipeline_struct_dir + "/Data Pipeline/"):
            all_pipeline_dirs = get_all_directories_from_path(
                pipeline_struct_dir + "/Data Pipeline/")

            for pipeline_name in all_pipeline_dirs:
                json_file = json_file_to_dict(
                    f"{pipeline_struct_dir}/Data Pipeline/{pipeline_name}/root_pipeline.json"
                )

                for i in range(1, json_file["Pipeline Segment Count"] + 1):
                    segment_id = json_file["Pipeline Segment Order"][str(
                        i)]['Pipeline Segment ID']
                    segment_type = json_file["Pipeline Segment Order"][str(
                        i)]['Pipeline Segment Type']

                    if segment_type in segment_dict.keys() and segment_id + ".json" in \
                            segment_dict[segment_type]:
                        segment_dict[segment_type].remove(segment_id + ".json")

        # Create path to eflow's garbage
        garbage_folder_path = create_dir_structure(
            os.getcwd(),
            f"{SYS_CONSTANTS.PARENT_OUTPUT_FOLDER_NAME}/_Extras/Garbage/Data Pipeline Segments/DataTransformer/"
        )

        # Rename files and move them to appropriate
        for segment_type, segment_ids in segment_dict.items():
            files_in_garbage = get_all_files_from_path(garbage_folder_path)

            for _id in segment_ids:
                file_to_remove = _id
                i = 1
                while file_to_remove in files_in_garbage:
                    file_to_remove = _id
                    file_to_remove = file_to_remove.split(
                        ".")[0] + f"_{i}.json"
                    i += 1

                os.rename(
                    pipeline_struct_dir +
                    f"Data Pipeline Segments/{segment_type}/{_id}",
                    pipeline_struct_dir +
                    f"Data Pipeline Segments/{segment_type}/{file_to_remove}")
                shutil.move(
                    pipeline_struct_dir +
                    f"Data Pipeline Segments/{segment_type}/{file_to_remove}",
                    garbage_folder_path + file_to_remove)
Пример #8
0
    def check_create_snapshot(self, df, df_features, directory_path, sub_dir):
        """

            Compares the passed pandas dataframe object to pre defined json
            file.

        Args:
            df: pd.Dataframe
                Pandas dataframe object.

            directory_path: string
                Output path of the dataset's.

            sub_dir: string
                If set to True than it will visualize the given data.

        Raises:
            Will raise a Mismatch error if the json file didn't match upp with the
            passed dataframe snapshot; causing the program to stop in runtime.
        """
        if not isinstance(df, pd.DataFrame):
            raise TypeError(
                f"'df' must be a pandas datafram object not a {type(df)}")

        if not isinstance(df_features, DataFrameTypes):
            raise TypeError(
                f"'df_features' must be a DataFrameTypes object not a {type(df_features)}"
            )

        output_folder_path = create_dir_structure(directory_path, sub_dir)

        json_file = output_folder_path + "Dataframe Snapshot.json"

        # Meta Data has already been generated; compare data
        if os.path.isfile(json_file):
            with open(json_file) as file:

                data = json.load(file)
                mismatch_error = None

                # Not using for looping; used for logic breaks
                while True:
                    if self.__compare_shape:
                        list_shape = list(df.shape)
                        if list(data["shape"]) != list_shape:
                            mismatch_error = f'the saved shape {data["shape"]} of the' \
                                             f' dataframe snapshot did not match up with' \
                                             f' the passed dataframe shape {list_shape}.'
                            break

                    # Ensure feature names match up
                    if self.__compare_feature_names:

                        snapshot_features = set(data["feature_names"])
                        passed_features = set(df_features.all_features())

                        feature_difference = snapshot_features.symmetric_difference(
                            passed_features)

                        if feature_difference:
                            mismatch_error = "the following feature name conflicts feature:\n"

                            missing_features = []
                            for feature in feature_difference:
                                if feature in snapshot_features:
                                    missing_features.append(feature)

                            extra_features = []
                            for feature in feature_difference:
                                if feature in passed_features:
                                    extra_features.append(feature)

                            if extra_features:
                                mismatch_error += f"--- Passed dataframe has additional feature(s) than snapshot:\n {extra_features}.\n"

                            if missing_features:
                                mismatch_error += f"--- Passed dataframe is missing the following snapshot feature(s):\n {missing_features}.\n"

                            if extra_features or missing_features:
                                break

                    # Ensure sudo random numbers are chosen again
                    if self.__compare_random_values:
                        compared_data = self.__create_random_values_dict(
                            df, df_features)

                        random_values_matchd_flag = True
                        for k, v in data["random_values"].items():
                            if k in compared_data:
                                if data["random_values"][k] != compared_data[k]:
                                    random_values_matchd_flag = False
                                    break
                        if not random_values_matchd_flag:
                            mismatch_error = f"the 'random' values did not match at feature name '{k}' in the dataframe " \
                                             + "(these 'random' values are based on the shape and name of the column)"
                            break

                    # Break main loop
                    break

                # Error found; raise it
                if mismatch_error is not None:
                    raise SnapshotMismatchError(
                        f"DataFrameSnapshot has raised an error because {mismatch_error}."
                        +
                        "\nThis error invoked because the directory structure saved a json file "
                        "containing attributes of the dataframe or a 'snapshot'."
                        "\nThe given error can be resolved by performing any of the following:"
                        "\n\t* Pass in the same dataframe as expected."
                        "\n\t* Disable the snapshot check by changing 'dataframe_snapshot' to False."
                        "\n\t* Disable save file option by changing the parameter 'save_file' to False."
                        "\n\t* Or deleting the json object file in the dataset directory under _Extras"
                    )

        # JSON file doesn't exist; create file
        else:
            self.__create_dataframe_snapshot_json_file(df, output_folder_path)
    def __init__(self,
                 df,
                 feature_names=[],
                 dataset_sub_dir="",
                 dataset_name="Default Dataset Name",
                 overwrite_full_path=None,
                 notebook_mode=False,
                 pca_perc=1.00):
        """
        Args:
            df: pd.Dataframe
                pd.Dataframe

            dataset_sub_dir: string
                Sub directory to write data.

            dataset_name: string
                Main project directory

            overwrite_full_path: string
                Overwrite full directory path to a given output folder

            notebook_mode: bool
                Display and show in notebook if set to true.
        """

        if isinstance(df, pd.DataFrame):
            self.__feature_names = copy.deepcopy(list(df.columns))
        else:
            if not feature_names:
                raise UnsatisfiedRequirments("If passing in a matrix like object. "
                                             "You must init feature names!")
            else:
                self.__feature_names = copy.deepcopy(feature_names)


        AutoModeler.__init__(self,
                             f'{dataset_name}/{dataset_sub_dir}',
                             overwrite_full_path)

        # Define model
        self.__cluster_models_paths = dict()

        self.__notebook_mode = copy.deepcopy(notebook_mode)

        self.__models_suggested_clusters = dict()

        self.__pca = None

        self.__first_scaler = None
        self.__second_scaler = None
        self.__cutoff_index = None
        self.__ordered_dp_indexes = None
        self.__pca_perc = pca_perc

        # --- Apply pca ---
        if pca_perc:

            # Create scaler object
            scaler = StandardScaler()
            scaled = scaler.fit_transform(df)

            self.__first_scaler = copy.deepcopy(scaler)

            print("\nInspecting scaled results!")
            self.__inspect_feature_matrix(matrix=scaled,
                                          feature_names=self.__feature_names,
                                          sub_dir="PCA",
                                          filename="Applied scaler results")

            pca, scaled = self.__visualize_pca_variance(scaled)

            self.__pca = pca

            # Generate "dummy" feature names
            pca_feature_names = ["PCA_Feature_" +
                                 str(i) for i in range(1,
                                                       len(self.__feature_names) + 1)]

            print("\nInspecting applied scaler and pca results!")
            self.__inspect_feature_matrix(matrix=scaled,
                                          feature_names=pca_feature_names,
                                          sub_dir="PCA",
                                          filename="Applied scaler and PCA results")

            if pca_perc < 1.0:
                # Find cut off point on cumulative sum
                cutoff_index = np.where(
                    pca.explained_variance_ratio_.cumsum() > pca_perc)[0][0]
            else:
                cutoff_index = scaled.shape[1] - 1

            print(
                "After applying pca with a cutoff percentage {0}%"
                " for the cumulative index. Using features 1 to {1}".format(
                    pca_perc, cutoff_index + 1))

            print("Old shape {0}".format(scaled.shape))

            scaled = scaled[:, :cutoff_index + 1]
            pca_feature_names = pca_feature_names[0: cutoff_index + 1]

            print("New shape {0}".format(scaled.shape))

            scaled = scaler.fit_transform(scaled)

            print("\nInspecting data after final scaler applied!")
            self.__inspect_feature_matrix(matrix=scaled,
                                          feature_names=pca_feature_names,
                                          sub_dir="PCA",
                                          filename="Applied final sclaer to process.")

            self.__second_scaler = copy.deepcopy(scaler)

            self.__scaled = scaled
            self.__cutoff_index = cutoff_index

        # Assumed PCA has already been applied; pass as matrix
        else:
            self.__scaled = df.values

        # Save objects to directory structure
        if self.__pca:
            pipeline_path = create_dir_structure(self.folder_path,
                                                 "Data Cluster Pipeline")

            # Pickle data pipeline objects
            pickle_object_to_file(self.__pca,
                                  pipeline_path,
                                  "PCA")

            pickle_object_to_file(self.__first_scaler,
                                  pipeline_path,
                                  "First Scaler")

            pickle_object_to_file(self.__second_scaler,
                                  pipeline_path,
                                  "First Scaler")

            pickle_object_to_file(self.__pca_perc,
                                  pipeline_path,
                                  "PCA Percentage")

            # Save Dimensions and Cutoff Index
            write_object_text_to_file(self.__cutoff_index,
                                      pipeline_path,
                                      "Cutoff Index")

            write_object_text_to_file(self.__cutoff_index + 1,
                                      pipeline_path,
                                      "Dimensions")
Пример #10
0
def generate_entropy_table(df,
                           df_features,
                           output_folder_path,
                           sub_dir,
                           file_name="Entropy Table"):
    """

        Calculate the entropy of each non-continous numerical feature in a pandas
        dataframe object and store in a pandas dataframe object in the proper
        directory structure.

    Args:
        df: pd.Dataframe
            Pandas DataFrame object

        df_features: DataFrameTypes from eflow
            DataFrameTypes object

        output_folder_path: str
            Pre defined path to already existing directory to output file(s).

        sub_dir: str
            Path to be possibly generated.

        file_name: str
            Name of the given file to save

    Returns:
        Nothing
    """
    entropy_dict = dict()
    for feature_name in df.columns:
        if feature_name in df_features.all_features() and \
                feature_name not in df_features.null_only_features() and \
                feature_name not in df_features.continuous_numerical_features():
            entropy_dict[feature_name] = calculate_entropy(
                df[feature_name].dropna())

    entropy_table = pd.DataFrame.from_dict(entropy_dict,
                                           orient='index').rename(columns={0: "Entropy"})

    entropy_table.index.name = "Features"

    entropy_table.sort_values(by=["Entropy"],
                              ascending=True,
                              inplace=True)

    create_dir_structure(output_folder_path,
                         sub_dir)


    pickle_object_to_file(entropy_table,
                          output_folder_path + sub_dir,
                          file_name)

    df_to_image(entropy_table,
                output_folder_path,
                sub_dir,
                "Entropy Table",
                show_index=True,
                format_float_pos=5)
Пример #11
0
def generate_meta_data(df,
                       output_folder_path,
                       sub_dir):
    """

        Creates files representing the shape and feature types of the dataframe.

    Args:
        df: pd.Dataframe
            Pandas DataFrame object

        output_folder_path: str
            Pre defined path to already existing directory to output file(s).

        sub_dir: str
            Path to be possibly generated.

    Returns:
        Creates meta data on the passed datafrane.
    """
    create_dir_structure(output_folder_path,
                         correct_directory_path(sub_dir + "/Meta Data"))

    output_folder_path = correct_directory_path(output_folder_path)

    # Create files relating to dataframe's shape
    shape_df = pd.DataFrame.from_dict({'Rows': [df.shape[0]],
                                       'Columns': [df.shape[1]]})

    if shape_df.shape[0]:
        df_to_image(shape_df,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Dataframe Shape Table",
                    show_index=False)

    write_object_text_to_file(shape_df.to_dict('records'),
                              f"{output_folder_path}/{sub_dir}/Meta Data",
                              "Dataframe Shape Text")

    # Create files relating to dataframe's types
    dtypes_df = data_types_table(df)
    if dtypes_df.shape[0]:
        df_to_image(dtypes_df,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Dataframe Types Table",
                    show_index=True)

    plt.close("all")


    # Missing value table
    mis_val_table = missing_values_table(df)
    if mis_val_table.shape[0]:
        df_to_image(mis_val_table,
                    f"{output_folder_path}/{sub_dir}",
                    "Meta Data",
                    "Missing Data Table",
                    show_index=True)

    plt.close("all")
Пример #12
0
    def __find_best_elbow_models(self,
                                 model_name,
                                 k_models,
                                 inertias,
                                 display_visuals=True):

        ks = range(1, len(inertias[0]) + 1)

        plt.figure(figsize=(13, 6))
        plt.title(f"All possible {model_name} Elbow's", fontsize=15)
        plt.xlabel('Number of clusters, k')
        plt.ylabel('Inertia')
        plt.xticks(ks)

        elbow_inertias_matrix = None
        inertias_matrix = None
        elbow_models = []
        elbow_sections = []
        center_elbow_count = dict()
        proximity_elbow_count = dict()

        # Plot ks vs inertias
        for i in range(0,len(inertias)):

            elbow_cluster = KneeLocator(ks,
                                        inertias[i],
                                        curve='convex',
                                        direction='decreasing').knee

            if elbow_cluster == 1 or not elbow_cluster:
                print("Elbow was either one or None for the elbow seq.")
                continue

            plt.plot(ks,
                     inertias[i],
                     '-o',
                     color='#367588',
                     alpha=0.5)

            if str(elbow_cluster) not in center_elbow_count.keys():
                center_elbow_count[str(elbow_cluster)] = 1
            else:
                center_elbow_count[str(elbow_cluster)] += 1

            for k_val in [elbow_cluster - 1, elbow_cluster, elbow_cluster + 1]:
                elbow_sections.append([ks[k_val - 1],inertias[i][k_val - 1]])

                if str(k_val) not in proximity_elbow_count.keys():
                    proximity_elbow_count[str(k_val)] = 1
                else:
                    proximity_elbow_count[str(k_val)] += 1


            if isinstance(elbow_inertias_matrix, type(None)):
                inertias_matrix = np.matrix(inertias[i])
                elbow_inertias_matrix = np.matrix(inertias[i][elbow_cluster - 2:elbow_cluster + 1])

            else:
                inertias_matrix = np.vstack([inertias_matrix, inertias[i]])

                elbow_inertias_matrix = np.vstack(
                    [elbow_inertias_matrix, inertias[i][elbow_cluster - 2:elbow_cluster + 1]])

            elbow_models.append(k_models[i][elbow_cluster - 2:elbow_cluster + 1])

        for elbow in elbow_sections:
            k_val = elbow[0]
            intertia = elbow[1]
            plt.plot(k_val,
                     intertia,
                     'r*',)

        del inertias
        del k_models
        del elbow_cluster

        self.save_plot(f"Models/{model_name}",f"All possible {model_name} Elbow's",)

        if display_visuals and self.__notebook_mode:
            plt.show()
        plt.close("all")

        center_elbow_count = pd.DataFrame({"Main Knees": list(center_elbow_count.keys()),
                                           "Counts": list(center_elbow_count.values())})
        center_elbow_count.sort_values(by=['Counts'],
                                       ascending=False,
                                       inplace=True)

        self.save_table_as_plot(
            center_elbow_count,
            sub_dir=f"Models/{model_name}",
            filename="Center Elbow Count")

        proximity_elbow_count = pd.DataFrame({"Proximity Knees": list(proximity_elbow_count.keys()),
                                              "Counts": list(proximity_elbow_count.values())})
        proximity_elbow_count.sort_values(by=['Counts'],
                                          ascending=False,
                                          inplace=True)

        self.save_table_as_plot(
            proximity_elbow_count,
            sub_dir=f"Models/{model_name}",
            filename="Proximity Elbow Count")

        plt.figure(figsize=(13, 6))
        plt.title(f"Best of all {model_name} Elbows", fontsize=15)
        plt.xlabel('Number of clusters, k')
        plt.ylabel('Inertia')
        plt.xticks(ks)

        average_elbow_inertias = elbow_inertias_matrix.mean(0)

        knee_vote = []
        for vector in elbow_inertias_matrix:
            knee_vote.append(
                np.absolute(vector - average_elbow_inertias).sum())

        best_elbow_index = np.array(knee_vote).argmin()

        plt.plot(ks,
                 inertias_matrix[best_elbow_index].tolist()[0],
                 '-o',
                 color='#367588')

        best_clusters = []
        for model in elbow_models[best_elbow_index]:
            k_val = len(model.get_clusters())

            self.__all_cluster_models[f"{model_name}_Cluster_" + str(k_val)] = model

            create_dir_structure(self.folder_path,
                                 f"Models/{model_name}/Clusters={k_val}")

            try:
                pickle_object_to_file(model,
                                      self.folder_path + f"Models/{model_name}/Clusters={k_val}",
                                      f"{model_name}_Cluster_" + str(k_val))
            except:
                print(f"Something went wrong when trying to save the model: {model_name}")
            plt.plot(ks[k_val - 1],
                     inertias_matrix[best_elbow_index].tolist()[0][k_val - 1],
                     'r*')
            best_clusters.append(k_val)

        self.save_plot(f"Models/{model_name}",
                       f"Best of all {model_name} Elbows")

        if display_visuals and self.__notebook_mode:
            plt.show()
        plt.close("all")

        best_clusters.sort()

        if display_visuals and self.__notebook_mode:
            display(proximity_elbow_count)
            display(center_elbow_count)

        return best_clusters