Exemplo n.º 1
0
class KMeansClustering:
    """
        This class shall  be used to divide the data into clusters before training.
    """
    def __init__(self, file_object, logger_object):
        self.file_object = file_object
        self.logger_object = logger_object
        self.awsObj = AwsStorageManagement()

    def elbow_plot(self, data):
        """
            Method Name: elbow_plot
            Description: This method saves the plot to decide the optimum number of clusters to the file.
            Output: A picture saved to the directory
            On Failure: Raise Exception
        """
        self.logger_object.log(
            self.file_object,
            'Entered the elbow_plot method of the KMeansClustering class')
        wcss = []  # initializing an empty list
        try:
            for i in range(1, 11):
                kmeans = KMeans(
                    n_clusters=i, init='k-means++',
                    random_state=42)  # initializing the KMeans object
                kmeans.fit(data)  # fitting the data to the KMeans Algorithm
                wcss.append(kmeans.inertia_)
            plt.plot(
                range(1, 11), wcss
            )  # creating the graph between WCSS and the number of clusters
            plt.title('The Elbow Method')
            plt.xlabel('Number of clusters')
            plt.ylabel('WCSS')
            #plt.show()
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png')
            img_buffer.seek(0)
            self.awsObj.deleteFile('models')
            self.awsObj.deleteFile('preprocessing_data')
            self.awsObj.saveObject('preprocessing_data', 'K-Means_Elbow.PNG',
                                   img_buffer, 'image/png')
            print('Saving kneeplot to aws')
            #plt.savefig('preprocessing_data/K-Means_Elbow.PNG') # saving the elbow plot locally
            # finding the value of the optimum cluster programmatically
            self.kn = KneeLocator(range(1, 11),
                                  wcss,
                                  curve='convex',
                                  direction='decreasing')
            self.logger_object.log(
                self.file_object,
                'The optimum number of clusters is: ' + str(self.kn.knee) +
                ' . Exited the elbow_plot method of the KMeansClustering class'
            )
            return self.kn.knee

        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in elbow_plot method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object,
                'Finding the number of clusters failed. Exited the elbow_plot method of the KMeansClustering class'
            )
            raise Exception()

    def create_clusters(self, data, number_of_clusters):
        """
            Method Name: create_clusters
            Description: Create a new dataframe consisting of the cluster information.
            Output: A dataframe with cluster column
            On Failure: Raise Exception
        """
        self.logger_object.log(
            self.file_object,
            'Entered the create_clusters method of the KMeansClustering class')
        self.data = data
        try:
            self.kmeans = KMeans(n_clusters=number_of_clusters,
                                 init='k-means++',
                                 random_state=42)
            #self.data = self.data[~self.data.isin([np.nan, np.inf, -np.inf]).any(1)]
            self.y_kmeans = self.kmeans.fit_predict(
                data)  #  divide data into clusters

            self.file_op = file_methods.File_Operation(self.file_object,
                                                       self.logger_object)
            print('Saving Kmeans Model')
            self.save_model = self.file_op.save_model(
                self.kmeans, 'KMeans')  # saving the KMeans model to directory
            # passing 'Model' as the functions need three parameters

            self.data[
                'Cluster'] = self.y_kmeans  # create a new column in dataset for storing the cluster information
            self.logger_object.log(
                self.file_object, 'successfully created ' + str(self.kn.knee) +
                'clusters. Exited the create_clusters method of the KMeansClustering class'
            )
            return self.data
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occurred in create_clusters method of the KMeansClustering class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object,
                'Fitting the data to clusters failed. Exited the create_clusters method of the KMeansClustering class'
            )
            raise Exception()
class File_Operation:
    """
        This class shall be used to save the model after training
        and load the saved model for prediction.
    """
    def __init__(self, file_object, logger_object):
        self.file_object = file_object
        self.logger_object = logger_object
        self.model_directory = 'models'
        self.awsObj = AwsStorageManagement()

    def save_model(self, model, filename):
        """
            Method Name: save_model
            Description: Save the model file to directory
            Outcome: File gets saved
            On Failure: Raise Exception
        """
        self.logger_object.log(
            self.file_object,
            'Entered the save_model method of the File_Operation class')
        try:
            print('Start Saving Model')
            with io.BytesIO() as f:
                pickle.dump(model, f)  # save the model to file
                f.seek(0)
                self.awsObj.saveObject(self.model_directory, filename + '.sav',
                                       f, 'text/html')
            print('Model Saved')
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' saved. Exited the save_model method of the Model_Finder class'
            )

            return 'success'
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in save_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' could not be saved. Exited the save_model method of the Model_Finder class'
            )
            raise Exception()

    def load_model(self, filename):
        """
            Method Name: load_model
            Description: load the model file to memory
            Output: The Model file loaded in memory
            On Failure: Raise Exception
        """
        self.logger_object.log(
            self.file_object,
            'Entered the load_model method of the File_Operation class')
        try:
            f = self.awsObj.loadObject(self.model_directory, filename + '.sav')
            print('Model load done')
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' loaded. Exited the load_model method of the Model_Finder class'
            )
            return f
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in load_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object, 'Model File ' + filename +
                ' could not be saved. Exited the load_model method of the Model_Finder class'
            )
            raise Exception()

    def find_correct_model_file(self, cluster_number):
        """
            Method Name: find_correct_model_file
            Description: Select the correct model based on cluster number
            Output: The Model file
            On Failure: Raise Exception
        """
        self.logger_object.log(
            self.file_object,
            'Entered the find_correct_model_file method of the File_Operation class'
        )
        try:
            self.cluster_number = cluster_number
            self.folder_name = self.model_directory
            self.list_of_model_files = []
            self.list_of_files = self.awsObj.listDirFiles(self.folder_name)
            for self.file in self.list_of_files:
                try:
                    if (self.file.index(str(self.cluster_number)) != -1):
                        self.model_name = self.file
                except:
                    continue
            self.model_name = self.model_name.split('.')[0]
            self.logger_object.log(
                self.file_object,
                'Exited the find_correct_model_file method of the Model_Finder class.'
            )
            return self.model_name
        except Exception as e:
            self.logger_object.log(
                self.file_object,
                'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger_object.log(
                self.file_object,
                'Exited the find_correct_model_file method of the Model_Finder class with Failure'
            )
            raise Exception()