Пример #1
0
    def trainingModel(self):

        self.log_writer.log(self.file_object, 'Start of Training')
        try:

            data_getter = Data_Getter(self.file_object, self.log_writer)
            data = data_getter.get_data()
            preprocessor = Preprocessor(self.file_object, self.log_writer)
            X, Y = preprocessor.separate_label_feature(
                data, label_column_name='Calories')
            is_null_present = preprocessor.is_null_present(X)
            if (is_null_present):
                X = preprocessor.impute_missing_values(X)
            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(X)
            X = preprocessor.remove_columns(X, cols_to_drop)

            x_train, x_test, y_train, y_test = train_test_split(
                X, Y, test_size=1 / 3, random_state=355)
            model_finder = Model_Finder(self.file_object, self.log_writer)
            best_model_name, best_model = model_finder.get_best_model(
                x_train, y_train, x_test, y_test)
            file_op = File_operation(self.file_object, self.log_writer)
            save_model = file_op.save_model(best_model, best_model_name)

            self.log_writer.log(self.file_object, 'Successful End of Training')
            self.file_object.close()

        except Exception:

            self.log_writer.log(self.file_object,
                                'Unsuccessful End of Training')
            self.file_object.close()
            raise Exception
Пример #2
0
    def predictFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = Data_Getter_Prediction(self.file_object,
                                                 self.log_writer)
            data = data_getter.get_data()
            preprocessor = Preprocessor(self.file_object, self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            file_loader = File_operation(self.file_object, self.log_writer)
            model = file_loader.load_model('my_model')

            X, y = preprocessor.separate_label_feature(data, 'Calories')
            result = list(model.predict(X.values))
            result = pd.Series(result, name='Predictions')
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex

        return path, result.head().to_json(orient="records")
Пример #3
0
    def predictionFromModel(self):

        try:

            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, "Prediction_Log",
                                'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()
            """doing the data preprocessing"""

            preprocessor = Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(
                data, ["Unnamed: 0"]
            )  # remove the unnamed column as it doesn't contribute to prediction.

            is_null_present = preprocessor.is_null_present(data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data)  # missing value imputation

            # check further which columns do not contribute to predictions
            # if the standard deviation for a column is zero, it means that the column has constant values
            # and they are giving the same output both for good and bad sensors
            # prepare the list of such columns to drop
            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)

            # drop the columns obtained above
            data = preprocessor.remove_columns(data, cols_to_drop)
            """ Applying the clustering approach"""

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            # pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data.drop(
                ['Wafer'],
                axis=1))  # drops the first column for cluster prediction
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(list(zip(wafer_names, result)),
                                          columns=['Wafer', 'Prediction'])
                #path = "Predictions.csv"

                pred_result = result.to_csv(header=True)
                self.AzureFunc.uploadBlob("predictionoutputfile",
                                          "predictions.csv", pred_result)
                output = self.AzureFunc.readingcsvfile("predictionoutputfile",
                                                       "predictions.csv")

                #result.to_csv("Predictions.csv", header=True,mode='a+')  # appends result to prediction file
            self.log_writer.log(self.file_object, "Prediction_Log",
                                'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object, "Prediction_Log",
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return output.head().to_json(orient="records")
Пример #4
0
        def trainingModel(self):
            # Logging the start of Training
            self.log_writer.log(self.file_object, "ModelTrainingLog",
                                'Start of Training')

            # Getting the data from the source
            data_getter = data_loader.Data_Getter(self.file_object,
                                                  self.log_writer)
            data = data_getter.get_data()
            """doing the data preprocessing"""

            preprocessor = Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(
                data, ['Wafer', "Unnamed: 0"]
            )  # remove the unnamed column as it doesn't contribute to prediction.

            # create separate features and labels
            X, Y = preprocessor.separate_label_feature(
                data, label_column_name='Good/Bad')

            is_null_present = preprocessor.is_null_present(X)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                X = preprocessor.impute_missing_values(
                    X)  # missing value imputation

            # check further which columns do not contribute to predictions
            # if the standard deviation for a column is zero, it means that the column has constant values
            # and they are giving the same output both for good and bad sensors
            # prepare the list of such columns to drop
            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(X)

            # drop the columns obtained above
            X = preprocessor.remove_columns(X, cols_to_drop)
            """ Applying the clustering approach"""

            kmeans = clustering.KMeansClustering(
                self.file_object, self.log_writer)  # object initialization.
            number_of_clusters = kmeans.elbow_plot(
                X
            )  # using the elbow plot to find the number of optimum clusters

            # Divide the data into clusters
            X = kmeans.create_clusters(X, number_of_clusters)

            # create a new column in the dataset consisting of the corresponding cluster assignments.
            X['Labels'] = Y

            # getting the unique clusters from our dataset
            list_of_clusters = X['Cluster'].unique()
            """parsing all the clusters and looking for the best ML algorithm to fit on individual cluster"""

            for i in list_of_clusters:
                cluster_data = X[X['Cluster'] ==
                                 i]  # filter the data for one cluster

                # Prepare the feature and Label columns
                cluster_features = cluster_data.drop(['Labels', 'Cluster'],
                                                     axis=1)
                cluster_label = cluster_data['Labels']

                # splitting the data into training and test set for each cluster one by one
                x_train, x_test, y_train, y_test = train_test_split(
                    cluster_features,
                    cluster_label,
                    test_size=1 / 3,
                    random_state=355)

                model_finder = tuner.Model_Finder(
                    self.file_object, self.log_writer)  # object initialization

                # getting the best model for each of the clusters
                best_model_name, best_model = model_finder.get_best_model(
                    x_train, y_train, x_test, y_test)

                # saving the best model to the directory.
                file_op = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
                save_model = file_op.save_model(best_model,
                                                best_model_name + str(i))

            # logging the successful Training
            self.log_writer.log(self.file_object, "ModelTrainingLog",
                                'Successful End of Training')