示例#1
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            X = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                X)  #drops the first column for cluster prediction
            X['clusters'] = clusters
            clusters = X['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = X[X['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                predictions = predictions + list(result)

            final = pd.DataFrame(predictions, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data = preprocessor.dropUnnecessaryColumns(data, [
                'id', 'region', 'url', 'region_url', 'image_url', 'state',
                'type', 'dogs_allowed'
            ])

            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValuesPrediction(data)

            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons
            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
示例#3
0
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_getter(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)

            is_null_present,cols_with_missing_values=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            data  = preprocessor.logTransformation(data)

            #scale the prediction data
            data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction
            data_scaled['clusters']=clusters
            clusters=data_scaled['clusters'].unique()
            result=[] # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data= data_scaled[data_scaled['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            json_data = result.to_json(orient='values')
            print(json_data)
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path,json_data,result
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            is_null_present=preprocessor.is_null_present(data)
            if(is_null_present):
                data=preprocessor.impute_missing_values(data)

            cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data)
            data=preprocessor.remove_columns(data,cols_to_drop)
            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction
            data['clusters']=clusters
            clusters=data['clusters'].unique()
            for i in clusters:
                #selecting all the records of a perticular cluster type
                cluster_data= data[data['clusters']==i]
                #getting all the wafer names
                wafer_names = list(cluster_data['Wafer'])
                #dropping wafer and clusters columns
                cluster_data = data.drop(['Wafer','clusters'],axis=1)
                #finding the model name for that cluster
                model_name = file_loader.find_correct_model_file(i)
                #loading the model using the model name
                model = file_loader.load_model(model_name)
                #these are the predicted values 
                pred_values = list(model.predict(cluster_data))
                #creating a dataframe with wafernames and predictions
                result = pandas.DataFrame(list(zip(wafer_names,pred_values)),columns=['Wafer','Prediction'])
                #path to save the dataframe as csv file
                path = "Prediction_Output_File/Predictions.csv"
                #writing to csv files
                result.to_csv(path,header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
示例#5
0
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()



            preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data,["serial","rate","listed_in(type)","listed_in(city)"])


            is_null_present,cols_with_missing_values=preprocessor.is_null_present(data)
            if(is_null_present):
                data=data.dropna(how='any')


            # get encoded values for categorical data

            data = preprocessor.encodeCategoricalValues(data)
            #scale the prediction data
            data_scaled = pandas.DataFrame(preprocessor.standardScalingData(data),columns=data.columns)

            #data=data.to_numpy()
            file_loader=file_methods.File_Operation(self.file_object,self.log_writer)
            kmeans=file_loader.load_model('KMeans')


            clusters=kmeans.predict(data_scaled)#drops the first column for cluster prediction
            data_scaled['clusters']=clusters
            clusters=data_scaled['clusters'].unique()
            result=[] # initialize blank list for storing predicitons


            for i in clusters:
                cluster_data= data_scaled[data_scaled['clusters']==i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)
            result = pandas.DataFrame(result,columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
示例#6
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            data.replace(-1, 1, inplace=True)
            new_data = data[[
                'H18', 'F76', 'F46', 'G57', 'C13', 'A71', 'E115', 'F56', 'I59',
                'A91'
            ]]

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(
                new_data)  #drops the first column for cluster prediction
            new_data['clusters'] = clusters
            clusters = new_data['clusters'].unique()
            result = []  # initialize balnk list for storing predicitons

            for i in clusters:
                cluster_data = new_data[new_data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Prediction'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
示例#7
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(data.drop(['Wafer'], axis=1))
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(list(zip(wafer_names, result)),
                                          columns=['Wafer', 'Prediction'])
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv("Prediction_Output_File/Predictions.csv",
                              header=True,
                              mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #cdrop 'Unnamed: 0'
            for cl in data.columns:
                if cl == 'Unnamed: 0':
                    data.drop('Unnamed: 0', axis=1, inplace=True)

            # Dropping column after performing EDA
            preprocessor_cus = preprocess_cus.Preprocessor_cus(
                self.file_object, self.log_writer)
            data = preprocessor_cus.drop_column(data)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            # replacing '?' values with np.nan as discussed in the EDA part
            data = preprocessor.replaceInvalidValuesWithNull(data)

            # get encoded values for categorical data
            data = preprocessor_cus.test_data_encode(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)

            result = []  # initialize balnk list for storing predicitons

            model = file_loader.load_model('CatBoost')
            for val in (model.predict(data)):
                result.append(val)

            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result['Predictions'].replace({0: "no", 1: "yes"}, inplace=True)
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'education'
            ])  # remove the column as it doesn't contribute to prediction.
            data = preprocessor.remove_unwanted_spaces(
                data)  # remove unwanted spaces from the dataframe
            data.replace(
                '?', np.NaN,
                inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)  # missing value imputation

            # Proceeding with more data pre-processing steps
            scaled_num_df = preprocessor.scale_numerical_columns(data)
            cat_df = preprocessor.encode_categorical_columns(data)
            X = pd.concat([scaled_num_df, cat_df], axis=1)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                X)  #drops the first column for cluster prediction
            X['clusters'] = clusters
            clusters = X['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = X[X['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('<=50K')
                    else:
                        predictions.append('>50K')

            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
示例#10
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.scaleData(data)

            #data = preprocessor.enocdeCategoricalvalues(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data)  #drops the first column for cluster prediction
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    if val == 0:
                        result.append("Lodgepole_Pine")
                    elif val == 1:
                        result.append("Spruce_Fir")
                    elif val == 2:
                        result.append("Douglas_fir")
                    elif val == 3:
                        result.append("Krummholz")
                    elif val == 4:
                        result.append("Ponderosa_Pine")
                    elif val == 5:
                        result.append("Aspen")
                    elif val == 6:
                        result.append("Cottonwood_Willow")
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')

        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data, ['veiltype'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            # get encoded values for categorical data
            data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):

                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()
            self.log_writer.log(self.file_object, 'Let me chk data')
            print(data.head())
            #self.log_writer.log(self.file_object,data.head())
            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            #data=preprocessor.remove_columns(data,['Wafer']) # remove the unnamed column as it doesn't contribute to prediction.
            data = preprocessor.binning(data)
            #removing unwanted columns as discussed in the EDA part in ipynb file
            data = preprocessor.dropUnnecessaryColumns(data, ['Ageband'])
            #print(data.isnull().sum())
            data = preprocessor.combiningfornewfeature(data)
            data = preprocessor.dropUnnecessaryColumns(
                data, ['Parch', 'Sibsp', 'FamilySize'])

            data = preprocessor.convertCategoricalfeatureIntonumeric(data)

            data = preprocessor.binningfare(data)
            data = preprocessor.dropUnnecessaryColumns(
                data, ['FareBand', 'PassengerId'])
            print(data.head())
            #print(data.isnull().sum())

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data)  # missing value imputation

            self.log_writer.log(self.file_object, '--Fati-03.5--')

            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            #data_scaled = pandas.DataFrame(data,columns=data.columns)
            self.log_writer.log(self.file_object, '--Fati-04--,It worked :)')

            ##----Predictions left only, will work after lunch IA

            data = data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            RfClassifier = file_loader.load_model('RandomForestClassifier')
            self.log_writer.log(self.file_object, '--Fati-05--')
            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            classifier = RfClassifier.predict(
                data_scaled)  #drops the first column for cluster prediction
            self.log_writer.log(self.file_object, '--Fati-06--')
            #data_scaled['clusters']=clusters
            #self.log_writer.log(self.file_object,'--Fati-07--')
            #clusters=data_scaled['clusters'].unique()
            self.log_writer.log(self.file_object, '--Fati-08--')
            print(classifier)
            #result=[] # initialize blank list for storing predicitons
            #with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #   encoder = pickle.load(file)

            #for i in clusters:
            #   cluster_data= data_scaled[data_scaled['clusters']==i]
            #  cluster_data = cluster_data.drop(['clusters'],axis=1)
            # model_name = file_loader.find_correct_model_file(i)
            #model = file_loader.load_model(model_name)
            #   result.append(val)
            result = pandas.DataFrame(classifier, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
示例#13
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):
        try:
            self.pred_data_val.deletePredictionFile()
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(data, [
                'policy_number', 'policy_bind_date', 'policy_state',
                'insured_zip', 'incident_location', 'incident_date',
                'incident_state', 'incident_city', 'insured_hobbies',
                'auto_make', 'auto_model', 'auto_year', 'age',
                'total_claim_amount'
            ])
            data.replace('?', np.NaN, inplace=True)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)

            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            data = preprocessor.encode_categorical_columns(data)
            data = preprocessor.scale_numerical_columns(data)

            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            predictions = []
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(cluster_data))
                for res in result:
                    if res == 0:
                        predictions.append('N')
                    else:
                        predictions.append('Y')
            final = pd.DataFrame(list(zip(predictions)),
                                 columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",
                         header=True,
                         mode='a+')
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:

    def __init__(self,path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object,'Start of Prediction')
            data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer)
            data=data_getter.get_data()


            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            data = preprocessor.remove_columns(data,[])  # remove the column as it doesn't contribute to prediction.
            data.replace('?', np.NaN, inplace=True)  # replacing '?' with NaN values for imputation

            # check if missing values are present in the dataset
            is_null_present, cols_with_missing_values = preprocessor.is_null_present(data)

            # if missing values are there, replace them appropriately.
            if (is_null_present):
                data = preprocessor.impute_missing_values(data, cols_with_missing_values)  # missing value imputation
            # encode categorical data
            #data = preprocessor.encode_categorical_columns(data)
            df=data.copy()
            df.drop(labels=['Sex'],axis=1,inplace=True)

            file_loader = file_methods.File_Operation(self.file_object, self.log_writer)
            kmeans = file_loader.load_model('KMeans')

            ##Code changed

            clusters=kmeans.predict(df)
            data['clusters']=clusters
            data = preprocessor.encode_categorical_columns(data)
            clusters=data['clusters'].unique()
            predictions=[]
            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'],axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = (model.predict(np.array(cluster_data)))
                for res in result:
                    if res == 0:
                        predictions.append('1-8 Rings')
                    elif res == 1:
                        predictions.append('11+ Rings')
                    else:
                        predictions.append('9-10 Rings')

            final= pd.DataFrame(list(zip(predictions)),columns=['Predictions'])
            path="Prediction_Output_File/Predictions.csv"
            final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            self.log_writer.log(self.file_object,'End of Prediction')
        except Exception as ex:
            self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path , final
示例#15
0
class prediction:
    def __init__(self, path):
        self.file_object = 'Prediction_Log'
        self.log_writer = logger.App_Logger()
        self.awsObj = AwsStorageManagement()
        self.emailObj = email()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.dropUnnecessaryColumns(data, ['veil-type'])

            # replacing '?' values with np.nan as discussed in the EDA part

            data = preprocessor.replaceInvalidValuesWithNull(data)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(
                    data, cols_with_missing_values)

            # get encoded values for categorical data
            data = preprocessor.encodeCategoricalValuesPrediction(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            kmeans = file_loader.load_model('KMeans')
            print(kmeans.labels_)
            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(data)
            data['clusters'] = clusters
            clusters = data['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data[data['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data)):
                    result.append(val)
            result = pandas.DataFrame(result, columns=['Predictions'])
            path = "Prediction_Output_File/Predictions.csv"
            self.awsObj.saveDataframeToCsv('Prediction_Output_File',
                                           'Predictions.csv', result)
            self.log_writer.log(self.file_object, 'End of Prediction')

            msg = MIMEMultipart()
            msg['Subject'] = 'MushroomTypeClassifier - Prediction Done | ' + str(
                datetime.now())
            body = 'Model Prediction Done Successfully... <br><br> Thanks and Regards, <br> Rahul Garg'
            msg.attach(MIMEText(body, 'html'))
            to_addr = ['*****@*****.**']
            self.emailObj.trigger_mail(to_addr, [], msg)
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path, execution_id):
        #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        #self.log_writer = logger.App_Logger()
        #self.pred_data_val = Prediction_Data_validation(path)
        self.execution_id = execution_id
        self.log_database = "strength_prediction_log"
        self.log_collection = "prediction_log"
        self.log_db_writer = App_LoggerDB(execution_id)
        self.az_blob_mgt = AzureBlobManagement()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path, execution_id)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'Start of Prediction')
            print("start of prediction")
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.log_database, self.log_collection, self.execution_id)
            data = data_getter.get_data()

            path = ""
            if data.__len__() == 0:
                self.log_db_writer.log(
                    self.log_database, self.log_collection,
                    "No data was present to perform prediction existing prediction method"
                )
                return path, "No data was present to perform prediction"

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            data = preprocessor.logTransformation(data)
            print("after log Transformation")
            print(data)

            #scale the prediction data
            data_scaled = pandas.DataFrame(
                preprocessor.standardScalingData(data), columns=data.columns)

            print("standard scaling for data completed")
            print(data_scaled)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            kmeans = file_loader.load_model('kkmeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data_scaled)  #drops the first column for cluster prediction
            data_scaled['clusters'] = clusters
            clusters = data_scaled['clusters'].unique()
            result = []  # initialize blank list for storing predicitons
            # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values
            #     encoder = pickle.load(file)

            for i in clusters:
                cluster_data = data_scaled[data_scaled['clusters'] == i]
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                print(model_name)
                model = file_loader.load_model(model_name)
                for val in (model.predict(cluster_data.values)):
                    result.append(val)

            result = pandas.DataFrame(result, columns=['strength-Predictions'])

            #result = list(model.predict(cluster_data))
            #self.result = pandas.DataFrame(list(zip(result)), columns=['Prediction'])
            #for val in (model.predict(cluster_data.values)):
            #    result.append(val)
            #print(self.result.shape)
            print("results after prediction with prediction columns")
            print(result)

            path = "Prediction-Output-File"
            #result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file
            self.az_blob_mgt.saveDataFrametoCSV(
                path,
                "cement-strength-prediction.csv",
                result,
                header=True,
                mode="a+")

            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'End of Prediction')
        except Exception as ex:
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path, result.head().to_json(orient="records")
示例#17
0
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)

            is_null_present, cols_with_missing_values = preprocessor.is_null_present(
                data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            #data  = preprocessor.logTransformation(data)

            #encode the prediction data
            data_scaled = preprocessor.encodeCategoricalValuesPrediction(data)
            ###Time features
            data = preprocessor.create_timefeatures(data)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)

            model = file_loader.load_model('XGBOOST')
            result.model.predict(data)

            result = pandas.DataFrame(result, columns=['Predictions'])
            result['Item_Identifier'] = data["Item_Identifier"]
            result["Outlet_Identifier"] = data["Outlet_Identifier"]
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True)  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path

    def prediction_from_user(self):
        try:
            dbConn = pymongo.MongoClient(
                "mongodb://localhost:27017/")  # opening a connection to Mongo
            db = dbConn[
                'HOURLYTRAFFICDETAILS']  # connecting to the database called DB
            # reading the inputs given by the user
            holiday = request.form['holiday']
            if (holiday == 'None'):
                Holiday_True = 0
            else:
                Holiday_True = 1
            temp = float(request.form['temp'])  # float variable
            # rain_1h = float(request.form['rain'])  #float variable
            # snow_1h = float(request.form['snow'])  #float variable
            clouds_all = int(request.form['cloud'])  # float variable
            weather_main = request.form[
                'weather']  # Categorical variable will be encoded
            date_time = request.form['Date_time']
            previous_hour = request.form['previoushourtraffic']
            list = ['CLOUDS', 'MIST', 'RAIN', 'SNOW', 'OTHERS']
            # Creating a dictionary
            dict_pred = {
                'holiday': Holiday_True,
                'temp': temp,
                'rain_1h': rain_1h,
                'snow_1h': snow_1h,
                'clouds_all': clouds_all,
                'date_time': date_time,
                'weather_main': weather_main,
                'previous_ihr': previous_hour
            }

            # Creating a dataframe
            df = pd.DataFrame(dict_pred, index=[
                0,
            ])
            # Converting to DateTime
            df['date_time'] = pd.to_datetime(df.date_time)
            df['weekday'] = df.date_time.dt.weekday  # Monday is 0 and Sunday is 6
            df['hour'] = df.date_time.dt.hour
            df['month'] = df.date_time.dt.month
            df['year'] = df.date_time.dt.year
            df['weather_main'] = np.where(
                df['weather_main'].upper().isin(list), df['weather_main'],
                'OTHERS')

            # Applying one-hot encoding
            filename = 'OHE.pkl'
            # Opening and loading the pickle file
            ohe = pickle.load(open(filename, 'rb'))
            weather_main_df = pd.DataFrame(
                ohe.transform(df[['weather_main']]).toarray())
            weather_main_df.columns = ohe.get_feature_names(['weather'])
            # Join the encoded dataset with the given one
            df = df.join(weather_main_df)
            # Dropping categorical column
            df.drop('weather_main', axis=1, inplace=True)
            # setting the index to date_time value
            df.set_index('date_time', inplace=True)
            # print df info
            # print(df.head())
            model = file_loader.load_model(xgb)

            # Prediction using the loaded pickle file
            predictionout = model.predict(df)
            table = db['TRAFFIC']
            mydict = {
                'holiday': Holiday_True,
                'temp': temp,
                'rain_1h': rain_1h,
                'snow_1h': snow_1h,
                'clouds_all': clouds_all,
                'date_time': str(date_time),
                'weather_main': weather_main,
                'traffic_volume': prediction,
                'previous_1hr': previous_hour
            }  # saving that detail to a dictionary
            x = table.insert_one(mydict)
            return predictionout
        except Exception as e:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % e)
            raise e
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()
        self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, 'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer)
            data = data_getter.get_data()

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object,
                                                      self.log_writer)
            data = preprocessor.remove_columns(
                data,
                ["Index_Product", "sku", "oe_constraint"
                 ])  #removing oe_constraint as it was removed in training

            data = preprocessor.encodeCategoricalValuesPred(data)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                #data=preprocessor.impute_missing_values(data)
                data = data.dropna()

            #cols_to_drop=preprocessor.get_columns_with_zero_std_deviation(data)
            #data=preprocessor.remove_columns(data,cols_to_drop)

            data = preprocessor.scale_numerical_columns(data)

            data = preprocessor.pcaTransformation(data)
            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object,
                                                      self.log_writer)
            # kmeans=file_loader.load_model('KMeans')

            ##Code changed
            #pred_data = data.drop(['Wafer'],axis=1)
            #clusters=kmeans.predict(data.drop(['Wafer'],axis=1))#drops the first column for cluster prediction
            #data['clusters']=clusters
            ##for i in clusters:
            #cluster_data= data[data['clusters']==i]
            #wafer_names = list(cluster_data['Wafer'])
            ##cluster_data = cluster_data.drop(['clusters'],axis=1)
            model_name = file_loader.find_correct_model_file()

            model = file_loader.load_model(model_name)
            result = list(model.predict(data))
            #result = pandas.DataFrame(list(zip(wafer_names,result)),columns=['Wafer','Prediction'])
            result = pandas.DataFrame(result, columns=['Prediction'])
            result["Prediction"] = result["Prediction"].map({
                0: "Yes",
                1: "No"
            })
            path = "Prediction_Output_File/Predictions.csv"
            result.to_csv("Prediction_Output_File/Predictions.csv",
                          header=True,
                          mode='a+')  #appends result to prediction file
            self.log_writer.log(self.file_object, 'End of Prediction')
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex
        return path
class prediction:
    def __init__(self, path):
        self.file_object = open("Prediction_Logs/Prediction_Log.txt", "a+")
        self.log_writer = logger.App_Logger()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile()  # deletes the existing prediction file from last run!
            self.log_writer.log(self.file_object, "Start of Prediction")
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.file_object, self.log_writer
            )
            data = data_getter.get_data()

            # code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.file_object, self.log_writer)
            is_null_present = preprocessor.is_null_present(data)
            if is_null_present:
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(data)
            data = preprocessor.remove_columns(data, cols_to_drop)
            # data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.file_object, self.log_writer)
            kmeans = file_loader.load_model("KMeans")

            ##Code changed
            # pred_data = data.drop(['Wafer'],axis=1)
            clusters = kmeans.predict(
                data.drop(["Wafer"], axis=1)
            )  # drops the first column for cluster prediction
            data["clusters"] = clusters
            clusters = data["clusters"].unique()
            for i in clusters:
                cluster_data = data[data["clusters"] == i]
                wafer_names = list(cluster_data["Wafer"])
                cluster_data = data.drop(labels=["Wafer"], axis=1)
                cluster_data = cluster_data.drop(["clusters"], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                result = pandas.DataFrame(
                    list(zip(wafer_names, result)), columns=["Wafer", "Prediction"]
                )
                path = "Prediction_Output_File/Predictions.csv"
                result.to_csv(
                    "Prediction_Output_File/Predictions.csv", header=True, mode="a+"
                )  # appends result to prediction file
            self.log_writer.log(self.file_object, "End of Prediction")
        except Exception as ex:
            self.log_writer.log(
                self.file_object,
                "Error occured while running the prediction!! Error:: %s" % ex,
            )
            raise ex
        return path, result.head().to_json(orient="records")
class prediction:
    def __init__(self, path, execution_id):
        self.execution_id = execution_id
        #self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+')
        #self.log_writer = logger.App_Logger()
        self.log_database = "wafer_prediction_log"
        self.log_collection = "prediction_log"
        self.log_db_writer = App_LoggerDB(execution_id)
        self.az_blob_mgt = AzureBlobManagement()
        if path is not None:
            self.pred_data_val = Prediction_Data_validation(path, execution_id)

    def predictionFromModel(self):

        try:
            self.pred_data_val.deletePredictionFile(
            )  #deletes the existing prediction file from last run!
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'Start of Prediction')
            data_getter = data_loader_prediction.Data_Getter_Pred(
                self.log_database, self.log_collection, self.execution_id)
            data = data_getter.get_data()
            path = ""
            if data.__len__() == 0:
                self.log_db_writer.log(
                    self.log_database, self.log_collection,
                    "No data was present to perform prediction existing prediction method"
                )
                return path, "No data was present to perform prediction"

            #code change
            # wafer_names=data['Wafer']
            # data=data.drop(labels=['Wafer'],axis=1)

            preprocessor = preprocessing.Preprocessor(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            is_null_present = preprocessor.is_null_present(data)
            if (is_null_present):
                data = preprocessor.impute_missing_values(data)

            cols_to_drop = preprocessor.get_columns_with_zero_std_deviation(
                data)
            data = preprocessor.remove_columns(data, cols_to_drop)

            #data=data.to_numpy()
            file_loader = file_methods.File_Operation(self.log_database,
                                                      self.log_collection,
                                                      self.execution_id)
            kmeans = file_loader.load_model('KMeans')
            #print(kmeans)

            ##Code changed
            pred_data = data.drop(['Wafer'], axis=1)
            clusters = kmeans.predict(
                pred_data)  #drops the first column for cluster prediction
            data['clusters'] = clusters
            clust = data['clusters'].unique()
            for i in clust:
                cluster_data = data[
                    data['clusters'] ==
                    i]  # filteing of data with clusters no 0,1,2,etc.
                wafer_names = list(cluster_data['Wafer'])
                cluster_data = data.drop(labels=['Wafer'], axis=1)
                cluster_data = cluster_data.drop(['clusters'], axis=1)
                model_name = file_loader.find_correct_model_file(i)
                model = file_loader.load_model(model_name)
                result = list(model.predict(cluster_data))
                self.result = pandas.DataFrame(list(zip(wafer_names, result)),
                                               columns=['Wafer', 'Prediction'])
                print(self.result.shape)
                print(self.result)
                #path="Prediction_Output_File/Predictions.csv"
                path = "prediction-output-file"
                self.az_blob_mgt.saveDataFrametoCSV(path,
                                                    "prediction.csv",
                                                    self.result,
                                                    header=True,
                                                    mode="a+")

                #result.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
            #self.log_writer.log(self.file_object,'End of Prediction')
            self.log_db_writer.log(self.log_database, self.log_collection,
                                   'End of prediction')
        except Exception as ex:

            #self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex)
            self.log_db_writer.log(
                self.log_database, self.log_collection,
                'Error occured while running the prediction!! Error:: %s' % ex)
            raise ex

        return path, self.result.head().to_json(orient="records")