class train_validation:
    def __init__(self,path):
        self.raw_data = Raw_Data_validation(path)
        self.dataTransform = dataTransform()
        self.dBOperation = dBOperation()
        self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object, 'Start of Validation on files for prediction!!')
            # extracting values from prediction schema
            column_names, noofcolumns = self.raw_data.valuesFromSchema()


            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object, "Raw Data Validation Complete!!")

            self.log_writer.log(self.file_object,
                                "Creating Training_Database and tables on the basis of given schema!!!")
            # create database with given name, if present open the connection! Create table with columns given in schema

            #self.dBOperation.createTableDb('Training', column_names)

            #self.log_writer.log(self.file_object, "Table creation Completed!!")
            #self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!")
            # insert csv files in the table
            #self.dBOperation.insertIntoTableGoodData('Training')
            #self.log_writer.log(self.file_object, "Insertion in Table completed!!!")
            #self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!")
            # Delete the good data folder after loading files in table
            self.raw_data.combinefiles()
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object, "Good_Data folder deleted!!!")
            self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object, "Validation Operation completed!!")
            self.log_writer.log(self.file_object, "Extracting csv file from table")
            # export data in table to csvfile
            #self.dBOperation.selectingDatafromtableintocsv('Training')
            self.file_object.close()

        except Exception as e:
            self.log_writer.log(self.file_object, "{}".format(e))
            raise e
class train_validation:
    def __init__(self, path, execution_id):
        self.raw_data = Raw_Data_validation(path, execution_id)
        self.dataTransform = dataTransform(execution_id)

        self.dBOperationMongoDB = DbOperationMongoDB(execution_id)
        #self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
        self.log_database = "wafer_training_log"
        self.log_collection = "training_main_log"
        self.execution_id = execution_id
        #self.log_writer = logger.App_Logger()
        self.logDB_write = App_LoggerDB(execution_id=execution_id)
        self.az_blob_mgt = AzureBlobManagement()

    def train_validation(self):
        try:
            self.logDB_write.log(self.log_database, self.log_collection,
                                 'Start of Validation on files!!')
            # extracting values from prediction schema
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Raw Data Validation Complete!!")

            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Starting Data Transforamtion!!")
            # replacing blanks in the csv file with "Null" values to insert in table
            self.dataTransform.replaceMissingWithNull()
            print("Missing value with NULL completed")

            self.logDB_write.log(self.log_database, self.log_collection,
                                 "DataTransformation Completed!!!")

            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Creating database and collection if not exist then insert record"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema
            #self.dBOperationMongoDB.insertIntoTableGoodData(column_names)
            #self.logDB_write.log(self.log_database, self.log_collection, "Table creation Completed!!")
            #self.logDB_write.log(self.log_database, self.log_collection, "Insertion of Data into Table started!!!!")
            # insert csv files stored in azure storage in the table in mongodb location
            self.dBOperationMongoDB.insertIntoTableGoodData(column_names)
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Insertion in Table completed!!!")
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Deleting Good Data Folder!!!")
            # Delete the good data folder after loading files in table
            #self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Good_Data folder deleted!!!")
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.logDB_write.log(
                self.log_database, self.log_collection,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Validation Operation completed!!")
            self.logDB_write.log(self.log_database, self.log_collection,
                                 "Extracting csv file from table")
            # export data in table from mongodb to csvfile
            self.dBOperationMongoDB.selectingDatafromtableintocsv()
            #self.file_object.close()

        except Exception as e:
            raise e
class train_validation:
    def __init__(self, path):
        self.raw_data = Raw_Data_validation(path)
        self.dataTransform = dataTransform()
        self.dBOperation = dBOperation()
        #my_file = rootProjPath+'\\Training_Logs\\Training_Main_Log.txt'
        #self.file_object = open(my_file, 'a+')
        #self.rootProjPath=rootProjPath
        self.file_object = open('Training_Logs/Training_Main_Log.txt', 'a+')
        self.log_writer = logger.App_Logger()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object, 'Start of Validation')
            # extracting values from prediction schema
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(self.file_object,
                                "Starting Data Transforamtion!!")
            # replacing blanks in the csv file with "Null" values to insert in table
            self.dataTransform.addQuotesToStringValuesInColumn()

            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")

            self.log_writer.log(
                self.file_object,
                "Creating Training_Database and tables on the basis of given schema!!!"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema

            self.dBOperation.createTableDb('Training', column_names)
            self.log_writer.log(self.file_object, "Table creation Completed!!")
            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")

            # insert csv files in the table

            self.dBOperation.insertIntoTableGoodData('Training')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")

            # Delete the good data folder after loading files in table

            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")

            # Move the bad files to archive folder

            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")

            # export data in table to csvfile
            self.dBOperation.selectingDatafromtableintocsv('Training')
            self.file_object.close()

        except Exception as e:
            raise e
class train_validation:
    def __init__(self, path):
        self.raw_data = Raw_Data_validation(path)
        self.dataTransform = dataTransform()
        self.dBOperation = dBOperation()
        self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object,
                                'Start of Validation on files!!')

            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            regex = self.raw_data.manualRegexCreation()

            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            self.raw_data.validateColumnLength(noofcolumns)
            self.raw_data.validateMissingValuesInWholeColumn()

            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")
            self.log_writer.log(self.file_object,
                                "Starting Data Transforamtion!!")

            self.dataTransform.replaceMissingWithNull()

            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")
            self.log_writer.log(
                self.file_object,
                "Creating Training_Database and tables on the basis of given schema!!!"
            )

            self.dBOperation.createTableDb('Training', column_names)
            self.log_writer.log(self.file_object, "Table creation Completed!!")
            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")

            self.dBOperation.insertIntoTableGoodData('Training')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")

            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")

            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")

            self.dBOperation.selectingDatafromtableintocsv('Training')
            self.file_object.close()

        except Exception as e:
            raise e
示例#5
0
class train_validation:
    def __init__(self, path):
        self.raw_data = Raw_Data_validation(path)
        self.dataTransform = dataTransform()
        self.dBOperation = dBOperation()
        self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object,
                                'Start of Validation on files!!')
            # extracting values from prediction schema
            # we have removed some unnecessary columns from the schema itself
            # so that the DB columns won't get created for them and we won't waste space.
            # Un Necessary Columns: 'url','address','name','dish_liked','phone','reviews_list'
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(self.file_object,
                                "Starting Data Transforamtion!!")
            # below function adds quotes to the '?' values in some columns.
            self.dataTransform.addQuotesToStringValuesInColumn()

            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")

            self.log_writer.log(
                self.file_object,
                "Creating Training_Database and tables on the basis of given schema!!!"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema

            self.dBOperation.createTableDb('Training', column_names)
            self.log_writer.log(self.file_object, "Table creation Completed!!")
            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")
            # insert csv files in the table
            self.dBOperation.insertIntoTableGoodData('Training')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")
            # Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")
            # export data in table to csvfile
            self.dBOperation.selectingDatafromtableintocsv('Training')
            self.file_object.close()

        except Exception as e:
            raise e
class train_validation:
    def __init__(self, path):
        self.raw_data = Raw_Data_validation(path)
        self.dBOperation = dBOperation()
        self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+')
        self.log_writer = logger.App_Logger()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object,
                                'Start of Validation on files!!')
            # extracting values from prediction schema
            column_names, noofcolumns = self.raw_data.valuesFromSchema()
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex)
            # vNo documentation avaialidating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)

            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(
                self.file_object,
                "Creating Training_Database and collection on the basis of given schema!!!"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema
            collection = self.dBOperation.createCollection()
            self.log_writer.log(self.file_object,
                                "collection creation Completed!!")
            self.log_writer.log(
                self.file_object,
                "Insertion of Data into collection started!!!!")
            # insert csv files in the collection
            self.dBOperation.GoodDatainsertIntoCollection(collection)
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")
            # Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchive()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")
            # export data in table to csvfile
            self.dBOperation.selectingDatafromCollectionintocsv(collection)
            self.file_object.close()

        except Exception as e:
            raise e
class train_validation:
    def __init__(self, path):
        self.raw_data = Raw_Data_validation(path)
        self.dataTransform = dataTransform()
        self.dBOperation = dBOperation()
        self.file_object = 'Training_Main_Log'
        self.log_writer = logger.App_Logger()
        self.emailObj = email()
        self.awsObj = AwsStorageManagement()

    def train_validation(self):
        try:
            self.log_writer.log(self.file_object,
                                'Start of Validation on files for Training!!')
            # extracting values from prediction schema
            LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema(
            )
            # getting the regex defined to validate filename
            regex = self.raw_data.manualRegexCreation()
            # validating filename of prediction files
            self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile,
                                                LengthOfTimeStampInFile)
            # validating column length in the file
            self.raw_data.validateColumnLength(noofcolumns)
            # validating if any column has all values missing
            self.raw_data.validateMissingValuesInWholeColumn()
            self.log_writer.log(self.file_object,
                                "Raw Data Validation Complete!!")

            self.log_writer.log(self.file_object,
                                "Starting Data Transforamtion!!")
            # below function adds quotes to the '?' values in some columns.
            self.dataTransform.addQuotesToStringValuesInColumn()

            self.log_writer.log(self.file_object,
                                "DataTransformation Completed!!!")

            self.log_writer.log(
                self.file_object,
                "Creating Training_Database and tables on the basis of given schema!!!"
            )
            # create database with given name, if present open the connection! Create table with columns given in schema
            self.log_writer.log(self.file_object, "Table creation Completed!!")
            self.log_writer.log(self.file_object,
                                "Insertion of Data into Table started!!!!")
            # insert csv files in the table
            self.dBOperation.insertIntoTableGoodData('mushroomClassifierDB')
            self.log_writer.log(self.file_object,
                                "Insertion in Table completed!!!")
            self.log_writer.log(self.file_object,
                                "Deleting Good Data Folder!!!")
            # Delete the good data folder after loading files in table
            self.raw_data.deleteExistingGoodDataTrainingFolder()
            self.log_writer.log(self.file_object,
                                "Good_Data folder deleted!!!")
            self.log_writer.log(
                self.file_object,
                "Moving bad files to Archive and deleting Bad_Data folder!!!")
            # Move the bad files to archive folder
            self.raw_data.moveBadFilesToArchiveBad()
            self.log_writer.log(
                self.file_object,
                "Bad files moved to archive!! Bad folder Deleted!!")
            self.log_writer.log(self.file_object,
                                "Validation Operation completed!!")
            self.log_writer.log(self.file_object,
                                "Extracting csv file from table")
            # export data in table to csvfile
            self.dBOperation.selectingDatafromtableintocsv(
                'mushroomClassifierDB')

            # Triggering Email
            msg = MIMEMultipart()
            msg['Subject'] = 'MushroomTypeClassifier - Train Validation | ' + str(
                datetime.now())
            file_list = self.awsObj.listDirFiles(
                'Training_Bad_Raw_Files_Validated')
            if len(file_list) >= 1:
                file_str = ','.join(file_list)
            else:
                file_str = 'No Bad Files'
            body = 'Model Train Validation Done Successfully... <br><br> Fault File List: <br>' + file_str + '<br><br>Thanks and Regards, <br> Rahul Garg'
            msg.attach(MIMEText(body, 'html'))
            to_addr = ['*****@*****.**']
            self.emailObj.trigger_mail(to_addr, [], msg)

        except Exception as e:
            raise e