class dataTransform:
    """
              This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

              Written By: Rajat Bisoi
              Version: 1.0
              Revisions: None

              """
    def __init__(self):
        # self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger('wafer')
        self.mongo = To_mongo_db()

    def replaceMissingWithNull(self):
        """
                                         Method Name: replaceMissingWithNull
                                         Description: This method replaces the missing values in columns with "NULL" to
                                                      store in the table. We are using substring in the first column to
                                                      keep only "Integer" data for ease up the loading.
                                                      This column is anyways going to be removed during training.

                                          Written By: Rajat Bisoi
                                         Version: 1.0
                                         Revisions: None

        """

        # log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            # onlyfiles = [f for f in listdir(self.goodDataPath)]
            idx = self.mongo.Get_ID('wafer_good_data', 'temp_db')
            for file in idx:
                # csv = pandas.read_csv(self.goodDataPath+"/" + file)
                csv = self.mongo.downlaod_one_from_mongo(
                    'wafer_good_data',
                    'temp_db',
                    file,
                    initial_columnname='Wafer')
                csv.fillna('NULL', inplace=True)
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                csv['Wafer'] = csv['Wafer'].str[6:]
                # csv.to_csv(self.goodDataPath+ "/" + file, index=None, header=True)
                self.mongo.send_to_mongo('wafer_good_data',
                                         'temp_db',
                                         csv,
                                         initial_columnname='Wafer')
                self.mongo.Delete_obj_in_collection('wafer_good_data',
                                                    'temp_db', file)
                self.logger.log(
                    'wafer_log',
                    str(file).replace("'", "-") +
                    "  File Transformed successfully!!")
            # log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(
                'wafer_log', "Data Transformation failed because :" +
                str(e).replace("'", "-"))
            raise e
Exemplo n.º 2
0
class dataTransformPredict:
    """
          This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.
     """
    def __init__(self):
        self.goodDataPath = "Prediction_Good_Raw_Files_Validated"
        self.logger = App_Logger()
        self.awsObj = AwsStorageManagement()

    def addQuotesToStringValuesInColumn(self):
        """
              Method Name: addQuotesToStringValuesInColumn
              Description: This method replaces the missing values in columns with "NULL" to
                           store in the table. We are using substring in the first column to
                           keep only "Integer" data for ease up the loading.
                           This column is anyways going to be removed during prediction.
          """

        try:
            log_file = 'dataTransformLog'
            onlyfiles = self.awsObj.listDirFiles(self.goodDataPath)
            for file in onlyfiles:
                data = self.awsObj.csvToDataframe(self.goodDataPath, file)
                data['stalk-root'] = data['stalk-root'].replace('?', "'?'")
                self.awsObj.saveDataframeToCsv(self.goodDataPath, file, data)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
        except Exception as e:
            log_file = 'dataTransformLog'
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            raise e
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.
               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                                           Method Name: replaceMissingWithNull
                                           Description: This method replaces the missing values in columns with "NULL" to
                                                        store in the table. We are using substring in the first column to
                                                        keep only "Integer" data for ease up the loading.
                                                        This column is anyways going to be removed during training.

                                                   """

        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
class dataTransformPredict:
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in os.listdir(self.goodDataPath)]
            for file in onlyfiles:
                csv = pandas.read_csv(self.goodDataPath + "/" + file)
                csv.fillna('NULL', inplace=True)
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                csv['Wafer'] = csv['Wafer'].str[6:]
                csv.to_csv(self.goodDataPath + "/" + file,
                           index=None,
                           header=True)
                self.logger.log(log_file,
                                " %s: File Transformed successfully!!" % file)
            # log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            # log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
Exemplo n.º 5
0
class dataTransform:

     """
          This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.
     """

     def __init__(self):
          self.goodDataPath = "Training_Good_Raw_Files_Validated"
          self.logger = App_Logger()
          self.awsObj = AwsStorageManagement()


     def addQuotesToStringValuesInColumn(self):
          """
             Method Name: addQuotesToStringValuesInColumn
             Description: This method converts all the columns with string datatype such that
                         each value for that column is enclosed in quotes. This is done
                         to avoid the error while inserting string values in table as varchar.
          """

          log_file = 'addQuotesToStringValuesInColumn'
          try:
               onlyfiles = self.awsObj.listDirFiles(self.goodDataPath)
               for file in onlyfiles:
                    data = self.awsObj.csvToDataframe(self.goodDataPath, file)
                    for column in data.columns:
                         count = data[column][data[column] == '?'].count()
                         if count != 0:
                              data[column] = data[column].replace('?', "'?'")
                    self.awsObj.saveDataframeToCsv(self.goodDataPath, file, data)
                    self.logger.log(log_file," %s: Quotes added successfully!!" % file)
          except Exception as e:
               self.logger.log(log_file, "Data Transformation failed because:: %s" % e)
class dataTransform:
    """
    This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

    """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                This method replaces the missing values in columns with "NULL" to
                store in the table.
        :return: None
        """
        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                data.fillna("NULL", inplace=True)
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                "%s: Quotes added successfully !!!" % file)

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transform failed because:: %s" % file)
            log_file.close()

        log_file.close()
Exemplo n.º 7
0
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: iNeuron Intelligence
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                                          """

        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)

                data.drop(columns=[
                    'url', "address", "name", 'dish_liked', 'phone',
                    'reviews_list'
                ],
                          inplace=True)
                columns = [
                    "online_order", "book_table", "rate", "location",
                    "rest_type", "cuisines", "menu_item", "listed_in(type)",
                    "listed_in(city)"
                ]
                for col in columns:
                    data[col] = data[col].apply(lambda x: "'" + str(x) + "'")

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
Exemplo n.º 8
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.


               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                                           Method Name: replaceMissingWithNull
                                           Description: This method replaces the missing values in columns with "NULL" to
                                                        store in the table. We are using substring in the first column to
                                                        keep only "Integer" data for ease up the loading.
                                                        This column is anyways going to be removed during training.


                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                # list of columns with string datatype variables
                columns = [
                    "policy_bind_date", "policy_state", "policy_csl",
                    "insured_sex", "insured_education_level",
                    "insured_occupation", "insured_hobbies",
                    "insured_relationship", "incident_state", "incident_date",
                    "incident_type", "collision_type", "incident_severity",
                    "authorities_contacted", "incident_city",
                    "incident_location", "property_damage",
                    "police_report_available", "auto_make", "auto_model",
                    "fraud_reported"
                ]

                for col in columns:
                    data[col] = data[col].apply(lambda x: "'" + str(x) + "'")

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
class dataTransformPredict:

     """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.


                  Version: 1.0
                  Revisions: None

                  """

     def __init__(self):
          self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
          self.logger = App_Logger()


     def addQuotesToStringValuesInColumn(self):

          """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                  
                                          """

          try:
               log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
               onlyfiles = [f for f in listdir(self.goodDataPath)]
               for file in onlyfiles:
                    data = pandas.read_csv(self.goodDataPath + "/" + file)
                    # list of columns with string datatype variables
                    column = ['sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick',
                              'pregnant',
                              'thyroid_surgery', 'I131_treatment', 'query_hypothyroid', 'query_hyperthyroid', 'lithium',
                              'goitre', 'tumor', 'hypopituitary', 'psych', 'TSH_measured', 'T3_measured',
                              'TT4_measured',
                              'T4U_measured', 'FTI_measured', 'TBG_measured', 'TBG', 'referral_source', 'Class']

                    for col in data.columns:
                         if col in column:  # add quotes in string value
                              data[col] = data[col].apply(lambda x: "'" + str(x) + "'")
                         if col not in column:  # add quotes to '?' values in integer/float columns
                              data[col] = data[col].replace('?', "'?'")
                    # #csv.update("'"+ csv['Wafer'] +"'")
                    # csv.update(csv['Wafer'].astype(str))
                    # csv['Wafer'] = csv['Wafer'].str[6:]
                    data.to_csv(self.goodDataPath + "/" + file, index=None, header=True)
                    self.logger.log(log_file, " %s: Quotes added successfully!!" % file)

          except Exception as e:
               log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
               self.logger.log(log_file, "Data Transformation failed because:: %s" % e)
               #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
               log_file.close()
               raise e
          log_file.close()
Exemplo n.º 10
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

               Written By: Chethan D
               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.

                                            Written By: Chethan D
                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)
                #data = self.removeHyphenFromColumnNames(data)
                # for col in data.columns:
                #      # if col in column: # add quotes in string value
                data['Item_Identifier'] = data["Item_Identifier"].apply(
                    lambda x: "'" + str(x) + "'")
                data['Outlet_Identifier'] = data["Outlet_Identifier"].apply(
                    lambda x: "'" + str(x) + "'")
                # if col not in column: # add quotes to '?' values in integer/float columns
                # for column in data.columns:
                #      count = data[column][data[column] == '?'].count()
                #      if count != 0:
                #           data[column] = data[column].replace('?', "'?'")
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                #csv['Wafer'] = csv['Wafer'].str[6:]
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.


               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.



                                                   """

        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)
                #list of columns with string datatype variables
                column = [
                    'sex', 'on_thyroxine', 'query_on_thyroxine',
                    'on_antithyroid_medication', 'sick', 'pregnant',
                    'thyroid_surgery', 'I131_treatment', 'query_hypothyroid',
                    'query_hyperthyroid', 'lithium', 'goitre', 'tumor',
                    'hypopituitary', 'psych', 'TSH_measured', 'T3_measured',
                    'TT4_measured', 'T4U_measured', 'FTI_measured',
                    'TBG_measured', 'TBG', 'referral_source', 'Class'
                ]

                for col in data.columns:
                    if col in column:  # add quotes in string value
                        data[col] = data[col].apply(
                            lambda x: "'" + str(x) + "'")
                    if col not in column:  # add quotes to '?' values in integer/float columns
                        data[col] = data[col].replace('?', "'?'")
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                #csv['Wafer'] = csv['Wafer'].str[6:]
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
Exemplo n.º 12
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

               Written By: iNeuron Intelligence
               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.

                                            Written By: iNeuron Intelligence
                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)
                #dropping unnecessary column
                data.drop(columns=[
                    'url', "address", "name", 'dish_liked', 'phone',
                    'reviews_list'
                ],
                          inplace=True)
                columns = [
                    "online_order", "book_table", "rate", "location",
                    "rest_type", "cuisines", "menu_item", "listed_in(type)",
                    "listed_in(city)"
                ]
                for col in columns:
                    data[col] = data[col].apply(lambda x: "'" + str(x) + "'")

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
Exemplo n.º 13
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

               Written By: iNeuron Intelligence
               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                                           Method Name: replaceMissingWithNull
                                           Description: This method replaces the missing values in columns with "NULL" to
                                                        store in the table. We are using substring in the first column to
                                                        keep only "Integer" data for ease up the loading.
                                                        This column is anyways going to be removed during training.

                                            Written By: iNeuron Intelligence
                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                # list of columns with string datatype variables
                columns = [
                    'Income', 'workclass', 'education', 'marital-status',
                    'occupation', 'relationship', 'race', 'sex',
                    'native-country'
                ]

                for col in columns:
                    data[col] = data[col].apply(lambda x: "'" + str(x) + "'")

                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                # csv['Wafer'] = csv['Wafer'].str[6
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: Ajinkya Abhang
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: Ajinkya Abhang
                                  Version: 1.0
                                  Revisions: None

                                          """

        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                # list of columns with string datatype variables
                column = [
                    'laundry_options', 'parking_options', 'lat', 'long',
                    'state', 'image_url', 'type', 'url', 'region', 'region_url'
                ]
                for col in data.columns:
                    if col in column:  # add quotes in string value
                        data[col] = data[col].apply(
                            lambda x: "'" + str(x) + "'")
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: iNeuron Intelligence
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: iNeuron Intelligence
                                  Version: 1.0
                                  Revisions: None

                                          """

        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)
                for column in data.columns:
                    count = data[column][data[column] == 'na'].count()
                    if count != 0:
                        data[column] = data[column].replace('na', "'na'")
                # for column in data.columns:
                #      count = data[column][data[column] == '?'].count()
                #      if count != 0:
                #           data[column] = data[column].replace('?', "'?'")
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
Exemplo n.º 16
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

               Written By: Ajinkya Abhang
               Version: 1.0
               Revisions: None

               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.

                                            Written By: Ajinkya Abhang
                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)
                #list of columns with string datatype variables
                column = [
                    'laundry_options', 'parking_options', 'lat', 'long',
                    'state', 'image_url', 'type', 'url', 'region', 'region_url'
                ]
                for col in data.columns:
                    if col in column:  # add quotes in string value
                        data[col] = data[col].apply(
                            lambda x: "'" + str(x) + "'")
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
Exemplo n.º 17
0
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: Krishna Nanda
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
                                  Method Name: replaceMissingWithNull
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                   Written By: Krishna Nanda
                                  Version: 1.0
                                  Revisions: None

                                          """

        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                csv = pandas.read_csv(self.goodDataPath + "/" + file)
                csv.fillna('NULL', inplace=True)
                # #csv.update("'"+ csv['Wafer'] +"'")
                # csv.update(csv['Wafer'].astype(str))
                csv['Wafer'] = csv['Wafer'].str[6:]
                csv.to_csv(self.goodDataPath + "/" + file,
                           index=None,
                           header=True)
                self.logger.log(log_file,
                                " %s: File Transformed successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
Exemplo n.º 18
0
class preprocessing_beforeDB:
    def __init__(self):
        self.goodData_path = "Training_Raw_Validated_File/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        file = open('Training_Logs/General_Log.txt', 'a+')
        self.logger.log(file,'Entered replaceMissingWithNull() method of preprocessing_beforeDB class of training_data_preprocessing_beforeDB package')
        file.close()
        try:
            f = open("Training_Logs/data_preprocessing_beforeDB.txt", "a+")
            only_files = [f for f in os.listdir(self.goodData_path)]
            for file in only_files:
                csv = pd.read_csv(self.goodData_path + "/" + file)
                csv.fillna('NULL',inplace=True)
                csv.to_csv(self.goodData_path + "/" + file,index=None,header=True)
                self.logger.log(f,'Replace Missing values with Null Values in Good Raw Main File Successfully !!')
            f.close()

            file = open('Training_Logs/General_Log.txt', 'a+')
            self.logger.log(file,'Successfully Executed replaceMissingWithNull() method of preprocessing_beforeDB class of training_data_preprocessing_beforeDB package')
            file.close()
        except Exception as e:
            f = open("Training_Logs/data_preprocessing_beforeDB.txt", "a+")
            self.logger.log(f,'Replace missing with Null Values failed in Main File becasue:: %s' % str(e))
            f.close()
Exemplo n.º 19
0
class dataTransform:

     """
               This class shall be used for transforming the Training and new predicion Data before loading it in Database!!.

               """

     def __init__(self):  
         self.file_object = open("../logs/datatransform/log.txt", 'a+')
         self.logger = App_Logger()
          
     def trainingData(self):
         self.logger.log(self.file_object,'Entered the trainingData method of the dataTransform class')
         try:
             data_getter=readWriteOps.Data_Getter()
             data = data_getter.get_data()
             df_filter = data.iloc[:, 3:]
             oe = OrdinalEncoder(dtype=np.int32)
             df_1 = oe.fit_transform(df_filter[['Geography', 'Gender']])
             df_2 = pd.DataFrame(data=df_1, columns=['Geography', 'Gender'])
             df_1= df_filter.drop(['Geography', 'Gender'], axis=1)
             df = pd.concat([df_2, df_1], axis=1)
             output = open('encoder.pkl', 'wb')
             pickle.dump(oe, output)
             output.close()
             self.logger.log(self.file_object,'Data transfomr Successful.Exited trainingData method of the dataTransform class')
             return df
         except Exception as e:
             self.logger.log(self.file_object,
                                   'Exception occured in trainingData method of the dataTransform class. Exception message: '+str(e))
             self.logger.log(self.file_object,
                                   'ataTransform Unsuccessful.Exited the trainingData method of the dataTransform class')
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.


                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.





                                          """

        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)

                data['Id'] = data["Id"].apply(lambda x: "'" + str(x) + "'")
                data['ActivityDate'] = data["ActivityDate"].apply(
                    lambda x: "'" + str(x) + "'")

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
            raise e
        log_file.close()
Exemplo n.º 21
0
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Version: 1.0
                  Revisions: None
                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                  Method Name: addQuotesToStringValuesInColumn
                                  Description: This method replaces the missing values in columns with "NULL" to
                                               store in the table. We are using substring in the first column to
                                               keep only "Integer" data for ease up the loading.
                                               This column is anyways going to be removed during prediction.

                                  Version: 1.0
                                  Revisions: None

                                          """
        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + "/" + file)

                for cl in data.columns:
                    if cl == 'Unnamed: 0':
                        data.drop('Unnamed: 0', axis=1, inplace=True)

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)

            log_file.close()
            raise e
        log_file.close()
class dataTransformPredict:
    """
                  This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.

                  Written By: iNeuron Intelligence
                  Version: 1.0
                  Revisions: None

                  """
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.

                                            Written By: iNeuron Intelligence
                                           Version: 1.0
                                           Revisions: None

                                                   """

        log_file = open("Prediction_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)
                data['class'] = data['class'].apply(
                    lambda x: "'" + str(x) + "'")
                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)
            #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" +  + "\n")
        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n")
            log_file.close()
        log_file.close()
Exemplo n.º 23
0
class dataTransform:
    """
               This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.


               """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        """
                                           Method Name: addQuotesToStringValuesInColumn
                                           Description: This method converts all the columns with string datatype such that
                                                       each value for that column is enclosed in quotes. This is done
                                                       to avoid the error while inserting string values in table as varchar.



                                                   """

        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt",
                        'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pd.read_csv(self.goodDataPath + "/" + file)

                for column in data.columns:
                    if col in column:  # add quotes in string value
                        data[col] = data[col].apply(
                            lambda x: "'" + str(x) + "'")
                    if col not in column:  # add quotes to '?' values in integer/float columns
                        data[col] = data[col].replace('?', "'?'")

                data.to_csv(self.goodDataPath + "/" + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: Quotes added successfully!!" % file)

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            log_file.close()
        log_file.close()
Exemplo n.º 24
0
class dataTransform:

    def __int__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def addQuotesToStringValuesInColumn(self):
        log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt", 'a+')
        try:
            onlyfiles = [f for f in os.listdir(self.goodDataPath)]
            for file in onlyfiles:
                data=pd.read_csv(self.goodDataPath + "/" + file)
                data['DATE'] = data["DATE"].apply(lambda x: "'" + str(x) + "'")
                data.to_csv(self.goodDataPath + "/" + file, index=None, header=True)
                self.logger.log(log_file, " %s: Quotes added successfully!!" % file)
        except Exception as e:
            self.logger.log(log_file,"Data Transformation failed because:: %s" % e)
            log_file.close()
        log_file.close()
Exemplo n.º 25
0
def train_and_evaluate(config_path):
    config = read_params(config_path)
    test_data_path = config["split_data"]["test_path"]
    train_data_path = config["split_data"]["train_path"]
    model_dir = config["model_dir"]
    file_object = open('Training_log.txt', 'a+')
    logger = App_Logger()

    df = pd.read_csv(train_data_path)  #Reading the processed dataset

    df["date"] = pd.to_datetime(df["date"]).dt.date
    X_train = df[df['date'] <= datetime.date(
        2017, 5, 31)]  #splitting the dataset based on date for trainging data
    val_X = df[df['date'] > datetime.date(
        2017, 5, 31)]  #spliting the dataset based on date for validation data
    logger.log(file_object, "Splitting dataset completed")

    X_train = X_train.drop(['date'], axis=1)
    val_X = val_X.drop(['date'], axis=1)

    y_train = np.log1p((X_train["transactionRevenue"]).values)
    val_y = np.log1p((val_X["transactionRevenue"]).values)
    logger.log(file_object,
               "Log transformation of transaction Revenue values completed")
    x1 = X_train.drop(['transactionRevenue'], axis=1)
    val_x1 = val_X.drop(['transactionRevenue'], axis=1)
    y_train = pd.DataFrame(y_train)
    val_y = pd.DataFrame(val_y)

    ################## MLFLOW ######################
    mlflow_config = config["mlflow_config"]
    remote_server_uri = mlflow_config['remote_server_uri']
    mlflow.set_tracking_uri(remote_server_uri)
    mlflow.set_experiment(mlflow_config["experiment_name"])

    with mlflow.start_run(run_name=mlflow_config["run_name"]) as mlops_run:
        model_xgb = run_xgb(x1, y_train)
        y_train_predict = model_xgb.predict(x1)
        rmse, mae, r2 = eval_metrics(y_train, y_train_predict)

        mlflow.log_param("n_estimators", 1200)
        mlflow.log_param("learning_rate", 0.5)
        mlflow.log_param("max_depth", 8)

        mlflow.log_metric('rmse', rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        tracking_url_type_store = urlparse(mlflow.get_artifact_uri()).scheme
        if tracking_url_type_store != "file":
            mlflow.sklearn.log_model(
                model_xgb,
                "model",
                registered_model_name=mlflow_config["registered_model_name"])
        else:
            mlflow.sklearn.load_model(model_xgb, "model")

    ##################### Saving the model as pickle file ################################
    logger.log(file_object, "Model file created successfully")
    file_object.close()
Exemplo n.º 26
0
class dataTransform:
    def __init__(self):
        self.goodDataPath = 'Training_Raw_files_validated/Good_Raw'
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                csv = pd.read_csv(self.goodDataPath + "/" + file)
                csv.fillna('NULL', inplace=True)
                csv['Wafer'] = csv['Wafer'].str[6:]
                csv.to_csv(self.goodDataPath + "/" + file,
                           index=None,
                           header=True)
                self.logger.log(log_file,
                                f"{file}: File Transformed successfully!!")
        except Exception as e:
            self.logger.log(log_file,
                            f"Data Transformation Failed because:: {e}")
            log_file.close()
        log_file.close()
Exemplo n.º 27
0
class dataTransform:
    """
      This class shall be used for transforming the Good Raw Training Data before loading it in Database!!.
    """
    def __init__(self):
        self.goodDataPath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()

    def replaceMissingWithNull(self):
        """
        Method Name: replaceMissingWithNull
        Description: This method replaces the missing values in columns with "NULL" to
                    store in the table. We are using substring in the first column to
                    keep only "Integer" data for ease up the loading.
                    This column is anyways going to be removed during training.
      """
        log_file = open("Training_Logs/dataTransformLog.txt", 'a+')
        try:
            onlyfiles = [filename for filename in listdir(self.goodDataPath)]
            for filename in onlyfiles:
                df = pandas.read_csv(self.goodDataPath + "/" + filename)
                df.fillna('NULL', inplace=True)
                df['Wafer'] = df['Wafer'].str[6:]
                df.to_csv(self.goodDataPath + "/" + filename,
                          index=None,
                          header=True)
                self.logger.log(
                    log_file,
                    " %s: File Transformed successfully!!" % filename)

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            log_file.close()

        log_file.close()
Exemplo n.º 28
0
class dataTransformPredict:
    def __init__(self):
        self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw"
        self.logger = App_Logger()

    def replaceSingleQuotesToDouble(self):
        try:
            log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+')
            onlyfiles = [f for f in listdir(self.goodDataPath)]
            for file in onlyfiles:
                data = pandas.read_csv(self.goodDataPath + '/' + file)
                columns = [
                    "policy_bind_date", "policy_state", "policy_csl",
                    "insured_sex", "insured_education_level",
                    "insured_occupation", "insured_hobbies",
                    "insured_relationship", "incident_state", "incident_date",
                    "incident_type", "collision_type", "incident_severity",
                    "authorities_contacted", "incident_city",
                    "incident_location", "property_damage",
                    "police_report_available", "auto_make", "auto_model"
                ]

                for col in columns:
                    data[col] = data[col].apply(lambda x: "'" + str(x) + "'")
                data.to_csv(self.goodDataPath + '/' + file,
                            index=None,
                            header=True)
                self.logger.log(log_file,
                                " %s: File Transformed successfully!!" % file)

        except Exception as e:
            self.logger.log(log_file,
                            "Data Transformation failed because:: %s" % e)
            log_file.close()
            raise e
        log_file.close()
class Data_Getter:
    """
    This class shall  be used for obtaining the data from the source for training.

    Written By: Piyush
    Version: 1.0
    Revisions: None

    """
    def __init__(self):
        self.training_file='../training_file/Churn_Modelling.csv'
        self.file_object = open("../logs/filereadlogs/log.txt", 'a+')
        self.logger = App_Logger()

    def get_data(self):
        """
        Method Name: get_data
        Description: This method reads the data from source.
        Output: A pandas DataFrame.
        On Failure: Raise Exception

         Written By: Piyush
        Version: 1.0
        Revisions: None

        """
        self.logger.log(self.file_object,'Entered the get_data method of the Data_Getter class')
        try:
            self.data= pd.read_csv(self.training_file) # reading the data file
            self.logger.log(self.file_object,'Data Load Successful.Exited the get_data method of the Data_Getter class')
            return self.data
        except Exception as e:
            self.logger.log(self.file_object,'Exception occured in get_data method of the Data_Getter class. Exception message: '+str(e))
            self.logger.log(self.file_object,
                                   'Data Load Unsuccessful.Exited the get_data method of the Data_Getter class')
            raise Exception()
class dBOperation:
    """
      This class shall be used for handling all the SQL operations.

      Written By: Chethan D
      Version: 1.0
      Revisions: None

      """
    def __init__(self):
        self.path = 'Training_Database/'
        self.badFilePath = "Training_Raw_files_validated/Bad_Raw"
        self.goodFilePath = "Training_Raw_files_validated/Good_Raw"
        self.logger = App_Logger()


    def dataBaseConnection(self,DatabaseName):

        """
                Method Name: dataBaseConnection
                Description: This method creates the database with the given name and if Database already exists then opens the connection to the DB.
                Output: Connection to the DB
                On Failure: Raise ConnectionError

                 Written By: Chethan D
                Version: 1.0
                Revisions: None

                """
        try:
            conn = sqlite3.connect(self.path+DatabaseName+'.db')

            file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+')
            self.logger.log(file, "Opened %s database successfully" % DatabaseName)
            file.close()
        except ConnectionError:
            file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+')
            self.logger.log(file, "Error while connecting to database: %s" %ConnectionError)
            file.close()
            raise ConnectionError
        return conn

    def createTableDb(self,DatabaseName,column_names):
        """
                        Method Name: createTableDb
                        Description: This method creates a table in the given database which will be used to insert the Good data after raw data validation.
                        Output: None
                        On Failure: Raise Exception

                         Written By: Chethan D
                        Version: 1.0
                        Revisions: None

                        """
        try:
            conn = self.dataBaseConnection(DatabaseName)
            c=conn.cursor()
            c.execute("SELECT count(name)  FROM sqlite_master WHERE type = 'table'AND name = 'Good_Raw_Data'")
            if c.fetchone()[0] ==1:
                conn.close()
                file = open("Training_Logs/DbTableCreateLog.txt", 'a+')
                self.logger.log(file, "Tables created successfully!!")
                file.close()

                file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+')
                self.logger.log(file, "Closed %s database successfully" % DatabaseName)
                file.close()

            else:

                for key in column_names.keys():
                    type = column_names[key]

                    #in try block we check if the table exists, if yes then add columns to the table
                    # else in catch block we will create the table
                    try:
                        #cur = cur.execute("SELECT name FROM {dbName} WHERE type='table' AND name='Good_Raw_Data'".format(dbName=DatabaseName))
                        conn.execute('ALTER TABLE Good_Raw_Data ADD COLUMN "{column_name}" {dataType}'.format(column_name=key,dataType=type))
                    except:
                        conn.execute('CREATE TABLE  Good_Raw_Data ({column_name} {dataType})'.format(column_name=key, dataType=type))


                    # try:
                    #     #cur.execute("SELECT name FROM {dbName} WHERE type='table' AND name='Bad_Raw_Data'".format(dbName=DatabaseName))
                    #     conn.execute('ALTER TABLE Bad_Raw_Data ADD COLUMN "{column_name}" {dataType}'.format(column_name=key,dataType=type))
                    #
                    # except:
                    #     conn.execute('CREATE TABLE Bad_Raw_Data ({column_name} {dataType})'.format(column_name=key, dataType=type))


                conn.close()

                file = open("Training_Logs/DbTableCreateLog.txt", 'a+')
                self.logger.log(file, "Tables created successfully!!")
                file.close()

                file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+')
                self.logger.log(file, "Closed %s database successfully" % DatabaseName)
                file.close()

        except Exception as e:
            file = open("Training_Logs/DbTableCreateLog.txt", 'a+')
            self.logger.log(file, "Error while creating table: %s " % e)
            file.close()
            conn.close()
            file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+')
            self.logger.log(file, "Closed %s database successfully" % DatabaseName)
            file.close()
            raise e


    def insertIntoTableGoodData(self,Database):

        """
                               Method Name: insertIntoTableGoodData
                               Description: This method inserts the Good data files from the Good_Raw folder into the
                                            above created table.
                               Output: None
                               On Failure: Raise Exception

                                Written By: Chethan D
                               Version: 1.0
                               Revisions: None

        """

        conn = self.dataBaseConnection(Database)
        goodFilePath= self.goodFilePath
        badFilePath = self.badFilePath
        onlyfiles = [f for f in listdir(goodFilePath)]
        log_file = open("Training_Logs/DbInsertLog.txt", 'a+')

        for file in onlyfiles:
            try:
                with open(goodFilePath+'/'+file, "r") as f:
                    next(f)
                    reader = csv.reader(f, delimiter="\n")
                    for line in enumerate(reader):
                        for list_ in (line[1]):
                            try:
                                conn.execute('INSERT INTO Good_Raw_Data values ({values})'.format(values=(list_)))
                                self.logger.log(log_file," %s: File loaded successfully!!" % file)
                                conn.commit()
                            except Exception as e:
                                raise e

            except Exception as e:

                conn.rollback()
                self.logger.log(log_file,"Error while creating table: %s " % e)
                shutil.move(goodFilePath+'/' + file, badFilePath)
                self.logger.log(log_file, "File Moved Successfully %s" % file)
                log_file.close()
                conn.close()

        conn.close()
        log_file.close()


    def selectingDatafromtableintocsv(self,Database):

        """
                               Method Name: selectingDatafromtableintocsv
                               Description: This method exports the data in GoodData table as a CSV file. in a given location.
                                            above created .
                               Output: None
                               On Failure: Raise Exception

                                Written By: Chethan D
                               Version: 1.0
                               Revisions: None

        """

        self.fileFromDb = 'Training_FileFromDB/'
        self.fileName = 'InputFile.csv'
        log_file = open("Training_Logs/ExportToCsv.txt", 'a+')
        try:
            conn = self.dataBaseConnection(Database)
            sqlSelect = "SELECT *  FROM Good_Raw_Data"
            cursor = conn.cursor()

            cursor.execute(sqlSelect)

            results = cursor.fetchall()
            # Get the headers of the csv file
            headers = [i[0] for i in cursor.description]

            #Make the CSV ouput directory
            if not os.path.isdir(self.fileFromDb):
                os.makedirs(self.fileFromDb)

            # Open CSV file for writing.
            csvFile = csv.writer(open(self.fileFromDb + self.fileName, 'w', newline=''),delimiter=',', lineterminator='\r\n',quoting=csv.QUOTE_ALL, escapechar='\\')

            # Add the headers and data to the CSV file.
            csvFile.writerow(headers)
            csvFile.writerows(results)

            self.logger.log(log_file, "File exported successfully!!!")
            log_file.close()

        except Exception as e:
            self.logger.log(log_file, "File exporting failed. Error : %s" %e)
            log_file.close()