def scaling(self): while(1): print("\nTasks (Feature Scaling)") for task in self.tasks: print(task) while(1): try: choice = int(input(("\n\nWhat you want to do? (Press -1 to go back) "))) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: break elif choice == 1: self.normalization() elif choice == 2: self.standardization() elif choice==3: DataDescription.showDataset(self) else: print("\nWrong Integer value!! Try again..") # Returns all the changes on the DataFrame. return self.data
def categoricalMain(self): while(1): print("\nTasks") for task in self.tasks: print(task) while(1): try: choice = int(input(("\n\nWhat you want to do? (Press -1 to go back) "))) except ValueError: print("Integer Value required. Try again...") continue break if choice == -1: break elif choice == 1: self.categoricalColumn() elif choice == 2: self.categoricalColumn() self.encoding() elif choice == 3: DataDescription.showDataset(self) else: print("\nWrong Integer value!! Try again..") # return the data after modifying return self.data
def normalization(self): while(1): print("\nTasks (Normalization)") for task in self.tasks_normalization: print(task) while(1): try: choice = int(input(("\n\nWhat you want to do? (Press -1 to go back) "))) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: break # Performs normalization on the columns provided. elif choice == 1: print(self.data.dtypes) columns = input("Enter all the column" + "(s)" + "you want to normalize (Press -1 to go back) ").lower() if columns == "-1": break for column in columns.split(" "): # This is the basic approach to perform MinMax Scaler on a set of data. try: minValue = self.data[column].min() maxValue = self.data[column].max() self.data[column] = (self.data[column] - minValue)/(maxValue - minValue) except: print("\nNot possible....") print("Done....") # Performs normalization on whole dataset. elif choice == 2: try: self.data = pd.DataFrame(MinMaxScaler().fit_transform(self.data)) print("Done.......") except: print("\nString Columns are present. So, " + "NOT" + " possible.\nYou can try the first option though.") elif choice==3: DataDescription.showDataset(self) else: print("\nYou pressed the wrong key!! Try again..") return
def standardization(self): while (1): print("\nTasks (Standardization)") for task in self.tasks_standardization: print(task) while (1): try: choice = int( input( ("\n\nWhat you want to do? [enter -1 to go back] " ))) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: break elif choice == 1: print(self.data.dtypes) columns = input( "Enter all the column" + self.bold_text_start + "(s)" + self.bold_text_end + "you want to normalize [enter -1 to go back] ").lower() if columns == "-1": break for column in columns.split(" "): try: mean = self.data[column].mean() standard_deviation = self.data[column].std() self.data[column] = (self.data[column] - mean) / (standard_deviation) except: print("\nNot possible....") print("Done....") elif choice == 2: try: self.data = pd.DataFrame(StandardScaler().fit_transform( self.data)) print("Done.......") except: print("\nString Columns are present. So, " + self.bold_text_start + "NOT" + self.bold_text_end + " possible. \nYou can try the first option though.") break elif choice == 3: DataDescription.showDataset(self) else: print("\nWrong choice!! Try again...") return
def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? [enter -1 to exit]: ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() elif choice == 1: DataDescription(self.data).describe() elif choice == 2: self.data = Imputation(self.data).imputer() elif choice == 3: self.data = Categorical(self.data).categoricalMain() elif choice == 4: self.data = FeatureScaling(self.data).scaling() elif choice == 5: Download(self.data).download() else: print("\nWrong choice!! Try again...")
def imputer(self): while (1): print("\nImputation Tasks") for task in self.tasks: print(task) while (1): try: choice = int( input(( "\nWhat you want to do? (Press -1 to go back) "))) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: break elif choice == 1: self.printNullValues() elif choice == 2: self.removeColumn() elif choice == 3: self.fillNullWithMean() elif choice == 4: self.fillNullWithMedian() elif choice == 5: self.fillNullWithMode() elif choice == 6: DataDescription.showDataset(self) else: print("\nWrong Integer value!! Try again..") return self.data
def main(): print('-'*10+'Welcome to ML Preprocessor CLI'+'-'*10+'\n\n') try: file_path=sys.argv[1] if file_path.endswith('.csv')==False: raise IncorrectFileFormatError("file is not in CSV format") revised_df=readCSV(file_path) print('\nScreenshot of independent dataframe:\n') print(revised_df.head()) print('\n'+'-'*30+'\n') while True: print('\nTasks(Preprocessing)') print('1.Data Description') print('2.Handling NULL values') print('3.Encoding Categorical Data') print('4.Feature Scaling of the Dataset') print('5.Download the modified Dataset\n') option=int(input('What do you want to do?(Press -1 to exit):')) if option==-1: raise ExitError elif option==1: data_desc=DataDescription(revised_df) while True: option=data_desc.getOption() if option==-1: break elif option==1: data_desc.showProperty() elif option==2: data_desc.showStats() elif option==3: data_desc.showDF() else: print('Incorrect option!Try again.') elif option==2: impute=Imputation(revised_df) while True: option=impute.getOption() if option==-1: break elif option==1: impute.countNULL() elif option==2: revised_df=impute.dropColumn() elif option==3: revised_df=impute.fillUtil() elif option==4: impute.showDF() else: print('Incorrect option!Try again.') elif option==3: encode=EncodeCategorical(revised_df) while True: option=encode.getOption() if option==-1: break elif option==1: encode.showCategorical() elif option==2: revised_df=encode.performOneHotEncodingUtil() elif option==3: encode.showDF() else: print('Incorrect option!Try again.') elif option==4: while True: scale=FeatureScaling(revised_df) option=scale.getOption() if option==-1: break elif option==1: scale.normalizeUtil() elif option==2: scale.standardizeUtil() elif option==3: scale.showDF() else: print('Incorrect option!Try again.') elif option==5: download=Download(revised_df) download.downloadDataframe() else: print('Incorrect option!Try again.') except IndexError as e: print('File path missing.',e) except IncorrectFileFormatError as e: print('Incorrect file format.',e) except FileNotFoundError as e: print('File Not Found!',e) except ExitError as e: print(e) except Exception as e: print('Incorrect option chosen.',e)