def prepare_db(database, pm): # Sets the selected database folder # in the path manager for referencing via full path. pm.set_current_selected_folder(database) # Processes the file path of the database into # a pre processed database ready to be used as a learning/training set. db = process_data.process_database_file(pm) output_file.write('CURRENT DATASET: ' + database + '\n') debug_file.write('CURRENT DATASET: ' + database + '\n') output_file.write('DATA TYPE: ' + db.get_dataset_type() + '\n') debug_file.write('DATA TYPE: ' + db.get_dataset_type() + '\n') # Sanity checks. normal_data, irregular_data = process_data.identify_missing_data(db) corrected_data = process_data.extrapolate_data(normal_data, irregular_data, db.get_missing_symbol()) # repaired_db is the total database once the missing values have been filled in. if len(corrected_data) > 0: repaired_db = normal_data + corrected_data else: repaired_db = normal_data db.set_data(repaired_db) # Convert the discrete data to type float. db.convert_discrete_to_float() # TODO: make it append the database name to the debug file aswell, so we can get every dataset when running for all of them. debug_file.write('\n\nFULL DATASET: \n') for row in db.get_data(): debug_file.write(str(row) + '\n') return db
def prepare_db(database, pm): # Sets the selected database folder # in the path manager for referencing via full path. pm.set_current_selected_folder(database) # Processes the file path of the database into # a pre processed database ready to be used as a learning/training set. db = process_data.process_database_file(pm) save_state = verify_save_folder(pm, db) if save_state is not False: # This is where we use the loaded save state object specified pass # Sanity checks. normal_data, irregular_data = process_data.identify_missing_data(db) corrected_data = process_data.extrapolate_data(normal_data, irregular_data, db.get_missing_symbol()) # repaired_db is the total database once the missing values have been filled in. if len(corrected_data) > 0: repaired_db = normal_data + corrected_data else: repaired_db = normal_data db.set_data(repaired_db) # Convert the discrete data to type float. db.convert_discrete_to_float() return db
for row in database: print(row) # Initializes path manager with default directory as databases. path_manager = pm() # Loads in a list of database folders for the user to select as the current database. selected_database = select_database( path_manager.find_folders(path_manager.get_databases_dir())) # Sets the selected database folder in the path manager for referencing via full path. path_manager.set_current_selected_folder(selected_database) # Processes the file path of the database into a pre processed database ready to be used as a learning/training set. db = process_data.process_database_file(path_manager) # Sanity checks. normal_data, irregular_data = process_data.identify_missing_data(db) corrected_data = process_data.extrapolate_data(normal_data, irregular_data, db.get_missing_symbol()) # repaired_db is the total database once the missing values have been filled in. if len(corrected_data) > 0: repaired_db = normal_data + corrected_data else: repaired_db = normal_data db.set_data(repaired_db)