示例#1
0
def prepare_db(database, pm):
    # Sets the selected database folder
    # in the path manager for referencing via full path.
    pm.set_current_selected_folder(database)
    # Processes the file path of the database into
    # a pre processed database ready to be used as a learning/training set.
    db = process_data.process_database_file(pm)

    output_file.write('CURRENT DATASET: ' + database + '\n')
    debug_file.write('CURRENT DATASET: ' + database + '\n')
    output_file.write('DATA TYPE: ' + db.get_dataset_type() + '\n')
    debug_file.write('DATA TYPE: ' + db.get_dataset_type() + '\n')
    # Sanity checks.
    normal_data, irregular_data = process_data.identify_missing_data(db)
    corrected_data = process_data.extrapolate_data(normal_data, irregular_data,
                                                   db.get_missing_symbol())
    # repaired_db is the total database once the missing values have been filled in.
    if len(corrected_data) > 0:
        repaired_db = normal_data + corrected_data
    else:
        repaired_db = normal_data

    db.set_data(repaired_db)
    # Convert the discrete data to type float.
    db.convert_discrete_to_float()
    # TODO: make it append the database name to the debug file aswell, so we can get every dataset when running for all of them.
    debug_file.write('\n\nFULL DATASET: \n')
    for row in db.get_data():
        debug_file.write(str(row) + '\n')

    return db
示例#2
0
def prepare_db(database, pm):
    # Sets the selected database folder
    # in the path manager for referencing via full path.
    pm.set_current_selected_folder(database)
    # Processes the file path of the database into
    # a pre processed database ready to be used as a learning/training set.
    db = process_data.process_database_file(pm)

    save_state = verify_save_folder(pm, db)

    if save_state is not False:
        # This is where we use the loaded save state object specified
        pass

    # Sanity checks.
    normal_data, irregular_data = process_data.identify_missing_data(db)
    corrected_data = process_data.extrapolate_data(normal_data, irregular_data,
                                                   db.get_missing_symbol())
    # repaired_db is the total database once the missing values have been filled in.
    if len(corrected_data) > 0:
        repaired_db = normal_data + corrected_data
    else:
        repaired_db = normal_data

    db.set_data(repaired_db)
    # Convert the discrete data to type float.
    db.convert_discrete_to_float()

    return db
示例#3
0
        for row in database:
            print(row)


# Initializes path manager with default directory as databases.
path_manager = pm()

# Loads in a list of database folders for the user to select as the current database.
selected_database = select_database(
    path_manager.find_folders(path_manager.get_databases_dir()))

# Sets the selected database folder in the path manager for referencing via full path.
path_manager.set_current_selected_folder(selected_database)

# Processes the file path of the database into a pre processed database ready to be used as a learning/training set.
db = process_data.process_database_file(path_manager)

# Sanity checks.
normal_data, irregular_data = process_data.identify_missing_data(db)

corrected_data = process_data.extrapolate_data(normal_data, irregular_data,
                                               db.get_missing_symbol())

# repaired_db is the total database once the missing values have been filled in.
if len(corrected_data) > 0:
    repaired_db = normal_data + corrected_data
else:
    repaired_db = normal_data

db.set_data(repaired_db)