def main(csv_path: str = SENSOR_CSV_PATH) -> None: """ Parse all files in a given directory and insert them into the sensor table in the database. For all the files found recursively in csv_path that match the naming scheme specified by CsvImporter.find_csv_files(), attempt to load and insert them into the database. Files which do not match the naming scheme will be moved to an archive/failed folder and skipped, and files which raise an error during loading/uploading will be moved to the archive/failed folder and have the error raised. Parameters ---------- csv_path Path to folder containing files to load. Returns ------- None. """ user, pw = secrets.db.epi engine = sqlalchemy.create_engine(f"mysql+pymysql://{user}:{pw}@{secrets.db.host}/{DB_NAME}") for filepath, attributes in CsvImporter.find_csv_files(csv_path): if attributes is None: move(filepath, filepath.replace("receiving", "archive/failed")) continue try: data = load_and_prepare_file(filepath, attributes) data.to_sql(TABLE_NAME, engine, if_exists="append", index=False) except Exception: move(filepath, filepath.replace("receiving", "archive/failed")) raise move(filepath, filepath.replace("receiving", "archive/successful"))
def test_find_csv_files(self): """Recursively explore and find CSV files.""" path_prefix = 'prefix/to/the/data/' glob_paths = [ # valid weekly path_prefix + 'fb_survey/weekly_202015_county_cli.csv', # valid daily path_prefix + 'ght/20200408_state_rawsearch.csv', # valid national path_prefix + 'valid/20200408_nation_sig.csv', # valid hhs path_prefix + 'valid/20200408_hhs_sig.csv', # invalid path_prefix + 'invalid/hello_world.csv', # invalid day path_prefix + 'invalid/22222222_b_c.csv', # invalid week path_prefix + 'invalid/weekly_222222_b_c.csv', # invalid geography path_prefix + 'invalid/20200418_province_c.csv', # ignored path_prefix + 'ignored/README.md', ] mock_glob = MagicMock() mock_glob.glob.return_value = glob_paths found = set(CsvImporter.find_csv_files(path_prefix, glob=mock_glob)) expected_issue_day = int(date.today().strftime("%Y%m%d")) expected_issue_week = int(str(epi.Week.fromdate(date.today()))) time_value_day = 20200408 expected = set([ (glob_paths[0], ('fb_survey', 'cli', 'week', 'county', 202015, expected_issue_week, delta_epiweeks(202015, expected_issue_week))), (glob_paths[1], ('ght', 'rawsearch', 'day', 'state', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)), (glob_paths[2], ('valid', 'sig', 'day', 'nation', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)), (glob_paths[3], ('valid', 'sig', 'day', 'hhs', time_value_day, expected_issue_day, (date.today() - date(year=time_value_day // 10000, month=(time_value_day // 100) % 100, day=time_value_day % 100)).days)), (glob_paths[4], None), (glob_paths[5], None), (glob_paths[6], None), (glob_paths[7], None), ]) self.assertEqual(found, expected)