import pandas as pd import numpy as np import DataPreprocessing as DPre import sqlite3 import time import sys nan = np.nan # init data preprocessing class covid_data_processing = DPre.CovidDataProcessing() # data source paths covid_link = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv' population_link = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv' population_encoding = 'ISO-8859-1' # create DFs of interest print('---Beginning Preprocessing and Data Validation---') covid_df = covid_data_processing.open_csv('url', covid_link, {'fips': 'str', 'cases': 'int', 'deaths': 'int'}, None) covid_df['date'] = pd.to_datetime(covid_df['date']) covid_df.query('fips != @nan', inplace=True) print(len(covid_df)) population_df = covid_data_processing.open_csv('url', population_link, {'STATE': 'str', 'COUNTY': 'str', 'POPESTIMATE2019': 'int'}, population_encoding) print('Read covid data') print('Read population data') # Create fips column from STATE and COUNTY columns in population df population_df.loc[:,'fips'] = population_df['STATE'] + population_df['COUNTY']