def deaths_vs_pop(state, date, output_filename): """Prepares txt file containing pop and total death count in each county of a state on a given date Parameters ---------- state: string Name of state date: str Date of deaths Prints/Returns -------- county_names: str list Name of the county case_rates: float list Percap rate for that day output_filename.txt: txt file Saves daily rates for counties in state """ # get counties and pops for a state STATECOL = 5 census_name = 'co-est2019-alldata.csv' query_column = STATECOL query_value = state results_columns = [6, 7] county_pops = mu.get_columns(census_name, query_column, query_value, results_columns) # gets deaths for each county date_county_deaths = mu.get_columns('covid-19-data/us-counties.csv', 2, state, [0, 1, 5]) county_deaths = [] for case_date, county_name, deaths in date_county_deaths: if case_date == date: county_deaths.append([county_name, deaths]) # saves pop, deaths in txt file f = open(output_filename + 'dp.txt', 'w+') i = 0 for county, pop in county_pops: if county != state: county = county[:-7] if county == county_deaths[i][0]: curr_deaths = county_deaths[i][1] str_to_write = pop + ' ' + curr_deaths + '\n' f.write(str_to_write) i += 1 else: for i in range(len(county_deaths)): if county == county_deaths[i][0]: curr_deaths = county_deaths[i][1] str_to_write = pop + ' ' + curr_deaths + '\n' f.write(str_to_write) i += 1 return
def print_cases(file_name, county_column, county, cases_columns): """Calls get_columns() function to return cases for a county Parameters ---------- file_name: string The path to the CSV file county_column: integer The column containing the county strings county: string The name of the county cases_columns: list of lists Containing the resulting columns Prints/Returns -------- cases: array of integers An array containing all cases for the input county """ try: cases = mu.get_columns(file_name, county_column, county, cases_columns) print(*cases, sep='\n') except ValueError: print('File contains dates that are not sequential') sys.exit(6) return cases
def print_percap_plot(file_name, county): """Calls plot_lines() and outputs png plot Parameters ---------- file_name: string Name of case data file county: string Name of county Outputs -------- percap_cases_boulder.png: png file Graph of per capita covid cases in a county """ # Get dates and cases county_column = 1 dates_cases_columns = [0, 4] date_cases = mu.get_columns(file_name, county_column, county, dates_cases_columns) # Get population of the county state_column = 5 state = 'Colorado' counties_pops = mu.get_columns('co-est2019-alldata.csv', state_column, state, [6, 7]) county_pop = mu.binary_search('Boulder County', counties_pops) # Calculate Per Capita Rates date_percap = mu.calc_per_capita(date_cases, county_pop) plot_points = [] for i in range(len(date_percap)): curr_date = (date_percap[i])[0] date = datetime.strptime(curr_date, '%Y-%m-%d') plot_points.append([date, (date_percap[i])[1]]) # Plot mu.plot_lines(plot_points, 'percap_cases_boulder.png')
def test_get_columns(self): columns = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [3, 4, 5]) test_columns = [['8013', '1', '0'], ['8013', '7', '0'], ['8013', '7', '0'], ['8013', '8', '0'], ['8013', '8', '0'], ['8013', '11', '0'], ['8013', '24', '0'], ['8013', '30', '0'], ['8013', '37', '0'], ['8013', '39', '0'], ['8013', '49', '0'], ['8013', '51', '0'], ['8013', '66', '0'], ['8013', '76', '1'], ['8013', '84', '1'], ['8013', '90', '1'], ['8013', '100', '1'], ['8013', '107', '2'], ['8013', '114', '2'], ['8013', '132', '2']] self.assertEqual(columns, test_columns)
def get_rates(state): ''' This function will return the rate of covid 19 cases in a given state ''' case_file = 'covid-19-data/us-counties.csv' query_column = 2 query_value = state result_columns = (0, 1, 4) target_date = '2020-11-02' result = my_utils.get_columns(case_file, query_column, query_value, result_columns, 0) co_cases = [] for i in range(len(result[0])): co_cases.append([result[0][i], result[1][i], result[2][i]]) population_file = 'co-est2019-alldata.csv' query_column = 5 result_columns = (6, 7) result = my_utils.get_columns(population_file, query_column, query_value, result_columns) co_pops = [] for i in range(len(result[0])): co_pops.append([result[0][i], int(result[1][i])]) co_pops.sort(key=itemgetter(0)) for co in co_cases: query = co[1] pop = my_utils.binary_search(query + ' County', co_pops) if pop is not None: if co[0] == target_date: print(co[2] / pop, end=' ') return [co_cases, co_pops]
def test_daily_count(self): # simple test column = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [4]) daily = my_utils.get_daily_count(column) test_daily = [ 1, 6, 0, 1, 0, 3, 13, 6, 7, 2, 10, 2, 15, 10, 8, 6, 10, 7, 7, 18 ] self.assertEqual(daily, test_daily) # randomized test for i in range(1000): data_size = random.randint(100, 1000) data = [] for j in range(data_size): data.append([random.randint(1, 100)]) daily = my_utils.get_daily_count(data) for k in range(len(daily)): if k == 0: self.assertListEqual([daily[k]], data[k]) else: pt1 = data[k] pt2 = data[k - 1] self.assertListEqual([daily[k]], [pt1[0] - pt2[0]])
def main(): desc = 'Opens a file and extracts data from a specific column.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('--file', dest='file_name', type=str, required=True, help='Name of the file to be opened by the script.') parser.add_argument('--result_column', dest='result_column', default=4, help='Column of file to be returned by the script.\ Defaults to 4 and must correspond to an index\ found in the file.') parser.add_argument('--county_column', dest='county_column', type=int, required=True, help='Column of file to be queried by the script.') parser.add_argument('--county', dest='county', type=str, required=True, help='Name of county to retrieve data from.') parser.add_argument('--return_daily_increment', dest='return_daily_increment', type=bool, default=False, help='Decides whether results\ are returned as daily increments.') parser.add_argument('--return_running_average', dest='return_running_average', type=bool, default=False, help='Decides whether to return\ running averages from results.') parser.add_argument('--running_avg_window_size', dest='running_avg_window_size', type=int, default=5, help='Determines the window\ size for the running average.') parser.add_argument('--date_column', dest='date_column', type=int, default=0, help='Determines the date column.') args = parser.parse_args() print() print('Results:') results = [] try: args.result_column = int(args.result_column) except ValueError: pass if ',' in args.result_column: result_array = [] for result in args.result_column.split(','): result_array.append(str(result)) args.result_column = result_array try: results = mu.get_columns(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get columns') else: try: results = mu.get_column(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get column') if args.return_daily_increment is True: try: results = mu.get_daily_count( get_cases(args.file_name, args.county_column, args.county, args.result_column, args.date_column)) except ValueError: print('Value Error during get daily increment.') if args.return_running_average is True: try: results, _ = mu.running_average( results, window_size=args.running_avg_window_size) except ValueError: print('ValueError during running average') for result in results: print(result) print() print()
def get_daily_rates(state, date, output_filename): """Prints daily case rate per capita for a given date Parameters ---------- state: string Name of state date: str Date of cases Prints/Returns -------- county_names: str list Name of the county case_rates: float list Percap rate for that day output_filename.txt: txt file Saves daily rates for counties in state """ # initialize hash table hcounty_pops = [] table_size = 1000 for i in range(table_size): hcounty_pops.append([]) # get counties and pops for a state census_name = 'co-est2019-alldata.csv' query_column = 5 # state query_value = state results_columns = [6, 7] county_pops = mu.get_columns(census_name, query_column, query_value, results_columns) # put counties and pops in a hash table for i in range(len(county_pops)): if i != 0: # state name curr_county_withc = county_pops[i][0] curr_county = curr_county_withc[:-7] curr_pop = county_pops[i][1] ht.put(hcounty_pops, table_size, curr_county, curr_pop) # get cases for each county on date case_file = 'covid-19-data/us-counties.csv' state_column = 2 counties_cases = [0, 1, 4] date_c_cases = mu.get_columns(case_file, state_column, state, counties_cases) # get cases for specific date c_cases = [] for c_date, c_county, c_case in date_c_cases: if c_date == date: c_cases.append([c_date, c_county, c_case]) # Write to txt file output_txt = output_filename + '_rates.txt' f = open(output_txt, 'w+') # print county name and percap case rate county_names = [] case_rates = [] for curr_date, county_name, cases in c_cases: county_names.append(county_name) c_pop = ht.get(county_name, hcounty_pops, table_size) date_case_rate = mu.calc_per_capita([[date, cases]], int(c_pop)) case_rate = date_case_rate[0][1] case_rates.append(case_rate) print(county_name, case_rate) to_txt = str(case_rate) + '\n' f.write(to_txt) f.close() return county_names, case_rates
def test_get_columns_dates_cases(self): test_results = None test_results = mu.get_columns('covid-19-data/us-counties.csv', 1, 'Boulder', ['cases', 'date'], 0) self.assertEqual(test_results[0][5], '11') self.assertEqual(test_results[1][25], '2020-04-08')
def test_result_col_doesnt_exist(self): with self.assertRaises(SystemExit) as cm: my_utils.get_columns('test_counties.csv', 1, 'Boulder', [12]) self.assertEqual(cm.exception.code, 4)
def test_file_not_found(self): with self.assertRaises(SystemExit) as cm: my_utils.get_columns('no-data-file.csv', 1, 'Boulder', [4]) self.assertEqual(cm.exception.code, 1)
def test_get_one_column(self): column = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [4]) test_column = [['1'], ['7'], ['7'], ['8'], ['8'], ['11'], ['24'], ['30'], ['37'], ['39'], ['49'], ['51'], ['66'], ['76'], ['84'], ['90'], ['100'], ['107'], ['114'], ['132']] self.assertEqual(column, test_column)