def deaths_vs_pop(state, date, output_filename):
    """Prepares txt file containing pop and total death
    count in each county of a state on a given date

    Parameters
    ----------
    state: string
            Name of state
    date: str
            Date of deaths

    Prints/Returns
    --------
    county_names: str list
            Name of the county
    case_rates: float list
            Percap rate for that day
    output_filename.txt: txt file
            Saves daily rates for counties in state
    """
    # get counties and pops for a state
    STATECOL = 5
    census_name = 'co-est2019-alldata.csv'
    query_column = STATECOL
    query_value = state
    results_columns = [6, 7]
    county_pops = mu.get_columns(census_name, query_column, query_value,
                                 results_columns)

    # gets deaths for each county
    date_county_deaths = mu.get_columns('covid-19-data/us-counties.csv', 2,
                                        state, [0, 1, 5])
    county_deaths = []
    for case_date, county_name, deaths in date_county_deaths:
        if case_date == date:
            county_deaths.append([county_name, deaths])

    # saves pop, deaths in txt file
    f = open(output_filename + 'dp.txt', 'w+')
    i = 0
    for county, pop in county_pops:
        if county != state:
            county = county[:-7]
            if county == county_deaths[i][0]:
                curr_deaths = county_deaths[i][1]
                str_to_write = pop + ' ' + curr_deaths + '\n'
                f.write(str_to_write)
                i += 1
            else:
                for i in range(len(county_deaths)):
                    if county == county_deaths[i][0]:
                        curr_deaths = county_deaths[i][1]
                        str_to_write = pop + ' ' + curr_deaths + '\n'
                        f.write(str_to_write)
                        i += 1

    return
def print_cases(file_name, county_column, county, cases_columns):
    """Calls get_columns() function to return cases for a county

    Parameters
    ----------
    file_name: string
            The path to the CSV file
    county_column: integer
            The column containing the county strings
    county: string
            The name of the county
    cases_columns: list of lists
            Containing the resulting columns

    Prints/Returns
    --------
    cases: array of integers
            An array containing all cases for the input county
    """
    try:
        cases = mu.get_columns(file_name, county_column, county, cases_columns)
        print(*cases, sep='\n')
    except ValueError:
        print('File contains dates that are not sequential')
        sys.exit(6)
    return cases
def print_percap_plot(file_name, county):
    """Calls plot_lines() and outputs png plot

    Parameters
    ----------
    file_name: string
            Name of case data file
    county: string
            Name of county

    Outputs
    --------
    percap_cases_boulder.png: png file
            Graph of per capita covid cases in a county
    """
    # Get dates and cases
    county_column = 1
    dates_cases_columns = [0, 4]
    date_cases = mu.get_columns(file_name,
                                county_column,
                                county,
                                dates_cases_columns)

    # Get population of the county
    state_column = 5
    state = 'Colorado'
    counties_pops = mu.get_columns('co-est2019-alldata.csv',
                                   state_column,
                                   state,
                                   [6, 7])

    county_pop = mu.binary_search('Boulder County', counties_pops)

    # Calculate Per Capita Rates
    date_percap = mu.calc_per_capita(date_cases, county_pop)
    plot_points = []
    for i in range(len(date_percap)):
        curr_date = (date_percap[i])[0]
        date = datetime.strptime(curr_date, '%Y-%m-%d')
        plot_points.append([date, (date_percap[i])[1]])

    # Plot
    mu.plot_lines(plot_points, 'percap_cases_boulder.png')
 def test_get_columns(self):
     columns = my_utils.get_columns('test_counties.csv', 1, 'Boulder',
                                    [3, 4, 5])
     test_columns = [['8013', '1', '0'], ['8013', '7', '0'],
                     ['8013', '7', '0'], ['8013', '8', '0'],
                     ['8013', '8', '0'], ['8013', '11', '0'],
                     ['8013', '24', '0'], ['8013', '30', '0'],
                     ['8013', '37', '0'], ['8013', '39', '0'],
                     ['8013', '49', '0'], ['8013', '51', '0'],
                     ['8013', '66', '0'], ['8013', '76', '1'],
                     ['8013', '84', '1'], ['8013', '90', '1'],
                     ['8013', '100', '1'], ['8013', '107', '2'],
                     ['8013', '114', '2'], ['8013', '132', '2']]
     self.assertEqual(columns, test_columns)
def get_rates(state):
    '''
    This function will return the rate of covid 19 cases in a given state
    '''
    case_file = 'covid-19-data/us-counties.csv'
    query_column = 2
    query_value = state
    result_columns = (0, 1, 4)
    target_date = '2020-11-02'
    result = my_utils.get_columns(case_file, query_column, query_value,
                                  result_columns, 0)
    co_cases = []

    for i in range(len(result[0])):
        co_cases.append([result[0][i], result[1][i], result[2][i]])

    population_file = 'co-est2019-alldata.csv'
    query_column = 5
    result_columns = (6, 7)

    result = my_utils.get_columns(population_file, query_column, query_value,
                                  result_columns)
    co_pops = []
    for i in range(len(result[0])):
        co_pops.append([result[0][i], int(result[1][i])])

    co_pops.sort(key=itemgetter(0))

    for co in co_cases:
        query = co[1]
        pop = my_utils.binary_search(query + ' County', co_pops)
        if pop is not None:
            if co[0] == target_date:
                print(co[2] / pop, end=' ')

    return [co_cases, co_pops]
    def test_daily_count(self):
        # simple test
        column = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [4])
        daily = my_utils.get_daily_count(column)
        test_daily = [
            1, 6, 0, 1, 0, 3, 13, 6, 7, 2, 10, 2, 15, 10, 8, 6, 10, 7, 7, 18
        ]
        self.assertEqual(daily, test_daily)

        # randomized test
        for i in range(1000):
            data_size = random.randint(100, 1000)
            data = []
            for j in range(data_size):
                data.append([random.randint(1, 100)])
            daily = my_utils.get_daily_count(data)
            for k in range(len(daily)):
                if k == 0:
                    self.assertListEqual([daily[k]], data[k])
                else:
                    pt1 = data[k]
                    pt2 = data[k - 1]
                    self.assertListEqual([daily[k]], [pt1[0] - pt2[0]])
示例#7
0
def main():
    desc = 'Opens a file and extracts data from a specific column.'

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument('--file',
                        dest='file_name',
                        type=str,
                        required=True,
                        help='Name of the file to be opened by the script.')

    parser.add_argument('--result_column',
                        dest='result_column',
                        default=4,
                        help='Column of file to be returned by the script.\
                        Defaults to 4 and must correspond to an index\
                        found in the file.')

    parser.add_argument('--county_column',
                        dest='county_column',
                        type=int,
                        required=True,
                        help='Column of file to be queried by the script.')

    parser.add_argument('--county',
                        dest='county',
                        type=str,
                        required=True,
                        help='Name of county to retrieve data from.')

    parser.add_argument('--return_daily_increment',
                        dest='return_daily_increment',
                        type=bool,
                        default=False,
                        help='Decides whether results\
                        are returned as daily increments.')

    parser.add_argument('--return_running_average',
                        dest='return_running_average',
                        type=bool,
                        default=False,
                        help='Decides whether to return\
                        running averages from results.')

    parser.add_argument('--running_avg_window_size',
                        dest='running_avg_window_size',
                        type=int,
                        default=5,
                        help='Determines the window\
                        size for the running average.')

    parser.add_argument('--date_column',
                        dest='date_column',
                        type=int,
                        default=0,
                        help='Determines the date column.')

    args = parser.parse_args()

    print()
    print('Results:')
    results = []
    try:
        args.result_column = int(args.result_column)
    except ValueError:
        pass
    if ',' in args.result_column:
        result_array = []
        for result in args.result_column.split(','):
            result_array.append(str(result))
            args.result_column = result_array
        try:
            results = mu.get_columns(args.file_name, args.county_column,
                                     args.county, args.result_column,
                                     args.date_column)
        except ValueError:
            print('ValueError during get columns')
    else:
        try:
            results = mu.get_column(args.file_name, args.county_column,
                                    args.county, args.result_column,
                                    args.date_column)
        except ValueError:
            print('ValueError during get column')
    if args.return_daily_increment is True:
        try:
            results = mu.get_daily_count(
                get_cases(args.file_name, args.county_column, args.county,
                          args.result_column, args.date_column))
        except ValueError:
            print('Value Error during get daily increment.')
    if args.return_running_average is True:
        try:
            results, _ = mu.running_average(
                results, window_size=args.running_avg_window_size)
        except ValueError:
            print('ValueError during running average')
    for result in results:
        print(result)
    print()
    print()
def get_daily_rates(state, date, output_filename):
    """Prints daily case rate per capita for a given date

    Parameters
    ----------
    state: string
            Name of state
    date: str
            Date of cases

    Prints/Returns
    --------
    county_names: str list
            Name of the county
    case_rates: float list
            Percap rate for that day
    output_filename.txt: txt file
            Saves daily rates for counties in state
    """
    # initialize hash table
    hcounty_pops = []
    table_size = 1000
    for i in range(table_size):
        hcounty_pops.append([])

    # get counties and pops for a state
    census_name = 'co-est2019-alldata.csv'
    query_column = 5  # state
    query_value = state
    results_columns = [6, 7]
    county_pops = mu.get_columns(census_name, query_column, query_value,
                                 results_columns)

    # put counties and pops in a hash table
    for i in range(len(county_pops)):
        if i != 0:  # state name
            curr_county_withc = county_pops[i][0]
            curr_county = curr_county_withc[:-7]
            curr_pop = county_pops[i][1]
            ht.put(hcounty_pops, table_size, curr_county, curr_pop)

    # get cases for each county on date
    case_file = 'covid-19-data/us-counties.csv'
    state_column = 2
    counties_cases = [0, 1, 4]
    date_c_cases = mu.get_columns(case_file, state_column, state,
                                  counties_cases)

    # get cases for specific date
    c_cases = []
    for c_date, c_county, c_case in date_c_cases:
        if c_date == date:
            c_cases.append([c_date, c_county, c_case])

    # Write to txt file
    output_txt = output_filename + '_rates.txt'
    f = open(output_txt, 'w+')

    # print county name and percap case rate
    county_names = []
    case_rates = []
    for curr_date, county_name, cases in c_cases:
        county_names.append(county_name)
        c_pop = ht.get(county_name, hcounty_pops, table_size)
        date_case_rate = mu.calc_per_capita([[date, cases]], int(c_pop))
        case_rate = date_case_rate[0][1]
        case_rates.append(case_rate)

        print(county_name, case_rate)
        to_txt = str(case_rate) + '\n'
        f.write(to_txt)

    f.close()

    return county_names, case_rates
 def test_get_columns_dates_cases(self):
     test_results = None
     test_results = mu.get_columns('covid-19-data/us-counties.csv', 1,
                                   'Boulder', ['cases', 'date'], 0)
     self.assertEqual(test_results[0][5], '11')
     self.assertEqual(test_results[1][25], '2020-04-08')
 def test_result_col_doesnt_exist(self):
     with self.assertRaises(SystemExit) as cm:
         my_utils.get_columns('test_counties.csv', 1, 'Boulder', [12])
     self.assertEqual(cm.exception.code, 4)
 def test_file_not_found(self):
     with self.assertRaises(SystemExit) as cm:
         my_utils.get_columns('no-data-file.csv', 1, 'Boulder', [4])
     self.assertEqual(cm.exception.code, 1)
 def test_get_one_column(self):
     column = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [4])
     test_column = [['1'], ['7'], ['7'], ['8'], ['8'], ['11'], ['24'],
                    ['30'], ['37'], ['39'], ['49'], ['51'], ['66'], ['76'],
                    ['84'], ['90'], ['100'], ['107'], ['114'], ['132']]
     self.assertEqual(column, test_column)