def print_running_avg(daily_cases, window_size):
    """Calls running_average() and prints running avg
       and window size

    Parameters
    ----------
    daily_cases: list
            List of daily counts in a county
    window_size: int
            Size of window to use in calculation

    Prints
    --------
    running_avg: list of floats
            Running averages for daily counts
    window: int
            Size of the window used in calculation
    """
    try:
        running_avg, window = mu.running_average(daily_cases,
                                                 window_size)
    except TypeError:
        running_avg, window = mu.running_average(daily_cases)
    print(*running_avg, sep='\n')
    print(window)
Exemplo n.º 2
0
 def test_running_avg(self):
     self.assertTrue(
         all(
             my_utils.running_average(np.array([0, 1, 2, 3, 4]), window=3)
             == np.array([0., 0.5, 1., 2., 3.])))
     # test when decreasing value
     self.assertTrue(
         all(
             my_utils.running_average(np.array([0, 9, 2, 6, 1]), window=3)
             == np.array([0., 4.5, 11. / 3., 17. / 3., 3.])))
     # test when window is smaller than length of array
     self.assertTrue(
         all(
             my_utils.running_average(np.array([0, 1, 2, 3, 4]), window=10)
             == np.array([0., 0.5, 1., 1.5, 2.])))
    def test_running_avg(self):
        # simple test
        avgs, window = my_utils.running_average([4, 6, 3, 1, 8, 99],
                                                window_size=4)
        self.assertEqual(avgs, [3.5, 4.5, 27.75])

        # randomized test
        for i in range(1000):
            data_size = random.randint(100, 1000)
            data = array('i')
            for j in range(data_size):
                data.append(random.randint(1, 100))
            # random window within range
            test_window = data_size - random.randint(1, 100)
            avgs, window_size = my_utils.running_average(data, test_window)
            for j in range(len(avgs)):
                self.assertEqual(avgs[j], np.mean(data[j:j + test_window]))
                self.assertEqual(window_size, test_window)
    def test_window_too_large(self):
        # simple test
        avgs, window = my_utils.running_average([4, 6, 3, 1, 8, 98],
                                                window_size=10)
        self.assertEqual(avgs, [20])
        self.assertEqual(window, 6)

        # randomized test
        for i in range(1000):
            data_size = random.randint(100, 1000)
            data = array('i')
            for j in range(data_size):
                data.append(random.randint(1, 100))
            # window bigger than data size
            test_window = data_size + random.randint(1, 100)
            avgs, window_size = my_utils.running_average(data, test_window)
            for j in range(len(avgs)):
                self.assertEqual(avgs[j], np.mean(data[j:j + data_size]))
                self.assertEqual(window_size, data_size)
Exemplo n.º 5
0
 def test_get_running_average_random_mode(self):
     for i in range(10):
         arr = []
         for j in range(100):
             x = random.randint(0, 10000)
             arr.append(x)
         for k in range(100):
             window = random.randint(1, 100)
             test_data, _ = mu.running_average(arr, window)
             for m in range(int(100 / window) - 1):
                 expected_result = np.mean(arr[m:m + window])
                 self.assertEqual(test_data[m], expected_result)
 def test_window_negative(self):
     for i in range(1000):
         data_size = random.randint(100, 1000)
         data = array('i')
         for j in range(data_size):
             data.append(random.randint(1, 100))
         # window negative
         test_window = random.randint(-100, -1)
         avgs, window_size = my_utils.running_average(data, test_window)
         for j in range(len(avgs)):
             self.assertEqual(avgs[j], np.mean(data[j:j + 5]))
             self.assertEqual(window_size, 5)
Exemplo n.º 7
0
def main():
    desc = 'Opens a file and extracts data from a specific column.'

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument('--file',
                        dest='file_name',
                        type=str,
                        required=True,
                        help='Name of the file to be opened by the script.')

    parser.add_argument('--result_column',
                        dest='result_column',
                        default=4,
                        help='Column of file to be returned by the script.\
                        Defaults to 4 and must correspond to an index\
                        found in the file.')

    parser.add_argument('--county_column',
                        dest='county_column',
                        type=int,
                        required=True,
                        help='Column of file to be queried by the script.')

    parser.add_argument('--county',
                        dest='county',
                        type=str,
                        required=True,
                        help='Name of county to retrieve data from.')

    parser.add_argument('--return_daily_increment',
                        dest='return_daily_increment',
                        type=bool,
                        default=False,
                        help='Decides whether results\
                        are returned as daily increments.')

    parser.add_argument('--return_running_average',
                        dest='return_running_average',
                        type=bool,
                        default=False,
                        help='Decides whether to return\
                        running averages from results.')

    parser.add_argument('--running_avg_window_size',
                        dest='running_avg_window_size',
                        type=int,
                        default=5,
                        help='Determines the window\
                        size for the running average.')

    parser.add_argument('--date_column',
                        dest='date_column',
                        type=int,
                        default=0,
                        help='Determines the date column.')

    args = parser.parse_args()

    print()
    print('Results:')
    results = []
    try:
        args.result_column = int(args.result_column)
    except ValueError:
        pass
    if ',' in args.result_column:
        result_array = []
        for result in args.result_column.split(','):
            result_array.append(str(result))
            args.result_column = result_array
        try:
            results = mu.get_columns(args.file_name, args.county_column,
                                     args.county, args.result_column,
                                     args.date_column)
        except ValueError:
            print('ValueError during get columns')
    else:
        try:
            results = mu.get_column(args.file_name, args.county_column,
                                    args.county, args.result_column,
                                    args.date_column)
        except ValueError:
            print('ValueError during get column')
    if args.return_daily_increment is True:
        try:
            results = mu.get_daily_count(
                get_cases(args.file_name, args.county_column, args.county,
                          args.result_column, args.date_column))
        except ValueError:
            print('Value Error during get daily increment.')
    if args.return_running_average is True:
        try:
            results, _ = mu.running_average(
                results, window_size=args.running_avg_window_size)
        except ValueError:
            print('ValueError during running average')
    for result in results:
        print(result)
    print()
    print()
Exemplo n.º 8
0
 def test_get_running_average_error_mode(self):
     with self.assertRaises(SystemExit) as cm:
         mu.running_average(None)
     self.assertEqual(cm.exception.code, 3)
Exemplo n.º 9
0
 def test_get_running_average(self):
     test_results = None
     test_results = mu.running_average([1, 2, 1, 2])
     self.assertAlmostEqual(test_results[0], 1.5)
Exemplo n.º 10
0
def main():
    """
    calculate the number of covid19 cases per capita\
    for each county in a given State for a given date.
    Cases are per 100,000 people and rounded to 1 decimal

    Required Args:
    ---------------
    state: str        Name of USA State (No abbreviations)
    query_date: str   date in ISO format 'YYYY-MM-DD'

    Optional Args (have defaults): see argparser section
    -------------------------------------------
    covid_file_name: str
    census_file_name: str
    daily_new: bool     default=True
    running_avg: bool   default=False
    window: int
    coviddata_county_column: int *
    cases_column: int *
    date_column: int *
    census_state_column: int *
    census_county_column: int *
    pop_column: int *

    Note: *= only needs to be changed if format of\
         covid19 and census data files are changed

    Returns:
    ---------
    out_lists: list of [str, float]
                        [county_name, county_caserate_at_date]

    """
    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--query_date',
                        type=str,
                        help='date in ISO format "YY-MM-DD" ',
                        required=True)

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        default='covid-19-data/us-counties.csv')

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        default='census-data/co-est2019-alldata.csv')

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile',
                        default=1)

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile',
                        default=4)

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file',
                        default=5)

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file',
                        default=6)

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file',
                        default=7)

    parser.add_argument('--daily_new',
                        type=bool,
                        default=True,
                        help='daily newcases. False gives cumulativ cases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    state = args.state
    census_county_column = args.census_county_column
    pop_column = args.pop_column
    query_date = date.fromisoformat(args.query_date)

    # make CSV file copy of only state covid-19-data
    if coviddata_file_name == 'covid-19-data/us-counties.csv':
        state_coviddata_file_name = 'covid-19-data/'+state+'-counties.csv'
        try:
            f1 = open(state_coviddata_file_name, 'r')
            f1.close()
        except FileNotFoundError:
            print('creating state_covidfile')
            state_coviddata_file_name = make_statefile(state)
            print(state_coviddata_file_name, 'state_coviddata_file_name')
    else:
        Warning('This script must be run on data within only \
                one state or else has error if counties of \
                the same name in different states across USA.\
                if not using default args.covid_file_name, please\
                check that county names are not duplicated.\
                NOTE: Proceeding by assigning variable\
                state_coviddata_file_name = args.covid_file_name ;\
                Watch out for errors from this issue.')
        state_coviddata_file_name = args.covid_file_name

    # get state county names and population data from census file
    census_state_data = get_column(census_file_name, census_state_column,
                                   state,
                                   result_columns=[census_county_column,
                                                   pop_column],
                                   date_column=None)
    county_pop_list = census_state_data[1][1:]

    # census file has names as "countyname + County", so rm " County"
    county_names_list_withcounty = census_state_data[0][1:]
    county_names_list = []
    for c in range(len(county_names_list_withcounty)):
        county_names_list.append(county_names_list_withcounty[c][:-7])

    # make hashtable of (key-county_name, value= county_pop)
    N = 260  # hashtable size. Max number counties in a State is Texas with 254
    census_hashtable = [[] for i in range(N)]
    for c in range(len(county_names_list)):
        hash_table.put(census_hashtable, N, county_names_list[c],
                       county_pop_list[c], method='rolling')

    # daily cases option and running avg cases option
    if daily_new is True:
        from my_utils import get_daily_count
    if running_avg is True:
        from my_utils import running_average

    # Loop through each county in state
    out_lists = []
    for c in range(len(county_names_list)):
        county_cases_data_cumulative = get_column(state_coviddata_file_name,
                                                  coviddata_county_column,
                                                  county_names_list[c],
                                                  result_columns=[cases_column],
                                                  date_column=date_column,
                                                  return_dates=True)
        # dates are stored in last index of list, in datetime format
        dates = county_cases_data_cumulative[-1]
        # convert cases from type str to int
        county_cases = list(map(int, county_cases_data_cumulative[0]))

        # daily cases option and running avg options
        if daily_new is True:
            county_cases = get_daily_count(county_cases)
        if running_avg is True:
            county_cases = running_average(county_cases, window)

        # binary search for county cases at date
        county_cases_at_date = binary_search(query_date, [dates, county_cases])
        # case rate per 100,000 people
        if county_cases_at_date is not None:
            county_caserate_at_date = county_cases_at_date * 100000 \
                                      / int(hash_table.get(census_hashtable,
                                            N,
                                            county_names_list[c],
                                            method='rolling'))
            out_lists.append([county_names_list[c],
                             round(county_caserate_at_date, 1)])
    print(out_lists)
    return out_lists
Exemplo n.º 11
0
                   county_column,
                   county,
                   result_columns=[cases_column],
                   date_column=date_column)
# convert cases from type str to int
cases = list(map(int, cases[0]))

# print daily cases option
if print_daily is True:
    from my_utils import get_daily_count
    day_cases = get_daily_count(cases)

# print runing average cases option
if print_running_avg is True:
    from my_utils import running_average
    running_avg_cases = running_average(day_cases, window)

# print outputs. (print one value per line)
print('cumulative cases by each date:')
for c in range(0, len(cases)):
    print(cases[c])

if print_daily is True:
    print('daily cases:')
    for c in range(0, len(day_cases)):
        print(day_cases[c])

if print_running_avg is True:
    print('running average cases, window = ' + str(window) + " :")
    for c in range(0, len(running_avg_cases)):
        print(running_avg_cases[c])
def main():
    """
    get Covid19 case data and census data and convert to per-capita rates
    data are from two different files.
    Per Capita Rates are per 100,000 people

    Required Args:
    ---------------
    state: str        Name of USA State (No abbreviations)
    coviddata_countys_list: list of str

    Optional Args (have defaults): see argparser section
    -------------------------------------------
    data_out_file: str  name of CSV file if want one to be made. or '[]'
    covid_file_name: str
    census_file_name: str
    daily_new: bool     default=True
    running_avg: bool   default=False
    running_sum: bool   default=False
    window: int
    coviddata_county_column: int *
    cases_column: int *
    date_column: int *
    census_state_column: int *
    census_county_column: int *
    pop_column: int *

    Note: *= only needs to be changed if format of
         covid19 and census data files are changed

    Returns:
    ---------
    out_data : list of lists of lists:
                [census_countys_list,
                 [[dates for c1],[dates for c2],..],
                 [per_capita_rates c1],[per_capita_rates c2],...]
    Where:
    ------
    per_capita_rates: list
            list of cases / population
            (these are per 100,000 people)

    dates: list
            list of dates in format datetime.date(YYYY, MM, D)

    """
    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--coviddata_countys_list',
                        type=str,
                        nargs='+',
                        help='list of strings for \
                        Name(s) of the county(s) in covid CSV file \
                        that we want to look at',
                        required=True)

    parser.add_argument('--data_out_file',
                        type=str,
                        help='Name of the CSV file to write this data \
                                out to. If not wanted, is "[]", which\
                                is coded to not return any data_out_file',
                        default='[]')

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        default='covid-19-data/us-counties.csv')

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        default='census-data/co-est2019-alldata.csv')

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile',
                        default=1)

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile',
                        default=4)

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file',
                        default=5)

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file',
                        default=6)

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file',
                        default=7)

    parser.add_argument('--daily_new',
                        type=bool,
                        default=True,
                        help='daily newcases. default is cumulativ dailycases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--running_sum',
                        type=bool,
                        default=False,
                        help='running sum of cases over a window.\
                                default is False, window size is required.\
                                cannot be switched on at same \
                                time as running_avg')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average or running sum')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    state = args.state
    coviddata_countys_list = [
        i.replace('-', ' ') for i in args.coviddata_countys_list
    ]
    data_out_file = args.data_out_file
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    running_summation = args.running_sum
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    census_county_column = args.census_county_column
    pop_column = args.pop_column

    # make CSV file copy of only state covid-19-data
    if coviddata_file_name == 'covid-19-data/us-counties.csv':
        state_coviddata_file_name = 'covid-19-data/' + state + '-counties.csv'
        try:
            f1 = open(state_coviddata_file_name, 'r')
            f1.close()
        except FileNotFoundError:
            print('creating state_covidfile')
            state_coviddata_file_name = make_statefile(state)
            print(state_coviddata_file_name, 'state_coviddata_file_name')

    elif coviddata_file_name == 'covid-19-data/' + state + '-counties.csv':
        state_coviddata_file_name = coviddata_file_name
    else:
        Warning('This script must be run on data within only \
                one state or else has error if counties of \
                the same name in different states across USA.\
                if not using default args.covid_file_name, please\
                check that county names are not duplicated.\
                NOTE: Proceeding by assigning variable\
                state_coviddata_file_name = args.covid_file_name ;\
                Watch out for errors from this issue.')
        state_coviddata_file_name = args.covid_file_name

    # get census data for all counties in the state
    census_state_data = get_column(
        census_file_name,
        census_state_column,
        state,
        result_columns=[census_county_column, pop_column],
        date_column=None)

    # sort census_state_data by county name
    # census_state_data is of list [[county_names], [census2010pops])
    sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1]))
    tuples = zip(*sorted_pairs)
    list1, list2 = [list(tuple) for tuple in tuples]
    census_state_data_sorted = [list1, list2]

    # pre-allocate structure of out_data list of lists of lists
    #   out_data[0] will be coviddata_countys_list
    #   out_data[1] will be list of dates for each county
    #   out_data[2] will be list of per_capita_rates for each county
    out_data = [[], [], []]

    # run for each county
    for county_index in range(0, len(coviddata_countys_list)):
        coviddata_county_name = coviddata_countys_list[county_index]
        out_data[0].append(coviddata_county_name)
        # run get_column() on covid data and census data
        cases_data_cumulative = get_column(state_coviddata_file_name,
                                           coviddata_county_column,
                                           coviddata_county_name,
                                           result_columns=[cases_column],
                                           date_column=date_column,
                                           return_dates=True)
        # convert cases from type str to int
        cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0]))

        # dates are stored in last index of list, in datetime format
        dates = cases_data_cumulative[-1]

        # daily cases option
        if daily_new is True:
            from my_utils import get_daily_count
            cases = get_daily_count(cases_data_cumulative[0])
        else:
            cases = cases_data_cumulative[0]

        # print running average OR running sum cases option OR neither
        if running_avg is True:
            from my_utils import running_average
            cases = running_average(cases, window)
        elif running_summation is True:
            from my_utils import running_sum
            cases = running_sum(cases, window)

        # use binary search to get county pop census data out of state data
        census_county_name = coviddata_county_name + ' County'
        county_pop = binary_search(census_county_name,
                                   census_state_data_sorted)

        # raise error if county census not found
        if county_pop is None:
            ValueError
            print('county census not found')
            sys.exit(1)

        county_pop = int(county_pop)

        # convert cases to per-capita rates by dividing county case by pop
        if type(cases) == list:
            cases = np.asarray(cases)

        per_capita_rates = np.round(cases / county_pop * 100000, 2)

        # convert per_capita_rates back from nparray to list
        per_capita_rates = per_capita_rates.tolist()

        # append to out_data lists
        out_data[1].append([dates])
        out_data[2].append([per_capita_rates])

    # write out_data to a CSV file in format 'County','date','per_capita_rate'
    if data_out_file != '[]':
        fout = open(data_out_file, 'w')
        fout.write("county,date,per_capita_rate \n")
        for county_index in range(0, len(out_data[0])):
            for date_ind in range(0, len(out_data[1][county_index][0])):
                fout.write(out_data[0][county_index] + ',' +
                           str(out_data[1][county_index][0][date_ind]) + ',' +
                           str(out_data[2][county_index][0][date_ind]) + '\n')
        fout.close()

    return out_data
def main():
    """
    get Covid19 case data and census data and convert to per-capita rates
    data are from two different files

    Returns:
    ---------
    per_capita_rates: list
            list of cases / population

    dates: list
            list of dates in format datetime.date(YYYY, MM, D)

    """
    # TODO: add main def docstring

    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        required=True)

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        required=True)

    parser.add_argument('--plot_file_name',
                        type=str,
                        help='output plot file generated',
                        required=True)

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--coviddata_county',
                        type=str,
                        help='Name of the county in covid CSV file',
                        required=True)

    parser.add_argument('--census_county',
                        type=str,
                        help='Name of the county in census CSV file',
                        required=True)

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile')

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile')

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file')

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file')

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file')

    parser.add_argument('--daily_new',
                        type=bool,
                        default=False,
                        help='daily newcases. default is cumulativ dailycases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    plot_file_name = args.plot_file_name
    coviddata_county_name = args.coviddata_county
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    state = args.state
    census_county_name = args.census_county
    census_county_column = args.census_county_column
    pop_column = args.pop_column

    # run get_column() on covid data and census data
    cases_data_cumulative = get_column(coviddata_file_name,
                                       coviddata_county_column,
                                       coviddata_county_name,
                                       result_columns=[cases_column],
                                       date_column=date_column,
                                       return_dates=True)

    census_state_data = get_column(
        census_file_name,
        census_state_column,
        state,
        result_columns=[census_county_column, pop_column],
        date_column=None)

    # convert cases from type str to int
    cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0]))

    # dates are stored in last index of list, in datetime format
    dates = cases_data_cumulative[-1]

    # daily cases option
    if daily_new is True:
        from my_utils import get_daily_count
        cases = get_daily_count(cases_data_cumulative[0])  # not dates column
    else:
        cases = cases_data_cumulative[0]

    # print runing average cases option
    if running_avg is True:
        from my_utils import running_average
        cases = running_average(cases, window)

    # census_state_data is of list [[county_names], [census2010pops])
    # sort census_state_data by county name
    sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1]))
    tuples = zip(*sorted_pairs)
    list1, list2 = [list(tuple) for tuple in tuples]
    census_state_data_sorted = [list1, list2]

    # use binary search to get county pop census data out of state data
    county_pop = binary_search(census_county_name, census_state_data_sorted)

    # raise error if county census not found
    if county_pop is None:
        ValueError
        print('county census not found')
        sys.exit(1)

    county_pop = int(county_pop)

    # convert cases to per-capita rates by dividing county case by population
    if type(cases) == list:
        cases = np.asarray(cases)

    per_capita_rates = cases / county_pop

    # convert per_capita_rates back from nparray to list
    per_capita_rates = per_capita_rates.tolist()

    # plot using plot_lines
    plot_points = [[]]
    for point in range(0, len(per_capita_rates)):
        plot_points[0].append([dates[point], per_capita_rates[point]])

    plot_labels = ['dates', 'per_capita_rates']

    plot = plot_lines(plot_points, plot_labels, plot_file_name)

    return plot  # NOTE: idk if this line is needed?