zip_code = zip_json[u'attributes'][u'Join_Zip_Code']
    case_counts.append(case_count)
    zip_codes.append(zip_code)

# ===========================================
# Store previous data in dictionary
# ===========================================

us_cases_rel_scripts_path = os.path.abspath(
    "../processed_data/cases/US")  # Path to data relative to scripts
prev_data_fpath = "%s/oakland-county_cases.csv" % us_cases_rel_scripts_path
prev_data_file = open(prev_data_fpath, 'r')
header = prev_data_file.readline().strip().split(',')

# IMPORTANT: Only append new data if date isn't already there
quoted_date = parse_data_utils.date_string_to_quoted(date)

if quoted_date in header:
    prev_data_file.close()
    sys.exit(
        "This date has already been updated. May need to check if multiple updates were made in the same day."
    )

data = {}  # Keys are zip codes, values are lists of case counts

for row in prev_data_file:
    values = row.strip().split(',')
    zip_code = values[0]
    case_counts_prev = values[1:]
    data[zip_code] = case_counts_prev
Пример #2
0
def write_data(file_path, date, zips, cases, location=""):
    """Writes or appends the data to a csv. Rows are zip codes and columns are dates.
    
    Args:
        file_path: The path to the csv to write/append to.
        date: The date we should add data to.
        zip: List of zip codes to be updated.
        cases: List of cases corresponding to the given list of zip codes and the given date.
        location: The location for which the data is being scraped.

    """
    # Fetch given csv and store in a dict.
    new_data = {}  # Keys are zip codes, values are lists of case counts
    dates = [""]
    # IMPORTANT: Only append new data if date isn't already there
    quoted_date = parse_data_utils.date_string_to_quoted(date)
    overwrite = False

    try:
        read_csv = open(file_path, 'r')
        dates = read_csv.readline().strip().split(',')
        if quoted_date in dates:
            overwrite = True
            print("%s - WARNING: data has already been updated today... "
                  "overwriting data for today with recently fetched data." %
                  location)
        # Fill in new_data with old data from the csv.
        for row in read_csv:
            values = row.strip().split(',')
            zip_code = values[0]
            old_cases = values[1:]
            if overwrite:
                old_cases = values[1:-1]
            new_data[zip_code] = old_cases
        read_csv.close()
    except IOError:
        print("%s - Creating csv file at %s" % (location, file_path))

    num_dates = len(dates) - 1  # Excluding today
    if overwrite:
        num_dates = num_dates - 1

    expected_entries = num_dates + 1

    # Fill in new_data with today's data.
    for zip_code, case_count in zip(zips, cases):
        quoted_zip_code = '"%s"' % zip_code
        if quoted_zip_code in new_data:
            if len(new_data[quoted_zip_code]) == expected_entries:
                new_data[quoted_zip_code][-1] = str(
                    int(new_data[quoted_zip_code][-1]) + case_count)
            else:
                new_data[quoted_zip_code].append(str(case_count))
        else:  # New zip code
            new_data[quoted_zip_code] = ['NA'] * (num_dates)
            new_data[quoted_zip_code].append(str(case_count))

    # Handle missing zip codes
    for zip_code in new_data:
        if len(new_data[zip_code]) < expected_entries:
            new_data[zip_code].append("NA")

    # Make sure everything has the same length
    # (There might be duplicates!)
    for zip_code in new_data:
        if len(new_data[zip_code]) != expected_entries:
            sys.exit(
                "%s - ERROR: Inconsistency in number of data points for zip code %s."
                " Data failed to update." % (location, zip_code))

    # Overwrite csv
    write_csv = open(file_path, 'w+')

    if not overwrite:
        dates.append(quoted_date)
    write_csv.write(','.join(dates) + "\n")
    sorted_zip_codes = sorted(new_data.keys())

    for zip_code in sorted_zip_codes:
        write_csv.write('%s,' % zip_code)
        write_csv.write(','.join(new_data[zip_code]) + "\n")

    write_csv.close()