Exemplo n.º 1
0
def collect_ctp(url, output_file):  #COVID tracking Project
    data_source_file = retrieve_data_source(url, '../data/raw/ctp_s.csv')
    deaths, confirmed, recovered = init_locations("s")
    if not data_source_file == "":
        location_field_index, confirmed_field_index, deaths_field_index = get_fields_indices_ctp(
            data_source_file)
        with codecs.open(data_source_file, 'r', encoding='utf8') as f:
            f.readline()
            lines = f.readlines()
            for line in lines:
                line = line.strip()
                fields = line.split(',')
                if fields[location_field_index] in abbr_to_state:
                    if fields[confirmed_field_index].strip() == '':
                        fields[
                            confirmed_field_index] = "NA"  # some fields come empty from the file and replace the initial 0 value
                    if fields[deaths_field_index].strip() == '':
                        fields[deaths_field_index] = "NA"
                    confirmed[abbr_to_state[
                        fields[location_field_index]]] = fields[
                            confirmed_field_index]  # convert the postal abbreviation of states to full state name
                    deaths[abbr_to_state[fields[
                        location_field_index]]] = fields[deaths_field_index]
                    recovered[abbr_to_state[
                        fields[location_field_index]]] = "NA"
            f.close()
        write_temp(deaths, recovered, confirmed, output_file)
    else:
        print("Data file not found from the source : " + url)
Exemplo n.º 2
0
def collect_nyt(url, output_file, level):
    data_source_file = retrieve_data_source(
        url, "../data/raw/nyt_" + level + ".csv")
    deaths, confirmed, recovered = init_locations(level)
    if not data_source_file == "":
        state_field_index, county_field_index, confirmed_field_index, deaths_field_index = get_fields_indices_nyt(
            data_source_file)
        with codecs.open(data_source_file, 'r', encoding='utf8') as tempf:
            tempf.readline()
            lines = tempf.readlines()
            for line in lines:
                line = line.strip()
                fields = line.split(',')
                if level == "s":
                    location = fields[state_field_index]
                else:
                    location = fields[county_field_index].strip(
                    ) + '-' + fields[state_field_index]
                    if location in countyname_mapping:
                        location = countyname_mapping[location]
                if location in confirmed:
                    if fields[confirmed_field_index].strip() == "":
                        fields[confirmed_field_index] = "NA"
                    if fields[deaths_field_index].strip() == "":
                        fields[deaths_field_index] = "NA"
                    confirmed[location] = fields[confirmed_field_index]
                    deaths[location] = fields[deaths_field_index]
                    recovered[location] = "NA"
            write_temp(deaths, recovered, confirmed, output_file)
    else:
        print("Data file not found from the source : " + url)
Exemplo n.º 3
0
def collect_jhu_c(url, out_f):
    file_date = url[-14:-4]
    date_ = datetime.strptime(file_date, '%m-%d-%Y')
    file_date = date_.strftime('%Y-%m-%d')

    data_source_file = retrieve_data_source(url, "../data/raw/jhu_c.csv")
    deaths, confirmed, recovered = init_locations("c")
    if not data_source_file == "":
        country_field_index, state_field_index, county_field_index, confirmed_field_index, deaths_field_index, recovered_field_index = get_fields_indices_jhu(
            data_source_file)
        with codecs.open(data_source_file, 'r', encoding='utf8') as f:
            f.readline()
            lines = f.readlines()
            for line in lines:
                line = line.strip()
                fields = line.split(',')
                if fields[
                        country_field_index] == 'US':  #fields[3] is country name
                    county_id = fields[county_field_index].strip(
                    ) + "-" + fields[
                        state_field_index]  #county_id should be in the format "countyName-StateName
                    if county_id in countyname_mapping:
                        county_id = countyname_mapping[county_id]
                    if county_id in confirmed:
                        if fields[confirmed_field_index].strip() == "":
                            fields[confirmed_field_index] = "NA"
                        if fields[deaths_field_index].strip() == "":
                            fields[deaths_field_index] = "NA"
                        if fields[recovered_field_index].strip() == "":
                            fields[recovered_field_index] = "NA"
                        confirmed[county_id] = fields[confirmed_field_index]
                        deaths[county_id] = fields[deaths_field_index]
                        recovered[county_id] = fields[recovered_field_index]

            f.close()
        write_temp(deaths, recovered, confirmed, out_f, file_date)
        return True
    else:
        print("Data file not found from the source : " + url)
        return False
Exemplo n.º 4
0
def collect_jhu_g(url, out_f):
    file_date = url[-14:-4]
    date_ = datetime.strptime(file_date, '%m-%d-%Y')
    file_date = date_.strftime('%Y-%m-%d')

    data_source_file = retrieve_data_source(url, "../data/raw/jhu_g.csv")
    deaths, confirmed, recovered = init_locations("g")
    if not data_source_file == "":
        country_field_index, state_field_index, county_field_index, confirmed_field_index, deaths_field_index, recovered_field_index = get_fields_indices_jhu(
            data_source_file)
        with codecs.open(data_source_file, 'r', encoding='utf8') as f:
            f.readline()
            lines = f.readlines()
            for line in lines:
                line = line.strip()
                fields = line.split(',')
                if fields[
                        country_field_index] in confirmed:  # fields[3] is country name
                    if fields[confirmed_field_index] == "":
                        fields[confirmed_field_index] = "0"
                    if fields[deaths_field_index] == "":
                        fields[deaths_field_index] = "0"
                    if fields[recovered_field_index] == "":
                        fields[recovered_field_index] = "0"
                    confirmed[fields[country_field_index]] += int(
                        fields[confirmed_field_index])
                    deaths[fields[country_field_index]] += int(
                        fields[deaths_field_index])
                    recovered[fields[country_field_index]] += int(
                        fields[recovered_field_index])

            f.close()
        write_temp(deaths, recovered, confirmed, out_f, file_date)
        return True
    else:
        print("Data file not found from the source : " + url)
        return False