def save_indicators(results):
    date = results["Date"]
    country = results["Country"]
    tests = results["Tests"]
    confirmed_cases = results["ConfirmedCases"]
    deaths = results["Deaths"]
    if not math.isnan(tests):
        with open(
            "data/daily/indicators/covid-19-{}-{}-tests.csv".format(
                date, format_country(country)
            ),
            "w",
        ) as f:
            f.write("{},{},{},{}\n".format(date, country, "Tests", tests))
    if not math.isnan(confirmed_cases):
        with open(
            "data/daily/indicators/covid-19-{}-{}-confirmed-cases.csv".format(
                date, format_country(country)
            ),
            "w",
        ) as f:
            f.write(
                "{},{},{},{}\n".format(date, country, "ConfirmedCases", confirmed_cases)
            )
    if not math.isnan(deaths):
        with open(
            "data/daily/indicators/covid-19-{}-{}-deaths.csv".format(
                date, format_country(country)
            ),
            "w",
        ) as f:
            f.write("{},{},{},{}\n".format(date, country, "Deaths", deaths))
def save_daily_areas(date, country, rows):
    csv_file = "data/daily/covid-19-cases-{}-{}.csv".format(
        date, format_country(country)
    )
    with open(csv_file, "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(rows)
def write_indicator_file(date, country, indicator, value):
    with open(
            "data/daily/indicators/covid-19-{}-{}-{}.csv".format(
                date, format_country(country), camel_to_hyphens(indicator)),
            "w",
    ) as f:
        f.write("{},{},{},{}\n".format(date, country, indicator, value))
示例#4
0
def crawl_json(date, country, check_only):
    if country == "UK":
        local_data_file = "data/raw/phe/coronavirus-covid-19-number-of-cases-in-{}-{}.json".format(
            format_country(country), date)

        if not os.path.exists(local_data_file):
            data_url = get_json_url(date)

            if data_url is None:
                if check_only:
                    return DatasetUpdate.UPDATE_NOT_AVAILABLE
                sys.stderr.write("No data available for {}\n".format(date))
                sys.exit(1)

            if check_only:
                return DatasetUpdate.UPDATE_AVAILABLE

            r = requests.get(data_url)
            with open(local_data_file, "w") as f:
                f.write(r.text)

        if check_only:
            return DatasetUpdate.ALREADY_UPDATED

        with open(local_data_file) as f:
            json_data = json.load(f)

            totalUKCases = json_data["overview"]["K02000001"]["totalCases"][
                "value"]
            totalUKDeaths = json_data["overview"]["K02000001"]["deaths"][
                "value"]
            englandCases = json_data["countries"]["E92000001"]["totalCases"][
                "value"]
            englandDeaths = json_data["countries"]["E92000001"]["deaths"][
                "value"]

            with sqlite3.connect('data/covid-19-uk.db') as conn:
                c = conn.cursor()
                c.execute(
                    f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'UK', 'ConfirmedCases', {totalUKCases})"
                )
                c.execute(
                    f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'UK', 'Deaths', {totalUKDeaths})"
                )
                c.execute(
                    f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'England', 'ConfirmedCases', {englandCases})"
                )
                c.execute(
                    f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'England', 'Deaths', {englandDeaths})"
                )

            # get area data for England
            daily_areas = parse_daily_areas_json(date, "England", json_data)
            if daily_areas is not None:
                #save_daily_areas(date, "England", daily_areas)
                save_daily_areas_to_sqlite(date, "England", daily_areas)
示例#5
0
def crawl_json(date, country, check_only):
    if country == "UK":
        # See https://github.com/PublicHealthEngland/coronavirus-dashboard
        blobs_url = "https://publicdashacc.blob.core.windows.net/publicdata?restype=container&comp=list"
        local_data_file = "data/raw/phe/coronavirus-covid-19-number-of-cases-in-{}-{}.json".format(
            format_country(country), date
        )
        
        if not os.path.exists(local_data_file):
            r = requests.get(blobs_url)
            blobs_xml = r.text
            blobs_dict = xmltodict.parse(blobs_xml)
            blob_names = sorted([o["Name"] for o in blobs_dict["EnumerationResults"]["Blobs"]["Blob"] if o["Name"]])
            dt = dateparser.parse(date, date_formats=['%Y-%m-%d'], locales=["en-GB"])
            blob_names_for_date = [name for name in blob_names if name.startswith("data_{}".format(dt.strftime('%Y%m%d')))]

            if len(blob_names_for_date) == 0:
                if check_only:
                    return DatasetUpdate.UPDATE_NOT_AVAILABLE
                sys.stderr.write("No data available for {}\n".format(date))
                sys.exit(1)         

            if check_only:
                return DatasetUpdate.UPDATE_AVAILABLE       

            # Use most recent date
            data_url = "https://c19pub.azureedge.net/{}".format(blob_names_for_date[-1])
            r = requests.get(data_url)
            with open(local_data_file, "w") as f:
                f.write(r.text)

        if check_only:
            return DatasetUpdate.ALREADY_UPDATED

        with open(local_data_file) as f:
            json_data = json.load(f)

            totalUKCases = json_data["overview"]["K02000001"]["totalCases"]["value"]
            totalUKDeaths = json_data["overview"]["K02000001"]["deaths"]["value"]
            englandCases = json_data["countries"]["E92000001"]["totalCases"]["value"]
            englandDeaths = json_data["countries"]["E92000001"]["deaths"]["value"]

            with sqlite3.connect('data/covid-19-uk.db') as conn:
                c = conn.cursor()
                c.execute(f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'UK', 'ConfirmedCases', {totalUKCases})")
                c.execute(f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'UK', 'Deaths', {totalUKDeaths})")
                c.execute(f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'England', 'ConfirmedCases', {englandCases})")
                c.execute(f"INSERT OR REPLACE INTO indicators VALUES ('{date}', 'England', 'Deaths', {englandDeaths})")

            # get area data for England
            daily_areas = parse_daily_areas_json(date, "England", json_data)
            if daily_areas is not None:
                #save_daily_areas(date, "England", daily_areas)
                save_daily_areas_to_sqlite(date, "England", daily_areas)
示例#6
0
def crawl_html(date, country, check_only):
    html_url = get_html_url(date, country)
    local_html_file = "data/raw/coronavirus-covid-19-number-of-cases-in-{}-{}.html".format(
        format_country(country), date)
    save_html_file = False

    try:
        with open(local_html_file) as f:
            html = f.read()
        if check_only:
            return DatasetUpdate.ALREADY_UPDATED
    except FileNotFoundError:
        r = requests.get(html_url)
        html = r.text
        save_html_file = True

    results = parse_totals(country, html)

    if results is None:
        if check_only:
            return DatasetUpdate.UPDATE_AVAILABLE
        sys.stderr.write(
            "Can't find numbers. Perhaps the page format has changed?\n")
        sys.exit(1)
    elif results["Date"] != date:
        if check_only:
            return DatasetUpdate.UPDATE_NOT_AVAILABLE
        sys.stderr.write("Page is dated {}, but want {}\n".format(
            results["Date"], date))
        sys.exit(1)

    if check_only:
        return DatasetUpdate.UPDATE_AVAILABLE

    daily_areas = parse_daily_areas(date, country, html)

    print_totals(results)
    #save_indicators(results)
    save_indicators_to_sqlite(results)

    if daily_areas is not None:
        #save_daily_areas(date, country, daily_areas)
        save_daily_areas_to_sqlite(date, country, daily_areas)

    if save_html_file:
        with open(local_html_file, "w") as f:
            f.write(html)
示例#7
0
def convert(indicators_csv_file):
    indicators = pd.read_csv(indicators_csv_file)

    for country in ["England", "Northern Ireland", "Scotland", "UK", "Wales"]:
        wide = indicators[indicators["Country"] == country]
        wide = wide.pivot(index="Date", columns="Indicator", values="Value")
        wide = wide.reindex(columns=["Tests", "ConfirmedCases", "Deaths"])

        # don't use to_csv since pandas can't write NA ints
        with open(
                "data/covid-19-totals-{}.csv".format(format_country(country)),
                "w") as f:
            f.write("Date,Tests,ConfirmedCases,Deaths\n")
            for (i, d) in wide.to_dict("index").items():
                f.write("{},{},{},{}\n".format(
                    i,
                    format_int(d["Tests"]),
                    format_int(d["ConfirmedCases"]),
                    format_int(d["Deaths"]),
                ))