Exemplos de write_datafile em Python, exemplos de utils.write_datafile em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: tested_lab.py Projeto: PedroRisquez/c19norge-data

def update():
    now = get_timestr()

    # get fhi datafile
    datafile = get_fhi_datafile("data_covid19_lab_by_time")
    df_new = pd.read_csv(datafile,
                         usecols=["date", "n_neg", "n_pos", "pr100_pos"],
                         parse_dates=["date"])

    mapping = {"n_neg": "new_neg", "n_pos": "new_pos"}

    df_new = df_new.rename(columns=mapping)

    df_new["new_total"] = df_new["new_neg"] + df_new["new_pos"]
    df_new["total_neg"] = df_new["new_neg"].cumsum()
    df_new["total_pos"] = df_new["new_pos"].cumsum()
    df_new["total"] = df_new["new_total"].cumsum()
    df_new["source"] = "fhi:git"

    df_new = df_new.sort_values(by=["date"], ascending=True)
    df = load_datafile("tested_lab", parse_dates=["date"])

    if not df_new.equals(df):
        print(now, "tested_lab: New update")

        sourcefile = load_sources()
        sourcefile["tested_lab.csv"]["last_updated"] = now
        sourcefile["tested_lab.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("tested_lab", df_new)

        # Generate graph
        graphs.tested_lab()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: omicron.py Projeto: frefrik/c19norge-data

def update():
    now = get_timestr()

    cols = {
        "År/Year": "year",
        "Uke/week": "week",
        "Tilfeller/cases": "new_confirmed",
    }
    data = get_data()

    if data:
        df_new = pd.read_excel(data.content, usecols=cols)
        df_new = df_new.rename(columns=cols)

        df_new = df_new[df_new["year"].isin([2021, 2022])]
        df_new = df_new[["year", "week",
                         "new_confirmed"]].fillna(0).astype(int)
        df_new = df_new.sort_values(by="week").reset_index(drop=True)

        df_new["total_confirmed"] = df_new["new_confirmed"].cumsum()

        df_new["source"] = "fhi:web"

        df = load_datafile("omicron")

        if not df_new.equals(df):
            print(now, "omicron: New update")

            sourcefile = load_sources()
            sourcefile["omicron.csv"]["last_updated"] = now
            sourcefile["omicron.csv"]["pending_update"] = 1

            write_sources(sourcefile)
            write_datafile("omicron", df_new)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: confirmed.py Projeto: PedroRisquez/c19norge-data

def update():
    now = get_timestr()

    datafile = get_fhi_datafile("data_covid19_msis_by_time_location")
    df_new = pd.read_csv(datafile,
                         usecols=["date", "n", "location_name"],
                         parse_dates=["date"])

    df_new = df_new.loc[(df_new["location_name"] == "Norge")]
    df_new = df_new.filter(items=["date", "n"])
    df_new = df_new[df_new.date >= "2020-02-21"]
    df_new = df_new.rename(columns={"n": "new"})

    df_new["total"] = df_new["new"].cumsum()
    df_new["source"] = "fhi:git"
    df_new = df_new.reset_index(drop=True)

    df = load_datafile("confirmed", parse_dates=["date"])
    df = df[df.date >= "2020-02-21"]
    df_filter = df.loc[df["source"] == "fhi:git"]
    df_filter = df_filter.reset_index(drop=True)

    if not df_new.equals(df_filter):
        print(now, "fhi_git.confirmed: New update")

        df_new = df_new.merge(df, how="outer")
        df_new = df_new.drop_duplicates(subset=["date"], keep="first")

        second_last = df_new.iloc[-2:]
        second_last_total = second_last.total.values[0]
        last_total = second_last.total.values[1]
        last_new = second_last.new.values[1]

        if second_last_total > last_total:
            newToday = last_new + (second_last_total - last_total)

            df_new.iloc[-1:]["total"] = second_last_total
            df_new.iloc[-1:]["new"] = newToday

        sourcefile = load_sources()
        sourcefile["confirmed.csv"]["last_updated"] = now
        sourcefile["confirmed.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("confirmed", df_new)

        # Generate graph
        graphs.confirmed()

Exemplo n.º 4

0

Exibir arquivo

def update():
    now = get_timestr()
    today = date.today()
    yesterday = today - timedelta(days=1)

    url = "https://statistikk.fhi.no/api/msis/antallKoronaTotalt"

    df = load_datafile("confirmed")
    last_total = df["total"].max()

    try:
        confirmed_total = requests.get(url).json()
    except Exception:
        confirmed_total = 0
        error = sys.exc_info()[1]
        print(now, "- ERROR:", str(error))

    if confirmed_total > last_total:
        print(now, "msis_api.confirmed: New update")

        confirmed_diff = confirmed_total - last_total

        if datetime.now().hour in range(0, 2):
            n_yesterday = df.new.loc[df["date"] == str(yesterday)].values[0]

            diff_yesterday = n_yesterday + confirmed_diff

            df.loc[df["date"] == str(yesterday), "new"] = diff_yesterday
            df.loc[df["date"] == str(yesterday), "total"] = confirmed_total
            df.loc[df["date"] == str(today), "total"] = confirmed_total
            df.loc[df["date"] == str(yesterday), "source"] = "msis:api"
            df.loc[df["date"] == str(today), "source"] = "msis:api"

        else:
            n_today = df.new.loc[df["date"] == str(today)].values[0]
            diff_today = n_today + confirmed_diff

            df.loc[df["date"] == str(today), "new"] = diff_today
            df.loc[df["date"] == str(today), "total"] = confirmed_total
            df.loc[df["date"] == str(today), "source"] = "msis:api"

        sourcefile = load_sources()
        sourcefile["confirmed.csv"]["last_updated"] = now
        sourcefile["confirmed.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("confirmed", df)

Exemplo n.º 5

0

Exibir arquivo

def update():
    now = get_timestr()

    url = "https://api.helsedirektoratet.no/ProduktCovid19/Covid19Statistikk/"

    headers = {
        "Ocp-Apim-Subscription-Key": os.getenv("HELSEDIR_API_KEY"),
    }

    res = requests.get(url + "helseregion", headers=headers).json()
    df_new = pd.DataFrame()

    for r in res:
        df_new = df_new.append(r["registreringer"])

    df_new = df_new.rename(
        columns={
            "dato": "date",
            "antInnlagte": "admissions",
            "antCovidIntensiv": "icu",
            "antRespirator": "respiratory",
        })

    df_new = df_new[["date", "admissions", "icu", "respiratory"]]
    df_new["date"] = pd.to_datetime(df_new["date"], format="%Y-%m-%d")

    df_new = df_new.groupby("date").sum()
    df_new = df_new.reset_index().sort_values(["date"], ascending=True)
    df_new["source"] = "helsedir:api"

    df = load_datafile("hospitalized", parse_dates=["date"])

    if not df_new.equals(df):
        print(now, "hospitalized: New update")

        sourcefile = load_sources()
        sourcefile["hospitalized.csv"]["last_updated"] = now
        sourcefile["hospitalized.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("hospitalized", df_new)

        # Generate graph
        graphs.hospitalized()

Exemplo n.º 6

0

Exibir arquivo

def update():
    now = get_timestr()

    # load current data
    df = load_datafile("dead")

    # get fhi datafile
    datafile = get_fhi_datafile("data_covid19_demographics")
    df_new = pd.read_csv(datafile)

    date_of_publishing = df_new.date_of_publishing.max()

    if date_of_publishing not in df.date.values:
        print(now, "dead: New update")

        last_data = df["total"].max()
        fhi_dead = df_new["n"].sum()
        dead_diff = fhi_dead - last_data

        df = df.append(
            {
                "date": date_of_publishing,
                "new": dead_diff,
                "total": fhi_dead,
                "source": "fhi:git",
            },
            ignore_index=True,
        )

        sourcefile = load_sources()
        sourcefile["dead.csv"]["last_updated"] = now
        sourcefile["dead.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("dead", df)

        # Generate graph
        graphs.dead()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: tested.py Projeto: PedroRisquez/c19norge-data

def update():
    now = get_timestr()

    # get from fhi api
    url = "https://www.fhi.no/api/chartdata/api/91672"
    res = requests.get(url).json()

    tests = res["figures"][4]
    fhi_tests = tests["number"]
    fhi_date = str(datetime.strptime(tests["updated"], "%d/%m/%Y").date())

    # get current data
    df = load_datafile("tested")

    # update new data
    if fhi_date not in df.date.values:
        print(now, "tested: New update")

        last_data = df["total"].max()
        tested_diff = fhi_tests - last_data

        df = df.append(
            {
                "date": fhi_date,
                "new": tested_diff,
                "total": fhi_tests,
                "source": "fhi:web",
            },
            ignore_index=True,
        )

        sourcefile = load_sources()
        sourcefile["tested.csv"]["last_updated"] = now
        sourcefile["tested.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("tested", df)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: smittestopp.py Projeto: PedroRisquez/c19norge-data

def update():
    now = get_timestr()
    url = "https://www.fhi.no/om/smittestopp/nokkeltall-fra-smittestopp/"

    browser = get_browser()

    try:
        browser.get(url)
        time.sleep(1)
        new_downloads = browser.execute_script(
            "return Highcharts.charts[0].series[0].options.data")
        total_downloads = browser.execute_script(
            "return Highcharts.charts[0].series[1].options.data")
        new_reported = browser.execute_script(
            "return Highcharts.charts[1].series[0].options.data")
        total_reported = browser.execute_script(
            "return Highcharts.charts[1].series[1].options.data")

        browser.close()
        browser.quit()

    except Exception:
        error = sys.exc_info()[1]
        print("- ERROR:", str(error))

    new_lst = []

    for n in new_downloads:
        date_parsed = datetime.fromtimestamp(n[0] / 1000)
        datestr = date_parsed.strftime("%Y-%m-%d")
        d = {"date": datestr, "new_downloads": n[1]}

        new_lst.append(d)

    df_new = pd.DataFrame(new_lst)

    for n in total_downloads:
        date_parsed = datetime.fromtimestamp(n[0] / 1000)
        datestr = date_parsed.strftime("%Y-%m-%d")
        df_new.loc[df_new["date"] == datestr, "total_downloads"] = n[1]

    for n in new_reported:
        date_parsed = datetime.fromtimestamp(n[0] / 1000)
        datestr = date_parsed.strftime("%Y-%m-%d")
        df_new.loc[df_new["date"] == datestr, "new_reported"] = n[1]

    for n in total_reported:
        date_parsed = datetime.fromtimestamp(n[0] / 1000)
        datestr = date_parsed.strftime("%Y-%m-%d")
        df_new.loc[df_new["date"] == datestr, "total_reported"] = n[1]

    df_new = df_new.fillna(0)
    intcolumns = [
        "new_downloads",
        "total_downloads",
        "new_reported",
        "total_reported",
    ]

    df_new[intcolumns] = df_new[intcolumns].astype(int)

    df_new["source"] = "fhi:web"
    df_new["date"] = pd.to_datetime(df_new["date"])

    columns = [
        "date",
        "new_downloads",
        "total_downloads",
        "new_reported",
        "total_reported",
    ]

    # Compare dfs
    df = load_datafile("smittestopp", parse_dates=["date"])
    df_new = df_new.sort_values(by=["date"])

    merged = pd.merge(df_new, df, how="outer", indicator=True)
    new_data = merged.loc[merged._merge == "left_only", columns]

    if not new_data.empty:
        print(now, "smittestopp: New update")

        sourcefile = load_sources()
        sourcefile["smittestopp.csv"]["last_updated"] = now
        sourcefile["smittestopp.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("smittestopp", df_new)

        # Generate graph
        graphs.smittestopp()

Exemplo n.º 9

0

Exibir arquivo

def update():
    now = get_timestr()

    # load current data
    df = load_datafile("vaccine_doses")

    # get fhi datafile
    datafile = get_fhi_datafile("data_covid19_sysvak_by_time_location")

    df_new = pd.read_csv(
        datafile,
        usecols=[
            "date",
            "granularity_geo",
            "location_name",
            "n_dose_1",
            "n_dose_2",
            "cum_n_dose_1",
            "cum_n_dose_2",
            "cum_pr100_dose_1",
            "cum_pr100_dose_2",
        ],
    )

    mapping = {
        "n_dose_1": "new_dose_1",
        "n_dose_2": "new_dose_2",
        "cum_n_dose_1": "total_dose_1",
        "cum_n_dose_2": "total_dose_2",
        "cum_pr100_dose_1": "total_pr100_dose_1",
        "cum_pr100_dose_2": "total_pr100_dose_2",
    }

    columns = [
        "granularity_geo",
        "location_name",
        "date",
        "new_dose_1",
        "new_dose_2",
        "total_dose_1",
        "total_dose_2",
        "total_pr100_dose_1",
        "total_pr100_dose_2",
        "new_doses",
        "total_doses",
        "source",
    ]

    df_new = df_new.rename(columns=mapping)

    df_new["new_doses"] = df_new["new_dose_1"] + df_new["new_dose_2"]
    df_new["total_doses"] = df_new["total_dose_1"] + df_new["total_dose_2"]
    df_new["source"] = "fhi:git"

    df_new = df_new[columns]

    for column in columns:
        df_new[column] = df_new[column].astype(str).replace("nan", "")
        df[column] = df[column].astype(str)

    df_new = df_new.sort_values(
        by=["granularity_geo", "location_name", "date"]).reset_index(drop=True)

    df = df.sort_values(
        by=["granularity_geo", "location_name", "date"]).reset_index(drop=True)

    if not df_new.index.equals(df.index):
        print(now, "vaccine_doses: New update")

        sourcefile = load_sources()
        sourcefile["vaccine_doses.csv"]["last_updated"] = now
        sourcefile["vaccine_doses.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("vaccine_doses", df_new)

        # Generate graph
        graphs.vaccine_doses()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: transport.py Projeto: PedroRisquez/c19norge-data

def update():
    now = get_timestr()

    headers = {
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 11.2; rv:86.0) Gecko/20100101 Firefox/86.0",
        "X-Requested-With": "XMLHttpRequest",
    }

    columns = [
        "tr_type",
        "route",
        "company",
        "tr_from",
        "tr_to",
        "departure",
        "arrival",
        "source",
    ]
    df_new = pd.DataFrame(columns=columns)

    base_url = "https://www.fhi.no"
    url = f"{base_url}/sv/smittsomme-sykdommer/corona/koronavirus-og-covid-19-pa-offentlig-kommunikasjon/"

    html = requests.get(url).text
    soup = BeautifulSoup(html, "lxml")

    for i in soup.find_all("div", class_="fhi-dynamic-table__download"):
        a = i.find("a")
        href = a.get("href")
        filename = os.path.basename(href)

        tr_types = {
            "båt": "Skip",
            "bat": "Skip",
            "buss": "Buss",
            "fly": "Fly",
            "tog": "Tog",
        }

        res = requests.get(f"{base_url}{href}",
                           allow_redirects=True,
                           headers=headers)

        df_tmp = pd.read_excel(res.content)
        df_tmp = df_tmp.dropna()

        for i, row in df_tmp.iterrows():
            route = row[0]
            company = row[1]
            from_to = row[2].replace("\xa0", "")
            departure = fmt_date(row[3])
            arrival = fmt_date(row[4])

            from_to_split = from_to.split("-")
            _from = from_to_split[0].strip()
            try:
                _to = from_to_split[1].strip()
            except IndexError:
                _to = ""

            tr_type = ""
            try:
                tr_type = filename.split("-")[1].split(".")[0]
            except Exception:
                raise Exception

            values = {
                "tr_type": tr_types[tr_type],
                "route": route,
                "company": company,
                "tr_from": _from,
                "tr_to": _to,
                "departure": departure,
                "arrival": arrival,
                "source": "fhi:web",
            }

            df_new = df_new.append(pd.DataFrame(values, index=[0]))

    df_new.loc[df_new.departure == "2021-12-27 16:05:00",
               ["departure"]] = "2020-12-27 16:05:00"

    df_new["departure"] = pd.to_datetime(df_new["departure"])
    df_new["arrival"] = pd.to_datetime(df_new["arrival"])

    for column in columns:
        df_new[column] = df_new[column].astype(str)

    df_new = df_new.sort_values(by=["departure", "arrival", "route"],
                                ascending=False)

    columns = [
        "tr_type",
        "route",
        "company",
        "tr_from",
        "tr_to",
        "departure",
        "arrival",
        "source",
    ]

    df = load_datafile("transport")

    for column in columns:
        df_new[column] = df_new[column].astype(str).replace("nan", "")
        df[column] = df[column].astype(str).replace("nan", "")

    merged = pd.merge(df_new, df, how="outer", indicator=True)
    new_data = merged.loc[merged._merge == "left_only", columns]

    if not new_data.empty:
        print(now, "transport: New update")
        df = pd.concat([df, new_data])
        df = df.sort_values(by=["departure", "arrival", "route"],
                            ascending=False)

        sourcefile = load_sources()
        sourcefile["transport.csv"]["last_updated"] = now
        sourcefile["transport.csv"]["pending_update"] = 1

        write_sources(sourcefile)
        write_datafile("transport", df)