示例#1
0
文件: common.py 项目: horazont/covid
def generate_counter_samples(
    counters: Counters,
    measurement: str,
    key_labels: typing.Sequence[str],
    field_labels: typing.Sequence[str],
):
    assert len(key_labels) == len(counters.keys)
    assert len(field_labels) == counters.data.shape[-1]
    data = counters.data
    keysets = list(itertools.product(*counters.keys))
    reshaped = counters.data.reshape(
        (data.shape[0], len(keysets), data.shape[-1]), )
    for i in range(counters.data.shape[0]):
        date = counters.first_date + timedelta(days=i)
        timestamp = datetime(date.year, date.month, date.day)
        for j, keyset in enumerate(keysets):
            tags = tuple((k, v) for ks, vs in zip(key_labels, keyset)
                         for k, v in zip(ks, vs))
            row = tuple(reshaped[i, j])
            if not any(row):
                # skip this sample: we have a lot of those because we have a
                # non-3.NF database: state and district are in the same column
                # and we get the product of all states and districts -> many
                # rows which never have non-zero values.
                continue
            yield influxdb.InfluxDBSample(
                measurement=measurement,
                tags=tags,
                fields=tuple(
                    (k, v) for k, v in zip(field_labels, reshaped[i, j])),
                timestamp=timestamp,
                ns_part=0,
            )
示例#2
0
def generate_population_samples(population_info, measurement: str,
                                first_date: datetime, ndays: int):
    templates = []
    for country, population in population_info.items():
        templates.append(
            influxdb.InfluxDBSample(
                measurement=measurement,
                tags=(("country", country), ),
                fields=(("population", population), ),
                timestamp=None,
                ns_part=0,
            ))

    for i in range(ndays + 1):
        date = first_date + timedelta(days=i)
        timestamp = datetime(date.year, date.month, date.day)
        yield from (template._replace(timestamp=timestamp)
                    for template in templates)
示例#3
0
def read(f, stationmap):
    reader = csv.reader(f, delimiter=";")
    header = next(reader)
    header_index = {k.strip(): i for i, k in enumerate(header)}

    date_index = header_index["MESS_DATUM"]
    station_id_index = header_index["STATIONS_ID"]

    fieldmap = {
        header_index[key]: field
        for key, field in FIELDMAP.items() if key in header_index
    }

    for row in reader:
        # parser check
        assert row[-1] == "eor"
        timestamp = datetime.strptime(row[date_index].strip(), "%Y%m%d")
        if timestamp < DATE_CUTOFF:
            continue

        station_id = int(row[station_id_index].strip())
        tags = (
            ("station_id", str(station_id)),
            ("state", stationmap[station_id]),
        )

        fields = []
        for index, field in fieldmap.items():
            value_s = row[index]
            value = float(value_s.strip())
            if value == -999:
                continue
            fields.append((field, value), )

        if not fields:
            continue

        yield influxdb.InfluxDBSample(
            measurement="dwd_weather",
            timestamp=timestamp,
            ns_part=0,
            fields=tuple(fields),
            tags=tags,
        )
示例#4
0
def import_samples(f):
    for row in csv.DictReader(f):
        year = int(row.get("Jahr", 2020))
        week = int(row["Kalenderwoche"])
        ntests = int(row["AnzahlTestungen"])
        npositive = int(row["TestsPositiv"])
        nsites = int(row["AnzahlLabore"])
        d = monday_of_calenderweek(year, week)
        yield influxdb.InfluxDBSample(
            measurement=MEASUREMENT,
            timestamp=datetime(d.year, d.month, d.day),
            ns_part=0,
            tags=(),
            fields=(
                ("tests", ntests),
                ("positives", npositive),
                ("sites", nsites),
            ),
        )
        print(f"\x1b[J{d}", end="\r")
示例#5
0
def generate_events(events):
    for ev in events:
        tags = []
        fields = []
        timestamp = datetime.strptime(ev["date"], "%Y-%m-%d")
        fields.append(("title", ev["title"]))
        try:
            text_parts = [ev["text"]]
        except KeyError:
            text_parts = []

        state = ev.get("state")
        district = ev.get("district")
        loc_parts = []
        if district is not None:
            loc_parts.append(district)
        if state is not None:
            loc_parts.append(state)
        if loc_parts:
            text_parts.append(f"<sup>({', '.join(loc_parts)})</sup>")

        is_spreader = bool(ev.get("is_spreader", False))
        is_policy = bool(ev.get("is_policy", False))
        tags.append(("is_spreader", INFLUX_BOOL_NAMES[is_spreader]))
        tags.append(("is_policy", INFLUX_BOOL_NAMES[is_policy]))
        if state is not None:
            tags.append(("state", state))
        if district is not None:
            tags.append(("district", district))

        if is_spreader:
            fields.append(("spreader_class", ev["spreader_class"]))
        fields.append(("text", "\n".join(text_parts)))

        yield influxdb.InfluxDBSample(
            timestamp=timestamp,
            ns_part=0,
            tags=tuple(tags),
            fields=tuple(fields),
            measurement=MEASUREMENT,
        )
示例#6
0
def generate_samples(f):
    reader = csv.DictReader(f)
    for row in reader:
        state = translate_state(row["Bundesland"])
        nreports = int(row["Anzahl_Meldebereiche_Erwachsene"])
        ninuse = int(row["Belegte_Intensivbetten_Erwachsene"])
        nfree = int(row["Freie_Intensivbetten_Erwachsene"])
        ninuse_covid = int(row["Aktuelle_COVID_Faelle_Erwachsene_ITS"])
        nemergency_reserve = int(row["7_Tage_Notfallreserve_Erwachsene"])
        yield influxdb.InfluxDBSample(
            timestamp=parse_date(row["Datum"]),
            ns_part=0,
            tags=(("state", state), ),
            fields=(
                ("reporting", nreports),
                ("inuse", ninuse),
                ("inuse_covid", ninuse_covid),
                ("emergency_reserve", nemergency_reserve),
                ("free", nfree),
            ),
            measurement=MEASUREMENT,
        )