示例#1
0
def confirm_or_create_area(area_type: str, area_code: str, area_name: str):
    stmt = (
        insert(AreaReference.__table__)
        .values(
            area_type=area_type,
            area_code=area_code,
            area_name=area_name,
            unique_ref=f"{area_type}|{area_code}"
        )
        .on_conflict_do_nothing(
            index_elements=[
                AreaReference.area_type,
                AreaReference.area_code
            ]
        )
        .compile(dialect=postgres())
    )

    # session = Session(autocommit=True)
    session = Session()
    try:
        session.connection().execute(stmt)
        session.flush()
        # session.begin()
        # session.add(stmt)
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
示例#2
0
def get_email_recipients():
    session = Session()

    query = session.execute(
        select([ReportRecipient.recipient]).where(
            and_(ReportRecipient.deactivated == False,
                 not_(ReportRecipient.approved_by == None))))

    return [item[0] for item in query]
示例#3
0
def to_sql(df: DataFrame):
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = df_size // DB_INSERT_MAX_ROWS + 1
    df.drop_duplicates(
        ["release_id", "area_id", "metric_id", "date"],
        keep="first",
        inplace=True
    )

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
示例#4
0
def to_sql(df: DataFrame) -> NoReturn:
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = ceil(df_size / DB_INSERT_MAX_ROWS)

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
def update_permissions():
    session = Session()
    connection = session.connection()
    try:
        connection.execute(text(PERMISSIONS_QUERY))
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
示例#6
0
def derive_publish_date_metrics(timestamp, area_type):
    query = PUBLISH_DATE_CALCULATION.format(
        today=f"{timestamp:%Y_%-m_%-d}",
        yesterday=get_previous_partition_date(),
        area_type=area_type
    )

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query), )
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    columns = [
        'partition_id', 'area_id', 'area_type', 'area_code',
        'metric_id', 'release_id', 'date', 'payload'
    ]

    to_sql(
        DataFrame(raw_data, columns=columns)
        .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True)))
        .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]]
    )

    return True
示例#7
0
def derive_publish_date_percentages(timestamp, area_type):
    query = PERCENTAGE_DATA.format(
        date=f"{timestamp:%Y_%-m_%-d}",
        area_type=area_type
    )

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    columns = [
        'area_id', 'partition_id', 'metric_id', 'metric', 'area_type',
        'area_code', 'release_id', 'date', 'payload'
    ]

    to_sql(
        DataFrame(raw_data, columns=columns)
        .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True)))
        .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]]
    )

    return True
示例#8
0
def get_timeseries(date: str, metric: str):
    ts = datetime.strptime(date, "%Y-%m-%d")
    partition = f"{ts:%Y_%-m_%-d}_other"
    partition_id = f"{ts:%Y_%-m_%-d}|other"
    values_query = queries.TIMESRIES_QUERY.format(partition=partition)
    change_query = queries.LATEST_CHANGE_QUERY.format(partition=partition)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(values_query),
                            partition_id=partition_id,
                            metric=metric)
        values = resp.fetchall()

        resp = conn.execute(text(change_query),
                            partition_id=partition_id,
                            datestamp=ts,
                            metric=metric + "Change")
        change = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    store_data(date, metric,
               plot_thumbnail(values, metric_name=metric, change=change))

    return True
示例#9
0
def get_vaccinations(date):
    ts = datetime.strptime(date, "%Y-%m-%d")
    partition = f"{ts:%Y_%-m_%-d}"

    vax_query = queries.VACCINATIONS_QUERY.format(partition_date=partition)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(vax_query), datestamp=ts)
        values = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    for item in values:
        store_data(date,
                   "vaccinations",
                   plot_vaccinations(item),
                   area_type=item["area_type"],
                   area_code=item["area_code"])

    return True
示例#10
0
def get_release(timestamp):
    insert_stmt = (
        insert(ReleaseReference.__table__)
        .values(timestamp=timestamp)
    )

    stmt = (
        insert_stmt
        .on_conflict_do_update(
            index_elements=[ReleaseReference.timestamp],
            set_={ReleaseReference.timestamp.name: insert_stmt.excluded.timestamp}
        )
        .returning(ReleaseReference.id)
    )

    # session = Session(autocommit=True)
    session = Session()
    try:
        response = session.execute(stmt)
        result = response.fetchone()[0]
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return result
def create_asset(release_date: str):

    partition_date = datetime.fromisoformat(release_date).strftime(
        "%Y_%-m_%-d")

    query = QUERY.format(date=partition_date)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = DataFrame(raw_data, columns=["properties", "geometry"])

    geo_data = {
        "type":
        "FeatureCollection",
        "features":
        (data.assign(type="Feature").reset_index().rename(columns={
            "index": "id"
        }).to_dict("records"))
    }

    return store_data(geo_data, "downloads", "maps/vax-data_latest.geojson")
示例#12
0
def add_metric(metric):
    stmt = (
        insert(MetricReference.__table__)
        .values(metric=metric)
        .on_conflict_do_nothing(index_elements=[MetricReference.metric])
        .compile(dialect=postgres())
    )

    session = Session()
    try:
        # session.begin()
        session.connection().execute(stmt)
        session.flush()
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
def update_stats(date, category):
    session = Session()
    connection = session.connection()
    try:
        connection.execute(
            text(
                STATS_QUERY.format(
                    datestamp=date,
                    partitions=
                    f'{{{str.join(",", get_partition_ids(date, category))}}}'))
        )
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
示例#14
0
def get_previous_partition_date():
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(PREVIOUS_PUBLICATION_DATE))
        data = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return f"{data[0]:%Y_%-m_%-d}"
def create_asset(data_type):
    params = PARAMETERS[data_type]

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(params["query"]),
                            process_name=params["process_name"])
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    store_data(dict(raw_data), params['container'], params['path'])
示例#16
0
def store_data(data: DataFrame):
    if not data.size:
        return None

    session = Session()
    connection = session.connection()
    try:
        records = data.to_dict(orient="records")

        insert_stmt = insert(PrivateReport.__table__).values(records)
        stmt = insert_stmt.on_conflict_do_update(
            index_elements=[
                PrivateReport.slug_id, PrivateReport.date,
                PrivateReport.metric, PrivateReport.area_id
            ],
            set_={PrivateReport.value.name: insert_stmt.excluded.value})
        connection.execute(stmt)
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
示例#17
0
def get_area_data():
    session = Session()
    try:
        results = read_sql(
            f"""\
            SELECT c.id AS "area_id", c.area_type, c.area_code
            FROM covid19.area_reference AS c
            """,
            con=session.connection(),
            index_col=["area_type", "area_code"]
        )
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return results
示例#18
0
def get_data(category: str, timestamp):
    query = text(CATEGORY_TIMESTAMPS)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(query, category=category, timestamp=timestamp)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=['timestamp'])

    df.timestamp = (df.timestamp.map(to_mins).astype({"timestamp": int}))
    return df
示例#19
0
def get_metrics():
    session = Session()

    try:
        metrics = read_sql(
            f"""\
            SELECT ref.id AS "metric_id", ref.metric
            FROM covid19.metric_reference AS ref;
            """,
            con=session.connection(),
            index_col=["metric"]
        )
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return metrics
def get_latest_scale_records(payload):
    date = datetime.fromisoformat(payload["timestamp"])
    area_type = payload["area_type"]

    query = RATES.format(area_type=AREA_TYPE_PARTITION[area_type],
                         date=f"{date:%Y_%-m_%-d}")

    metric, attr = "newCasesBySpecimenDateRollingRate", "value"
    if area_type == "msoa":
        metric, attr = "newCasesBySpecimenDate", "rollingRate"

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query),
                            metric=metric,
                            attr=attr,
                            area_type=area_type)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = DataFrame(raw_data, columns=["area_type", "area_code", "rate"])

    results = {
        "records": data.to_dict(orient="records"),
        "percentiles": {
            "min": data.loc[:, "rate"].min(),
            "0.1": data.loc[:, "rate"].quantile(0.1).round(1),
            "0.4": data.loc[:, "rate"].quantile(0.4).round(1),
            "median": data.loc[:, "rate"].median().round(1),
            "0.6": data.loc[:, "rate"].quantile(0.6).round(1),
            "0.9": data.loc[:, "rate"].quantile(0.9).round(1),
            "max": data.loc[:, "rate"].max(),
        },
        "timestamp": payload["timestamp"]
    }

    return results
示例#21
0
def generate_html(record_date: datetime):
    slug_id = get_record_id(record_date)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(OUTPUT_DATA), slug_id=slug_id)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=["area_name", "metric", "date", "value"])
    df.value = df.value.map(format_number)

    area_names = [
        "United Kingdom", "England", "Northern Ireland", "Scotland", "Wales"
    ]

    result = list()

    for section in structure:
        result.append(
            [process_section(df, item, area_names) for item in section])

    template = env.get_template("base.html")

    html_data = template.render(
        results={
            "area_names": area_names,
            "data": result
        },
        timestamp=datetime.now().strftime(r"%A, %d %b %Y at %H:%M:%S GMT"))

    # Uncomment for testing
    # with open("sample.html", "w") as fp:
    #     print(html_data, file=fp)

    # Comment for testing
    store_html(html_data, slug_id=slug_id)
示例#22
0
def get_data(record_date: datetime) -> DataFrame:
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(
            MAIN_QUERY.format(partition_date=f"{record_date:%Y_%-m_%-d}")),
                            metrics=METRICS)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    dt = (DataFrame(raw_data,
                    columns=[
                        "release_id", "area_id", "metric", "date", "value"
                    ]).assign(slug_id=get_record_id(record_date)))

    return dt
示例#23
0
def retrieve_data(timestamp: datetime):
    with open(query_path) as fp:
        query = fp.read()

    query = Template(query).substitute(release_date=f"{timestamp:%Y_%-m_%-d}")

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=["key", "value"])

    return df
示例#24
0
def get_metric_id(metric: str):
    stmt = (
        select([
            MetricReference.id
        ])
        .where(
            MetricReference.metric == metric
        )
    )

    session = Session()
    try:
        result = session.execute(stmt)
        return result.fetchone()[0]

    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
示例#25
0
def get_data(query, metric):
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(
            text(query),
            metric=metric
        )
        raw_data = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = {
        f'{metric}Date': f'{raw_data[1]:%-d %b}',
        metric: format(int(raw_data[2]), ",d")
    }

    return data
示例#26
0
def get_area_codes(area_type: str):
    query = (
        select([
            AreaReference.id.label("area_id"),
            AreaReference.area_code
        ])
        .where(
            AreaReference.area_type == area_type
        )
    )

    session = Session()
    try:
        result = read_sql(query, con=session.connection())
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return result
示例#27
0
def get_data(timestamp: datetime):
    query = MAIN.format(date=f"{timestamp:%Y_%-m_%-d}")
    metric = "newCasesBySpecimenDateRollingRate"

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(
            text(query),
            metric=metric,
            datestamp=f'{timestamp::%Y-%m-%d}'
        )
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    print(len(raw_data))
    return DataFrame(raw_data, columns=["areaType", "areaCode", metric])
示例#28
0
def get_announcements():
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(ANNOUNCEMENTS))
        values = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    results = list()
    for item in values:
        results.append({
            "body":
            markdown(item["body"].strip()),
            "date":
            datetime.fromisoformat(item["date"]).strftime("%-d %B %Y")
        })

    return results
def main(req: HttpRequest) -> HttpResponse:
    logging.info("Running health checks")

    logging.info("Testing database connection and response")
    session = Session()
    conn = session.connection()
    try:
        q = conn.execute(text("SELECT 1 AS result;"))
        _ = q.fetchone()
        logging.info("Database is alive")
    except Exception as err:
        session.rollback()
        logging.warning("Database is dead")
        raise err
    finally:
        session.close()

    storage_kws = dict(
        container="pipeline",
        path="healthchecks/etl",
        content_type="text/plain",
        compressed=False
    )
    logging.info("Testing storage connection")
    try:
        with StorageClient(**storage_kws) as storage:
            storage.upload("test")
            storage.download()
            storage.delete()
        logging.info("Storage is alive")
    except Exception as err:
        logging.warning("Storage is dead")
        raise err

    logging.info("All tests passed.")

    return HttpResponse(b"ALIVE", status_code=200)
def create_asset(area_type: str, device: str, release_date: str):
    params = PARAMETERS[area_type]
    partition_date = datetime.fromisoformat(release_date).strftime(
        "%Y_%-m_%-d")

    query = params['query'].format(date=partition_date,
                                   area_type=AREA_TYPE_PARTITION[area_type],
                                   attr=params['attribute'])

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query),
                            metric=params["metric"],
                            area_type=area_type)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = DataFrame(raw_data, columns=["date", "properties", "geometry"])

    if device == Device.mobile:
        data = data.loc[data.date == data.date.max(), :]

    geo_data = {
        "type":
        "FeatureCollection",
        "features": (data.drop(columns=["date"]).assign(
            type="Feature").reset_index().rename(columns={
                "index": "id"
            }).to_dict("records"))
    }

    return store_data(geo_data, params['container'], params['path'][device])