示例#1
0
    def query_insights_data():
        from airqo_etl_utils.app_insights_utils import query_insights_data

        from airqo_etl_utils.commons import fill_nan

        all_insights_data = query_insights_data(
            start_date_time=start_date_time,
            end_date_time=end_date_time,
            all_data=True,
            freq="",
        )

        return dict({"data": fill_nan(data=all_insights_data)})
示例#2
0
def insights_daily_insights(start_date_time: str, end_date_time: str):
    from airqo_etl_utils.app_insights_utils import (
        query_insights_data,
        average_insights_data,
    )

    hourly_insights_data = query_insights_data(freq="hourly",
                                               start_date_time=start_date_time,
                                               end_date_time=end_date_time)
    pd.DataFrame(hourly_insights_data).to_csv(
        path_or_buf="hourly_insights_airqo_data.csv", index=False)

    airqo_data = average_insights_data(frequency="daily",
                                       data=hourly_insights_data)
    pd.DataFrame(airqo_data).to_csv(
        path_or_buf="daily_insights_airqo_data.csv", index=False)
示例#3
0
    def average_insights_data(**kwargs):
        from airqo_etl_utils.app_insights_utils import (
            query_insights_data,
            average_insights_data,
        )

        from airqo_etl_utils.commons import get_date_time_values, fill_nan

        start_date_time, end_date_time = get_date_time_values(**kwargs)

        hourly_insights_data = query_insights_data(
            freq="hourly",
            start_date_time=start_date_time,
            end_date_time=end_date_time)

        ave_insights_data = average_insights_data(frequency="daily",
                                                  data=hourly_insights_data)

        return dict({"data": fill_nan(data=ave_insights_data)})
示例#4
0
    def average_insights_data():
        from airqo_etl_utils.app_insights_utils import (
            query_insights_data,
            average_insights_data,
        )

        from airqo_etl_utils.commons import fill_nan
        from datetime import datetime

        now = datetime.utcnow()
        start_date_time = datetime.strftime(now, "%Y-%m-%dT00:00:00Z")
        end_date_time = datetime.strftime(now, "%Y-%m-%dT23:59:59Z")

        hourly_insights_data = query_insights_data(
            freq="hourly",
            start_date_time=start_date_time,
            end_date_time=end_date_time)

        ave_insights_data = average_insights_data(frequency="daily",
                                                  data=hourly_insights_data)

        return dict({"data": fill_nan(data=ave_insights_data)})
示例#5
0
def insights_cleanup_etl():
    from airqo_etl_utils.date import (
        date_to_str_days,
        first_day_of_week,
        last_day_of_week,
        first_day_of_month,
        last_day_of_month,
    )

    start_date_time = date_to_str_days(
        first_day_of_week(first_day_of_month(date_time=datetime.now())))
    end_date_time = date_to_str_days(
        last_day_of_week(last_day_of_month(date_time=datetime.now())))

    @task(multiple_outputs=True)
    def create_empty_insights():

        from airqo_etl_utils.airqo_api import AirQoApi

        from airqo_etl_utils.commons import fill_nan
        import random
        import pandas as pd
        from airqo_etl_utils.date import (
            date_to_str_days,
            date_to_str_hours,
        )

        airqo_api = AirQoApi()
        sites = airqo_api.get_sites(tenant="airqo")
        insights = []

        dates = pd.date_range(start_date_time, end_date_time, freq="1H")
        for date in dates:
            date_time = date_to_str_hours(date)
            for site in sites:
                try:
                    hourly_insight = {
                        "time": date_time,
                        "pm2_5": random.uniform(50.0, 150.0),
                        "pm10": random.uniform(50.0, 150.0),
                        "empty": True,
                        "frequency": "HOURLY",
                        "forecast": False,
                        "siteId": site["_id"],
                    }
                    insights.append(hourly_insight)
                except Exception as ex:
                    print(ex)

        dates = pd.date_range(start_date_time, end_date_time, freq="24H")
        for date in dates:
            date_time = date_to_str_days(date)
            for site in sites:
                try:
                    daily_insight = {
                        "time": date_time,
                        "pm2_5": random.uniform(50.0, 150.0),
                        "pm10": random.uniform(50.0, 150.0),
                        "empty": True,
                        "frequency": "DAILY",
                        "forecast": False,
                        "siteId": site["_id"],
                    }
                    insights.append(daily_insight)
                except Exception as ex:
                    print(ex)

        return dict({"data": fill_nan(data=insights)})

    @task(multiple_outputs=True)
    def query_insights_data():
        from airqo_etl_utils.app_insights_utils import query_insights_data

        from airqo_etl_utils.commons import fill_nan

        all_insights_data = query_insights_data(
            start_date_time=start_date_time,
            end_date_time=end_date_time,
            all_data=True,
            freq="",
        )

        return dict({"data": fill_nan(data=all_insights_data)})

    @task(multiple_outputs=True)
    def filter_insights(empty_insights_data: dict,
                        available_insights_data: dict):

        from airqo_etl_utils.commons import fill_nan, un_fill_nan

        import pandas as pd

        insights_data_df = pd.DataFrame(
            data=un_fill_nan(available_insights_data.get("data")))
        empty_insights_data_df = pd.DataFrame(
            data=un_fill_nan(empty_insights_data.get("data")))

        insights_data = pd.concat([empty_insights_data_df,
                                   insights_data_df]).drop_duplicates(
                                       keep=False,
                                       subset=["siteId", "time", "frequency"])

        return dict(
            {"data": fill_nan(data=insights_data.to_dict(orient="records"))})

    @task()
    def load(insights_data: dict):
        from airqo_etl_utils.commons import un_fill_nan

        empty_insights_data = un_fill_nan(insights_data.get("data"))
        from airqo_etl_utils.app_insights_utils import save_insights_data

        save_insights_data(insights_data=empty_insights_data,
                           action="insert",
                           partition=2)

    empty_insights = create_empty_insights()
    available_insights = query_insights_data()
    filtered_insights = filter_insights(
        empty_insights_data=empty_insights,
        available_insights_data=available_insights)
    load(insights_data=filtered_insights)