def merge_data(averaged_airqo_data: dict, weather_data: dict):

        from airqo_etl_utils.commons import un_fill_nan, fill_nan
        from airqo_etl_utils.airqo_utils import merge_airqo_and_weather_data

        hourly_airqo_data = un_fill_nan(averaged_airqo_data.get("data"))
        hourly_weather_data = un_fill_nan(weather_data.get("data"))

        merged_measurements = merge_airqo_and_weather_data(
            airqo_data=hourly_airqo_data, weather_data=hourly_weather_data
        )

        return dict({"data": fill_nan(data=merged_measurements)})
Пример #2
0
    def load(inputs: dict):

        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_api import AirQoApi

        kcca_data = un_fill_nan(inputs.get("data"))

        airqo_api = AirQoApi()
        airqo_api.save_events(measurements=kcca_data, tenant="kcca")
    def average_data_by_hour(raw_data: dict):

        from airqo_etl_utils.airqo_utils import average_airqo_data
        from airqo_etl_utils.commons import fill_nan, un_fill_nan

        raw_airqo_data = un_fill_nan(raw_data.get("data"))
        average_data = average_airqo_data(data=raw_airqo_data, frequency="hourly")

        return dict({"data": fill_nan(data=average_data)})
Пример #4
0
    def transform(inputs: dict):

        from airqo_etl_utils.kcca_utils import transform_kcca_measurements_for_api
        from airqo_etl_utils.commons import un_fill_nan, fill_nan

        data = un_fill_nan(inputs.get("data"))
        cleaned_data = transform_kcca_measurements_for_api(data)

        return dict({"data": fill_nan(data=cleaned_data)})
Пример #5
0
    def load(insights_data: dict):
        from airqo_etl_utils.commons import un_fill_nan

        empty_insights_data = un_fill_nan(insights_data.get("data"))
        from airqo_etl_utils.app_insights_utils import save_insights_data

        save_insights_data(insights_data=empty_insights_data,
                           action="insert",
                           partition=2)
    def average_data(inputs: dict):

        from airqo_etl_utils.commons import un_fill_nan, fill_nan
        from airqo_etl_utils.airqo_utils import average_airqo_measurements

        data = un_fill_nan(inputs.get("data"))
        averaged_data = average_airqo_measurements(data=data, frequency="daily")

        return dict({"data": fill_nan(data=averaged_data)})
Пример #7
0
    def load(kcca_data: dict, **kwargs):

        from airqo_etl_utils.kcca_utils import (
            transform_kcca_measurements_for_api,
            transform_kcca_hourly_data_for_bigquery,
            transform_kcca_data_for_message_broker,
        )
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_api import AirQoApi
        from airqo_etl_utils.message_broker import KafkaBrokerClient
        from airqo_etl_utils.bigquery_api import BigQueryApi
        from airqo_etl_utils.config import configuration

        data = un_fill_nan(kcca_data.get("data"))

        try:
            dag_run = kwargs.get("dag_run")
            destination = dag_run.conf["destination"]
        except KeyError:
            destination = "bigquery"

        if destination == "bigquery":

            kcca_transformed_data = transform_kcca_hourly_data_for_bigquery(
                data)

            big_query_api = BigQueryApi()
            big_query_api.save_data(
                data=kcca_transformed_data,
                table=big_query_api.hourly_measurements_table,
            )

        elif destination == "message-broker":

            kcca_transformed_data = transform_kcca_data_for_message_broker(
                data=data, frequency="hourly")

            info = {
                "data": kcca_transformed_data,
                "action": "insert",
                "tenant": "kcca"
            }

            kafka = KafkaBrokerClient()
            kafka.send_data(info=info,
                            topic=configuration.HOURLY_MEASUREMENTS_TOPIC)

        elif destination == "api":
            kcca_transformed_data = transform_kcca_measurements_for_api(data)
            airqo_api = AirQoApi()
            airqo_api.save_events(measurements=kcca_transformed_data,
                                  tenant="kcca")

        else:
            raise Exception(
                "Invalid data destination. Valid values are bigquery, message-broker and api"
            )
Пример #8
0
    def load(data: dict):
        from airqo_etl_utils.app_insights_utils import (
            save_insights_data,
            create_insights_data,
        )
        from airqo_etl_utils.commons import un_fill_nan

        insights_list = un_fill_nan(data.get("data"))
        insights_data = create_insights_data(data=insights_list)
        save_insights_data(insights_data=insights_data, action="save")
    def calibrate(inputs: dict):

        from airqo_etl_utils.commons import un_fill_nan, fill_nan
        from airqo_etl_utils.airqo_utils import calibrate_hourly_airqo_measurements

        data = un_fill_nan(inputs.get("data"))

        airqo_calibrated_data = calibrate_hourly_airqo_measurements(measurements=data)

        return dict({"data": fill_nan(data=airqo_calibrated_data)})
Пример #10
0
    def filter_insights(empty_insights_data: dict,
                        available_insights_data: dict):

        from airqo_etl_utils.commons import fill_nan, un_fill_nan

        import pandas as pd

        insights_data_df = pd.DataFrame(
            data=un_fill_nan(available_insights_data.get("data")))
        empty_insights_data_df = pd.DataFrame(
            data=un_fill_nan(empty_insights_data.get("data")))

        insights_data = pd.concat([empty_insights_data_df,
                                   insights_data_df]).drop_duplicates(
                                       keep=False,
                                       subset=["siteId", "time", "frequency"])

        return dict(
            {"data": fill_nan(data=insights_data.to_dict(orient="records"))})
Пример #11
0
    def send_raw_measurements_to_api(airqo_data: dict):
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_utils import restructure_airqo_data
        from airqo_etl_utils.airqo_api import AirQoApi

        data = un_fill_nan(airqo_data.get("data"))

        airqo_restructured_data = restructure_airqo_data(data=data, destination="api")
        airqo_api = AirQoApi()
        airqo_api.save_events(measurements=airqo_restructured_data, tenant="airqo")
Пример #12
0
    def load(forecast: dict, transformed_forecast: dict):
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.app_insights_utils import save_insights_data
        import pandas as pd

        forecast_insights_data = un_fill_nan(forecast.get("data"))
        transformed_forecast_data = un_fill_nan(
            transformed_forecast.get("data"))

        forecast_insights_data_df = pd.DataFrame(forecast_insights_data)
        transformed_forecast_data_df = pd.DataFrame(transformed_forecast_data)
        insights_data = pd.concat(
            [forecast_insights_data_df, transformed_forecast_data_df],
            ignore_index=True)

        save_insights_data(
            insights_data=insights_data.to_dict(orient="records"),
            action="save",
            partition=1,
        )
Пример #13
0
    def send_hourly_measurements_to_api(inputs: dict):

        from airqo_etl_utils.kcca_utils import transform_kcca_measurements_for_api
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_api import AirQoApi

        data = un_fill_nan(inputs.get("data"))
        kcca_data = transform_kcca_measurements_for_api(data)

        airqo_api = AirQoApi()
        airqo_api.save_events(measurements=kcca_data, tenant="kcca")
Пример #14
0
    def send_hourly_measurements_to_bigquery(kcca_data: dict):

        from airqo_etl_utils.kcca_utils import transform_kcca_hourly_data_for_bigquery
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.bigquery_api import BigQueryApi

        data = un_fill_nan(kcca_data.get("data"))
        kcca_restructured_data = transform_kcca_hourly_data_for_bigquery(data)

        big_query_api = BigQueryApi()
        big_query_api.save_data(data=kcca_restructured_data,
                                table=big_query_api.hourly_measurements_table)
Пример #15
0
    def send_raw_measurements_to_bigquery(airqo_data: dict):

        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_utils import restructure_airqo_data
        from airqo_etl_utils.bigquery_api import BigQueryApi

        data = un_fill_nan(airqo_data.get("data"))
        airqo_restructured_data = restructure_airqo_data(
            data=data, destination="bigquery"
        )

        big_query_api = BigQueryApi()
        big_query_api.save_raw_measurements(airqo_restructured_data)
Пример #16
0
    def map_site_ids(airqo_data: dict, deployment_logs: dict):

        from airqo_etl_utils.commons import un_fill_nan, fill_nan
        from airqo_etl_utils.airqo_utils import map_site_ids_to_historical_measurements

        data = un_fill_nan(airqo_data.get("data"))
        logs = deployment_logs.get("data")

        restructured_data = map_site_ids_to_historical_measurements(
            data=data, deployment_logs=logs
        )

        return dict({"data": fill_nan(data=restructured_data)})
Пример #17
0
    def save_to_bigquery(inputs: dict):
        from airqo_etl_utils.bigquery_api import BigQueryApi
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.weather_data_utils import (
            transform_weather_data_for_bigquery,
        )

        weather_data = un_fill_nan(inputs.get("data"))
        bigquery_data = transform_weather_data_for_bigquery(data=weather_data)

        big_query_api = BigQueryApi()
        big_query_api.save_data(
            data=bigquery_data, table=big_query_api.hourly_weather_table
        )
Пример #18
0
    def load(airqo_data: dict, **kwargs):

        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.bigquery_api import BigQueryApi
        from airqo_etl_utils.airqo_api import AirQoApi
        from airqo_etl_utils.airqo_utils import restructure_airqo_data
        from airqo_etl_utils.config import configuration
        from airqo_etl_utils.message_broker import KafkaBrokerClient

        data = un_fill_nan(airqo_data.get("data"))

        try:
            dag_run = kwargs.get("dag_run")
            destination = dag_run.conf["destination"]
        except KeyError:
            destination = "bigquery"

        if destination == "bigquery":
            airqo_restructured_data = restructure_airqo_data(
                data=data, destination="bigquery"
            )
            big_query_api = BigQueryApi()
            big_query_api.save_data(
                data=airqo_restructured_data,
                table=big_query_api.hourly_measurements_table,
            )

        elif destination == "message-broker":
            airqo_restructured_data = restructure_airqo_data(
                data=data, destination="message-broker"
            )

            info = {
                "data": airqo_restructured_data,
                "action": "insert",
                "tenant": "airqo",
            }
            kafka = KafkaBrokerClient()
            kafka.send_data(info=info, topic=configuration.HOURLY_MEASUREMENTS_TOPIC)
        elif destination == "api":
            airqo_restructured_data = restructure_airqo_data(
                data=data, destination="api"
            )
            airqo_api = AirQoApi()
            airqo_api.save_events(measurements=airqo_restructured_data, tenant="airqo")
        else:
            raise Exception(
                "Invalid data destination. Valid values are bigquery, message-broker and api"
            )
Пример #19
0
    def update_app_insights(airqo_data: dict):

        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.airqo_utils import restructure_airqo_data
        from airqo_etl_utils.message_broker import KafkaBrokerClient
        from airqo_etl_utils.config import configuration

        data = un_fill_nan(airqo_data.get("data"))
        insights_data = restructure_airqo_data(data=data, destination="app-insights")
        info = {"data": insights_data, "action": "save"}

        kafka = KafkaBrokerClient()
        kafka.send_data(
            info=info, topic=configuration.INSIGHTS_MEASUREMENTS_TOPIC, partition=0
        )
Пример #20
0
    def send_hourly_measurements_to_message_broker(airqo_data: dict):

        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.config import configuration
        from airqo_etl_utils.message_broker import KafkaBrokerClient
        from airqo_etl_utils.airqo_utils import restructure_airqo_data

        data = un_fill_nan(airqo_data.get("data"))
        airqo_restructured_data = restructure_airqo_data(
            data=data, destination="message-broker"
        )

        info = {"data": airqo_restructured_data, "action": "insert", "tenant": "airqo"}

        kafka = KafkaBrokerClient()
        kafka.send_data(info=info, topic=configuration.HOURLY_MEASUREMENTS_TOPIC)
Пример #21
0
    def send_hourly_measurements_to_message_broker(airqo_data: dict):

        from airqo_etl_utils.kcca_utils import transform_kcca_data_for_message_broker
        from airqo_etl_utils.commons import un_fill_nan
        from airqo_etl_utils.config import configuration
        from airqo_etl_utils.message_broker import KafkaBrokerClient

        data = un_fill_nan(airqo_data.get("data"))
        kcca_restructured_data = transform_kcca_data_for_message_broker(
            data=data, frequency="hourly")

        info = {
            "data": kcca_restructured_data,
            "action": "insert",
            "tenant": "kcca"
        }

        kafka = KafkaBrokerClient()
        kafka.send_data(info=info,
                        topic=configuration.HOURLY_MEASUREMENTS_TOPIC)
Пример #22
0
def map_site_ids_to_historical_measurements(data: list, deployment_logs: list) -> list:

    if not deployment_logs or not data:
        return data

    airqo_api = AirQoApi()
    devices = airqo_api.get_devices(tenant="airqo")

    mapped_data = []

    devices_logs_df = pd.DataFrame(deployment_logs)
    devices_logs_df["start_time"] = devices_logs_df["start_time"].apply(
        lambda x: str_to_date(x)
    )
    devices_logs_df["end_time"] = devices_logs_df["end_time"].apply(
        lambda x: str_to_date(x)
    )

    data = un_fill_nan(data)
    data_df = pd.DataFrame(data)

    for _, data_row in data_df.iterrows():
        device = get_device(devices, device_id=data_row["device_id"])

        if not device:
            continue

        site_id = device.get("site").get("_id")
        time = str_to_date(data_row["time"])
        device_logs = devices_logs_df[devices_logs_df["device_id"] == device.get("_id")]

        if not device_logs.empty:
            for _, log in device_logs.iterrows():
                if log["start_time"] <= time <= log["end_time"]:
                    site_id = log["site_id"]

        data_row["site_id"] = site_id

        mapped_data.append(data_row.to_dict())

    return mapped_data