def extract_hourly_weather_data(): from airqo_etl_utils.airqo_utils import extract_airqo_weather_data_from_tahmo from airqo_etl_utils.commons import fill_nan airqo_weather_data = extract_airqo_weather_data_from_tahmo( start_time=start_time, end_time=end_time, frequency="hourly" ) return dict({"data": fill_nan(data=airqo_weather_data)})
def extract_raw_data(): from airqo_etl_utils.airqo_utils import extract_airqo_data_from_thingspeak from airqo_etl_utils.commons import fill_nan raw_airqo_data = extract_airqo_data_from_thingspeak( start_time=start_time, end_time=end_time, all_devices=False ) return dict({"data": fill_nan(data=raw_airqo_data)})
def average_data(inputs: dict): from airqo_etl_utils.commons import un_fill_nan, fill_nan from airqo_etl_utils.airqo_utils import average_airqo_measurements data = un_fill_nan(inputs.get("data")) averaged_data = average_airqo_measurements(data=data, frequency="daily") return dict({"data": fill_nan(data=averaged_data)})
def average_data_by_hour(raw_data: dict): from airqo_etl_utils.airqo_utils import average_airqo_data from airqo_etl_utils.commons import fill_nan, un_fill_nan raw_airqo_data = un_fill_nan(raw_data.get("data")) average_data = average_airqo_data(data=raw_airqo_data, frequency="hourly") return dict({"data": fill_nan(data=average_data)})
def transform(inputs: dict): from airqo_etl_utils.kcca_utils import transform_kcca_measurements_for_api from airqo_etl_utils.commons import un_fill_nan, fill_nan data = un_fill_nan(inputs.get("data")) cleaned_data = transform_kcca_measurements_for_api(data) return dict({"data": fill_nan(data=cleaned_data)})
def extract_hourly_weather_data(**kwargs): from airqo_etl_utils.commons import get_date_time_values, fill_nan from airqo_etl_utils.airqo_utils import extract_airqo_weather_data_from_tahmo start_time, end_time = get_date_time_values(**kwargs) airqo_weather_data = extract_airqo_weather_data_from_tahmo( start_time=start_time, end_time=end_time, frequency="hourly" ) return dict({"data": fill_nan(data=airqo_weather_data)})
def calibrate(inputs: dict): from airqo_etl_utils.commons import un_fill_nan, fill_nan from airqo_etl_utils.airqo_utils import calibrate_hourly_airqo_measurements data = un_fill_nan(inputs.get("data")) airqo_calibrated_data = calibrate_hourly_airqo_measurements(measurements=data) return dict({"data": fill_nan(data=airqo_calibrated_data)})
def extract_airqo_data(**kwargs): from airqo_etl_utils.commons import fill_nan from airqo_etl_utils.airqo_utils import extract_airqo_hourly_data_from_api start_time, end_time = time_values(**kwargs) data = extract_airqo_hourly_data_from_api( start_time=start_time, end_time=end_time ) return dict({"data": fill_nan(data=data)})
def extract(**kwargs): from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan, get_date_time_values start_time, end_time = get_date_time_values(**kwargs) kcca_data = extract_kcca_measurements(start_time=start_time, end_time=end_time, freq="hourly") return dict({"data": fill_nan(kcca_data)})
def extract_api_forecast_data(): from airqo_etl_utils.app_insights_utils import ( create_insights_data, get_forecast_data, ) from airqo_etl_utils.commons import fill_nan forecast_data = get_forecast_data("airqo") insights_data = create_insights_data(data=forecast_data) return dict({"data": fill_nan(data=insights_data)})
def create_empty_insights(): from airqo_etl_utils.airqo_api import AirQoApi from airqo_etl_utils.commons import fill_nan import random import pandas as pd from airqo_etl_utils.date import ( date_to_str_days, date_to_str_hours, ) airqo_api = AirQoApi() sites = airqo_api.get_sites(tenant="airqo") insights = [] dates = pd.date_range(start_date_time, end_date_time, freq="1H") for date in dates: date_time = date_to_str_hours(date) for site in sites: try: hourly_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "HOURLY", "forecast": False, "siteId": site["_id"], } insights.append(hourly_insight) except Exception as ex: print(ex) dates = pd.date_range(start_date_time, end_date_time, freq="24H") for date in dates: date_time = date_to_str_days(date) for site in sites: try: daily_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "DAILY", "forecast": False, "siteId": site["_id"], } insights.append(daily_insight) except Exception as ex: print(ex) return dict({"data": fill_nan(data=insights)})
def extract(**kwargs): from airqo_etl_utils.weather_data_utils import ( extract_weather_data_from_tahmo, ) from airqo_etl_utils.commons import fill_nan, get_date_time_values start_date_time, end_date_time = get_date_time_values(**kwargs) weather_data = extract_weather_data_from_tahmo( start_date_time=start_date_time, end_date_time=end_date_time ) return dict({"data": fill_nan(data=weather_data)})
def extract_airqo_data(**kwargs): from airqo_etl_utils.app_insights_utils import ( create_insights_data_from_bigquery, ) from airqo_etl_utils.commons import get_date_time_values, fill_nan start_date_time, end_date_time = get_date_time_values(**kwargs) hourly_insights_data = create_insights_data_from_bigquery( start_date_time=start_date_time, end_date_time=end_date_time) return dict({"data": fill_nan(data=hourly_insights_data)})
def query_insights_data(): from airqo_etl_utils.app_insights_utils import query_insights_data from airqo_etl_utils.commons import fill_nan all_insights_data = query_insights_data( start_date_time=start_date_time, end_date_time=end_date_time, all_data=True, freq="", ) return dict({"data": fill_nan(data=all_insights_data)})
def map_site_ids(airqo_data: dict, deployment_logs: dict): from airqo_etl_utils.commons import un_fill_nan, fill_nan from airqo_etl_utils.airqo_utils import map_site_ids_to_historical_measurements data = un_fill_nan(airqo_data.get("data")) logs = deployment_logs.get("data") restructured_data = map_site_ids_to_historical_measurements( data=data, deployment_logs=logs ) return dict({"data": fill_nan(data=restructured_data)})
def merge_data(averaged_airqo_data: dict, weather_data: dict): from airqo_etl_utils.commons import un_fill_nan, fill_nan from airqo_etl_utils.airqo_utils import merge_airqo_and_weather_data hourly_airqo_data = un_fill_nan(averaged_airqo_data.get("data")) hourly_weather_data = un_fill_nan(weather_data.get("data")) merged_measurements = merge_airqo_and_weather_data( airqo_data=hourly_airqo_data, weather_data=hourly_weather_data ) return dict({"data": fill_nan(data=merged_measurements)})
def extract(): from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan from airqo_etl_utils.date import date_to_str from datetime import datetime, timedelta start_time = date_to_str(datetime.utcnow() - timedelta(hours=1)) end_time = date_to_str(datetime.utcnow()) kcca_data = extract_kcca_measurements(start_time=start_time, end_time=end_time, freq="raw") return dict({"data": fill_nan(data=kcca_data)})
def extract(): from airqo_etl_utils.date import date_to_str_hours from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan from datetime import datetime, timedelta hour_of_day = datetime.utcnow() - timedelta(hours=1) start_date_time = date_to_str_hours(hour_of_day) end_date_time = datetime.strftime(hour_of_day, "%Y-%m-%dT%H:59:59Z") kcca_data = extract_kcca_measurements(start_time=start_date_time, end_time=end_date_time, freq="hourly") return dict({"data": fill_nan(kcca_data)})
def extract_hourly_raw_data(**kwargs): from airqo_etl_utils.commons import get_date_time_values, fill_nan from airqo_etl_utils.airqo_utils import ( extract_airqo_data_from_thingspeak, average_airqo_data, ) start_time, end_time = get_date_time_values(**kwargs) raw_airqo_data = extract_airqo_data_from_thingspeak( start_time=start_time, end_time=end_time, all_devices=True ) average_data = average_airqo_data(data=raw_airqo_data, frequency="hourly") return dict({"data": fill_nan(data=average_data)})
def extract(): from airqo_etl_utils.date import date_to_str_hours from airqo_etl_utils.weather_data_utils import ( extract_weather_data_from_tahmo, ) from airqo_etl_utils.commons import fill_nan hour_of_day = datetime.utcnow() - timedelta(hours=1) start_date_time = date_to_str_hours(hour_of_day) end_date_time = datetime.strftime(hour_of_day, "%Y-%m-%dT%H:59:59Z") weather_data = extract_weather_data_from_tahmo( start_date_time=start_date_time, end_date_time=end_date_time ) return dict({"data": fill_nan(data=weather_data)})
def average_insights_data(**kwargs): from airqo_etl_utils.app_insights_utils import ( query_insights_data, average_insights_data, ) from airqo_etl_utils.commons import get_date_time_values, fill_nan start_date_time, end_date_time = get_date_time_values(**kwargs) hourly_insights_data = query_insights_data( freq="hourly", start_date_time=start_date_time, end_date_time=end_date_time) ave_insights_data = average_insights_data(frequency="daily", data=hourly_insights_data) return dict({"data": fill_nan(data=ave_insights_data)})
def filter_insights(empty_insights_data: dict, available_insights_data: dict): from airqo_etl_utils.commons import fill_nan, un_fill_nan import pandas as pd insights_data_df = pd.DataFrame( data=un_fill_nan(available_insights_data.get("data"))) empty_insights_data_df = pd.DataFrame( data=un_fill_nan(empty_insights_data.get("data"))) insights_data = pd.concat([empty_insights_data_df, insights_data_df]).drop_duplicates( keep=False, subset=["siteId", "time", "frequency"]) return dict( {"data": fill_nan(data=insights_data.to_dict(orient="records"))})
def average_insights_data(): from airqo_etl_utils.app_insights_utils import ( query_insights_data, average_insights_data, ) from airqo_etl_utils.commons import fill_nan from datetime import datetime now = datetime.utcnow() start_date_time = datetime.strftime(now, "%Y-%m-%dT00:00:00Z") end_date_time = datetime.strftime(now, "%Y-%m-%dT23:59:59Z") hourly_insights_data = query_insights_data( freq="hourly", start_date_time=start_date_time, end_date_time=end_date_time) ave_insights_data = average_insights_data(frequency="daily", data=hourly_insights_data) return dict({"data": fill_nan(data=ave_insights_data)})
def extract_insights_forecast_data(): from airqo_etl_utils.app_insights_utils import ( create_insights_data, transform_old_forecast, ) from airqo_etl_utils.date import ( date_to_str, first_day_of_week, first_day_of_month, ) from airqo_etl_utils.commons import fill_nan now = datetime.now() start_date_time = date_to_str( first_day_of_week(first_day_of_month(date_time=now))) end_date_time = date_to_str(now) forecast_data = transform_old_forecast(start_date_time=start_date_time, end_date_time=end_date_time) insights_data = create_insights_data(data=forecast_data) return dict({"data": fill_nan(data=insights_data)})