def query_insights_data(): from airqo_etl_utils.app_insights_utils import query_insights_data from airqo_etl_utils.commons import fill_nan all_insights_data = query_insights_data( start_date_time=start_date_time, end_date_time=end_date_time, all_data=True, freq="", ) return dict({"data": fill_nan(data=all_insights_data)})
def insights_daily_insights(start_date_time: str, end_date_time: str): from airqo_etl_utils.app_insights_utils import ( query_insights_data, average_insights_data, ) hourly_insights_data = query_insights_data(freq="hourly", start_date_time=start_date_time, end_date_time=end_date_time) pd.DataFrame(hourly_insights_data).to_csv( path_or_buf="hourly_insights_airqo_data.csv", index=False) airqo_data = average_insights_data(frequency="daily", data=hourly_insights_data) pd.DataFrame(airqo_data).to_csv( path_or_buf="daily_insights_airqo_data.csv", index=False)
def average_insights_data(**kwargs): from airqo_etl_utils.app_insights_utils import ( query_insights_data, average_insights_data, ) from airqo_etl_utils.commons import get_date_time_values, fill_nan start_date_time, end_date_time = get_date_time_values(**kwargs) hourly_insights_data = query_insights_data( freq="hourly", start_date_time=start_date_time, end_date_time=end_date_time) ave_insights_data = average_insights_data(frequency="daily", data=hourly_insights_data) return dict({"data": fill_nan(data=ave_insights_data)})
def average_insights_data(): from airqo_etl_utils.app_insights_utils import ( query_insights_data, average_insights_data, ) from airqo_etl_utils.commons import fill_nan from datetime import datetime now = datetime.utcnow() start_date_time = datetime.strftime(now, "%Y-%m-%dT00:00:00Z") end_date_time = datetime.strftime(now, "%Y-%m-%dT23:59:59Z") hourly_insights_data = query_insights_data( freq="hourly", start_date_time=start_date_time, end_date_time=end_date_time) ave_insights_data = average_insights_data(frequency="daily", data=hourly_insights_data) return dict({"data": fill_nan(data=ave_insights_data)})
def insights_cleanup_etl(): from airqo_etl_utils.date import ( date_to_str_days, first_day_of_week, last_day_of_week, first_day_of_month, last_day_of_month, ) start_date_time = date_to_str_days( first_day_of_week(first_day_of_month(date_time=datetime.now()))) end_date_time = date_to_str_days( last_day_of_week(last_day_of_month(date_time=datetime.now()))) @task(multiple_outputs=True) def create_empty_insights(): from airqo_etl_utils.airqo_api import AirQoApi from airqo_etl_utils.commons import fill_nan import random import pandas as pd from airqo_etl_utils.date import ( date_to_str_days, date_to_str_hours, ) airqo_api = AirQoApi() sites = airqo_api.get_sites(tenant="airqo") insights = [] dates = pd.date_range(start_date_time, end_date_time, freq="1H") for date in dates: date_time = date_to_str_hours(date) for site in sites: try: hourly_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "HOURLY", "forecast": False, "siteId": site["_id"], } insights.append(hourly_insight) except Exception as ex: print(ex) dates = pd.date_range(start_date_time, end_date_time, freq="24H") for date in dates: date_time = date_to_str_days(date) for site in sites: try: daily_insight = { "time": date_time, "pm2_5": random.uniform(50.0, 150.0), "pm10": random.uniform(50.0, 150.0), "empty": True, "frequency": "DAILY", "forecast": False, "siteId": site["_id"], } insights.append(daily_insight) except Exception as ex: print(ex) return dict({"data": fill_nan(data=insights)}) @task(multiple_outputs=True) def query_insights_data(): from airqo_etl_utils.app_insights_utils import query_insights_data from airqo_etl_utils.commons import fill_nan all_insights_data = query_insights_data( start_date_time=start_date_time, end_date_time=end_date_time, all_data=True, freq="", ) return dict({"data": fill_nan(data=all_insights_data)}) @task(multiple_outputs=True) def filter_insights(empty_insights_data: dict, available_insights_data: dict): from airqo_etl_utils.commons import fill_nan, un_fill_nan import pandas as pd insights_data_df = pd.DataFrame( data=un_fill_nan(available_insights_data.get("data"))) empty_insights_data_df = pd.DataFrame( data=un_fill_nan(empty_insights_data.get("data"))) insights_data = pd.concat([empty_insights_data_df, insights_data_df]).drop_duplicates( keep=False, subset=["siteId", "time", "frequency"]) return dict( {"data": fill_nan(data=insights_data.to_dict(orient="records"))}) @task() def load(insights_data: dict): from airqo_etl_utils.commons import un_fill_nan empty_insights_data = un_fill_nan(insights_data.get("data")) from airqo_etl_utils.app_insights_utils import save_insights_data save_insights_data(insights_data=empty_insights_data, action="insert", partition=2) empty_insights = create_empty_insights() available_insights = query_insights_data() filtered_insights = filter_insights( empty_insights_data=empty_insights, available_insights_data=available_insights) load(insights_data=filtered_insights)