def extract(**kwargs): from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan, get_date_time_values start_time, end_time = get_date_time_values(**kwargs) kcca_data = extract_kcca_measurements(start_time=start_time, end_time=end_time, freq="hourly") return dict({"data": fill_nan(kcca_data)})
def extract(): from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan from airqo_etl_utils.date import date_to_str from datetime import datetime, timedelta start_time = date_to_str(datetime.utcnow() - timedelta(hours=1)) end_time = date_to_str(datetime.utcnow()) kcca_data = extract_kcca_measurements(start_time=start_time, end_time=end_time, freq="raw") return dict({"data": fill_nan(data=kcca_data)})
def extract(): from airqo_etl_utils.date import date_to_str_hours from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan from datetime import datetime, timedelta hour_of_day = datetime.utcnow() - timedelta(hours=1) start_date_time = date_to_str_hours(hour_of_day) end_date_time = datetime.strftime(hour_of_day, "%Y-%m-%dT%H:59:59Z") kcca_data = extract_kcca_measurements(start_time=start_date_time, end_time=end_date_time, freq="hourly") return dict({"data": fill_nan(kcca_data)})
def kcca_hourly_measurements(start_date_time: str, end_date_time: str): from airqo_etl_utils.kcca_utils import ( extract_kcca_measurements, transform_kcca_measurements_for_api, transform_kcca_data_for_message_broker, transform_kcca_hourly_data_for_bigquery, ) from airqo_etl_utils.bigquery_api import BigQueryApi kcca_unclean_data = extract_kcca_measurements(start_time=start_date_time, end_time=end_date_time, freq="hourly") pd.DataFrame(kcca_unclean_data).to_csv( path_or_buf="outputs/kcca_unclean_data.csv", index=False) # API cleaned_data = transform_kcca_measurements_for_api(kcca_unclean_data) pd.DataFrame(cleaned_data).to_csv(path_or_buf="kcca_cleaned_data.csv", index=False) # Message Broker message_broker_data = transform_kcca_data_for_message_broker( kcca_unclean_data, frequency="hourly") pd.DataFrame(message_broker_data).to_csv( path_or_buf="kcca_message_broker_data.csv", index=False) # Big Query bigquery_data = transform_kcca_hourly_data_for_bigquery( data=kcca_unclean_data) bigquery_data_df = pd.DataFrame(bigquery_data) bigquery_api = BigQueryApi() bigquery_data_df = bigquery_api.validate_data( dataframe=bigquery_data_df, columns=bigquery_api.hourly_measurements_columns, numeric_columns=bigquery_api.hourly_measurements_numeric_columns, table=bigquery_api.hourly_measurements_table, ) bigquery_data_df.to_csv(path_or_buf="kcca_data_for_bigquery.csv", index=False)