示例#1
0
    def extract(**kwargs):

        from airqo_etl_utils.kcca_utils import extract_kcca_measurements
        from airqo_etl_utils.commons import fill_nan, get_date_time_values

        start_time, end_time = get_date_time_values(**kwargs)
        kcca_data = extract_kcca_measurements(start_time=start_time,
                                              end_time=end_time,
                                              freq="hourly")

        return dict({"data": fill_nan(kcca_data)})
示例#2
0
    def extract():

        from airqo_etl_utils.kcca_utils import extract_kcca_measurements
        from airqo_etl_utils.commons import fill_nan
        from airqo_etl_utils.date import date_to_str
        from datetime import datetime, timedelta

        start_time = date_to_str(datetime.utcnow() - timedelta(hours=1))
        end_time = date_to_str(datetime.utcnow())

        kcca_data = extract_kcca_measurements(start_time=start_time,
                                              end_time=end_time,
                                              freq="raw")

        return dict({"data": fill_nan(data=kcca_data)})
示例#3
0
    def extract():
        from airqo_etl_utils.date import date_to_str_hours
        from airqo_etl_utils.kcca_utils import extract_kcca_measurements
        from airqo_etl_utils.commons import fill_nan
        from datetime import datetime, timedelta

        hour_of_day = datetime.utcnow() - timedelta(hours=1)
        start_date_time = date_to_str_hours(hour_of_day)
        end_date_time = datetime.strftime(hour_of_day, "%Y-%m-%dT%H:59:59Z")

        kcca_data = extract_kcca_measurements(start_time=start_date_time,
                                              end_time=end_date_time,
                                              freq="hourly")

        return dict({"data": fill_nan(kcca_data)})
示例#4
0
def kcca_hourly_measurements(start_date_time: str, end_date_time: str):
    from airqo_etl_utils.kcca_utils import (
        extract_kcca_measurements,
        transform_kcca_measurements_for_api,
        transform_kcca_data_for_message_broker,
        transform_kcca_hourly_data_for_bigquery,
    )
    from airqo_etl_utils.bigquery_api import BigQueryApi

    kcca_unclean_data = extract_kcca_measurements(start_time=start_date_time,
                                                  end_time=end_date_time,
                                                  freq="hourly")
    pd.DataFrame(kcca_unclean_data).to_csv(
        path_or_buf="outputs/kcca_unclean_data.csv", index=False)

    # API
    cleaned_data = transform_kcca_measurements_for_api(kcca_unclean_data)
    pd.DataFrame(cleaned_data).to_csv(path_or_buf="kcca_cleaned_data.csv",
                                      index=False)

    # Message Broker
    message_broker_data = transform_kcca_data_for_message_broker(
        kcca_unclean_data, frequency="hourly")
    pd.DataFrame(message_broker_data).to_csv(
        path_or_buf="kcca_message_broker_data.csv", index=False)

    # Big Query
    bigquery_data = transform_kcca_hourly_data_for_bigquery(
        data=kcca_unclean_data)
    bigquery_data_df = pd.DataFrame(bigquery_data)
    bigquery_api = BigQueryApi()
    bigquery_data_df = bigquery_api.validate_data(
        dataframe=bigquery_data_df,
        columns=bigquery_api.hourly_measurements_columns,
        numeric_columns=bigquery_api.hourly_measurements_numeric_columns,
        table=bigquery_api.hourly_measurements_table,
    )
    bigquery_data_df.to_csv(path_or_buf="kcca_data_for_bigquery.csv",
                            index=False)