def extract_kcca_measurements(start_time: str, end_time: str, freq: str) -> list: if freq.lower() == "hourly": interval = "6H" elif freq.lower() == "daily": interval = "48H" else: interval = "1H" dates = pd.date_range(start_time, end_time, freq=interval) measurements = [] last_date_time = dates.values[len(dates.values) - 1] for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) print(start_time + " : " + end_time) range_measurements = query_kcca_measurements(freq, start, end) measurements.extend(range_measurements) measurements_df = pd.json_normalize(measurements) return measurements_df.to_dict(orient="records")
def insights_forecast(): from airqo_etl_utils.app_insights_utils import ( create_insights_data, get_forecast_data, transform_old_forecast, ) from airqo_etl_utils.date import date_to_str, first_day_of_week, first_day_of_month now = datetime.now() start_date_time = date_to_str( first_day_of_week(first_day_of_month(date_time=now))) end_date_time = date_to_str(now) old_forecast = transform_old_forecast(start_date_time=start_date_time, end_date_time=end_date_time) pd.DataFrame(old_forecast).to_csv(path_or_buf="old_forecast_data.csv", index=False) forecast_data = get_forecast_data("airqo") pd.DataFrame(forecast_data).to_csv(path_or_buf="forecast_data.csv", index=False) insights_data = create_insights_data(data=forecast_data) pd.DataFrame(insights_data).to_csv( path_or_buf="insights_forecast_data.csv", index=False)
def get_hourly_data( self, start_date_time: str, end_date_time: str, columns: list, table: str ) -> pd.DataFrame: try: query = f""" SELECT {', '.join(map(str, columns))} FROM `{table}` WHERE timestamp >= '{start_date_time}' and timestamp <= '{end_date_time}' """ dataframe = self.client.query(query=query).result().to_dataframe() except Exception as ex: print(ex) query = f""" SELECT {', '.join(map(str, columns))} FROM `{table}` WHERE time >= '{start_date_time}' and time <= '{end_date_time}' """ dataframe = self.client.query(query=query).result().to_dataframe() dataframe["timestamp"] = dataframe["timestamp"].apply(lambda x: date_to_str(x)) if "time" in list(dataframe.columns): dataframe["time"] = dataframe["time"].apply(lambda x: date_to_str(x)) return dataframe
def extract_airqo_devices_deployment_history() -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo") devices_history = [] for device in devices: try: maintenance_logs = airqo_api.get_maintenance_logs( tenant="airqo", device=device["name"], activity_type="deployment" ) if not maintenance_logs or len(maintenance_logs) <= 1: continue log_df = pd.DataFrame(maintenance_logs) log_df = log_df.dropna(subset=["date"]) log_df["site_id"] = ( log_df["site_id"].fillna(method="bfill").fillna(method="ffill") ) log_df = log_df.dropna(subset=["site_id"]) log_df["start_time"] = pd.to_datetime(log_df["date"]) log_df = log_df.sort_values(by="start_time") log_df["end_time"] = log_df["start_time"].shift(-1) log_df["end_time"] = log_df["end_time"].fillna(datetime.utcnow()) log_df["start_time"] = log_df["start_time"].apply(lambda x: date_to_str(x)) log_df["end_time"] = log_df["end_time"].apply(lambda x: date_to_str(x)) if len(set(log_df["site_id"].tolist())) == 1: continue for _, raw in log_df.iterrows(): device_history = { "device": raw["device"], "device_id": device["_id"], "start_time": raw["start_time"], "end_time": raw["end_time"], "site_id": raw["site_id"], } devices_history.append(device_history) except Exception as ex: print(ex) traceback.print_exc() return devices_history
def extract(): from airqo_etl_utils.kcca_utils import extract_kcca_measurements from airqo_etl_utils.commons import fill_nan from airqo_etl_utils.date import date_to_str from datetime import datetime, timedelta start_time = date_to_str(datetime.utcnow() - timedelta(hours=1)) end_time = date_to_str(datetime.utcnow()) kcca_data = extract_kcca_measurements(start_time=start_time, end_time=end_time, freq="raw") return dict({"data": fill_nan(data=kcca_data)})
def get_airqo_data(freq: str, start_time: str = None, end_time: str = None) -> list: airqo_api = AirQoApi() devices = airqo_api.get_devices(tenant="airqo", all_devices=False) measurements = [] start = (str_to_date(start_time) if start_time else datetime.utcnow() - timedelta(days=7)) end = str_to_date(end_time) if end_time else datetime.utcnow() start_time = (date_to_str_days(start) if freq == "daily" else date_to_str_hours(start)) end_time = date_to_str_days(end) if freq == "daily" else date_to_str_hours( end) frequency = get_airqo_api_frequency(freq=freq) dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for device in devices: for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) try: events = airqo_api.get_events( tenant="airqo", start_time=start, frequency=freq, end_time=end, device=device["name"], ) measurements.extend(events) except Exception as ex: print(ex) traceback.print_exc() insights = format_measurements_to_insights(data=measurements) return insights
def get_weather_data_from_tahmo(start_time=None, end_time=None, tenant="airqo"): airqo_api = AirQoApi() airqo_sites = airqo_api.get_sites(tenant=tenant) station_codes = [] for site in airqo_sites: try: if "nearest_tahmo_station" in dict(site).keys(): station_codes.append(site["nearest_tahmo_station"]["code"]) except Exception as ex: print(ex) measurements = [] tahmo_api = TahmoApi() frequency = get_frequency(start_time=start_time, end_time=end_time) dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) print(start + " : " + end) range_measurements = tahmo_api.get_measurements( start, end, station_codes) measurements.extend(range_measurements) if len(measurements) != 0: measurements_df = pd.DataFrame(data=measurements) else: measurements_df = pd.DataFrame( [], columns=["value", "variable", "station", "time"]) return measurements_df.to_dict(orient="records") clean_measurements_df = remove_invalid_dates(dataframe=measurements_df, start_time=start_time, end_time=end_time) return clean_measurements_df.to_dict(orient="records")
def resample_data(data: pd.DataFrame, frequency: str) -> pd.DataFrame: data = data.dropna(subset=["time"]) data["time"] = pd.to_datetime(data["time"]) data = data.sort_index(axis=0) if "latitude" in data.columns and "longitude" in data.columns: original_df = data[["time", "latitude", "longitude"]] else: original_df = data[["time"]] resample_value = "24H" if frequency.lower() == "daily" else "1H" averages = pd.DataFrame(data.resample(resample_value, on="time").mean()) averages["time"] = averages.index averages["time"] = averages["time"].apply(lambda x: date_to_str(x)) averages = averages.reset_index(drop=True) if resample_value == "1H": original_df["time"] = original_df["time"].apply( lambda x: date_to_str_hours(x)) elif resample_value == "24H": original_df["time"] = original_df["time"].apply( lambda x: date_to_str_days(x)) else: original_df["time"] = original_df["time"].apply( lambda x: date_to_str(x)) if "latitude" in original_df.columns and "longitude" in original_df.columns: def reset_latitude_or_longitude(time: str, field: str): date_row = pd.DataFrame( original_df.loc[original_df["time"] == time]) if date_row.empty: return time return (date_row.iloc[0]["latitude"] if field == "latitude" else date_row.iloc[0]["longitude"]) averages["latitude"] = averages.apply( lambda row: reset_latitude_or_longitude(row["time"], "latitude"), axis=1) averages["longitude"] = averages.apply( lambda row: reset_latitude_or_longitude(row["time"], "longitude"), axis=1) return averages
def extract_insights_forecast_data(): from airqo_etl_utils.app_insights_utils import ( create_insights_data, transform_old_forecast, ) from airqo_etl_utils.date import ( date_to_str, first_day_of_week, first_day_of_month, ) from airqo_etl_utils.commons import fill_nan now = datetime.now() start_date_time = date_to_str( first_day_of_week(first_day_of_month(date_time=now))) end_date_time = date_to_str(now) forecast_data = transform_old_forecast(start_date_time=start_date_time, end_date_time=end_date_time) insights_data = create_insights_data(data=forecast_data) return dict({"data": fill_nan(data=insights_data)})
def query_insights_data(freq: str, start_date_time: str, end_date_time: str, forecast=False, all_data=False) -> list: airqo_api = AirQoApi() insights = [] frequency = get_frequency(start_time=start_date_time, end_time=end_date_time) dates = pd.date_range(start_date_time, end_date_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for date in dates: start = date_to_str(date) query_end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(query_end_date_time) > last_date_time: end = end_date_time else: end = date_to_str(query_end_date_time) try: api_results = airqo_api.get_app_insights( start_time=start, frequency=freq, end_time=end, forecast=forecast, all_data=all_data, ) insights.extend(api_results) except Exception as ex: print(ex) traceback.print_exc() return insights
def remove_invalid_dates(dataframe: pd.DataFrame, start_time: str, end_time: str) -> pd.DataFrame: start = pd.to_datetime(start_time) end = pd.to_datetime(end_time) dataframe["time"] = pd.to_datetime(dataframe["time"]) data_frame = dataframe.set_index(["time"]) time_data_frame = data_frame.loc[(data_frame.index >= start) & (data_frame.index <= end)] time_data_frame["time"] = time_data_frame.index time_data_frame["time"] = time_data_frame["time"].apply( lambda x: date_to_str(x)) time_data_frame = time_data_frame.reset_index(drop=True) return time_data_frame
def extract_airqo_data_from_thingspeak( start_time: str, end_time: str, all_devices: bool ) -> list: thingspeak_base_url = configuration.THINGSPEAK_CHANNEL_URL airqo_api = AirQoApi() airqo_devices = airqo_api.get_devices(tenant="airqo", all_devices=all_devices) read_keys = airqo_api.get_read_keys(devices=airqo_devices) channels_data = [] frequency = get_frequency(start_time=start_time, end_time=end_time) def get_field_8_value(x: str, position: int): try: values = x.split(",") return values[position] except Exception as exc: print(exc) return None dates = pd.date_range(start_time, end_time, freq=frequency) last_date_time = dates.values[len(dates.values) - 1] for device in airqo_devices: try: channel_id = str(device["device_number"]) for date in dates: start = date_to_str(date) end_date_time = date + timedelta(hours=dates.freq.n) if np.datetime64(end_date_time) > last_date_time: end = end_time else: end = date_to_str(end_date_time) read_key = read_keys[str(channel_id)] channel_url = f"{thingspeak_base_url}{channel_id}/feeds.json?start={start}&end={end}&api_key={read_key}" print(f"{channel_url}") data = json.loads( requests.get(channel_url, timeout=100.0).content.decode("utf-8") ) if (data != -1) and ("feeds" in data): dataframe = pd.DataFrame(data["feeds"]) if dataframe.empty: print( f"{channel_id} does not have data between {start} and {end}" ) continue channel_df = pd.DataFrame( data=[], columns=[ "time", "s1_pm2_5", "s2_pm2_5", "s1_pm10", "device_id", "site_id", "s2_pm10", "latitude", "longitude", "altitude", "wind_speed", "satellites", "hdop", "internalTemperature", "internalHumidity", "battery", "temperature", "humidity", "pressure", "externalAltitude", ], ) channel_df["s1_pm2_5"] = dataframe["field1"].apply( lambda x: get_valid_value(x, "pm2_5") ) channel_df["s1_pm10"] = dataframe["field2"].apply( lambda x: get_valid_value(x, "pm10") ) channel_df["s2_pm2_5"] = dataframe["field3"].apply( lambda x: get_valid_value(x, "pm2_5") ) channel_df["s2_pm10"] = dataframe["field4"].apply( lambda x: get_valid_value(x, "pm10") ) channel_df["latitude"] = dataframe["field5"].apply( lambda x: get_valid_value(x, "latitude") ) channel_df["longitude"] = dataframe["field6"].apply( lambda x: get_valid_value(x, "longitude") ) channel_df["battery"] = dataframe["field7"].apply( lambda x: get_valid_value(x, "battery") ) if "field8" in dataframe.columns: try: channel_df["latitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 0), "latitude" ) ) channel_df["longitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 1), "longitude" ) ) channel_df["altitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 2), "altitude" ) ) channel_df["wind_speed"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 3), "wind_speed" ) ) channel_df["satellites"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 4), "satellites" ) ) channel_df["hdop"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 5), "hdop" ) ) channel_df["internalTemperature"] = dataframe[ "field8" ].apply( lambda x: get_valid_value( get_field_8_value(x, 6), "externalTemperature" ) ) channel_df["internalHumidity"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 7), "externalHumidity" ) ) channel_df["temperature"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 8), "externalTemperature" ) ) channel_df["humidity"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 9), "externalHumidity" ) ) channel_df["pressure"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 10), "pressure" ) ) channel_df["externalAltitude"] = dataframe["field8"].apply( lambda x: get_valid_value( get_field_8_value(x, 11), "altitude" ) ) except Exception as ex: traceback.print_exc() print(ex) channel_df["time"] = dataframe["created_at"] channel_df["device_id"] = device["_id"] channel_df["site_id"] = device["site"]["_id"] channel_df["device_number"] = device["device_number"] channel_df["device"] = device["name"] channel_df["frequency"] = "raw" channels_data.extend(channel_df.to_dict(orient="records")) except Exception as ex: print(ex) traceback.print_exc() channel_data_df = pd.DataFrame(channels_data) clean_channel_data_df = remove_invalid_dates( dataframe=channel_data_df, start_time=start_time, end_time=end_time ) return clean_channel_data_df.to_dict(orient="records")
def predict_time_to_string(time: str): date_time = predict_str_to_date(time) return date_to_str(date_time)