def get_facility_first_seen( period: Optional[str] = None) -> List[FacilitySeen]: """Run this and it'll check if there are new facilities in scada data and let you know which ones The query can be an expensive one so don't run to often. """ engine = get_database_engine() __query = """ select distinct fs.facility_code, fs.network_id from facility_scada fs where fs.facility_code not in (select distinct code from facility) {period_query} """ period_query = f"and fs.trading_interval > now() - interval '{period}'" if period else "" query = __query.format(period_query=period_query) with engine.connect() as c: logger.debug(query) row = list(c.execute(query)) records: List[FacilitySeen] = [ FacilitySeen(code=r[0], network_id=r[1]) for r in row ] return records
def get_v2_compat_data() -> OpennemDataSet: query = """ select trading_interval at time zone 'AEST' as trading_interval, facility_code, generated from facility_scada where facility_code in ('LD01', 'LD02', 'LD03', 'LD04', 'BW01', 'BW02', 'BW03', 'BW04', 'MP1', 'ER01', 'ER02', 'ER03', 'ER04', 'MM3', 'MM4', 'MP2', 'REDBANK1', 'VP5', 'VP6', 'WW7', 'WW8') and trading_interval >= '2021-01-01 00:00:00+10' order by trading_interval asc; """ engine = get_database_engine() with engine.connect() as c: logger.debug(query) res = list(c.execute(query)) results = [ ScadaResults(trading_interval=i[0], code=i[1], generated=i[2]).dict() for i in res ] return results
def get_network_data_ranges() -> list[NetworkDataDateRanges]: """Runs a query to get the network data ranges""" engine = get_database_engine() stmt = sql(""" select fs.network_id, min(fs.trading_interval) as scada_min, max(fs.trading_interval) as scada_max from facility_scada fs where fs.is_forecast is FALSE group by fs.network_id; """) with engine.connect() as c: results = list(c.execute(stmt)) if not results: raise Exception( "No results for data range query in update_network_data_ranges") models = [ NetworkDataDateRanges(network=i[0], data_min=i[1], data_max=i[2]) for i in results ] return models
def get_facility_first_seen() -> List[FacilitySeen]: """Run this and it'll check if there are new facilities in scada data and let you know which ones The query can be an expensive one so don't run to often. """ engine = get_database_engine() __query = """ select distinct fs.facility_code, fs.network_id from facility_scada fs where fs.facility_code not in (select distinct code from facility); """ with engine.connect() as c: logger.debug(__query) row = list(c.execute(__query)) records: List[FacilitySeen] = [ FacilitySeen(code=r[0], network_id=r[1]) for r in row ] return records
def get_generated( network_region: str, date_min: datetime, date_max: datetime, network: NetworkSchema, fueltech_id: Optional[str] = None, ) -> List[Dict]: """Gets generated values for a date range for a network and network region and optionally for a single fueltech""" # @TODO support refresh energies for a single duid or station query = get_generated_query(network_region, date_min, date_max, network, fueltech_id) engine = get_database_engine() results = [] with engine.connect() as c: logger.debug(query) if not DRY_RUN: try: results = list(c.execute(query)) except Exception as e: logger.error(e) logger.debug("Got back {} rows".format(len(results))) return results
def refresh_material_views(view_name: Optional[str] = None, concurrently: bool = True, with_data: bool = True) -> None: """Refresh material views""" __query = "REFRESH MATERIALIZED VIEW {is_concurrent} {view} {data_spec}" engine = get_database_engine() views = [] if view_name: views.append(view_name) else: views = get_materialized_view_names() with engine.connect() as c: for v in views: query = __query.format( view=v, is_concurrent="concurrently" if concurrently else "", data_spec="with data" if with_data else "", ) logger.debug(query) try: c.execution_options( isolation_level="AUTOCOMMIT").execute(query) except Exception as e: logger.error("Could not run material refresh: {}".format(e))
def _clear_scada_for_range(item: Dict[str, Any]) -> None: # We need to purge old records in that date range all_dates = [i["trading_interval"] for i in item["records"]] min_date = min(all_dates) max_date = max(all_dates) duids = list(set([i["facility_code"] for i in item["records"]])) __sql = """ update facility_scada set generated=null where network_id='NEM' and facility_code in ({fac_codes}) and trading_interval >= '{min_date}' and trading_interval <= '{max_date}' """ query = __sql.format(fac_codes=duid_in_case(duids), min_date=min_date, max_date=max_date) engine = get_database_engine() with engine.connect() as c: logger.debug(query) c.execute(query)
def exec_aggregates_network_demand_query(date_min: datetime, date_max: datetime, network: NetworkSchema) -> bool: resp_code: bool = False engine = get_database_engine() result = None if date_max < date_min: raise Exception( "exec_aggregates_network_demand_query: date_max ({}) is prior to date_min ({})" .format(date_max, date_min)) query = aggregates_network_demand_query(date_min=date_min, date_max=date_max, network=network) with engine.connect() as c: logger.debug(query) if not DRY_RUN: result = c.execute(query) logger.debug(result) return resp_code
def init_aggregation_policies() -> None: """Initializes the continuous aggregation policies""" # @TODO check what exists with query engine = get_database_engine() for view in _VIEW_MAP: if not view.aggregation_policy: logging.debug("Skipping {}".format(view.name)) continue with engine.connect() as c: drop_query = remove_continuous_aggregation_query(view) try: logger.debug(drop_query) c.execute(drop_query) except Exception: logger.warn( "Could not drop continuous aggregation query: {}".format( view.name)) pass create_query = create_continuous_aggregation_query(view) logger.debug(create_query) try: c.execute(create_query) except Exception as e: logger.warn( "Could not create continuous aggregation query: {}".format( e))
def crawlers_get_crawl_metadata() -> List[CrawlMetadata]: """Get a return of metadata schemas for all crawlers from the database""" engine = get_database_engine() __query = """ select cm.spider_name as name, cm.data->>'version' as version, cm.data->>'last_crawled' as last_crawled, cm.data->>'latest_processed' as last_processed, cm.data->>'server_latest' as server_latest, cm.data->>'force_run' as force_run from crawl_meta cm order by last_crawled desc; """ _crawler_metas = [] with engine.connect() as c: _crawler_metas = list(c.execute(__query)) if not _crawler_metas: return [] _crawler_meta_models = [CrawlMetadata(**i) for i in _crawler_metas] return _crawler_meta_models
def get_flows( date_min: datetime, date_max: datetime, network_region: str, network: NetworkSchema, flow: FlowDirection, ) -> List[Dict]: """Gets flows""" query = get_flows_query(network_region, date_min, date_max, network, flow) engine = get_database_engine() results = [] with engine.connect() as c: logger.debug(query) if not DRY_RUN: try: results = list(c.execute(query)) except Exception as e: logger.error(e) logger.debug("Got back {} flow rows".format(len(results))) return results
def store_stats_database(statset: StatsSet) -> int: s = SessionLocal() records_to_store = [i.dict() for i in statset.stats] stmt = insert(Stats).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "stat_date", "country", "stat_type", ], set_={ "value": stmt.excluded.value, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() num_records = len(records_to_store) logger.info("Wrote {} records to database".format(num_records)) return num_records
def bulkinsert_mms_items( table: ORMTableType, records: List[Dict], update_fields: Optional[List[Union[str, Column[Any]]]] = None, ) -> int: num_records = 0 if not records: return 0 sql_query = build_insert_query(table, update_fields) csv_content = generate_bulkinsert_csv_from_records(table, records, column_names=list( records[0].keys())) # @TODO check the scoping here engine = get_database_engine() conn = engine.raw_connection() try: cursor = conn.cursor() cursor.copy_expert(sql_query, csv_content) conn.commit() num_records = len(records) logger.info(f"Bulk inserted {len(records)} records") except Exception as generic_error: if hasattr(generic_error, "hide_parameters"): generic_error.hide_parameters = True # type: ignore logger.error(generic_error) finally: engine.dispose() conn.close() return num_records
def gov_stats_cpi() -> Optional[OpennemDataSet]: engine = get_database_engine() query = country_stats_query(StatTypes.CPI) with engine.connect() as c: logger.debug(query) row = list(c.execute(query)) stats = [ DataQueryResult(interval=i[0], result=i[1], group_by=i[2] if len(i) > 1 else None) for i in row ] if len(stats) < 1: logger.error("No results for gov_stats_cpi returing blank set") return None result = stats_factory( stats, code="au.cpi", network=NetworkNEM, interval=human_to_interval("1Q"), period=human_to_period("all"), units=get_unit("cpi"), group_field="gov", ) return result
def exec_aggregates_facility_daily_query(date_min: datetime, date_max: datetime, network: NetworkSchema) -> bool: resp_code: bool = False engine = get_database_engine() result = None # @TODO should put this check everywhere # or place it all in a schema that validates if date_max < date_min: raise Exception( "exec_aggregates_facility_daily_query: date_max ({}) is prior to date_min ({})" .format(date_max, date_min)) query = aggregates_facility_daily_query(date_min=date_min, date_max=date_max, network=network) with engine.connect() as c: logger.debug(query) if not DRY_RUN: result = c.execute(query) logger.debug(result) # @NOTE rooftop fix for double counts if not DRY_RUN: run_rooftop_fix() return resp_code
def store_bom_records_temp_max(recs: List[Dict]) -> None: # records_to_store = [i for i in recs if i["temp_max"] is not None] records_to_store = recs first = recs[0] update_field = "temp_max" if "temp_min" in first: update_field = "temp_min" engine = get_database_engine() session = SessionLocal() # insert stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={update_field: getattr(stmt.excluded, update_field)}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_unit_solution(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] records_primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) facility_code = normalize_duid(record["DUID"]) if not trading_interval or not facility_code: continue # Since this can insert 1M+ records at a time we need to # do a separate in-memory check of primary key constraints # better way of doing this .. @TODO _unique_set = (trading_interval, facility_code, "NEM") if _unique_set not in records_primary_keys: records_to_store.append( { "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": float(record["INITIALMW"]), "network_id": "NEM", } ) records_primary_keys.append(_unique_set) # free records_primary_keys = [] logger.debug("Saving %d records", len(records_to_store)) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)
def update_facility_seen_range( include_first_seen: bool = False, facility_codes: Optional[List[str]] = None, ) -> bool: """Updates last seen and first seen. For each facility updates the date the facility was seen for the first and last time in the power data from FacilityScada. Args: include_first_seen (bool, optional): Include earliest seen time. Defaults to False. facility_codes (Optional[List[str]], optional): List of facility codes to update. Defaults to None. Returns: bool: Ran successfuly """ engine = get_database_engine() __query = get_update_seen_query(include_first_seen=include_first_seen, facility_codes=facility_codes) with engine.connect() as c: logger.debug(__query) c.execute(__query) slack_message("Ran facility_seen_range") return True
def test_insert(): data = get_file_content() csv_reader = csv.DictReader(data.split("\n")) csv_buffer = StringIO() fieldnames = [ "created_by", "created_at", "updated_at", "network_id", "trading_interval", "facility_code", "generated", "eoi_quantity", ] csvwriter = csv.DictWriter( csv_buffer, fieldnames=fieldnames, ) for record in facility_scada_generate_records(csv_reader): csvwriter.writerow(record) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_buffer.seek(0)
def purge_views() -> None: """Remove views that aren't in the view table""" engine = get_database_engine() all_views_query = get_all_views_query() all_views = [] with engine.connect() as c: result = list(c.execute(all_views_query)) # Dont drop postgis or mapped views all_views = [ i[0] for i in result if i[0] not in POSTGIS_VIEWS + [i.name for i in _VIEW_MAP] ] for view_name in all_views: with engine.connect() as c: c.execution_options(isolation_level="AUTOCOMMIT") query = "drop materialized view if exists {} cascade;".format( view_name) logger.info("Dropping view {}".format(view_name)) logger.debug(query) try: c.execute(query) except Exception as e: logger.error("Error dropping view: {}".format(e))
def get_daily_fueltech_summary() -> DailySummary: engine = get_database_engine() _result = [] query = daily_fueltech_summary_query() with engine.connect() as c: logger.debug(query) _result = list(c.execute(query)) records = [ DailySummaryResult( trading_day=i[0], network=i[1], fueltech_id=i[2], fueltech_label=i[3], renewable=i[4], energy=i[5], generated_total=i[6], demand_total=i[7], demand_proportion=i[8], ) for i in _result ] ds = DailySummary(trading_day=records[0].trading_day, network=records[0].network, results=records) return ds
def process_nem_price(table: AEMOTableSchema) -> ControllerReturn: """Stores the NEM price for both dispatch price and trading price""" session = get_scoped_session() engine = get_database_engine() cr = ControllerReturn(total_records=len(table.records)) records_to_store = [] primary_keys = [] price_field = "price" if table.full_name == "dispatch_price": price_field = "price_dispatch" for record in table.records: # @NOTE disable pk track trading_interval = parse_date(record["settlementdate"]) primary_key = set([trading_interval, record["regionid"]]) # type: ignore if primary_key in primary_keys: continue primary_keys.append(primary_key) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controllers.nem", "network_region": record["regionid"], "trading_interval": trading_interval, price_field: record["rrp"], }) cr.processed_records += 1 stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={price_field: getattr(stmt.excluded, price_field)}, ) try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting NEM price records") logger.error(e) cr.errors = cr.processed_records finally: session.rollback() session.close() engine.dispose() return cr
def power_flows_week( time_series: TimeSeries, network_region_code: str, ) -> Optional[OpennemDataSet]: engine = get_database_engine() query = interconnector_power_flow(time_series=time_series, network_region=network_region_code) with engine.connect() as c: logger.debug(query) row = list(c.execute(query)) if len(row) < 1: raise Exception("No results from query: {}".format(query)) imports = [ DataQueryResult(interval=i[0], result=i[2], group_by="imports" if len(i) > 1 else None) for i in row ] exports = [ DataQueryResult(interval=i[0], result=i[3], group_by="exports" if len(i) > 1 else None) for i in row ] result = stats_factory( imports, # code=network_region_code or network.code, network=time_series.network, period=human_to_period("7d"), interval=human_to_interval("5m"), units=get_unit("power"), region=network_region_code, fueltech_group=True, ) if not result: raise Exception("No results") result_exports = stats_factory( exports, # code=network_region_code or network.code, network=time_series.network, period=human_to_period("7d"), interval=human_to_interval("5m"), units=get_unit("power"), region=network_region_code, fueltech_group=True, ) result.append_set(result_exports) return result
def init_fueltechs() -> None: engine = get_database_engine() with engine.connect() as c: for facility_code, fueltech_id in FUELTECHS.items(): c.execute(get_update_sql(facility_code, fueltech_id)) logger.debug("Updated fueltech for {}".format(facility_code)) logger.info("Done updating facility fueltechs")
def store_mms_table(table: AEMOTableSchema) -> int: if not table.name: logger.error("Table has no name!: {}".format(table)) return 0 # Get the table ORM model table_schema = get_mms_model(table) if not table_schema: logger.error("No table ORM schema for table name {}".format( table.name)) return 0 # update all non-primary key fields. get them dynamically. update_fields = [ i.name for i in table_schema.__table__.columns if not i.primary_key ] # type: ignore records_to_store = table.records sql_query = "" try: sql_query = build_insert_query(table_schema, update_fields) except Exception as e: logger.error(e) return 0 conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = "" try: csv_content = generate_csv_from_records( table_schema, records_to_store, column_names=list(records_to_store[0].keys()), ) except Exception as e: logger.error(e) return 0 if not csv_content: return 0 logger.debug(csv_content.getvalue().splitlines()[:2]) cursor.copy_expert(sql_query, csv_content) conn.commit() logger.info("{}: Inserted {} records".format(table.full_name, len(records_to_store))) return len(records_to_store)
def run_rooftop_fix() -> None: query = "delete from at_facility_daily where trading_day < '2018-03-01 00:00:00+00' and network_id='AEMO_ROOFTOP';" engine = get_database_engine() with engine.connect() as c: logger.debug(query) if not DRY_RUN: c.execute(query)
def store_wem_facility_intervals( balancing_set: WEMFacilityIntervalSet) -> ControllerReturn: """Persist WEM facility intervals""" engine = get_database_engine() session = get_scoped_session() cr = ControllerReturn() records_to_store = [] if not balancing_set.intervals: return cr cr.total_records = len(balancing_set.intervals) cr.server_latest = balancing_set.server_latest for _rec in balancing_set.intervals: records_to_store.append({ "created_by": "wem.controller", "network_id": "WEM", "trading_interval": _rec.trading_interval, "facility_code": _rec.facility_code, "generated": _rec.generated, "eoi_quantity": _rec.eoi_quantity, }) cr.processed_records += 1 if len(records_to_store) < 1: return cr stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, "eoi_quantity": stmt.excluded.eoi_quantity, }, ) try: session.execute(stmt) session.commit() cr.inserted_records = len(records_to_store) except Exception as e: logger.error("Error: {}".format(e)) cr.errors = len(records_to_store) cr.error_detail.append(str(e)) finally: session.close() engine.dispose() return cr
def process_dispatch_interconnectorres( table: AEMOTableSchema) -> ControllerReturn: session = get_scoped_session() engine = get_database_engine() cr = ControllerReturn(total_records=len(table.records)) records_to_store = [] primary_keys = [] for record in table.records: primary_key = set( [record["settlementdate"], record["interconnectorid"]]) if primary_key in primary_keys: continue primary_keys.append(primary_key) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controller", "facility_code": record["interconnectorid"], "trading_interval": record["settlementdate"], "generated": record["mwflow"], }) cr.processed_records += 1 # insert stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={"generated": stmt.excluded.generated}, ) try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting records") logger.error(e) cr.errors = cr.processed_records return cr finally: session.rollback() session.close() engine.dispose() return cr
def process_item(self, item, spider=None): s = SessionLocal() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] primary_keys = [] for row in csvreader: trading_interval = parse_date(row["TRADING_DAY_INTERVAL"], network=NetworkWEM, dayfirst=False) if trading_interval not in primary_keys: forecast_load = clean_float(row["FORECAST_EOI_MW"]) records_to_store.append({ "created_by": spider.name, "trading_interval": trading_interval, "network_id": "WEM", "network_region": "WEM", "forecast_load": forecast_load, # generation_scheduled=row["Scheduled Generation (MW)"], # generation_total=row["Total Generation (MW)"], "price": clean_float(row["PRICE"]), }) primary_keys.append(trading_interval) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ "price": stmt.excluded.price, "forecast_load": stmt.excluded.forecast_load, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() return len(records_to_store)
def crawlers_flush_metadata(days: int | None = None, crawler_name: str | None = None) -> None: """Flush the crawler metadata""" engine = get_database_engine() __meta_query = """ delete from crawl_meta cm where 1=1 and {crawler_clause} ; """ meta_crawler_clause = "" if crawler_name: meta_crawler_clause = crawler_name meta_query = __meta_query.format( crawler_clause=f"spider_name = '{meta_crawler_clause}'", ) __history_query = """ delete from crawl_history where 1=1 and {crawler_clause_history} {days_clause_history} """ crawler_clause_history = "" if crawler_name: crawler_clause_history = f"crawler_name = '{crawler_name}' and" days_clause_history = "" if days: days_clause_history = f"interval >= now() - interval '{days} days'" history_query = __history_query.format( crawler_clause_history=crawler_clause_history, days_clause_history=days_clause_history) logger.debug(dedent(meta_query)) logger.debug(dedent(history_query)) with engine.connect() as c: c.execute(meta_query) c.execute(history_query)