def process_item(self, item, spider=None): s = SessionLocal() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] primary_keys = [] for row in csvreader: trading_interval = parse_date(row["TRADING_DAY_INTERVAL"], network=NetworkWEM, dayfirst=False) if trading_interval not in primary_keys: forecast_load = clean_float(row["FORECAST_EOI_MW"]) records_to_store.append({ "created_by": spider.name, "trading_interval": trading_interval, "network_id": "WEM", "network_region": "WEM", "forecast_load": forecast_load, # generation_scheduled=row["Scheduled Generation (MW)"], # generation_total=row["Total Generation (MW)"], "price": clean_float(row["PRICE"]), }) primary_keys.append(trading_interval) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ "price": stmt.excluded.price, "forecast_load": stmt.excluded.forecast_load, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() return len(records_to_store)
def validate_maxcapacity(cls, value: str) -> float: f = clean_float(value) if not f: raise ValueError("Not a valid capacity: {}".format(value)) return f
def parse_cpi_value(cls, value: Any) -> float: v = clean_float(value) if not v: raise ValueError("No inflation Value") return v
def generate_balancing_summary( records: List[Dict], interval_field: str = "SETTLEMENTDATE", network_region_field: str = "REGIONID", price_field: Optional[str] = None, network: NetworkSchema = NetworkNEM, limit: int = 0, ) -> List[Dict]: created_at = datetime.now() primary_keys = [] return_records = [] created_by = "" for row in records: trading_interval = parse_date(row[interval_field], network=network, dayfirst=False) network_region = None if network_region_field and network_region_field in row: network_region = row[network_region_field] pk = (trading_interval, network.code, network_region) if pk in primary_keys: continue primary_keys.append(pk) price = None if price_field and price_field in row: price = clean_float(row[price_field]) if price: price = str(float_to_str(price)) __rec = { "created_by": created_by, "created_at": created_at, "updated_at": None, "network_id": network.code, "network_region": network_region, "trading_interval": trading_interval, "price": price, } return_records.append(__rec) if limit > 0 and len(return_records) >= limit: break return return_records
def import_dump_emissions() -> List[Dict]: content = load_data("emissions_output.csv", from_project=True, skip_loaders=True) csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) records = [] for rec in csvreader: records.append({ "facility_code": rec["DUID"], "emissions_factor_co2": clean_float(rec["CO2E_EMISSIONS_FACTOR"]), }) return records
def aemo_gi_capacity_cleaner(cap: Optional[str]) -> Optional[float]: """Custom capacity cleaner because sometimes its parsed as silly text like a range (ie. '150 - 180'""" if isinstance(cap, int) or isinstance(cap, float): return cap if not cap: return None cap = cap.strip() num_part = re.search(r"^[\d\.]+", cap) if not num_part: return None num_extracted = str(num_part.group(0)) num_extracted_and_clean = clean_float(num_extracted) return num_extracted_and_clean
def import_emissions_map(file_name: str) -> None: """Import emission factors from CSV files for each network the format of the csv file is station_name,network_id,network_region,facility_code,emissions_factor_co2,fueltech_id,emission_factor_source """ session = SessionLocal() content = load_data(file_name, from_project=True, skip_loaders=True) csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) for rec in csvreader: network_id = rec["network_id"] facility_code = normalize_duid(rec["facility_code"]) emissions_intensity = clean_float(rec["emissions_factor_co2"]) if not facility_code: logger.info("No emissions intensity for {}".format(facility_code)) continue facility = (session.query(Facility).filter_by( code=facility_code).filter_by(network_id=network_id).one_or_none()) if not facility: logger.info("No stored facility for {}".format(facility_code)) continue facility.emissions_factor_co2 = emissions_intensity session.add(facility) logger.info("Updated {} to {}".format(facility_code, emissions_intensity)) session.commit() return None
def process_trading_regionsum(table: Dict[str, Any], spider: Spider) -> Dict: session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] limit = None records_to_store = [] records_processed = 0 primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False, date_format="%Y/%m/%d %H:%M:%S", ) if not trading_interval: continue _pk = set([trading_interval, record["REGIONID"]]) if _pk in primary_keys: continue primary_keys.append(_pk) demand_total = None if "TOTALDEMAND" in record: demand_total = clean_float(record["TOTALDEMAND"]) records_to_store.append({ "network_id": "NEM", "created_by": spider.name, "network_region": record["REGIONID"], "trading_interval": trading_interval, "demand_total": demand_total, }) records_processed += 1 if limit and records_processed >= limit: logger.info("Reached limit of: {} {}".format( limit, records_processed)) break stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={ "demand_total": stmt.excluded.demand_total, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def unit_scada_generate_facility_scada( records, spider=None, network: NetworkSchema = NetworkNEM, interval_field: str = "SETTLEMENTDATE", facility_code_field: str = "DUID", date_format: Optional[str] = None, power_field: Optional[str] = None, energy_field: Optional[str] = None, is_forecast: bool = False, primary_key_track: bool = False, groupby_filter: bool = True, created_by: str = None, limit: int = 0, duid: str = None, ) -> List[Dict]: created_at = datetime.now() primary_keys = [] return_records = [] created_by = "" if spider and hasattr(spider, "name"): created_by = spider.name for row in records: trading_interval = parse_date( row[interval_field], network=network, dayfirst=False, date_format=date_format, ) # if facility_code_field not in row: # logger.error("Invalid row no facility_code") # continue facility_code = normalize_duid(row[facility_code_field]) if duid and facility_code != duid: continue if primary_key_track: pkey = (trading_interval, facility_code) if pkey in primary_keys: continue primary_keys.append(pkey) generated = None if power_field and power_field in row: generated = clean_float(row[power_field]) if generated: generated = float_to_str(generated) energy = None if energy_field and energy_field in row: energy = clean_float(row[energy_field]) if energy: energy = float_to_str(energy) __rec = { "created_by": created_by, "created_at": created_at, "updated_at": None, "network_id": network.code, "trading_interval": trading_interval, "facility_code": facility_code, "generated": generated, "eoi_quantity": energy, "is_forecast": is_forecast, } return_records.append(__rec) if limit > 0 and len(return_records) >= limit: break if not groupby_filter: return return_records return_records_grouped = {} for pk_values, rec_value in groupby( return_records, key=lambda r: ( r.get("network_id"), r.get("trading_interval"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() return_records = list(return_records_grouped.values()) return return_records
def process_dispatch_interconnectorres(table: Dict, spider: Spider) -> Dict: session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: ti_value = None if "SETTLEMENTDATE" in record: ti_value = record["SETTLEMENTDATE"] if "RUN_DATETIME" in record: ti_value = record["RUN_DATETIME"] if not ti_value: raise Exception("Require a trading interval") trading_interval = parse_date(ti_value, network=NetworkNEM, dayfirst=False) if not trading_interval: continue facility_code = normalize_duid(record["INTERCONNECTORID"]) power_value = clean_float(record["METEREDMWFLOW"]) records_to_store.append({ "network_id": "NEM", "created_by": spider.name, "facility_code": facility_code, "trading_interval": trading_interval, "generated": power_value, }) # remove duplicates return_records_grouped = {} for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) # insert stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={"generated": stmt.excluded.generated}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_item(self, item, spider=None): if not item: logger.error("No item in pipeline") return None if "content" not in item: logger.error("No content in pipeline") return None content = item["content"] if "ELEC_NEM_SUMMARY" not in content: logger.error("Invalid summary return") return None summary = content["ELEC_NEM_SUMMARY"] s = SessionLocal() records_to_store = [] for row in summary: trading_interval = parse_date(row["SETTLEMENTDATE"], network=NetworkNEM) demand_total = clean_float(row["TOTALDEMAND"]) generation_scheduled = clean_float(row["SCHEDULEDGENERATION"]) generation_non_scheduled = clean_float(row["SEMISCHEDULEDGENERATION"]) records_to_store.append( { "created_by": spider.name, "trading_interval": trading_interval, "network_id": "NEM", "network_region": row["REGIONID"].strip(), # "demand_total": demand_total, "generation_scheduled": generation_scheduled, "generation_non_scheduled": generation_non_scheduled, } ) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ # "demand_total": stmt.excluded.demand_total, "generation_scheduled": stmt.excluded.generation_scheduled, "generation_non_scheduled": stmt.excluded.generation_non_scheduled, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: s.close() return {"num_records": len(records_to_store)}
def process_trading_regionsum(table: AEMOTableSchema) -> ControllerReturn: engine = get_database_engine() if not table.records: logger.debug(table) raise Exception("Invalid table no records") cr = ControllerReturn(total_records=len(table.records)) limit = None records_to_store = [] records_processed = 0 primary_keys = [] for record in table.records: if not isinstance(record, dict): raise Exception("Invalid record type") trading_interval = parse_date( record["settlementdate"], network=NetworkNEM, dayfirst=False, date_format="%Y/%m/%d %H:%M:%S", ) if not trading_interval: continue _pk = set([trading_interval, record["regionid"]]) if _pk in primary_keys: continue primary_keys.append(_pk) net_interchange = None if "netinterchange" in record: net_interchange = clean_float(record["netinterchange"]) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controller.nem", "network_region": record["regionid"], "net_interchange_trading": net_interchange, "trading_interval": trading_interval, }) records_processed += 1 if limit and records_processed >= limit: logger.info("Reached limit of: {} {}".format( limit, records_processed)) break stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={ "net_interchange_trading": stmt.excluded.net_interchange_trading, }, ) session = get_scoped_session() try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting records") logger.error(e) records_to_store = [] cr.errors = cr.processed_records finally: session.rollback() session.close() engine.dispose() return cr
def unit_scada_generate_facility_scada( records: List[Union[Dict[str, Any], MMSBaseClass]], network: NetworkSchema = NetworkNEM, interval_field: str = "settlementdate", facility_code_field: str = "duid", power_field: str = "scadavalue", energy_field: Optional[str] = None, is_forecast: bool = False, primary_key_track: bool = True, ) -> List[Dict]: """@NOTE method deprecated""" created_at = datetime.now() primary_keys = [] return_records = [] if not records: return [] fields = "" first_record = records[0] if isinstance(first_record, MMSBaseClass): first_record = asdict(first_record) # type: ignore try: fields = ", ".join([f"'{i}'" for i in list(first_record.keys())]) except Exception as e: logger.error("Fields error: {}".format(e)) pass for row in records: # cast it all to dicts if isinstance(row, MMSBaseClass): row = asdict(row) # type: ignore if interval_field not in row: raise Exception("No such field: '{}'. Fields: {}. Data: {}".format( interval_field, fields, row)) trading_interval = row[interval_field] if facility_code_field not in row: raise Exception("No such facility field: {}. Fields: {}".format( facility_code_field, fields)) facility_code = row[facility_code_field] energy_quantity: Optional[float] = None if energy_field: if energy_field not in row: raise Exception("No energy field: {}. Fields: {}".format( energy_field, fields)) energy_quantity = clean_float(row[energy_field]) power_quantity: Optional[float] = None if power_field not in row: raise Exception("No suck power field: {}. Fields: {}".format( power_field, fields)) power_quantity = clean_float(row[power_field]) # should we track primary keys to remove duplicates? # @NOTE this does occur sometimes especially on large # imports of data from large sets if primary_key_track: pk = (trading_interval, network.code, facility_code) if pk in primary_keys: continue primary_keys.append(pk) __rec = { "created_by": "opennem.controller", "created_at": created_at, "updated_at": None, "network_id": network.code, "trading_interval": trading_interval, "facility_code": facility_code, "generated": power_quantity, "eoi_quantity": energy_quantity, "is_forecast": is_forecast, "energy_quality_flag": 0, } return_records.append(__rec) return return_records
def check_emissions_map() -> None: content = load_data("opennem_emission_factors.csv", from_project=True, skip_loaders=True) mms_emissions = import_mms_emissions() def get_emissions_for_code(facility_code: str) -> Optional[Dict]: facility_lookup = list( filter(lambda x: x["facility_code"] == facility_code, mms_emissions)) if not facility_lookup or len(facility_lookup) < 1: logger.error( "Could not find facility {} in MMS emmissions data".format( facility_code)) return None facility = facility_lookup.pop() return facility csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) csv_out = [] for rec in csvreader: network_id = rec["network_id"] facility_code = normalize_duid(rec["facility_code"]) emissions_intensity = clean_float(rec["emissions_factor_co2"]) if network_id not in ["NEM"]: rec["emission_factor_source"] = "NPI" csv_out.append(rec) continue rec["emission_factor_source"] = "" mms_emission_record = get_emissions_for_code(facility_code) if not mms_emission_record: csv_out.append(rec) continue if emissions_intensity != mms_emission_record["emissions_factor_co2"]: logger.error("Mismatch for {}: {} and {}".format( facility_code, emissions_intensity, mms_emission_record["emissions_factor_co2"])) if mms_emission_record["emissions_factor_co2"]: rec["emissions_factor_co2"] = mms_emission_record[ "emissions_factor_co2"] else: rec["emission_factor_source"] = "Lookup" rec["emission_factor_source"] = mms_emission_record[ "emission_factor_source"] csv_out.append(rec) fieldnames = [ "network_id", "network_region", "facility_code", "station_name", "fueltech_id", "status_id", "emissions_factor_co2", "emission_factor_source", ] with open("emission_factors.csv", "w") as fh: csvwriter = csv.DictWriter(fh, fieldnames=fieldnames) csvwriter.writeheader() csvwriter.writerows(csv_out)