def test_with_basecode(self): duid = None unit = UnitSchema(id=1, number=1, alias=None) station_name = "Portland" unit_code = get_unit_code(unit, duid, station_name) assert unit_code == "0NPORT_1", "Unit code is right with no duid"
def test_with_alias(self): duid = "TEST1" unit = UnitSchema(id=1, number=1, alias="GT1") unit_code = get_unit_code(unit, duid) assert unit_code == "TEST1_GT1", "Unit with GT1 alias"
def test_no_alias(self): duid = "TEST1" unit = UnitSchema(id=1, number=1) unit_code = get_unit_code(unit, duid) assert (unit_code == "TEST1" ), "No alias means unit code is the same as duid"
def patches(): sqls = [ # "update facility set capacity_registered = 2.0, unit_capacity = 2.0 where code = 'GOSNELLS'", # "update facility set capacity_registered = 1.1, unit_capacity = 1.1 where code = 'ATLAS'", # code GULLRWF2_74 -> Biala "update facility set active=false where network_code ='GULLRWF2'", "update facility set station_id = (select id from station where name = 'Wivenhoe Small Hydro') where code ='WIVENSH'", "update station set name = 'Wivenhoe Mini' where name = 'Wivenhoe Small'", # "update facility set fueltech_id = 'pumps' where network_code in ('PUMP2', 'PUMP1')", # "update facility set active=false where code='PIONEER'", # "update facility set station_id = null where name='Crookwell' and code is null", # "update facility set station_id = null where name='Pioneer Sugar Mill' and code is null", ] with engine.connect() as c: for query in sqls: rows = c.execute(query) pprint(rows) s = session() duid = None unit = parse_unit_duid(1, duid) unit_code = get_unit_code(unit, duid, "Singleton Solar Farm") singleton = Station( name="Singleton", locality="singleton", network_name="Singleton Solar Farm", network_id="NEM", created_by="opennem.patches", ) s.add(singleton) singleston_facility = Facility( code="0NSISF_1", status_id="operating", network_region="NSW1", network_name="Singleton Solar Farm", fueltech_id="solar_utility", unit_id=unit.id, unit_number=unit.number, unit_capacity=0.4, capacity_registered=0.4, created_by="opennem.patches", ) singleston_facility.station = singleton s.add(singleston_facility) s.commit()
def process_facilities(self, records): s = self.session() # Store a list of all existing duids all_duids = list( set([ i[0] for i in s.query(Facility.network_code).filter( Facility.network_code != None).all() ])) for _, facility_records in records.items(): facility_index = 1 facility_station = None created_station = False station_network_name = record_get_station_name(facility_records) station_name = station_name_cleaner(station_network_name) duid_unique = has_unique_duid(facility_records) facility_count = len(facility_records) # Step 1. Find the station # First by duid if it's unique duid = get_unique_duid(facility_records) # all GI records should have a region station_network_region = get_unique_reqion(facility_records) # This is the most suitable unit record to use for the station # see helper above facility_station_record = get_station_record_from_facilities( facility_records) if duid and duid_unique and facility_count == 1: facility_lookup = None try: facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == station_network_region).one_or_none()) except MultipleResultsFound: logger.error( "Found multiple duid for station with code {}".format( duid)) continue if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if (duid and (duid_unique and facility_count > 1) or not duid_unique): facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == station_network_region).first()) if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if not facility_station and facility_station_join_by_name( station_name): try: facility_station = (s.query(Station).filter( Station.name == station_name).one_or_none()) except MultipleResultsFound: logger.warning( "Multiple results found for station name : {}".format( station_name)) facility_station = None # If we have a station name, and no duid, and it's ok to join by name # then find the station (make sure to region lock) if (station_name and not duid and not facility_station and facility_station_join_by_name(station_name)): facility = (s.query(Facility).join(Facility.station).filter( Facility.network_region == station_network_region).filter( Station.name == station_name).first()) if facility: facility_station = facility.station # Create one as it doesn't exist if not facility_station: facility_station = Station( name=station_name, network_name=name_normalizer( facility_station_record["station_name"]), network_id="NEM", created_by="pipeline.aemo.general_information", ) s.add(facility_station) s.commit() created_station = True else: facility_station.updated_by = ( "pipeline.aemo.general_information") for facility_record in facility_records: if facility_record["FuelType"] in ["Natural Gas Pipeline"]: continue # skip these statuses too if facility_record["UnitStatus"] in FACILITY_INVALID_STATUS: continue facility = None created_facility = False facility_network_name = name_normalizer( facility_record["station_name"]) facility_name = station_name_cleaner( facility_record["station_name"]) duid = normalize_duid(facility_record["duid"]) reg_cap = clean_capacity(facility_record["NameCapacity"]) units_num = facility_record["Units"] or 1 unit_id = facility_index + (units_num - 1) unit = parse_unit_duid(unit_id, duid) unit_size = clean_capacity(facility_record["unit_capacity"]) unit_code = get_unit_code(unit, duid, facility_record["station_name"]) facility_comissioned = facility_record["SurveyEffective"] facility_comissioned_dt = None if type(facility_comissioned) is datetime: facility_comissioned_dt = facility_comissioned try: if type(facility_comissioned) is str: facility_comissioned_dt = datetime.strptime( facility_comissioned, "%d/%m/%y") except ValueError: logger.error( "Error parsing date: {}".format(facility_comissioned)) facility_status = map_aemo_facility_status( facility_record["UnitStatus"]) facility_network_region = normalize_aemo_region( facility_record["Region"]) facility_fueltech = (lookup_fueltech( facility_record["FuelType"], techtype=facility_record["TechType"], ) if ("FuelType" in facility_record and facility_record["FuelType"]) else None) if not facility_fueltech: logger.error("Error looking up fueltech: {} {} ".format( facility_record["FuelType"], facility_record["TechType"], )) # check if we have it by ocode first facility = (s.query(Facility).filter( Facility.code == unit_code).one_or_none()) if not facility and duid: try: facility = ( s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == facility_network_region) # .filter(Facility.nameplate_capacity != None) .one_or_none()) except MultipleResultsFound: logger.warn( "Multiple results found for duid : {}".format( duid)) if facility: if facility.station and not facility_station: facility_station = facility.station logger.info( "GI: Found facility by DUID: code {} station {}". format( facility.code, facility.station.name if facility.station else None, )) # Done trying to find existing if not facility: facility = Facility( code=unit_code, network_code=duid, created_by="pipeline.aemo.general_information", ) facility.station = facility_station created_facility = True if duid and not facility.network_code: facility.network_code = duid facility.updated_by = "pipeline.aemo.general_information" if not facility.network_region: facility.network_region = facility_network_region facility.updated_by = "pipeline.aemo.general_information" if not facility.network_name: facility.network_name = facility_network_name facility.updated_by = "pipeline.aemo.general_information" if not facility.fueltech_id and facility_fueltech: facility.fueltech_id = facility_fueltech facility.updated_by = "pipeline.aemo.general_information" if not facility.capacity_registered or ( facility.status and facility.status != "operating"): facility.capacity_registered = reg_cap facility.updated_by = "pipeline.aemo.general_information" # @TODO work this out # facility.dispatch_type = facility_dispatch_type if not facility.unit_id: facility.unit_id = unit.id facility.unit_number = unit.number facility.unit_size = unit_size facility.unit_alias = unit.alias if not facility.unit_capacity or ( facility.status and facility.status != "operating"): facility.unit_capacity = unit_size facility.updated_by = "pipeline.aemo.general_information" # if not facility.status_id: facility.status_id = facility_status # facility.updated_by = "pipeline.aemo.general_information" if not facility.registered and facility_comissioned_dt: facility.registered = facility_comissioned_dt facility.updated_by = "pipeline.aemo.general_information" facility.station = facility_station if facility.fueltech_id is None: logger.warning("Could not find fueltech for: {} {}".format( facility.code, facility.network_code)) # facility.status_id = facility_status if facility_station and not facility.station: facility.station = facility_station if facility.status_id is None: raise Exception( "GI: Failed to map status ({}) on row: {}".format( facility.status_id, facility_record)) s.add(facility) s.commit() facility_index += units_num if created_station: logger.info("GI: {} station with name {} ".format( "Created" if created_station else "Updated", station_name, # facility_station.id, )) if created_facility: logger.info( "GI: {} facility with duid {} to station {}".format( "Created" if created_facility else "Updated", duid, station_name, )) try: s.commit() except Exception as e: logger.error(e) raise e finally: s.close()
def rel_grouper(records, station_code_map): records_parsed = [] for _id, i in enumerate(records, start=2000): name = station_name_cleaner(i["station_name"]) duid = normalize_duid(i["duid"]) unit = parse_unit_duid(i["unit_no"], duid) fueltech = lookup_fueltech( i["fuel_source_primary"], i["fuel_source_descriptor"], i["tech_primary"], i["tech_primary_descriptor"], i["dispatch_type"], ) station_code = lookup_station_code([duid], i["station_name"], station_code_map) records_parsed.append({ "name": name, "code": duid, "status": parse_facility_status("operating"), "station_code": station_code, "network_region": i["region"].strip(), "network_name": i["station_name"].strip(), "unit_size": clean_capacity(i["unit_size"]), "unit_code": get_unit_code(unit, duid, name), "dispatch_type": parse_dispatch_type(i["dispatch_type"]), "fueltech": parse_facility_fueltech(fueltech), "capacity_registered": clean_capacity(i["reg_cap"]), "capacity_maximum": clean_capacity(i["max_cap"]), }) grouped_records = {} for key, v in groupby(records_parsed, key=lambda v: v["station_code"]): # key = k[1 if key not in grouped_records: grouped_records[key] = [] grouped_records[key] += list(v) coded_records = {} _id = 2000 for station_code, rel in grouped_records.items(): station_name = rel[0]["network_name"] if station_code in coded_records: raise Exception("Code conflict: {}. {} {}".format( station_code, station_name, coded_records[station_code])) if not station_code: raise Exception("Unmapped station: {}".format(rel)) coded_records[station_code] = { "name": station_name_cleaner(station_name), "network_name": station_name, "code": station_code, "id": _id, "facilities": rel, } _id += 1 return coded_records
def gi_grouper(records, station_code_map): # filter out records we don't want records = list(filter(gi_filter, records)) records = [{ "name": station_name_cleaner(i["station_name"]), **i } for i in records] grouped_records = {} for k, v in groupby(records, key=lambda v: (v["name"].strip(), v["owner"].strip())): v = list(v) key = k[0] if key not in grouped_records: grouped_records[key] = [] grouped_records[key] += v records_parsed = [] for station_name, facilities in grouped_records.items(): facility_index = 0 for i in facilities: name = station_name_cleaner(i["station_name"]) duid = normalize_duid(i["duid"]) unit = parse_unit_duid(i["unit_num"], duid) units_num = i["unit_num"] or 1 unit_id = facility_index + (units_num - 1) unit = parse_unit_duid(unit_id, duid) fueltech = lookup_fueltech(i["FuelType"], techtype=i["TechType"]) if not duid: duid = get_unit_code(unit, duid, i["station_name"]) facility_duids = [duid] station_code = lookup_station_code(facility_duids, station_name, station_code_map) records_parsed.append({ # not a real station id # "id": _id, "name": name, "code": duid, "network_code": duid, "station_code": station_code, "network_region": normalize_aemo_region(i["Region"]), "network_name": i["station_name"].strip(), "dispatch_type": "GENERATOR", "fueltech": parse_facility_fueltech(fueltech) if fueltech else None, "status": parse_facility_status(map_aemo_facility_status(i["status"])), "registered": parse_comissioned_date(i["SurveyEffective"]), **get_capacities(i), }) facility_index += 1 coded_records = {} _id = 3000 for station_code, facilities in groupby(records_parsed, key=lambda x: x["station_code"]): # station_name = facilities[0]["name"] facilities = list(facilities) if not station_code: raise Exception("Unmapped station {}: {}".format( station_code, facilities)) if station_code not in coded_records: coded_records[station_code] = { "id": _id, "code": station_code, # "name": station_name_cleaner(station_name), # "network_name": station_name, "facilities": [], } _id += 1 coded_records[station_code]["facilities"] += facilities grouped_records = {} for station_code, station_record in coded_records.items(): grouped_records[station_code] = { "id": station_record.get("id"), "code": station_code, "name": station_name_cleaner(station_name), "network_name": station_name, "facilities": [], } for facility in station_record["facilities"]: grouped_records[station_code]["name"] = facility["name"] grouped_records[station_code]["network_name"] = facility[ "network_name"] facility.pop("name") facility.pop("network_name") grouped_records[station_code]["facilities"].append(facility) return grouped_records
def process_generators(self, generators): s = self.session() stations_updated = 0 stations_added = 0 generators_updated = 0 generators_added = 0 for station_key, facilities in generators.items(): facility = None facility_station = None created_station = False created_facility = False station_name = station_key[0] duid_unique = has_unique_duid(facilities) facility_count = len(facilities) # Step 1. Find the station # First by duid if it's unique duid = get_unique_duid(facilities) # This is the most suitable unit record to use for the station # see helper above facility_station_record = get_station_record_from_facilities( facilities) facility_network_region = get_unique_reqion(facilities) if duid and duid_unique and facility_count == 1: facility_lookup = None try: facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) except MultipleResultsFound: logger.warning( "REL: Multiple stations found for {} {} with duid {}". format(station_name, facility_network_region, duid)) if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if (duid and (duid_unique and facility_count > 1) or not duid_unique): facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).first()) if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if not facility_station and facility_station_join_by_name( station_name): try: facility_station = (s.query(Station).filter( Station.name == station_name).one_or_none()) except MultipleResultsFound: logger.warning( "REL: Multiple stations found for {} {}".format( station_name, facility_network_region)) logger.debug( "REL: Looked up {} by name and region {} and found {}". format( station_name, facility_network_region, facility.station if facility else "nothing", )) # Create one as it doesm't exist if not facility_station: facility_station = Station( name=station_name, network_name=name_normalizer( facility_station_record["station_name"]), network_id="NEM", created_by="pipeline.aemo.registration_exemption", ) s.add(facility_station) created_station = True else: facility_station.updated_by = ( "pipeline.aemo.registration_exemption") logger.info("REL: {} station with name {} and code {}".format( "Created" if created_station else "Updated", facility_station.name, facility_station.code, )) # Step 2. Add the facilities/units to the station # Now that we have a station or created one .. # Step go through the facility records we got .. for facility_record in facilities: network_name = name_normalizer(facility_record["station_name"]) participant_name = name_normalizer( facility_record["participant"]) facility_region = normalize_aemo_region( facility_record["region"]) duid = normalize_duid(facility_record["duid"]) reg_cap = clean_capacity(facility_record["reg_cap"]) unit = parse_unit_duid(facility_record["unit_no"], duid) unit_size = clean_capacity(facility_record["unit_size"]) unit_code = get_unit_code( unit, duid, facility_station_record["station_name"]) facility_status = "operating" facility_dispatch_type = parse_dispatch_type( facility_record["dispatch_type"]) fueltech = lookup_fueltech( facility_record["fuel_source_primary"], facility_record["fuel_source_descriptor"], facility_record["tech_primary"], facility_record["tech_primary_descriptor"], facility_record["dispatch_type"], ) # Skip loads that are not batteries or pumps for now # @NOTE @TODO better to centralize this as it needs to be consistent if (facility_dispatch_type == DispatchType.LOAD and fueltech not in [ "battery_charging", "pumps", ]): continue # check if we have it by ocode first facility = (s.query(Facility).filter( Facility.code == unit_code).one_or_none()) # If the duid is unique then we have no issues on which to join/create if duid and duid_unique and not facility: try: facility = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) except MultipleResultsFound: logger.warning( "REL: Multiple facilities found for {} {}".format( station_name, duid)) if duid and not duid_unique and not facility: facility = (s.query(Facility).filter( Facility.network_code == duid).filter( Facility.unit_number == None).filter( Facility.status_id == "operating").first()) # If the duid is not unique then we need to figure things out .. if duid and not duid_unique and not facility: facility_lookup = ( s.query(Facility).filter(Facility.network_code == duid) # Not having a code means we haven't written to this record yet so we'll use it .filter(Facility.code == None).all()) facility_db_count = len(facility_lookup) logging.debug( "Non unique duid: {} with {} in database and {} in facility duid is {}" .format( station_name, facility_db_count, facility_count, duid, )) if len(facility_lookup) > 0: facility = facility_lookup.pop() if not facility: facility = Facility( code=unit_code, network_code=duid, created_by="pipeline.aemo.registration_exemption", ) created_facility = True else: facility.updated_by = ( "pipeline.aemo.registration_exemption") # Sanity checking if len(unit_code) < 3: raise Exception( "Unit code {} is invalid. For station {} with duid {}". format(unit_code, station_name, duid)) # if not facility.code: facility.code = unit_code facility.fueltech_id = fueltech facility.network_code = duid facility.network_region = facility_region facility.network_name = network_name facility.capacity_registered = reg_cap facility.dispatch_type = facility_dispatch_type facility.unit_id = unit.id facility.unit_number = unit.number facility.unit_alias = unit.alias facility.unit_capacity = unit_size # Assume all REL's are operating if we don't have a status facility.status_id = "operating" facility.station = facility_station # Log that we have a new fueltech if fueltech and fueltech != facility.fueltech_id: logger.warning( "Fueltech mismatch for {} {}: prev {} new {}".format( facility.name_clean, facility.code, facility.fueltech_id, fueltech, )) if not created_facility: facility.updated_by = ( "pipeline.aemo.registration_exemption") s.add(facility) s.commit() logger.info("REL: {} facility with duid {} and id {}".format( "Created" if created_facility else "Updated", facility.code, facility.network_code, )) generators_updated += 1 logger.info( "NEM REL Pipeline: Added {} stations, updated {} stations. Added {}, updated {} generators of {} total" .format( stations_added, stations_updated, generators_added, generators_updated, len(generators), ))