def load_bom_stations_json() -> None: """ Imports BOM stations into the database from bom_stations.json The json is obtained using scripts/bom_stations.py """ session = SessionLocal() bom_stations = load_data("bom_stations.json", from_project=True) bom_capitals = load_data("bom_capitals.json", from_project=True) codes = [] if not bom_stations: logger.error("Could not load bom stations") stations_imported = 0 for bom_station in bom_stations: if "code" not in bom_station: logger.error("Invalida bom station ..") continue if bom_station["code"] in codes: continue codes.append(bom_station["code"]) station = session.query(BomStation).filter_by( code=bom_station["code"]).one_or_none() if not station: logger.info("New BOM station: %s", bom_station["name"]) station = BomStation(code=bom_station["code"], ) station.name = bom_station["name_full"] station.name_alias = bom_station["name"] station.website_url = bom_station["url"] station.feed_url = bom_station["json_feed"] station.priority = 5 station.state = bom_station["state"] station.altitude = bom_station["altitude"] if "web_code" in bom_station: station.web_code = bom_station["web_code"] if bom_station["code"] in bom_capitals: station.is_capital = True station.priority = 1 station.geom = "SRID=4326;POINT({} {})".format(bom_station["lng"], bom_station["lat"]) stations_imported += 1 session.add(station) logger.info("Imported {} stations".format(stations_imported)) session.commit()
def load_bom_stations_csv() -> None: """ Imports the BOM fixed-width stations format Made redundant with the new JSON """ s = SessionLocal() station_csv = load_data("stations_db.txt", from_fixture=True) lines = station_csv.split("\n") for line in lines: code, state, name, registered, lng, lat = parse_fixed_line(line) station = s.query(BomStation).filter_by(code=code).one_or_none() if not station: station = BomStation( code=code, state=state, name=name, registered=registered, ) station.geom = "SRID=4326;POINT({} {})".format(lat, lng) try: s.add(station) s.commit() except Exception: logger.error("Have {}".format(station.code))
def wikidata_join(): session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) # session.add() for entry in wikidata: station_name = entry.get("name") station_lookup = (session.query(Station).filter( Station.name == station_name).all()) if len(station_lookup) == 0: logger.info("Didn't find a station for {}".format(station_name)) if len(station_lookup) == 1: station = station_lookup.pop() station.description = entry.get("description") station.wikipedia_link = entry.get("wikipedia") station.wikidata_id = entry.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_name)) if len(station_lookup) > 1: logger.info("Found multiple for station {}".format(station_name)) session.commit()
def load_networks() -> None: """ Load the networks fixture """ fixture = load_data("networks.json", from_fixture=True) s = SessionLocal() for network in fixture: network_model = s.query(Network).filter_by( code=network["code"]).one_or_none() if not network_model: network_model = Network(code=network["code"]) network_model.label = network["label"] network_model.country = network["country"] network_model.timezone = network["timezone"] network_model.timezone_database = network["timezone_database"] network_model.offset = network["offset"] network_model.interval_size = network["interval_size"] network_model.network_price = network["network_price"] if "interval_shift" in network: network_model.interval_shift = network["interval_shift"] if "export_set" in network: network_model.export_set = network["export_set"] try: s.add(network_model) s.commit() except Exception: logger.error("Have {}".format(network_model.code))
def load_network_regions() -> None: """ Load the network region fixture """ fixture = load_data("network_regions.json", from_fixture=True) s = SessionLocal() for network_region in fixture: network_region_model = (s.query(NetworkRegion).filter_by( code=network_region["code"], network_id=network_region["network_id"]).one_or_none()) if not network_region_model: network_region_model = NetworkRegion(code=network_region["code"]) network_region_model.network_id = network_region["network_id"] try: s.add(network_region_model) s.commit() logger.debug("Loaded network region {}".format( network_region_model.code)) except Exception: logger.error("Have {}".format(network_region_model.code))
def import_photos_from_fixtures() -> None: """ Import photos to stations """ session = SessionLocal() photo_records = get_import_photo_data() for photo_record in photo_records: station = (session.query(Station).filter( Station.code == photo_record.station_code).one_or_none()) if not station: logger.error("Could not find station {}".format( photo_record.station_code)) continue img = get_image_from_web(photo_record.image_url) if not img: logger.error("No image for {}".format(photo_record.image_url)) continue hash_id = image_get_crypto_hash(img)[-8:] file_name = "{}_{}_{}.{}".format(hash_id, station.name.replace(" ", "_"), "original", "jpeg") photo = session.query(Photo).filter_by(hash_id=hash_id).one_or_none() if not photo: photo = Photo(hash_id=hash_id, ) photo.name = file_name photo.width = img.size[0] photo.height = img.size[1] photo.original_url = photo_record.image_url photo.license_type = photo_record.license photo.license_link = photo_record.license_link photo.author = photo_record.author photo.author_link = photo_record.author_link if photo_record.is_primary: photo.is_primary = True photo.approved = True photo.approved_by = "opennem.importer.photos" photo.approved_at = datetime.now() img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo) session.add(photo) if station: session.add(station) session.commit()
def process_item(self, item, spider=None): s = SessionLocal() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid( normalize_string(row["PARTICIPANT_CODE"]) ) participant_name = participant_name_filter( row["PARTICIPANT_DISPLAY_NAME"] ) or participant_name_filter( row.get("PARTICIPANT_FULL_NAME", None) ) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, created_by=spider.name, ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = spider.name logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def process_item(self, item, spider=None): s = SessionLocal() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["Participant Code"]) participant_name = participant_name_filter(row["Participant Name"]) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, # @TODO WEM provides these but nem doesn't so ignore for noe # address=row["Address"], # city=row["City"], # state=row["State"], # postcode=row["Postcode"], created_by="pipeline.wem.participant", ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def rooftop_facilities() -> None: session = SessionLocal() for state_map in STATE_NETWORK_REGION_MAP: state_rooftop_code = "{}_{}_{}".format( ROOFTOP_CODE, state_map["network"].upper(), state_map["state"].upper(), ) rooftop_station = session.query(Station).filter_by( code=state_rooftop_code).one_or_none() if not rooftop_station: logger.info("Creating new station {}".format(state_rooftop_code)) rooftop_station = Station(code=state_rooftop_code, ) rooftop_station.name = "Rooftop Solar {}".format(state_map["state"]) rooftop_station.description = "Solar rooftop facilities for {}".format( state_map["state"]) rooftop_station.approved = False rooftop_station.approved_by = "" rooftop_station.created_by = "opennem.importer.rooftop" if not rooftop_station.location: rooftop_station.location = Location(state=state_map["state"]) rooftop_fac = session.query(Facility).filter_by( code=state_rooftop_code).one_or_none() if not rooftop_fac: logger.info("Creating new facility {}".format(state_rooftop_code)) rooftop_fac = Facility(code=state_rooftop_code) network = state_map["network"] # map to separate AEMO rooftop network if network.upper() == "NEM": network = "AEMO_ROOFTOP" rooftop_fac.network_id = network rooftop_fac.network_region = state_map["network_region"] rooftop_fac.fueltech_id = "solar_rooftop" rooftop_fac.status_id = "operating" rooftop_fac.active = True rooftop_fac.dispatch_type = DispatchType.GENERATOR rooftop_fac.approved_by = "opennem.importer.rooftop" rooftop_fac.created_by = "opennem.importer.rooftop" rooftop_station.facilities.append(rooftop_fac) session.add(rooftop_fac) session.add(rooftop_station) session.commit()
def wikidata_join_mapping() -> None: """Attempts to join the wikidata to OpenNEM stations using the csv file with mappings""" session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) wikidata_mappings = None with open("opennem/data/wikidata_mappings.csv") as fh: csvreader = csv.DictReader( fh, fieldnames=[ "code", "name", "network_id", "network_region", "fueltech_id", "wikidata_id", ], ) wikidata_mappings = { i["code"]: i["wikidata_id"] for i in list(csvreader) if i["wikidata_id"] and i["code"] != "code" } for station_code, wikidata_id in wikidata_mappings.items(): wikidata_record_lookup = list( filter(lambda x: x["wikidata_id"] == wikidata_id, wikidata)) if len(wikidata_record_lookup) == 0: logger.error("Could not find {}".format(wikidata_id)) continue wikidata_record = wikidata_record_lookup.pop() station = session.query(Station).filter( Station.code == station_code).one_or_none() if not station: logger.error("Didn't find a station for {}".format(station_code)) continue station.description = wikidata_record.get("description") station.wikipedia_link = wikidata_record.get("wikipedia") station.wikidata_id = wikidata_record.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_code)) session.commit()
def import_rooftop_aemo_backfills() -> None: csvrecords = [] backfill_file = Path(__file__).parent / "aemo_rooftop_backfill.csv" with backfill_file.open() as fh: fieldnames = fh.readline().strip().split(",") csvreader = csv.DictReader(fh, fieldnames=fieldnames) csvrecords = [BackfillRecord(**i) for i in csvreader] logger.debug("Loaded {} records".format(len(csvrecords))) export_records = [ i.dict(exclude={"energy_v2", "energy_v3", "network_region"}) for i in csvrecords ] session = SessionLocal() records_added = 0 records_updated = 0 for rec in export_records: model = (session.query(AggregateFacilityDaily).filter_by( network_id="AEMO_ROOFTOP_BACKFILL").filter_by( trading_day=rec["trading_day"]).filter_by( facility_code=rec["facility_code"])).first() if not model: model = AggregateFacilityDaily(**rec) records_added += 1 else: records_updated += 1 model.energy = rec["energy"] model.market_value = rec["market_value"] session.add(model) session.commit() logger.debug("Added {} records and updated {}".format( records_added, records_updated))
def import_aemo_facility_closure_dates() -> bool: closure_records = parse_aemo_closures_xls() session = SessionLocal() for record in closure_records: facility: Optional[Facility] = (session.query(Facility).filter_by( network_id="NEM").filter_by(code=record.duid).one_or_none()) if not facility: logger.info("Could not find facility {} - {}".format( record.duid, record.station_name)) continue facility.expected_closure_date = record.expected_closure_date facility.expected_closure_year = record.expected_closure_year session.add(facility) session.commit() return True
def load_facilitystatus() -> None: """ Load the facility status fixture """ fixture = load_data("facility_status.json", from_fixture=True) s = SessionLocal() for status in fixture: facility_status = s.query(FacilityStatus).filter_by( code=status["code"]).one_or_none() if not facility_status: facility_status = FacilityStatus(code=status["code"], ) facility_status.label = status["label"] try: s.add(facility_status) s.commit() except Exception: logger.error("Have {}".format(facility_status.code))
def load_fueltechs() -> None: """ Load the fueltechs fixture """ fixture = load_data("fueltechs.json", from_fixture=True) s = SessionLocal() for ft in fixture: fueltech = s.query(FuelTech).filter_by(code=ft["code"]).one_or_none() if not fueltech: fueltech = FuelTech(code=ft["code"], ) fueltech.label = ft["label"] fueltech.renewable = ft["renewable"] try: s.add(fueltech) s.commit() except Exception: logger.error("Have {}".format(fueltech.code))
def wikidata_join() -> None: """Attempts to join the wikidata to OpenNEM stations based on the name""" session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) # Use a better query # engine = get_database_engine() # station_lookup_query = """ # """ for entry in wikidata: station_name = entry.get("name") station_lookup = session.query(Station).filter( Station.name == station_name).all() if len(station_lookup) == 0: logger.info("Didn't find a station for {}".format(station_name)) if len(station_lookup) == 1: station = station_lookup.pop() station.description = entry.get("description") station.wikipedia_link = entry.get("wikipedia") station.wikidata_id = entry.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_name)) if len(station_lookup) > 1: logger.info("Found multiple for station {}".format(station_name)) session.commit()
def load_bom_stations_json(): """ Imports BOM stations into the database from bom_stations.json The json is obtained using scripts/bom_stations.py """ session = SessionLocal() bom_stations = load_data("bom_stations.json", from_project=True) bom_capitals = load_data("bom_capitals.json", from_project=True) for bom_station in bom_stations: station = (session.query(BomStation).filter_by( code=bom_station["code"]).one_or_none()) if not station: logger.info("New station: %s", bom_station["name"]) station = BomStation(code=bom_station["code"], ) station.name = bom_station["name_full"] station.name_alias = bom_station["name"] station.website_url = bom_station["url"] station.feed_url = bom_station["json_feed"] station.priority = 5 station.state = bom_station["state"] station.altitude = bom_station["altitude"] if bom_station["code"] in bom_capitals: station.is_capital = True station.priority = 1 station.geom = "SRID=4326;POINT({} {})".format(bom_station["lng"], bom_station["lat"]) session.add(station) session.commit()
def process_item(self, item, spider): s = SessionLocal() observation = BomObservation( station_id=item["station_id"], observation_time=parse_date(item["aifstime_utc"]), temp_apparent=item["apparent_t"], temp_air=item["air_temp"], press_qnh=item["press_qnh"], wind_dir=item["wind_dir"], wind_spd=item["wind_spd_kmh"], ) try: s.add(observation) s.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return item
def import_osm_ways() -> int: """Gets the geometries for each OSM way id and store them against the station geom""" session = SessionLocal() stations_with_ways = (session.query(Station).join(Location).filter( Location.osm_way_id.isnot(None)).all()) if not stations_with_ways or len(stations_with_ways) < 1: logger.error("No stations with ways!") return 0 station_count = 0 for station in stations_with_ways: location: Location = station.location geom_boundary = None try: geom_boundary = get_osm_geom(station.location.osm_way_id) except Exception as e: logger.error("get osm wkt error: {}".format(e)) pass if not geom_boundary: logger.error("Error getting WKT from OSM") continue location.boundary = geom_boundary session.add(location) session.commit() logger.info("Updated boundary geom from OSM for station: {}".format( station.code)) return station_count
def import_emissions_map(file_name: str) -> None: """Import emission factors from CSV files for each network the format of the csv file is station_name,network_id,network_region,facility_code,emissions_factor_co2,fueltech_id,emission_factor_source """ session = SessionLocal() content = load_data(file_name, from_project=True, skip_loaders=True) csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) for rec in csvreader: network_id = rec["network_id"] facility_code = normalize_duid(rec["facility_code"]) emissions_intensity = clean_float(rec["emissions_factor_co2"]) if not facility_code: logger.info("No emissions intensity for {}".format(facility_code)) continue facility = (session.query(Facility).filter_by( code=facility_code).filter_by(network_id=network_id).one_or_none()) if not facility: logger.info("No stored facility for {}".format(facility_code)) continue facility.emissions_factor_co2 = emissions_intensity session.add(facility) logger.info("Updated {} to {}".format(facility_code, emissions_intensity)) session.commit() return None
def import_osm_way_data() -> None: """Updates the OSM way ids for stations from the CSV file fixture""" session = SessionLocal() station_osm_records = get_import_osm_data() for station_osm_record in station_osm_records: station = (session.query(Station).filter( Station.code == station_osm_record.station_code).one_or_none()) if not station: logger.error("Could not find station {}".format( station_osm_record.station_code)) continue if not station.location: logger.error("Station {} does not have a location".format( station_osm_record.station_code)) continue station.location.osm_way_id = station_osm_record.osm_way_id session.add(station) session.commit() logger.info("Updated station: {}".format(station.code))
def process_item(self, item, spider): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue if row["Facility Type"] in [ "Demand Side Program", "Non-Dispatchable Load", "Network", ]: continue participant = None participant_name = participant_name_filter(row["Participant Name"]) participant_network_name = normalize_string( row["Participant Name"]) participant_code = normalize_duid(row["Participant Code"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_at=datetime.now(), code=participant_code, name=participant_name, network_name=participant_network_name, ) s.add(participant) s.commit() logger.debug( "Participant not found created new database entry: %s", participant_code, ) station = None facility = None facility_code = normalize_duid(row["Facility Code"]) station_code = parse_wem_facility_code(facility_code) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by="opennem.wem.facilities", approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") station.location = location logger.debug("Added WEM station: {}".format(station_code)) facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by=spider.name, approved_at=datetime.now(), code=facility_code, network_id="WEM", network_code=facility_code, network_region="WEM", ) capacity_registered = clean_capacity(row["Maximum Capacity (MW)"]) capacity_unit = clean_capacity(row["Maximum Capacity (MW)"]) registered_date = row["Registered From"] facility.status_id = "operating" facility.capacity_registered = capacity_registered facility.unit_id = 1 facility.unit_number = 1 facility.unit_capacity = capacity_unit if registered_date: registered_date_dt = datetime.strptime(registered_date, "%Y-%m-%d %H:%M:%S") facility.registered = registered_date_dt facility.station = station s.add(facility) records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added
def update_existing_geos() -> None: """ Old method to update geos from existing facilities file on OpenNEM """ station_fixture = load_data("facility_registry.json", from_fixture=True) stations = [{"station_code": k, **v} for k, v in station_fixture.items()] s = SessionLocal() for station_data in stations: station = None station_name = station_name_cleaner(station_data["display_name"]) station_code = normalize_duid(station_data["station_code"]) station_state = map_compat_facility_state( station_data["status"]["state"]) station = s.query(Station).filter( Station.network_code == station_code).one_or_none() if not station: logger.info("Could not find station {}".format(station_code)) continue if ("location" in station_data and "latitude" in station_data["location"] and station_data["location"]["latitude"]): station.geom = ("SRID=4326;POINT({} {})".format( station_data["location"]["latitude"], station_data["location"]["longitude"], ), ) station.geocode_processed_at = datetime.now() station.geocode_by = "opennem" station.geocode_approved = True station.updated_by = "fixture.registry" s.add(station) logger.info("Updated station geo location {} ({})".format( station.code, station.name, )) facilities = [{ "code": k, **v } for k, v in stations[0]["duid_data"].items()] # update fueltechs for facility_data in facilities: facility_duid = facility_data["code"] facility_fueltech = lookup_fueltech(facility_data["fuel_tech"]) facility = s.query(Facility).filter( Facility.network_code == facility_duid).first() if not facility: logger.error( "Could not find existing facility {} for station {}". format(facility_duid, station_code)) continue if not facility.fueltech_id: facility.fueltech_id = facility_fueltech if facility.fueltech_id != facility_fueltech: logger.error( "Fueltech mismatch for {}. Old is {} and new is {}".format( station_code, facility_fueltech, station.fueltech_id)) s.add(facility) s.commit()
def opennem_init() -> None: session = SessionLocal() station_data = load_data("opennem_stations.json", from_project=True) stations = StationSet() for s in station_data: stations.add_dict(s) for station in stations: logger.debug("Adding station: {}".format(station.code)) station_model = session.query(Station).filter_by( code=station.code).one_or_none() if not station_model: station_model = Station(code=station.code) station_model.approved = True station_model.approved_at = datetime.now() station_model.approved_by = "opennem.init" station_model.created_by = "opennem.init" station_model.description = station.description station_model.name = station.name station_model.network_name = station.network_name if not station_model.location: station_model.location = Location() if station.location: station_model.location.locality = station.location.locality station_model.location.state = station.location.state station_model.location.postcode = station.location.postcode station_model.location.country = station.location.country if station.location.lat and station.location.lng: station_model.location.geom = "SRID=4326;POINT({} {})".format( station.location.lng, station.location.lat) for fac in station.facilities: facility_model = (session.query(Facility).filter_by( code=fac.code).filter_by( network_id=fac.network.code).one_or_none()) if not facility_model: facility_model = Facility(code=fac.code, network_id=fac.network.code) facility_model.network_region = fac.network_region facility_model.fueltech_id = fac.fueltech.code facility_model.status_id = fac.status.code facility_model.dispatch_type = fac.dispatch_type facility_model.capacity_registered = fac.capacity_registered facility_model.registered = fac.registered facility_model.unit_id = fac.unit_id facility_model.unit_number = fac.unit_number facility_model.unit_alias = fac.unit_alias facility_model.unit_capacity = fac.unit_capacity facility_model.emissions_factor_co2 = fac.emissions_factor_co2 facility_model.approved = fac.approved facility_model.approved_by = fac.approved_by facility_model.created_by = "opennem.init" facility_model.approved_by = "opennem.init" session.add(facility_model) station_model.facilities.append(facility_model) logger.debug(" => Added facility {}".format(fac.code)) print(station_model) session.add(station_model) session.commit()
def registry_init() -> None: registry = registry_import() session = SessionLocal() for station in registry: station_dict = station.dict(exclude={"id"}) # pprint(station_dict) station_model = session.query(Station).filter_by( code=station.code).one_or_none() if not station_model: # pylint:disable no-member station_model = fromdict(Station(), station_dict) station_model.approved = True station_model.approved_at = datetime.now() station_model.approved_by = "opennem.registry" station_model.created_by = "opennem.registry" # location station_model.location = fromdict( Location(), station_dict["location"], exclude=["id"], ) if station.location.lat and station.location.lng: station_model.location.geom = "SRID=4326;POINT({} {})".format( station.location.lng, station.location.lat) session.add(station_model) session.commit() for fac in station.facilities: f = (session.query(Facility).filter_by(code=fac.code).filter_by( network_id=fac.network.code).one_or_none()) if not f: print("new facility {} {}".format(fac.code, fac.network.code)) f = Facility(**fac.dict( exclude={ "id", "fueltech", "status", "network", "revision_ids", "scada_power", })) f.approved_by = "opennem.registry" f.created_by = "opennem.registry" f.approved_at = datetime.now() f.created_at = datetime.now() f.network_id = fac.network.code if fac.fueltech: f.fueltech_id = fac.fueltech.code if fac.network.code: f.network_code = fac.network.code f.status_id = fac.status.code f.approved = True if station_model.id: f.station_id = station_model.id else: station_model.facilities.append(f) session.add(station_model) session.add(f) try: session.commit() except IntegrityError as e: logger.error(e)
def import_station_set(stations: StationSet, only_insert_facilities: bool = False) -> None: session = SessionLocal() for station in stations: add_or_update: str = "Updating" station_model = session.query(Station).filter_by( code=station.code).one_or_none() if not station_model: add_or_update = "Adding" station_model = Station(code=station.code) station_model.created_by = "opennem.init" logger.debug("{} station: {}".format(add_or_update, station.code)) if station.description: station_model.description = station.description if station.name: station_model.name = station.name station_model.approved = station.approved if station.approved: station_model.approved = True station_model.approved_at = datetime.now() station_model.approved_by = "opennem.init" else: station_model.approved_at = None station_model.approved_by = None if station.website_url: station_model.website_url = station.website_url if station.network_name: station_model.network_name = station.network_name if not station_model.location: station_model.location = Location() if station.location: station_model.location.locality = station.location.locality station_model.location.state = station.location.state station_model.location.postcode = station.location.postcode station_model.location.country = station.location.country if station.location.lat and station.location.lng: station_model.location.geom = "SRID=4326;POINT({} {})".format( station.location.lng, station.location.lat) session.add(station_model) session.commit() for fac in station.facilities: facility_added = False facility_model = (session.query(Facility).filter_by( code=fac.code).filter_by( network_id=fac.network.code).one_or_none()) if facility_model and only_insert_facilities: logger.debug(" => skip updating {}".format( facility_model.code)) continue if not facility_model: facility_model = Facility(code=fac.code, network_id=fac.network.code) facility_added = True if facility_model.station_id != station_model.id: facility_model.station_id = station_model.id logger.debug(" => Reassigned facility {} to station {}".format( facility_model.code, station_model.code)) # fueltech if fac.fueltech: facility_model.fueltech_id = fac.fueltech.code if fac.fueltech_id: facility_model.fueltech_id = fac.fueltech_id # network if fac.network: facility_model.network_id = fac.network.code if fac.network_id: facility_model.network_id = fac.network_id # status if fac.status: facility_model.status_id = fac.status.code if fac.status_id: facility_model.status_id = fac.status_id # rest if fac.dispatch_type: facility_model.dispatch_type = fac.dispatch_type if fac.capacity_registered: facility_model.capacity_registered = fac.capacity_registered if fac.registered: facility_model.registered = fac.registered if fac.network_region: facility_model.network_region = fac.network_region facility_model.unit_id = fac.unit_id facility_model.unit_number = fac.unit_number facility_model.unit_alias = fac.unit_alias facility_model.unit_capacity = fac.unit_capacity if fac.emissions_factor_co2: facility_model.emissions_factor_co2 = fac.emissions_factor_co2 if fac.approved: facility_model.approved = fac.approved if fac.approved: facility_model.approved_by = "opennem.importer" else: facility_model.approved_by = None if not facility_model.created_by: facility_model.created_by = "opennem.init" session.add(facility_model) station_model.facilities.append(facility_model) logger.debug(" => {} facility {} to {} {}".format( "Added" if facility_added else "Updated", fac.code, facility_model.network_id, facility_model.network_region, )) session.add(station_model) session.commit()
def process_item(self, item, spider=None): if "records" not in item: logger.error("Invalid return response") records = item["records"] is_latest = False record_date = None if "meta" in item: if "is_latest" in item["meta"]: is_latest = item["meta"]["is_latest"] if "record_date" in item["meta"]: record_date = item["meta"]["record_date"] if "postcode" not in records: logger.error("No postcode data") if "installations" not in records: logger.error("No postcode data") if "postcodeCapacity" not in records: logger.error("No postcode capacity data") postcode_gen = records["postcode"] postcode_capacity = records["postcodeCapacity"] installations = records["installations"] engine = get_database_engine() session = SessionLocal() records_to_store = [] created_at = datetime.now() created_by = "" if spider and hasattr(spider, "name"): created_by = spider.name for record in postcode_gen: for state, prefix in STATE_POSTCODE_PREFIXES.items(): facility_code = "{}_{}_{}".format(ROOFTOP_CODE, "apvi".upper(), state.upper()) interval_time = parse_date( record["ts"], dayfirst=False, yearfirst=True, ) interval_time = interval_time.astimezone( NetworkNEM.get_timezone()) generated = sum([ float(v) / 100 * postcode_capacity[k] for k, v in record.items() if k.startswith(prefix) and v and k in postcode_capacity and k[:2] not in WA_NON_SWIS ]) if not generated: continue __record = { "created_by": created_by, "created_at": created_at, "network_id": "APVI", "trading_interval": interval_time, "facility_code": facility_code, "generated": generated, } records_to_store.append(__record) STATE_CAPACITIES = {} if is_latest: # temporariy only run getting capacities on latest logger.info("Updating capacities on %s", record_date) for postcode_prefix, capacity_val in postcode_capacity.items(): for state, prefix in STATE_POSTCODE_PREFIXES.items(): if state not in STATE_CAPACITIES: STATE_CAPACITIES[state] = 0 if postcode_prefix.startswith(prefix): STATE_CAPACITIES[state] += capacity_val for state, state_capacity in STATE_CAPACITIES.items(): facility_code = "{}_{}_{}".format(ROOFTOP_CODE, "apvi".upper(), state.upper()) state_facility: Facility = (session.query(Facility).filter_by( code=facility_code).one_or_none()) if not state_facility: raise Exception("Could not find rooftop facility for %s", facility_code) state_facility.capacity_registered = state_capacity if state.lower() in installations: state_number_units = installations[state.lower()] state_facility.unit_number = state_number_units session.add(state_facility) session.commit() if len(records_to_store) < 1: return 0 stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, "created_by": stmt.excluded.created_by, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["PARTICIPANT_CODE"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=participant_code, network_code=participant_code, network="WEM", ) s.add(participant) s.commit() logger.warning( "Participant not found created new database entry: {}". format(participant_code)) station = None facility = None facility_code = normalize_duid(row["FACILITY_CODE"]) station_code = parse_wem_facility_code(facility_code) station_name = station_name_cleaner(row["DISPLAY_NAME"]) station_network_name = normalize_string(row["DISPLAY_NAME"]) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") s.add(location) station.location = location logger.debug("Added WEM station: {}".format(station_code)) lat = row["LATITUDE"] lng = row["LONGITUDE"] station.name = station_name station.network_name = station_network_name if lat and lng and not station.location.geom: station.location.geom = "SRID=4326;POINT({} {})".format( lat, lng) station.location.geocode_by = "aemo" station.location.geocode_approved = True facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by="opennem.wem.live.facilities", approved_at=datetime.now(), network_id="WEM", code=facility_code, network_code=facility_code, network_region="WEM", ) registered_date = row["YEAR_COMMISSIONED"] if registered_date: registered_date_dt = None date_fmt = "%Y" try: registered_date_dt = datetime.strptime( registered_date, date_fmt) except Exception: logger.error("Bad date: %s for format %s", registered_date, date_fmt) if registered_date_dt: facility.registered = registered_date_dt fueltech = lookup_fueltech(fueltype=row["PRIMARY_FUEL"], techtype=row["FACILITY_TYPE"]) if fueltech and not facility.fueltech: facility.fueltech_id = fueltech facility.status_id = "operating" facility.station = station s.add(station) s.add(facility) s.commit() records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added
def wikidata_photos() -> None: """ Attach wikidata photos to stations """ session = SessionLocal() wikidata = load_data("wikidata-photos.json", from_project=True) for entry in wikidata: image_url = entry["thumb"] name = entry["itemLabel"] wiki_id = dataid_from_url(entry["item"]) stations = session.query(Station).filter( Station.wikidata_id == wiki_id).all() if not stations or len(stations) == 0: logger.error("Could not find station {}".format(name)) continue for station in stations: img = get_image(image_url) if not img: logger.error("No image for {}".format(name)) continue hash_id = image_get_crypto_hash(img)[-8:] file_name = "{}_{}_{}.{}".format(hash_id, name.replace(" ", "_"), "original", "jpeg") photo = Photo( name=file_name, hash_id=hash_id, width=img.size[0], height=img.size[1], original_url=image_url, ) img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo) # Thumbnail copy (and code copy!) img.thumbnail((280, 340)) hash_id = image_get_crypto_hash(img)[-8:] file_name = "{}_{}_{}.{}".format(hash_id, name.replace(" ", "_"), img.size[0], "jpeg") photo_thumb = Photo( name=file_name, hash_id=hash_id, width=img.size[0], height=img.size[1], original_url=image_url, ) img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo_thumb) session.add(photo) session.add(photo_thumb) if station: session.add(station) session.commit()
def setup_network_flow_stations(network: NetworkSchema = NetworkNEM) -> None: """Creats a station for each network region and a facility for each of imports exports so that energy values in facility_scada can be matched up per-region""" session = SessionLocal() network_regions = get_network_region_schema(network) for network_region in network_regions: logger.info( f"Setting up for network {network.code} and region: {network_region.code}" ) flow_station_id = generated_flow_station_id(network, network_region) flow_station = (session.query(Station).filter_by( code=flow_station_id).filter_by( network_code=flow_station_id).one_or_none()) if not flow_station: flow_station = Station(code=flow_station_id, network_code=flow_station_id) flow_station.approved = False flow_station.created_by = IMPORTER_NAME if not flow_station.location: flow_station.location = Location( state=state_from_network_region(network_region.code)) flow_station.name = "Flows for {} state {}".format( network.code.upper(), state_from_network_region(network_region.code.upper())) flow_facilities = [(i, generated_flow_station_id(network, network_region, i)) for i in FlowDirection] for (flow_direction, flow_facility_id) in flow_facilities: flow_facility_model = (session.query(Facility).filter_by( code=flow_facility_id).filter_by( dispatch_type=DispatchType.GENERATOR).filter_by( network_id=network.code).filter_by( network_region=network_region.code).one_or_none()) if not flow_facility_model: flow_facility_model = Facility( # type: ignore code=flow_facility_id, dispatch_type=DispatchType.GENERATOR, network_id=network.code, network_region=network_region.code, ) flow_facility_model.status_id = "operating" flow_facility_model.approved = False flow_facility_model.created_by = IMPORTER_NAME flow_facility_model.fueltech_id = flow_direction.value flow_facility_model.updated_at = datetime.now() flow_station.facilities.append(flow_facility_model) session.add(flow_station) logger.info( "Created network trading flow station facility: {}".format( flow_facility_id)) session.commit() return None
def wikidata_photos(): session = SessionLocal() wikidata = load_data("wikidata-photos.json", from_project=True) for entry in wikidata: image_url = entry["thumb"] name = entry["itemLabel"] wiki_id = dataid_from_url(entry["item"]) station = (session.query(Station).filter( Station.wikidata_id == wiki_id).one_or_none()) if not station: print("Could not find station {}".format(name)) # continue img = get_image(image_url) if not img: print("No image for {}".format(name)) continue # file_name = urlparse(image_url).path.split("/")[-1:] file_name = "{}_{}.{}".format(name.replace(" ", "_"), "original", "jpeg") photo = Photo( name=file_name, width=img.size[0], height=img.size[1], original_url=image_url, ) img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo) img.thumbnail((280, 340)) file_name = "{}_{}.{}".format(name.replace(" ", "_"), img.size[0], "jpeg") photo_thumb = Photo( name=file_name, width=img.size[0], height=img.size[1], original_url=image_url, ) img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo_thumb) session.add(photo) session.add(photo_thumb) if station: session.add(station) session.commit()