def process_item(self, item, spider=None): s = self.session() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None created_record = False participant_code = normalize_duid( normalize_string(row["PARTICIPANT_CODE"]) ) participant_name = participant_name_filter( row["PARTICIPANT_DISPLAY_NAME"] ) or participant_name_filter( row.get("PARTICIPANT_FULL_NAME", None) ) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, created_by="pipeline.wem.live.participant", ) created_record = True logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.live.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def process_participants(self, participants): s = self.session() for participant_data in participants: participant_name = participant_data["name"].strip() try: participant = (s.query(Participant).filter( Participant.name == participant_name).one_or_none()) except MultipleResultsFound as e: logger.info("Found multiple participants with name {}".format( participant_name)) participant = (s.query(Participant).filter( Participant.name == participant_name).first()) if not participant: participant = Participant(name=participant_name) participant.name_clean = participant_name_filter(participant_name) participant.abn = participant_data["abn"] s.add(participant) logger.info( "Found new NEM participant: {}".format(participant_name)) self.participant_keys[participant_name] = participant facility = None s.commit()
def process_item(self, item, spider=None): s = self.session() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["Participant Code"]) participant_name = participant_name_filter(row["Participant Name"]) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, # @TODO WEM provides these but nem doesn't so ignore for noe # address=row["Address"], # city=row["City"], # state=row["State"], # postcode=row["Postcode"], created_by="pipeline.wem.participant", ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def process_participants(self, participants): s = self.session() for participant in participants: participant_name = name_normalizer(participant["Owner"]) # Funky case of Todae solar where they put their name in the participant = (s.query(Participant).filter( Participant.name == participant_name).one_or_none()) if not participant: participant = Participant( name=participant_name, name_clean=participant_name_filter(participant_name), ) s.add(participant) logger.info( "GI: Added new partipant to NEM database: {}".format( participant_name))
def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 for record in item: created = False participant_code = normalize_duid( record["facilities"][0]["PARTICIPANTID"]) # Step 1. Find participant by code or create participant = (s.query(ParticipantModel).filter( ParticipantModel.code == participant_code).one_or_none()) if not participant: participant = ParticipantModel( code=participant_code, network_code=participant_code, created_by="au.nem.mms.dudetail_summary", ) logger.debug("Created participant {}".format(participant_code)) else: participant.updated_by = "au.nem.mms.dudetail_summary" # Step 3. now create the facilities and associate for facility_record in record["facilities"]: duid = normalize_duid(facility_record["DUID"]) station_code = facility_map_station( duid, normalize_duid(record["id"])) network_region = normalize_aemo_region( facility_record["REGIONID"]) date_start = facility_record["date_start"] date_end = facility_record["date_end"] facility_state = "retired" # Step 2. Find station or create station = (s.query(Station).filter( Station.network_code == station_code).one_or_none()) if not station: station = Station( code=station_code, network_code=station_code, network_id="NEM", created_by="au.nem.mms.dudetail_summary", ) logger.debug("Created station {}".format(station_code)) else: station.updated_by = "au.nem.mms.dudetail_summary" station.participant = participant if date_end == None: facility_state = "operating" if not "DISPATCHTYPE" in facility_record: logger.error( "MMS dudetailsummary: Invalid record: {}".format( facility_record)) continue dispatch_type = parse_dispatch_type( facility_record["DISPATCHTYPE"]) facility = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) if not facility: facility = Facility( code=duid, network_code=duid, dispatch_type=dispatch_type, created_by="au.nem.mms.dudetail_summary", ) records_created += 1 created = True else: facility.updated_by = "au.nem.mms.dudetail_summary" records_updated += 1 facility.network_region = network_region facility.deregistered = date_end facility.registered = date_start facility.status_id = facility_state if not facility.dispatch_type: facility.dispatch_type = dispatch_type # Associations facility_station_id = facility_map_station(duid, station.id) facility.station_id = station.id try: s.add(facility) s.commit() except Exception as e: logger.error(e) logger.debug( "MMS DudetailSummary:{} facility record with id {}".format( "Created" if created else "Updated", duid)) logger.info( "MMS DudetailSummary: Created {} facility records and updated {}". format(records_created, records_updated))
def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 q = self.engine.execute(text("select code from participant")) participant_codes = list(set([i[0] for i in q.fetchall()])) records = item for record in records: created = False if not "NAME" in record or not "PARTICIPANTID" in record: logger.error(record) raise Exception( "Invalid MMS participant record: {}".format(record)) participant_schema = None try: participant_schema = ParticipantSchema( **{ "code": record["PARTICIPANTID"], "name": record["NAME"], "network_name": record["NAME"], }) except Exception: logger.error("Validation error with record: {}".format( record["NAME"])) continue # pid = normalize_duid(record["PARTICIPANTID"]) # name = normalize_string(record["NAME"]) # name_clean = participant_name_filter(record["NAME"]) participant = (s.query(ParticipantModel).filter( ParticipantModel.code == participant_schema.code).one_or_none()) if not participant: participant = ParticipantModel( **{ **participant_schema.dict(), "created_by": "au.nem.mms.participant", }) records_created += 1 created = True else: participant.name = participant_schema.name participant.network_name = participant_schema.network_name records_updated += 1 try: s.add(participant) s.commit() except Exception as e: logger.error(e) logger.debug("{} participant record with id {}".format( "Created" if created else "Updated", participant_schema.code, )) logger.info("Created {} records and updated {}".format( records_created, records_updated))
def process_item(self, item, spider): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue if row["Facility Type"] in [ "Demand Side Program", "Non-Dispatchable Load", "Network", ]: continue participant = None participant_name = participant_name_filter(row["Participant Name"]) participant_network_name = normalize_string( row["Participant Name"]) participant_code = normalize_duid(row["Participant Code"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_at=datetime.now(), code=participant_code, name=participant_name, network_name=participant_network_name, ) s.add(participant) s.commit() logger.debug( "Participant not found created new database entry: %s", participant_code, ) station = None facility = None facility_code = normalize_duid(row["Facility Code"]) station_code = parse_wem_facility_code(facility_code) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by="opennem.wem.facilities", approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") station.location = location logger.debug("Added WEM station: {}".format(station_code)) facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by=spider.name, approved_at=datetime.now(), code=facility_code, network_id="WEM", network_code=facility_code, network_region="WEM", ) capacity_registered = clean_capacity(row["Maximum Capacity (MW)"]) capacity_unit = clean_capacity(row["Maximum Capacity (MW)"]) registered_date = row["Registered From"] facility.status_id = "operating" facility.capacity_registered = capacity_registered facility.unit_id = 1 facility.unit_number = 1 facility.unit_capacity = capacity_unit if registered_date: registered_date_dt = datetime.strptime(registered_date, "%Y-%m-%d %H:%M:%S") facility.registered = registered_date_dt facility.station = station s.add(facility) records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added
def process_item(self, item, spider=None): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["PARTICIPANT_CODE"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=participant_code, network_code=participant_code, network="WEM", ) s.add(participant) s.commit() logger.warning( "Participant not found created new database entry: {}". format(participant_code)) station = None facility = None facility_code = normalize_duid(row["FACILITY_CODE"]) station_code = parse_wem_facility_code(facility_code) station_name = station_name_cleaner(row["DISPLAY_NAME"]) station_network_name = normalize_string(row["DISPLAY_NAME"]) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") s.add(location) station.location = location logger.debug("Added WEM station: {}".format(station_code)) lat = row["LATITUDE"] lng = row["LONGITUDE"] station.name = station_name station.network_name = station_network_name if lat and lng and not station.location.geom: station.location.geom = "SRID=4326;POINT({} {})".format( lat, lng) station.location.geocode_by = "aemo" station.location.geocode_approved = True facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by="opennem.wem.live.facilities", approved_at=datetime.now(), network_id="WEM", code=facility_code, network_code=facility_code, network_region="WEM", ) registered_date = row["YEAR_COMMISSIONED"] if registered_date: registered_date_dt = None date_fmt = "%Y" try: registered_date_dt = datetime.strptime( registered_date, date_fmt) except Exception: logger.error("Bad date: %s for format %s", registered_date, date_fmt) if registered_date_dt: facility.registered = registered_date_dt fueltech = lookup_fueltech(fueltype=row["PRIMARY_FUEL"], techtype=row["FACILITY_TYPE"]) if fueltech and not facility.fueltech: facility.fueltech_id = fueltech facility.status_id = "operating" facility.station = station s.add(station) s.add(facility) s.commit() records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added