Пример #1
0
    def process_item(self, item, spider=None):
        if not "generators" in item:
            raise Exception("No generators found in item pipeline failed")

        generators = item["generators"]

        # Add clean station names and if group_by name
        generators = [{
            **i,
            "name":
            station_name_cleaner(i["station_name"]),
            "name_join":
            False if facility_station_join_by_name(
                station_name_cleaner(i["station_name"])) else i["duid"],
        } for i in generators]

        # sort by name

        generators_grouped = {}

        for k, v in groupby(generators,
                            key=lambda v: (v["name"], v["name_join"])):
            key = k
            if not key in generators_grouped:
                generators_grouped[key] = []

            generators_grouped[key] += list(v)

        # with open("registrtation-exemption-grouped.json", "w") as fh:
        #     json.dump(generators_grouped, fh, indent=4)

        return {**item, "generators": generators_grouped}
Пример #2
0
    def process_facilities(self, records):
        s = self.session()

        # Store a list of all existing duids
        all_duids = list(
            set([
                i[0] for i in s.query(Facility.network_code).filter(
                    Facility.network_code != None).all()
            ]))

        for _, facility_records in records.items():
            facility_index = 1
            facility_station = None
            created_station = False

            station_network_name = record_get_station_name(facility_records)
            station_name = station_name_cleaner(station_network_name)

            duid_unique = has_unique_duid(facility_records)
            facility_count = len(facility_records)

            # Step 1. Find the station
            # First by duid if it's unique
            duid = get_unique_duid(facility_records)

            # all GI records should have a region
            station_network_region = get_unique_reqion(facility_records)

            # This is the most suitable unit record to use for the station
            # see helper above
            facility_station_record = get_station_record_from_facilities(
                facility_records)

            if duid and duid_unique and facility_count == 1:

                facility_lookup = None

                try:
                    facility_lookup = (s.query(Facility).filter(
                        Facility.network_code == duid).filter(
                            Facility.network_region ==
                            station_network_region).one_or_none())
                except MultipleResultsFound:
                    logger.error(
                        "Found multiple duid for station with code {}".format(
                            duid))
                    continue

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if (duid and (duid_unique and facility_count > 1)
                    or not duid_unique):

                facility_lookup = (s.query(Facility).filter(
                    Facility.network_code == duid).filter(
                        Facility.network_region ==
                        station_network_region).first())

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if not facility_station and facility_station_join_by_name(
                    station_name):
                try:
                    facility_station = (s.query(Station).filter(
                        Station.name == station_name).one_or_none())
                except MultipleResultsFound:
                    logger.warning(
                        "Multiple results found for station name : {}".format(
                            station_name))
                    facility_station = None

            # If we have a station name, and no duid, and it's ok to join by name
            # then find the station (make sure to region lock)
            if (station_name and not duid and not facility_station
                    and facility_station_join_by_name(station_name)):
                facility = (s.query(Facility).join(Facility.station).filter(
                    Facility.network_region == station_network_region).filter(
                        Station.name == station_name).first())

                if facility:
                    facility_station = facility.station

            # Create one as it doesn't exist
            if not facility_station:
                facility_station = Station(
                    name=station_name,
                    network_name=name_normalizer(
                        facility_station_record["station_name"]),
                    network_id="NEM",
                    created_by="pipeline.aemo.general_information",
                )

                s.add(facility_station)
                s.commit()

                created_station = True
            else:
                facility_station.updated_by = (
                    "pipeline.aemo.general_information")

            for facility_record in facility_records:
                if facility_record["FuelType"] in ["Natural Gas Pipeline"]:
                    continue

                # skip these statuses too
                if facility_record["UnitStatus"] in FACILITY_INVALID_STATUS:
                    continue

                facility = None
                created_facility = False

                facility_network_name = name_normalizer(
                    facility_record["station_name"])
                facility_name = station_name_cleaner(
                    facility_record["station_name"])
                duid = normalize_duid(facility_record["duid"])
                reg_cap = clean_capacity(facility_record["NameCapacity"])

                units_num = facility_record["Units"] or 1
                unit_id = facility_index + (units_num - 1)

                unit = parse_unit_duid(unit_id, duid)
                unit_size = clean_capacity(facility_record["unit_capacity"])
                unit_code = get_unit_code(unit, duid,
                                          facility_record["station_name"])

                facility_comissioned = facility_record["SurveyEffective"]
                facility_comissioned_dt = None

                if type(facility_comissioned) is datetime:
                    facility_comissioned_dt = facility_comissioned

                try:
                    if type(facility_comissioned) is str:
                        facility_comissioned_dt = datetime.strptime(
                            facility_comissioned, "%d/%m/%y")
                except ValueError:
                    logger.error(
                        "Error parsing date: {}".format(facility_comissioned))

                facility_status = map_aemo_facility_status(
                    facility_record["UnitStatus"])
                facility_network_region = normalize_aemo_region(
                    facility_record["Region"])
                facility_fueltech = (lookup_fueltech(
                    facility_record["FuelType"],
                    techtype=facility_record["TechType"],
                ) if ("FuelType" in facility_record
                      and facility_record["FuelType"]) else None)

                if not facility_fueltech:
                    logger.error("Error looking up fueltech: {} {} ".format(
                        facility_record["FuelType"],
                        facility_record["TechType"],
                    ))

                # check if we have it by ocode first
                facility = (s.query(Facility).filter(
                    Facility.code == unit_code).one_or_none())

                if not facility and duid:
                    try:
                        facility = (
                            s.query(Facility).filter(
                                Facility.network_code == duid).filter(
                                    Facility.network_region ==
                                    facility_network_region)
                            # .filter(Facility.nameplate_capacity != None)
                            .one_or_none())
                    except MultipleResultsFound:
                        logger.warn(
                            "Multiple results found for duid : {}".format(
                                duid))

                    if facility:
                        if facility.station and not facility_station:
                            facility_station = facility.station

                        logger.info(
                            "GI: Found facility by DUID: code {} station {}".
                            format(
                                facility.code,
                                facility.station.name
                                if facility.station else None,
                            ))

                # Done trying to find existing
                if not facility:
                    facility = Facility(
                        code=unit_code,
                        network_code=duid,
                        created_by="pipeline.aemo.general_information",
                    )
                    facility.station = facility_station

                    created_facility = True

                if duid and not facility.network_code:
                    facility.network_code = duid
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.network_region:
                    facility.network_region = facility_network_region
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.network_name:
                    facility.network_name = facility_network_name
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.fueltech_id and facility_fueltech:
                    facility.fueltech_id = facility_fueltech
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.capacity_registered or (
                        facility.status and facility.status != "operating"):
                    facility.capacity_registered = reg_cap
                    facility.updated_by = "pipeline.aemo.general_information"

                # @TODO work this out
                # facility.dispatch_type = facility_dispatch_type

                if not facility.unit_id:
                    facility.unit_id = unit.id
                    facility.unit_number = unit.number
                    facility.unit_size = unit_size
                    facility.unit_alias = unit.alias

                if not facility.unit_capacity or (
                        facility.status and facility.status != "operating"):
                    facility.unit_capacity = unit_size
                    facility.updated_by = "pipeline.aemo.general_information"

                # if not facility.status_id:
                facility.status_id = facility_status
                # facility.updated_by = "pipeline.aemo.general_information"

                if not facility.registered and facility_comissioned_dt:
                    facility.registered = facility_comissioned_dt
                    facility.updated_by = "pipeline.aemo.general_information"

                facility.station = facility_station

                if facility.fueltech_id is None:
                    logger.warning("Could not find fueltech for: {} {}".format(
                        facility.code, facility.network_code))

                # facility.status_id = facility_status

                if facility_station and not facility.station:
                    facility.station = facility_station

                if facility.status_id is None:
                    raise Exception(
                        "GI: Failed to map status ({}) on row: {}".format(
                            facility.status_id, facility_record))

                s.add(facility)
                s.commit()

                facility_index += units_num

                if created_station:
                    logger.info("GI: {} station with name {} ".format(
                        "Created" if created_station else "Updated",
                        station_name,
                        # facility_station.id,
                    ))

                if created_facility:
                    logger.info(
                        "GI: {} facility with duid {} to station {}".format(
                            "Created" if created_facility else "Updated",
                            duid,
                            station_name,
                        ))

        try:
            s.commit()
        except Exception as e:
            logger.error(e)
            raise e
        finally:
            s.close()
Пример #3
0
def opennem_import():
    """
    This is the main method that overlays AEMO data and produces facilities

    """
    log = []

    nem_mms = station_reindex(load_data("mms.json", from_project=True))
    nem_rel = station_reindex(load_data("rel.json", from_project=True))
    nem_gi = station_reindex(load_data("nem_gi.json", True))
    registry = station_reindex(load_data("registry.json", True))

    opennem = nem_mms.copy()

    # REL
    for station_code, rel_station in nem_rel.items():
        if station_code not in opennem.keys():
            logger.info("REL: New record {}".format(station_code))
            opennem[station_code] = rel_station

        else:
            logger.info("REL: Existing record {}".format(station_code))
            om_station = opennem.get(station_code)

            for rel_facility_duid, rel_facility in rel_station[
                "facilities"
            ].items():

                if not rel_facility_duid:
                    continue

                if rel_facility_duid not in om_station["facilities"].keys():
                    logger.info(
                        " ==> Added duid {} to station ".format(
                            rel_facility_duid
                        )
                    )
                    om_station["facilities"][rel_facility_duid] = rel_facility
                    continue

                om_facility = om_station["facilities"][rel_facility_duid]

                if om_facility["status"]["code"] != "operating":
                    logger.info(
                        "REL: Set status for {} to {}".format(
                            rel_facility_duid,
                            "operating",
                        )
                    )
                    om_facility["status"]["code"] = "operating"
                    om_facility["status"]["label"] = "Operating"

                if rel_facility["fueltech"]:
                    logger.info(
                        "REL: Set fueltech for {} to {}".format(
                            rel_facility_duid,
                            rel_facility["fueltech"],
                        )
                    )
                    om_facility["fueltech"] = rel_facility["fueltech"]

                if (
                    rel_facility["capacity_registered"]
                    and rel_facility["capacity_registered"]
                    != om_facility["capacity_registered"]
                ):
                    logger.info(
                        "REL: Set capacity for %s to %s",
                        rel_facility_duid,
                        rel_facility["capacity_registered"],
                    )
                    om_facility["capacity_registered"] = rel_facility[
                        "capacity_registered"
                    ]

    # GI
    for station_code, gi_station in nem_gi.items():
        if (
            station_code not in opennem.keys()
            and not facility_station_join_by_name(gi_station["name"])
        ):
            logger.info("GI: New record {}".format(station_code))
            gi_station["created_by"] = "aemo_gi"
            opennem[station_code] = gi_station
            continue

        station_name = gi_station["name"]

        station_name_existing = list(
            filter(lambda x: x["name"] == gi_station["name"], opennem.values())
        )

        if len(station_name_existing) and facility_station_join_by_name(
            station_name
        ):
            station_code = station_name_existing[0]["code"]
            logger.info(
                "GI: found existing station we're joining to: {}".format(
                    station_code
                )
            )
        elif station_code not in opennem.keys():
            opennem[station_code] = gi_station

        logger.info("GI: Existing record {}".format(station_code))
        om_station = opennem.get(station_code)

        for gi_facility_duid, gi_facility in gi_station["facilities"].items():

            if not gi_facility_duid:
                continue

            if gi_facility_duid not in om_station["facilities"]:
                continue

            om_facility = om_station["facilities"][gi_facility_duid]

            if gi_facility_duid not in om_station["facilities"].keys():
                logger.info(
                    " ==> Added duid {} to station ".format(gi_facility_duid)
                )
                om_station["facilities"][gi_facility_duid] = gi_facility
                continue

            if (
                gi_facility["status"]
                and om_facility["status"]["code"] != gi_facility["status"]
            ):
                logger.info(
                    "GI Set status for {} to {}".format(
                        gi_facility_duid,
                        gi_facility["status"],
                    )
                )
                om_facility["status"] = gi_facility["status"]

            elif not om_facility["status"]:
                om_facility["status"] = gi_facility["status"]

            if not om_facility.get("fueltech", None):
                om_facility["fueltech"] = gi_facility["fueltech"]

    # registry
    for station_code, registry_station in registry.items():
        if station_code not in opennem.keys():
            if registry_station.get("state") == "WA":
                logger.info("Registry: New record {}".format(station_code))
                opennem[station_code] = registry_station

            continue

        om_station = opennem.get(station_code)

        lat = registry_station.get("lat", None)
        lng = registry_station.get("lng", None)

        om_station["lat"] = lat
        om_station["lng"] = lng

        if lat and lng:
            logger.info(
                "Registry: set lat and lng for {}".format(station_code)
            )

        for facility_code, registry_facility in registry_station.get(
            "facilities", {}
        ).items():

            if facility_code not in om_station["facilities"]:
                logger.info(
                    "Registry: {} has facility {} not in opennem".format(
                        station_code, facility_code
                    )
                )
                continue

            if (
                registry_facility["status"]
                and not om_station["facilities"][facility_code]["status"]
            ):
                logger.info(
                    "Registry: set status to {}".format(
                        registry_facility["status"]
                    )
                )
                om_station["facilities"][facility_code][
                    "status"
                ] = registry_station["status"]

            if registry_facility["fueltech"] and not om_station["facilities"][
                facility_code
            ].get("fueltech", None):
                om_station["facilities"][facility_code][
                    "fueltech"
                ] = registry_facility["fueltech"]

            logger.info(
                "registry: {} registry fueltech {} opennem fueltech {}".format(
                    facility_code,
                    registry_facility["fueltech"],
                    om_station["facilities"][facility_code].get(
                        "fueltech", None
                    ),
                )
            )

    for station_code, station_entry in opennem.items():
        facilities = [i for i in station_entry["facilities"].values()]
        opennem[station_code]["facilities"] = facilities

    with open("data/opennem.json", "w") as fh:
        json.dump(opennem, fh, indent=4, cls=OpenNEMJSONEncoder)

    stations = [StationSchema(**i) for i in list(opennem.values())]

    return stations
Пример #4
0
    def process_generators(self, generators):
        s = self.session()

        stations_updated = 0
        stations_added = 0
        generators_updated = 0
        generators_added = 0

        for station_key, facilities in generators.items():
            facility = None
            facility_station = None
            created_station = False
            created_facility = False

            station_name = station_key[0]

            duid_unique = has_unique_duid(facilities)
            facility_count = len(facilities)

            # Step 1. Find the station
            # First by duid if it's unique
            duid = get_unique_duid(facilities)

            # This is the most suitable unit record to use for the station
            # see helper above
            facility_station_record = get_station_record_from_facilities(
                facilities)

            facility_network_region = get_unique_reqion(facilities)

            if duid and duid_unique and facility_count == 1:
                facility_lookup = None

                try:
                    facility_lookup = (s.query(Facility).filter(
                        Facility.network_code == duid).one_or_none())
                except MultipleResultsFound:
                    logger.warning(
                        "REL: Multiple stations found for {} {} with duid {}".
                        format(station_name, facility_network_region, duid))

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if (duid and (duid_unique and facility_count > 1)
                    or not duid_unique):

                facility_lookup = (s.query(Facility).filter(
                    Facility.network_code == duid).first())

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if not facility_station and facility_station_join_by_name(
                    station_name):
                try:
                    facility_station = (s.query(Station).filter(
                        Station.name == station_name).one_or_none())
                except MultipleResultsFound:
                    logger.warning(
                        "REL: Multiple stations found for {} {}".format(
                            station_name, facility_network_region))

                logger.debug(
                    "REL: Looked up {} by name and region {} and found {}".
                    format(
                        station_name,
                        facility_network_region,
                        facility.station if facility else "nothing",
                    ))

            # Create one as it doesm't exist
            if not facility_station:
                facility_station = Station(
                    name=station_name,
                    network_name=name_normalizer(
                        facility_station_record["station_name"]),
                    network_id="NEM",
                    created_by="pipeline.aemo.registration_exemption",
                )

                s.add(facility_station)
                created_station = True
            else:
                facility_station.updated_by = (
                    "pipeline.aemo.registration_exemption")

            logger.info("REL: {} station with name {} and code {}".format(
                "Created" if created_station else "Updated",
                facility_station.name,
                facility_station.code,
            ))

            # Step 2. Add the facilities/units to the station
            # Now that we have a station or created one ..

            # Step go through the facility records we got ..
            for facility_record in facilities:
                network_name = name_normalizer(facility_record["station_name"])
                participant_name = name_normalizer(
                    facility_record["participant"])
                facility_region = normalize_aemo_region(
                    facility_record["region"])
                duid = normalize_duid(facility_record["duid"])
                reg_cap = clean_capacity(facility_record["reg_cap"])
                unit = parse_unit_duid(facility_record["unit_no"], duid)
                unit_size = clean_capacity(facility_record["unit_size"])
                unit_code = get_unit_code(
                    unit, duid, facility_station_record["station_name"])
                facility_status = "operating"
                facility_dispatch_type = parse_dispatch_type(
                    facility_record["dispatch_type"])
                fueltech = lookup_fueltech(
                    facility_record["fuel_source_primary"],
                    facility_record["fuel_source_descriptor"],
                    facility_record["tech_primary"],
                    facility_record["tech_primary_descriptor"],
                    facility_record["dispatch_type"],
                )

                # Skip loads that are not batteries or pumps for now
                # @NOTE @TODO better to centralize this as it needs to be consistent
                if (facility_dispatch_type == DispatchType.LOAD
                        and fueltech not in [
                            "battery_charging",
                            "pumps",
                        ]):
                    continue

                # check if we have it by ocode first
                facility = (s.query(Facility).filter(
                    Facility.code == unit_code).one_or_none())

                # If the duid is unique then we have no issues on which to join/create
                if duid and duid_unique and not facility:
                    try:
                        facility = (s.query(Facility).filter(
                            Facility.network_code == duid).one_or_none())
                    except MultipleResultsFound:
                        logger.warning(
                            "REL: Multiple facilities found for {} {}".format(
                                station_name, duid))

                if duid and not duid_unique and not facility:
                    facility = (s.query(Facility).filter(
                        Facility.network_code == duid).filter(
                            Facility.unit_number == None).filter(
                                Facility.status_id == "operating").first())

                # If the duid is not unique then we need to figure things out ..
                if duid and not duid_unique and not facility:
                    facility_lookup = (
                        s.query(Facility).filter(Facility.network_code == duid)
                        # Not having a code means we haven't written to this record yet so we'll use it
                        .filter(Facility.code == None).all())

                    facility_db_count = len(facility_lookup)

                    logging.debug(
                        "Non unique duid: {} with {} in database and {} in facility duid is {}"
                        .format(
                            station_name,
                            facility_db_count,
                            facility_count,
                            duid,
                        ))

                    if len(facility_lookup) > 0:
                        facility = facility_lookup.pop()

                if not facility:
                    facility = Facility(
                        code=unit_code,
                        network_code=duid,
                        created_by="pipeline.aemo.registration_exemption",
                    )
                    created_facility = True
                else:
                    facility.updated_by = (
                        "pipeline.aemo.registration_exemption")

                # Sanity checking
                if len(unit_code) < 3:
                    raise Exception(
                        "Unit code {} is invalid. For station {} with duid {}".
                        format(unit_code, station_name, duid))

                #
                if not facility.code:
                    facility.code = unit_code

                facility.fueltech_id = fueltech
                facility.network_code = duid
                facility.network_region = facility_region
                facility.network_name = network_name

                facility.capacity_registered = reg_cap
                facility.dispatch_type = facility_dispatch_type

                facility.unit_id = unit.id
                facility.unit_number = unit.number
                facility.unit_alias = unit.alias
                facility.unit_capacity = unit_size

                # Assume all REL's are operating if we don't have a status
                facility.status_id = "operating"

                facility.station = facility_station

                # Log that we have a new fueltech
                if fueltech and fueltech != facility.fueltech_id:
                    logger.warning(
                        "Fueltech mismatch for {} {}: prev {} new {}".format(
                            facility.name_clean,
                            facility.code,
                            facility.fueltech_id,
                            fueltech,
                        ))

                if not created_facility:
                    facility.updated_by = (
                        "pipeline.aemo.registration_exemption")

                s.add(facility)
                s.commit()

                logger.info("REL: {} facility with duid {} and id {}".format(
                    "Created" if created_facility else "Updated",
                    facility.code,
                    facility.network_code,
                ))

            generators_updated += 1

        logger.info(
            "NEM REL Pipeline: Added {} stations, updated {} stations. Added {}, updated {} generators of {} total"
            .format(
                stations_added,
                stations_updated,
                generators_added,
                generators_updated,
                len(generators),
            ))