Python clean_float示例，opennem.core.normalizers.clean_float Python示例

示例#1

0

显示文件

文件： pulse.py 项目： zalihat/opennem

    def process_item(self, item, spider=None):

        s = SessionLocal()

        csvreader = csv.DictReader(item["content"].split("\n"))

        records_to_store = []
        primary_keys = []

        for row in csvreader:
            trading_interval = parse_date(row["TRADING_DAY_INTERVAL"],
                                          network=NetworkWEM,
                                          dayfirst=False)

            if trading_interval not in primary_keys:
                forecast_load = clean_float(row["FORECAST_EOI_MW"])

                records_to_store.append({
                    "created_by": spider.name,
                    "trading_interval": trading_interval,
                    "network_id": "WEM",
                    "network_region": "WEM",
                    "forecast_load": forecast_load,
                    # generation_scheduled=row["Scheduled Generation (MW)"],
                    # generation_total=row["Total Generation (MW)"],
                    "price": clean_float(row["PRICE"]),
                })
                primary_keys.append(trading_interval)

        stmt = insert(BalancingSummary).values(records_to_store)
        stmt.bind = get_database_engine()
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                "trading_interval",
                "network_id",
                "network_region",
            ],
            set_={
                "price": stmt.excluded.price,
                "forecast_load": stmt.excluded.forecast_load,
            },
        )

        try:
            s.execute(stmt)
            s.commit()
        except Exception as e:
            logger.error("Error inserting records")
            logger.error(e)
            return 0
        finally:
            s.close()

        return len(records_to_store)

示例#2

0

显示文件

文件： mms.py 项目： opennem/opennem

    def validate_maxcapacity(cls, value: str) -> float:
        f = clean_float(value)

        if not f:
            raise ValueError("Not a valid capacity: {}".format(value))

        return f

示例#3

0

显示文件

文件： stats.py 项目： zalihat/opennem

    def parse_cpi_value(cls, value: Any) -> float:
        v = clean_float(value)

        if not v:
            raise ValueError("No inflation Value")

        return v

示例#4

0

显示文件

def generate_balancing_summary(
    records: List[Dict],
    interval_field: str = "SETTLEMENTDATE",
    network_region_field: str = "REGIONID",
    price_field: Optional[str] = None,
    network: NetworkSchema = NetworkNEM,
    limit: int = 0,
) -> List[Dict]:
    created_at = datetime.now()
    primary_keys = []
    return_records = []

    created_by = ""

    for row in records:

        trading_interval = parse_date(row[interval_field],
                                      network=network,
                                      dayfirst=False)

        network_region = None

        if network_region_field and network_region_field in row:
            network_region = row[network_region_field]

        pk = (trading_interval, network.code, network_region)

        if pk in primary_keys:
            continue

        primary_keys.append(pk)

        price = None

        if price_field and price_field in row:
            price = clean_float(row[price_field])

            if price:
                price = str(float_to_str(price))

        __rec = {
            "created_by": created_by,
            "created_at": created_at,
            "updated_at": None,
            "network_id": network.code,
            "network_region": network_region,
            "trading_interval": trading_interval,
            "price": price,
        }

        return_records.append(__rec)

        if limit > 0 and len(return_records) >= limit:
            break

    return return_records

示例#5

0

显示文件

文件： emissions.py 项目： opennem/opennem

def import_dump_emissions() -> List[Dict]:
    content = load_data("emissions_output.csv",
                        from_project=True,
                        skip_loaders=True)

    csv_content = content.splitlines()
    csvreader = csv.DictReader(csv_content)
    records = []

    for rec in csvreader:
        records.append({
            "facility_code":
            rec["DUID"],
            "emissions_factor_co2":
            clean_float(rec["CO2E_EMISSIONS_FACTOR"]),
        })

    return records

示例#6

0

显示文件

文件： gi.py 项目： opennem/opennem

def aemo_gi_capacity_cleaner(cap: Optional[str]) -> Optional[float]:
    """Custom capacity cleaner because sometimes its parsed as silly
    text like a range (ie. '150 - 180'"""
    if isinstance(cap, int) or isinstance(cap, float):
        return cap

    if not cap:
        return None

    cap = cap.strip()

    num_part = re.search(r"^[\d\.]+", cap)

    if not num_part:
        return None

    num_extracted = str(num_part.group(0))

    num_extracted_and_clean = clean_float(num_extracted)

    return num_extracted_and_clean

示例#7

0

显示文件

文件： emissions.py 项目： zalihat/opennem

def import_emissions_map(file_name: str) -> None:
    """Import emission factors from CSV files for each network
    the format of the csv file is

    station_name,network_id,network_region,facility_code,emissions_factor_co2,fueltech_id,emission_factor_source
    """
    session = SessionLocal()

    content = load_data(file_name, from_project=True, skip_loaders=True)

    csv_content = content.splitlines()
    csvreader = csv.DictReader(csv_content)

    for rec in csvreader:
        network_id = rec["network_id"]
        facility_code = normalize_duid(rec["facility_code"])
        emissions_intensity = clean_float(rec["emissions_factor_co2"])

        if not facility_code:
            logger.info("No emissions intensity for {}".format(facility_code))
            continue

        facility = (session.query(Facility).filter_by(
            code=facility_code).filter_by(network_id=network_id).one_or_none())

        if not facility:
            logger.info("No stored facility for {}".format(facility_code))
            continue

        facility.emissions_factor_co2 = emissions_intensity
        session.add(facility)
        logger.info("Updated {} to {}".format(facility_code,
                                              emissions_intensity))

    session.commit()

    return None

示例#8

0

显示文件

文件： opennem.py 项目： zalihat/opennem

def process_trading_regionsum(table: Dict[str, Any], spider: Spider) -> Dict:
    session = SessionLocal()
    engine = get_database_engine()

    if "records" not in table:
        raise Exception("Invalid table no records")

    records = table["records"]

    limit = None
    records_to_store = []
    records_processed = 0
    primary_keys = []

    for record in records:
        trading_interval = parse_date(
            record["SETTLEMENTDATE"],
            network=NetworkNEM,
            dayfirst=False,
            date_format="%Y/%m/%d %H:%M:%S",
        )

        if not trading_interval:
            continue

        _pk = set([trading_interval, record["REGIONID"]])

        if _pk in primary_keys:
            continue

        primary_keys.append(_pk)

        demand_total = None

        if "TOTALDEMAND" in record:
            demand_total = clean_float(record["TOTALDEMAND"])

        records_to_store.append({
            "network_id": "NEM",
            "created_by": spider.name,
            "network_region": record["REGIONID"],
            "trading_interval": trading_interval,
            "demand_total": demand_total,
        })

        records_processed += 1

        if limit and records_processed >= limit:
            logger.info("Reached limit of: {} {}".format(
                limit, records_processed))
            break

    stmt = insert(BalancingSummary).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=["trading_interval", "network_id", "network_region"],
        set_={
            "demand_total": stmt.excluded.demand_total,
        },
    )

    try:
        session.execute(stmt)
        session.commit()
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        return {"num_records": 0}

    finally:
        session.close()

    return {"num_records": len(records_to_store)}

示例#9

0

显示文件

文件： opennem.py 项目： zalihat/opennem

def unit_scada_generate_facility_scada(
    records,
    spider=None,
    network: NetworkSchema = NetworkNEM,
    interval_field: str = "SETTLEMENTDATE",
    facility_code_field: str = "DUID",
    date_format: Optional[str] = None,
    power_field: Optional[str] = None,
    energy_field: Optional[str] = None,
    is_forecast: bool = False,
    primary_key_track: bool = False,
    groupby_filter: bool = True,
    created_by: str = None,
    limit: int = 0,
    duid: str = None,
) -> List[Dict]:
    created_at = datetime.now()
    primary_keys = []
    return_records = []

    created_by = ""

    if spider and hasattr(spider, "name"):
        created_by = spider.name

    for row in records:

        trading_interval = parse_date(
            row[interval_field],
            network=network,
            dayfirst=False,
            date_format=date_format,
        )

        # if facility_code_field not in row:
        # logger.error("Invalid row no facility_code")
        # continue

        facility_code = normalize_duid(row[facility_code_field])

        if duid and facility_code != duid:
            continue

        if primary_key_track:
            pkey = (trading_interval, facility_code)

            if pkey in primary_keys:
                continue

            primary_keys.append(pkey)

        generated = None

        if power_field and power_field in row:
            generated = clean_float(row[power_field])

            if generated:
                generated = float_to_str(generated)

        energy = None

        if energy_field and energy_field in row:
            energy = clean_float(row[energy_field])

            if energy:
                energy = float_to_str(energy)

        __rec = {
            "created_by": created_by,
            "created_at": created_at,
            "updated_at": None,
            "network_id": network.code,
            "trading_interval": trading_interval,
            "facility_code": facility_code,
            "generated": generated,
            "eoi_quantity": energy,
            "is_forecast": is_forecast,
        }

        return_records.append(__rec)

        if limit > 0 and len(return_records) >= limit:
            break

    if not groupby_filter:
        return return_records

    return_records_grouped = {}

    for pk_values, rec_value in groupby(
            return_records,
            key=lambda r: (
                r.get("network_id"),
                r.get("trading_interval"),
                r.get("facility_code"),
            ),
    ):
        if pk_values not in return_records_grouped:
            return_records_grouped[pk_values] = list(rec_value).pop()

    return_records = list(return_records_grouped.values())

    return return_records

示例#10

0

显示文件

文件： opennem.py 项目： zalihat/opennem

def process_dispatch_interconnectorres(table: Dict, spider: Spider) -> Dict:
    session = SessionLocal()
    engine = get_database_engine()

    if "records" not in table:
        raise Exception("Invalid table no records")

    records = table["records"]

    records_to_store = []

    for record in records:
        ti_value = None

        if "SETTLEMENTDATE" in record:
            ti_value = record["SETTLEMENTDATE"]

        if "RUN_DATETIME" in record:
            ti_value = record["RUN_DATETIME"]

        if not ti_value:
            raise Exception("Require a trading interval")

        trading_interval = parse_date(ti_value,
                                      network=NetworkNEM,
                                      dayfirst=False)

        if not trading_interval:
            continue

        facility_code = normalize_duid(record["INTERCONNECTORID"])
        power_value = clean_float(record["METEREDMWFLOW"])

        records_to_store.append({
            "network_id": "NEM",
            "created_by": spider.name,
            "facility_code": facility_code,
            "trading_interval": trading_interval,
            "generated": power_value,
        })

    # remove duplicates
    return_records_grouped = {}

    for pk_values, rec_value in groupby(
            records_to_store,
            key=lambda r: (
                r.get("trading_interval"),
                r.get("network_id"),
                r.get("facility_code"),
            ),
    ):
        if pk_values not in return_records_grouped:
            return_records_grouped[pk_values] = list(rec_value).pop()

    records_to_store = list(return_records_grouped.values())

    # insert
    stmt = insert(FacilityScada).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=[
            "trading_interval", "network_id", "facility_code", "is_forecast"
        ],
        set_={"generated": stmt.excluded.generated},
    )

    try:
        session.execute(stmt)
        session.commit()
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        return {"num_records": 0}
    finally:
        session.close()

    return {"num_records": len(records_to_store)}

示例#11

0

显示文件

文件： summary.py 项目： zalihat/opennem

    def process_item(self, item, spider=None):
        if not item:
            logger.error("No item in pipeline")
            return None

        if "content" not in item:
            logger.error("No content in pipeline")
            return None

        content = item["content"]

        if "ELEC_NEM_SUMMARY" not in content:
            logger.error("Invalid summary return")
            return None

        summary = content["ELEC_NEM_SUMMARY"]

        s = SessionLocal()
        records_to_store = []

        for row in summary:
            trading_interval = parse_date(row["SETTLEMENTDATE"], network=NetworkNEM)

            demand_total = clean_float(row["TOTALDEMAND"])
            generation_scheduled = clean_float(row["SCHEDULEDGENERATION"])
            generation_non_scheduled = clean_float(row["SEMISCHEDULEDGENERATION"])

            records_to_store.append(
                {
                    "created_by": spider.name,
                    "trading_interval": trading_interval,
                    "network_id": "NEM",
                    "network_region": row["REGIONID"].strip(),
                    # "demand_total": demand_total,
                    "generation_scheduled": generation_scheduled,
                    "generation_non_scheduled": generation_non_scheduled,
                }
            )

        stmt = insert(BalancingSummary).values(records_to_store)
        stmt.bind = get_database_engine()
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                "trading_interval",
                "network_id",
                "network_region",
            ],
            set_={
                # "demand_total": stmt.excluded.demand_total,
                "generation_scheduled": stmt.excluded.generation_scheduled,
                "generation_non_scheduled": stmt.excluded.generation_non_scheduled,
            },
        )

        try:
            s.execute(stmt)
            s.commit()
        except Exception as e:
            logger.error("Error inserting records")
            logger.error(e)
            return {"num_records": 0}
        finally:
            s.close()

        return {"num_records": len(records_to_store)}

示例#12

0

显示文件

def process_trading_regionsum(table: AEMOTableSchema) -> ControllerReturn:
    engine = get_database_engine()

    if not table.records:
        logger.debug(table)
        raise Exception("Invalid table no records")

    cr = ControllerReturn(total_records=len(table.records))
    limit = None
    records_to_store = []
    records_processed = 0
    primary_keys = []

    for record in table.records:

        if not isinstance(record, dict):
            raise Exception("Invalid record type")

        trading_interval = parse_date(
            record["settlementdate"],
            network=NetworkNEM,
            dayfirst=False,
            date_format="%Y/%m/%d %H:%M:%S",
        )

        if not trading_interval:
            continue

        _pk = set([trading_interval, record["regionid"]])

        if _pk in primary_keys:
            continue

        primary_keys.append(_pk)

        net_interchange = None

        if "netinterchange" in record:
            net_interchange = clean_float(record["netinterchange"])

        records_to_store.append({
            "network_id": "NEM",
            "created_by": "opennem.controller.nem",
            "network_region": record["regionid"],
            "net_interchange_trading": net_interchange,
            "trading_interval": trading_interval,
        })

        records_processed += 1

        if limit and records_processed >= limit:
            logger.info("Reached limit of: {} {}".format(
                limit, records_processed))
            break

    stmt = insert(BalancingSummary).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=["trading_interval", "network_id", "network_region"],
        set_={
            "net_interchange_trading": stmt.excluded.net_interchange_trading,
        },
    )

    session = get_scoped_session()

    try:
        session.execute(stmt)
        session.commit()
        cr.inserted_records = cr.processed_records
        cr.server_latest = max(
            [i["trading_interval"] for i in records_to_store])
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        records_to_store = []
        cr.errors = cr.processed_records
    finally:
        session.rollback()
        session.close()
        engine.dispose()

    return cr

示例#13

0

显示文件

def unit_scada_generate_facility_scada(
    records: List[Union[Dict[str, Any], MMSBaseClass]],
    network: NetworkSchema = NetworkNEM,
    interval_field: str = "settlementdate",
    facility_code_field: str = "duid",
    power_field: str = "scadavalue",
    energy_field: Optional[str] = None,
    is_forecast: bool = False,
    primary_key_track: bool = True,
) -> List[Dict]:
    """@NOTE method deprecated"""
    created_at = datetime.now()
    primary_keys = []
    return_records = []

    if not records:
        return []

    fields = ""

    first_record = records[0]

    if isinstance(first_record, MMSBaseClass):
        first_record = asdict(first_record)  # type: ignore

    try:
        fields = ", ".join([f"'{i}'" for i in list(first_record.keys())])
    except Exception as e:
        logger.error("Fields error: {}".format(e))
        pass

    for row in records:
        # cast it all to dicts
        if isinstance(row, MMSBaseClass):
            row = asdict(row)  # type: ignore

        if interval_field not in row:
            raise Exception("No such field: '{}'. Fields: {}. Data: {}".format(
                interval_field, fields, row))

        trading_interval = row[interval_field]

        if facility_code_field not in row:
            raise Exception("No such facility field: {}. Fields: {}".format(
                facility_code_field, fields))

        facility_code = row[facility_code_field]

        energy_quantity: Optional[float] = None

        if energy_field:
            if energy_field not in row:
                raise Exception("No energy field: {}. Fields: {}".format(
                    energy_field, fields))

            energy_quantity = clean_float(row[energy_field])

        power_quantity: Optional[float] = None

        if power_field not in row:
            raise Exception("No suck power field: {}. Fields: {}".format(
                power_field, fields))

        power_quantity = clean_float(row[power_field])

        # should we track primary keys to remove duplicates?
        # @NOTE this does occur sometimes especially on large
        # imports of data from large sets
        if primary_key_track:
            pk = (trading_interval, network.code, facility_code)

            if pk in primary_keys:
                continue

            primary_keys.append(pk)

        __rec = {
            "created_by": "opennem.controller",
            "created_at": created_at,
            "updated_at": None,
            "network_id": network.code,
            "trading_interval": trading_interval,
            "facility_code": facility_code,
            "generated": power_quantity,
            "eoi_quantity": energy_quantity,
            "is_forecast": is_forecast,
            "energy_quality_flag": 0,
        }

        return_records.append(__rec)

    return return_records

示例#14

0

显示文件

文件： emissions.py 项目： zalihat/opennem

def check_emissions_map() -> None:
    content = load_data("opennem_emission_factors.csv",
                        from_project=True,
                        skip_loaders=True)
    mms_emissions = import_mms_emissions()

    def get_emissions_for_code(facility_code: str) -> Optional[Dict]:
        facility_lookup = list(
            filter(lambda x: x["facility_code"] == facility_code,
                   mms_emissions))

        if not facility_lookup or len(facility_lookup) < 1:
            logger.error(
                "Could not find facility {} in MMS emmissions data".format(
                    facility_code))
            return None

        facility = facility_lookup.pop()
        return facility

    csv_content = content.splitlines()
    csvreader = csv.DictReader(csv_content)

    csv_out = []

    for rec in csvreader:
        network_id = rec["network_id"]
        facility_code = normalize_duid(rec["facility_code"])
        emissions_intensity = clean_float(rec["emissions_factor_co2"])

        if network_id not in ["NEM"]:
            rec["emission_factor_source"] = "NPI"
            csv_out.append(rec)

            continue

        rec["emission_factor_source"] = ""

        mms_emission_record = get_emissions_for_code(facility_code)

        if not mms_emission_record:
            csv_out.append(rec)
            continue

        if emissions_intensity != mms_emission_record["emissions_factor_co2"]:
            logger.error("Mismatch for {}: {} and {}".format(
                facility_code, emissions_intensity,
                mms_emission_record["emissions_factor_co2"]))

            if mms_emission_record["emissions_factor_co2"]:
                rec["emissions_factor_co2"] = mms_emission_record[
                    "emissions_factor_co2"]
            else:
                rec["emission_factor_source"] = "Lookup"

        rec["emission_factor_source"] = mms_emission_record[
            "emission_factor_source"]
        csv_out.append(rec)

    fieldnames = [
        "network_id",
        "network_region",
        "facility_code",
        "station_name",
        "fueltech_id",
        "status_id",
        "emissions_factor_co2",
        "emission_factor_source",
    ]
    with open("emission_factors.csv", "w") as fh:
        csvwriter = csv.DictWriter(fh, fieldnames=fieldnames)
        csvwriter.writeheader()
        csvwriter.writerows(csv_out)