Пример #1
0
def export_historic_intervals(limit: int | None = None) -> None:
    """ """
    session = get_scoped_session()

    networks = [NetworkNEM, NetworkWEM]

    for network in networks:
        network_regions: list[NetworkRegion] = (
            session.query(NetworkRegion).filter(
                NetworkRegion.network_id == network.code).all())

        for network_region in network_regions:
            scada_range: ScadaDateRange = get_scada_range(network=network,
                                                          networks=networks,
                                                          energy=False)

            if not scada_range or not scada_range.start:
                logger.error(
                    "Could not get scada range for network {}".format(network))
                continue

            for week_start, week_end in week_series_datetimes(
                    start=scada_range.end, end=scada_range.start,
                    length=limit):
                if week_start > get_today_opennem():
                    continue

                try:
                    export_network_intervals_for_week(
                        week_start,
                        week_end,
                        network=network,
                        network_region=network_region)
                except Exception as e:
                    logger.error(f"export_historic_intervals error: {e}")
Пример #2
0
def run_crawl(crawler: CrawlerDefinition,
              last_crawled: bool = True,
              limit: bool = False,
              latest: bool = True) -> None:
    """Runs a crawl from the crawl definition with ability to overwrite last crawled and obey the defined
    limit"""

    logger.info(
        "Crawling: {}. (Last Crawled: {}. Limit: {}. Server latest: {})".
        format(crawler.name, crawler.last_crawled, crawler.limit,
               crawler.server_latest))

    # now in opennem time which is Australia/Sydney
    now_opennem_time = get_today_opennem()

    crawler_set_meta(crawler.name, CrawlStatTypes.version, crawler.version)
    crawler_set_meta(crawler.name, CrawlStatTypes.last_crawled,
                     now_opennem_time)

    cr: Optional[ControllerReturn] = crawler.processor(
        crawler=crawler,
        last_crawled=last_crawled,
        limit=crawler.limit,
        latest=latest)

    if not cr:
        return None

    # run here
    has_errors = False

    logger.info("Inserted {} of {} records".format(cr.inserted_records,
                                                   cr.total_records))

    if cr.errors > 0:
        has_errors = True
        logger.error("Crawl controller error for {}: {}".format(
            crawler.name, cr.error_detail))

    if not has_errors:
        if cr.server_latest:
            crawler_set_meta(crawler.name, CrawlStatTypes.latest_processed,
                             cr.server_latest)
            crawler_set_meta(crawler.name, CrawlStatTypes.server_latest,
                             cr.server_latest)
        else:
            logger.debug("{} has no server_latest return".format(crawler.name))

        logger.info("Set last_processed to {} and server_latest to {}".format(
            crawler.last_processed, cr.server_latest))
Пример #3
0
def set_crawler_history(crawler_name: str, histories: list[CrawlHistoryEntry]) -> int:
    """Sets the crawler history"""
    engine = get_database_engine()
    session = get_scoped_session()

    history_intervals = [i.interval for i in histories]

    logger.debug(f"Have {len(history_intervals)} history intervals for {crawler_name}")

    date_max = max(history_intervals)
    date_min = min(history_intervals)

    logger.debug(f"crawler {crawler_name} date range: {date_min}, {date_max}")

    stmt = sql(
        """
        select
            interval
        from crawl_history
        where
            interval >= :date_max
            and interval <= :date_min
    """
    )

    query = stmt.bindparams(date_max=date_max, date_min=date_min)

    with engine.connect() as c:
        results = list(c.execute(query))

    existing_intervals = [i[0] for i in results]

    logger.debug(f"Got {len(existing_intervals)} existing intervals for crawler {crawler_name}")

    # Persist the crawl history records
    crawl_history_records: list[Dict[str, datetime | str | int | None]] = []

    for ch in histories:
        crawl_history_records.append(
            {
                "source": "nemweb",
                "crawler_name": crawler_name,
                "network_id": "NEM",
                "interval": ch.interval,
                "inserted_records": ch.records,
                "crawled_time": None,
                "processed_time": get_today_opennem(),
            }
        )

    # insert
    stmt = insert(CrawlHistory).values(crawl_history_records)
    stmt.bind = engine  # type: ignore
    stmt = stmt.on_conflict_do_update(  # type: ignore
        index_elements=["source", "crawler_name", "network_id", "interval"],
        set_={
            "inserted_records": stmt.excluded.inserted_records,  # type: ignore
            "crawled_time": stmt.excluded.crawled_time,  # type: ignore
            "processed_time": stmt.excluded.processed_time,  # type: ignore
        },
    )

    try:
        session.execute(stmt)
        session.commit()
    except Exception as e:
        logger.error(f"set_crawler_history error updating records: {e}")
    finally:
        session.rollback()
        session.close()
        engine.dispose()

    return len(histories)
Пример #4
0
def get_apvi_rooftop_data(
        day: Optional[datetime] = None) -> Optional[APVIForecastSet]:
    """Obtains and parses APVI forecast data"""

    if not day:
        day = get_today_opennem()

    day_string = get_date_component(format_str=APVI_DATE_QUERY_FORMAT, dt=day)

    apvi_endpoint_url = get_apvi_uri(today=False)

    logger.info("Getting APVI data for day {} from {}".format(
        day_string, apvi_endpoint_url))

    _resp = _apvi_request_session.post(apvi_endpoint_url,
                                       data={"day": day_string})

    if not _resp.ok:
        logger.error("Invalid APVI Return: {}".format(_resp.status_code))
        return None

    _resp_json = None

    try:
        _resp_json = _resp.json()
    except JSONDecodeError as e:
        logger.error("Error decoding APVI response: {}".format(e))
        return None

    _required_keys = ["postcode", "postcodeCapacity", "installations"]

    for _req_key in _required_keys:
        if _req_key not in _resp_json:
            logger.error(f"Invalid APVI response: {_req_key} field not found")
            return None

    postcode_gen = _resp_json["postcode"]
    postcode_capacity = _resp_json["postcodeCapacity"]
    installations = _resp_json["installations"]

    # brisbane has no DST so its effectively NEM time
    _run_at = get_today_opennem()
    _interval_records = []

    for record in postcode_gen:
        for state, prefix in STATE_POSTCODE_PREFIXES.items():

            generated = sum([
                float(v) / 100 * postcode_capacity[k]
                for k, v in record.items() if k.startswith(prefix) and v
                and k in postcode_capacity and k[:2] not in WA_NON_SWIS
            ])

            if not generated:
                continue

            _interval_records.append(
                APVIForecastInterval(
                    **{
                        "network_id": "APVI",
                        "trading_interval": record["ts"],
                        "state": state,
                        "generated": generated,
                    }))

    _state_capacities = {}

    # Calcualte state capacities
    for postcode_prefix, capacity_val in postcode_capacity.items():
        for state, prefix in STATE_POSTCODE_PREFIXES.items():
            if state not in _state_capacities:
                _state_capacities[state] = 0

            if postcode_prefix.startswith(prefix):
                _state_capacities[state] += capacity_val

    # derive state capacity models
    _state_capacity_models = []

    for state, state_capacity in _state_capacities.items():
        capacity_registered = state_capacity

        if state.lower() in installations:
            unit_number = installations[state.lower()]

        _state_capacity_models.append(
            APVIStateRooftopCapacity(state=state,
                                     capacity_registered=capacity_registered,
                                     unit_number=unit_number))

    apvi_server_latest: Optional[datetime] = None

    trading_intervals = list(
        set([i.trading_interval for i in _interval_records]))

    if trading_intervals:
        apvi_server_latest = max(trading_intervals)

    apvi_forecast_set = APVIForecastSet(crawled=_run_at,
                                        intervals=_interval_records,
                                        capacities=_state_capacity_models)

    try:
        apvi_forecast_set.server_latest = apvi_server_latest
    except ValidationError:
        logger.error("APVI validation error for server_latest: {} <{}>".format(
            apvi_server_latest, repr(apvi_server_latest)))

    return apvi_forecast_set
Пример #5
0
def get_apvi_rooftop_today() -> Optional[APVIForecastSet]:
    """Gets today APVI data and returns a set"""

    apvi_endpoint_url = get_apvi_uri(today=True)

    _resp = _apvi_request_session.get(apvi_endpoint_url)

    if not _resp.ok:
        logger.error("Invalid APVI Return: {}".format(_resp.status_code))
        return None

    _resp_json = None

    try:
        _resp_json = _resp.json()
    except JSONDecodeError as e:
        logger.error("Error decoding APVI response: {}".format(e))
        return None

    _required_keys = ["capacity", "performance", "output"]

    for _req_key in _required_keys:
        if _req_key not in _resp_json:
            logger.error(f"Invalid APVI response: {_req_key} field not found")
            return None

    # capacity = _resp_json["capacity"]
    # performance = _resp_json["performance"]
    output = _resp_json["output"]

    _run_at = get_today_opennem()

    record_set: Dict[str, Dict] = {}

    for postcode_prefix, time_records in output.items():
        # Skip WA that is non-SWIS
        if postcode_prefix in WA_NON_SWIS:
            continue

        state = get_state_for_prefix(postcode_prefix)

        if state not in record_set:
            record_set[state] = {}

        for trading_interval, generated in time_records.items():
            if trading_interval not in record_set[state]:
                record_set[state][trading_interval] = 0

            record_set[state][trading_interval] += generated

    interval_models = []

    for state, interval_records in record_set.items():
        for trading_interval, generated in interval_records.items():
            interval_models.append(
                APVIForecastInterval(
                    **{
                        "network_id": "APVI",
                        "trading_interval": trading_interval,
                        "state": state,
                        "generated": generated,
                    }))

    apvi_server_latest: Optional[datetime] = None

    trading_intervals = list(set([i.trading_interval
                                  for i in interval_models]))

    if trading_intervals:
        apvi_server_latest = max(trading_intervals)

    apvi_forecast_set = APVIForecastSet(
        crawled=_run_at,
        intervals=interval_models,
    )

    try:
        apvi_forecast_set.server_latest = apvi_server_latest
    except ValidationError:
        logger.error("APVI validation error for server_latest: {} <{}>".format(
            apvi_server_latest, repr(apvi_server_latest)))

    return apvi_forecast_set