def export_historic_intervals(limit: int | None = None) -> None: """ """ session = get_scoped_session() networks = [NetworkNEM, NetworkWEM] for network in networks: network_regions: list[NetworkRegion] = ( session.query(NetworkRegion).filter( NetworkRegion.network_id == network.code).all()) for network_region in network_regions: scada_range: ScadaDateRange = get_scada_range(network=network, networks=networks, energy=False) if not scada_range or not scada_range.start: logger.error( "Could not get scada range for network {}".format(network)) continue for week_start, week_end in week_series_datetimes( start=scada_range.end, end=scada_range.start, length=limit): if week_start > get_today_opennem(): continue try: export_network_intervals_for_week( week_start, week_end, network=network, network_region=network_region) except Exception as e: logger.error(f"export_historic_intervals error: {e}")
def run_crawl(crawler: CrawlerDefinition, last_crawled: bool = True, limit: bool = False, latest: bool = True) -> None: """Runs a crawl from the crawl definition with ability to overwrite last crawled and obey the defined limit""" logger.info( "Crawling: {}. (Last Crawled: {}. Limit: {}. Server latest: {})". format(crawler.name, crawler.last_crawled, crawler.limit, crawler.server_latest)) # now in opennem time which is Australia/Sydney now_opennem_time = get_today_opennem() crawler_set_meta(crawler.name, CrawlStatTypes.version, crawler.version) crawler_set_meta(crawler.name, CrawlStatTypes.last_crawled, now_opennem_time) cr: Optional[ControllerReturn] = crawler.processor( crawler=crawler, last_crawled=last_crawled, limit=crawler.limit, latest=latest) if not cr: return None # run here has_errors = False logger.info("Inserted {} of {} records".format(cr.inserted_records, cr.total_records)) if cr.errors > 0: has_errors = True logger.error("Crawl controller error for {}: {}".format( crawler.name, cr.error_detail)) if not has_errors: if cr.server_latest: crawler_set_meta(crawler.name, CrawlStatTypes.latest_processed, cr.server_latest) crawler_set_meta(crawler.name, CrawlStatTypes.server_latest, cr.server_latest) else: logger.debug("{} has no server_latest return".format(crawler.name)) logger.info("Set last_processed to {} and server_latest to {}".format( crawler.last_processed, cr.server_latest))
def set_crawler_history(crawler_name: str, histories: list[CrawlHistoryEntry]) -> int: """Sets the crawler history""" engine = get_database_engine() session = get_scoped_session() history_intervals = [i.interval for i in histories] logger.debug(f"Have {len(history_intervals)} history intervals for {crawler_name}") date_max = max(history_intervals) date_min = min(history_intervals) logger.debug(f"crawler {crawler_name} date range: {date_min}, {date_max}") stmt = sql( """ select interval from crawl_history where interval >= :date_max and interval <= :date_min """ ) query = stmt.bindparams(date_max=date_max, date_min=date_min) with engine.connect() as c: results = list(c.execute(query)) existing_intervals = [i[0] for i in results] logger.debug(f"Got {len(existing_intervals)} existing intervals for crawler {crawler_name}") # Persist the crawl history records crawl_history_records: list[Dict[str, datetime | str | int | None]] = [] for ch in histories: crawl_history_records.append( { "source": "nemweb", "crawler_name": crawler_name, "network_id": "NEM", "interval": ch.interval, "inserted_records": ch.records, "crawled_time": None, "processed_time": get_today_opennem(), } ) # insert stmt = insert(CrawlHistory).values(crawl_history_records) stmt.bind = engine # type: ignore stmt = stmt.on_conflict_do_update( # type: ignore index_elements=["source", "crawler_name", "network_id", "interval"], set_={ "inserted_records": stmt.excluded.inserted_records, # type: ignore "crawled_time": stmt.excluded.crawled_time, # type: ignore "processed_time": stmt.excluded.processed_time, # type: ignore }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error(f"set_crawler_history error updating records: {e}") finally: session.rollback() session.close() engine.dispose() return len(histories)
def get_apvi_rooftop_data( day: Optional[datetime] = None) -> Optional[APVIForecastSet]: """Obtains and parses APVI forecast data""" if not day: day = get_today_opennem() day_string = get_date_component(format_str=APVI_DATE_QUERY_FORMAT, dt=day) apvi_endpoint_url = get_apvi_uri(today=False) logger.info("Getting APVI data for day {} from {}".format( day_string, apvi_endpoint_url)) _resp = _apvi_request_session.post(apvi_endpoint_url, data={"day": day_string}) if not _resp.ok: logger.error("Invalid APVI Return: {}".format(_resp.status_code)) return None _resp_json = None try: _resp_json = _resp.json() except JSONDecodeError as e: logger.error("Error decoding APVI response: {}".format(e)) return None _required_keys = ["postcode", "postcodeCapacity", "installations"] for _req_key in _required_keys: if _req_key not in _resp_json: logger.error(f"Invalid APVI response: {_req_key} field not found") return None postcode_gen = _resp_json["postcode"] postcode_capacity = _resp_json["postcodeCapacity"] installations = _resp_json["installations"] # brisbane has no DST so its effectively NEM time _run_at = get_today_opennem() _interval_records = [] for record in postcode_gen: for state, prefix in STATE_POSTCODE_PREFIXES.items(): generated = sum([ float(v) / 100 * postcode_capacity[k] for k, v in record.items() if k.startswith(prefix) and v and k in postcode_capacity and k[:2] not in WA_NON_SWIS ]) if not generated: continue _interval_records.append( APVIForecastInterval( **{ "network_id": "APVI", "trading_interval": record["ts"], "state": state, "generated": generated, })) _state_capacities = {} # Calcualte state capacities for postcode_prefix, capacity_val in postcode_capacity.items(): for state, prefix in STATE_POSTCODE_PREFIXES.items(): if state not in _state_capacities: _state_capacities[state] = 0 if postcode_prefix.startswith(prefix): _state_capacities[state] += capacity_val # derive state capacity models _state_capacity_models = [] for state, state_capacity in _state_capacities.items(): capacity_registered = state_capacity if state.lower() in installations: unit_number = installations[state.lower()] _state_capacity_models.append( APVIStateRooftopCapacity(state=state, capacity_registered=capacity_registered, unit_number=unit_number)) apvi_server_latest: Optional[datetime] = None trading_intervals = list( set([i.trading_interval for i in _interval_records])) if trading_intervals: apvi_server_latest = max(trading_intervals) apvi_forecast_set = APVIForecastSet(crawled=_run_at, intervals=_interval_records, capacities=_state_capacity_models) try: apvi_forecast_set.server_latest = apvi_server_latest except ValidationError: logger.error("APVI validation error for server_latest: {} <{}>".format( apvi_server_latest, repr(apvi_server_latest))) return apvi_forecast_set
def get_apvi_rooftop_today() -> Optional[APVIForecastSet]: """Gets today APVI data and returns a set""" apvi_endpoint_url = get_apvi_uri(today=True) _resp = _apvi_request_session.get(apvi_endpoint_url) if not _resp.ok: logger.error("Invalid APVI Return: {}".format(_resp.status_code)) return None _resp_json = None try: _resp_json = _resp.json() except JSONDecodeError as e: logger.error("Error decoding APVI response: {}".format(e)) return None _required_keys = ["capacity", "performance", "output"] for _req_key in _required_keys: if _req_key not in _resp_json: logger.error(f"Invalid APVI response: {_req_key} field not found") return None # capacity = _resp_json["capacity"] # performance = _resp_json["performance"] output = _resp_json["output"] _run_at = get_today_opennem() record_set: Dict[str, Dict] = {} for postcode_prefix, time_records in output.items(): # Skip WA that is non-SWIS if postcode_prefix in WA_NON_SWIS: continue state = get_state_for_prefix(postcode_prefix) if state not in record_set: record_set[state] = {} for trading_interval, generated in time_records.items(): if trading_interval not in record_set[state]: record_set[state][trading_interval] = 0 record_set[state][trading_interval] += generated interval_models = [] for state, interval_records in record_set.items(): for trading_interval, generated in interval_records.items(): interval_models.append( APVIForecastInterval( **{ "network_id": "APVI", "trading_interval": trading_interval, "state": state, "generated": generated, })) apvi_server_latest: Optional[datetime] = None trading_intervals = list(set([i.trading_interval for i in interval_models])) if trading_intervals: apvi_server_latest = max(trading_intervals) apvi_forecast_set = APVIForecastSet( crawled=_run_at, intervals=interval_models, ) try: apvi_forecast_set.server_latest = apvi_server_latest except ValidationError: logger.error("APVI validation error for server_latest: {} <{}>".format( apvi_server_latest, repr(apvi_server_latest))) return apvi_forecast_set