def start_requests(self) -> Generator[scrapy.Request, None, None]: start_month = datetime(2020, 1, 1) end_month = datetime(2009, 7, 1) for date in month_series(start_month, end_month): for table in self.tables: url_params = { "month": get_date_component("%m", dt=date), "year": get_date_component("%Y", dt=date), "table": table.upper(), } req_url = MMS_URL.format(**url_params) yield scrapy.Request(req_url)
class NemXLSSpider(scrapy.Spider): start_url: Optional[str] = None url_params = { "day": get_date_component("%d"), "month": get_date_component("%m"), "year": get_date_component("%Y"), } def start_requests(self) -> Generator[scrapy.Request, None, None]: if not self.start_url: return None request_url = self.start_url.format(**self.url_params) yield scrapy.Request(request_url) def parse(self, response: Response) -> Generator[Dict[str, Any], None, None]: yield {"content": response.text}
class WemCurrentSpider(scrapy.Spider): start_url = None url_params = { "day": get_date_component("%d"), "month": get_date_component("%m"), "year": get_date_component("%Y"), } def start_requests(self): cache_bust = datetime.now().strftime("%Y%M%d%H%M%S%f") if self.start_url: request_url = self.start_url.format(**self.url_params) request_url += "?{}".format(cache_bust) yield scrapy.Request(request_url) def parse(self, response) -> Generator[Dict, None, None]: yield {"content": response.text}
def get_apvi_rooftop_data( day: Optional[datetime] = None) -> Optional[APVIForecastSet]: """Obtains and parses APVI forecast data""" if not day: day = get_today_opennem() day_string = get_date_component(format_str=APVI_DATE_QUERY_FORMAT, dt=day) apvi_endpoint_url = get_apvi_uri(today=False) logger.info("Getting APVI data for day {} from {}".format( day_string, apvi_endpoint_url)) _resp = _apvi_request_session.post(apvi_endpoint_url, data={"day": day_string}) if not _resp.ok: logger.error("Invalid APVI Return: {}".format(_resp.status_code)) return None _resp_json = None try: _resp_json = _resp.json() except JSONDecodeError as e: logger.error("Error decoding APVI response: {}".format(e)) return None _required_keys = ["postcode", "postcodeCapacity", "installations"] for _req_key in _required_keys: if _req_key not in _resp_json: logger.error(f"Invalid APVI response: {_req_key} field not found") return None postcode_gen = _resp_json["postcode"] postcode_capacity = _resp_json["postcodeCapacity"] installations = _resp_json["installations"] # brisbane has no DST so its effectively NEM time _run_at = get_today_opennem() _interval_records = [] for record in postcode_gen: for state, prefix in STATE_POSTCODE_PREFIXES.items(): generated = sum([ float(v) / 100 * postcode_capacity[k] for k, v in record.items() if k.startswith(prefix) and v and k in postcode_capacity and k[:2] not in WA_NON_SWIS ]) if not generated: continue _interval_records.append( APVIForecastInterval( **{ "network_id": "APVI", "trading_interval": record["ts"], "state": state, "generated": generated, })) _state_capacities = {} # Calcualte state capacities for postcode_prefix, capacity_val in postcode_capacity.items(): for state, prefix in STATE_POSTCODE_PREFIXES.items(): if state not in _state_capacities: _state_capacities[state] = 0 if postcode_prefix.startswith(prefix): _state_capacities[state] += capacity_val # derive state capacity models _state_capacity_models = [] for state, state_capacity in _state_capacities.items(): capacity_registered = state_capacity if state.lower() in installations: unit_number = installations[state.lower()] _state_capacity_models.append( APVIStateRooftopCapacity(state=state, capacity_registered=capacity_registered, unit_number=unit_number)) apvi_server_latest: Optional[datetime] = None trading_intervals = list( set([i.trading_interval for i in _interval_records])) if trading_intervals: apvi_server_latest = max(trading_intervals) apvi_forecast_set = APVIForecastSet(crawled=_run_at, intervals=_interval_records, capacities=_state_capacity_models) try: apvi_forecast_set.server_latest = apvi_server_latest except ValidationError: logger.error("APVI validation error for server_latest: {} <{}>".format( apvi_server_latest, repr(apvi_server_latest))) return apvi_forecast_set