def import_nemweb_scada() -> None: engine_mysql = get_mysql_engine() logger.info("Connected to database.") year = NEMWEB_DISPATCH_OLD_MIN_DATE.year month = NEMWEB_DISPATCH_OLD_MIN_DATE.month for dt in date_series( date_trunc(NEMWEB_DISPATCH_OLD_MAX_DATE, "month"), date_trunc(NEMWEB_DISPATCH_OLD_MIN_DATE, "month"), interval=relativedelta(months=1), reverse=True, ): query = get_scada_old_query(year=dt.year, month=dt.month) with engine_mysql.connect() as c: logger.debug(query) results_raw = list(c.execute(query)) logger.info("Got {} rows for year {} and month {}".format( len(results_raw), year, month)) results_schema = [ DispatchUnitSolutionOld( trading_interval=i[0], facility_code=i[1], generated=i[2], ) for i in results_raw ] insert_scada_records(results_schema) return None
def get_crawler_missing_intervals( crawler_name: str, interval: TimeInterval, days: int = 365 * 3, ) -> list[datetime]: """Gets the crawler missing intervals going back a period of days""" engine = get_database_engine() stmt = sql( """ with intervals as ( select interval from generate_series(nemweb_latest_interval() - interval :days, nemweb_latest_interval(), interval :interval_size) AS interval ) select intervals.interval, case when ch.inserted_records is NULL then FALSE else TRUE end as has_record from intervals left join ( select * from crawl_history where crawler_name = :crawler_name and interval <= nemweb_latest_interval() and interval >= nemweb_latest_interval() - interval :days ) as ch on ch.interval = intervals.interval where ch.inserted_records is null order by 1 desc; """ ) query = stmt.bindparams(crawler_name=crawler_name, days=f"{days} days", interval_size=interval.interval_sql) logger.debug(dedent(str(query))) with engine.connect() as c: results = list(c.execute(query)) models = [i[0] for i in results] # truncate # @NOTE specific >= as we trunc hour or greater if interval.interval >= 60: models = [date_trunc(i, interval.trunc) for i in models] return models
def daily_fueltech_summary_query(day: datetime = DATE_YESTERDAY, network: NetworkSchema = NetworkNEM) -> str: EXCLUDE_FUELTECHS = [ "imports", "exports", "interconnector", "battery_discharging" ] day_date = day.replace(tzinfo=network.get_fixed_offset()) date_min = date_trunc(day_date, truncate_to="day") date_max = date_min + timedelta(days=1) query = __daily_fueltech_query.format( date_min=date_min, date_max=date_max, tz=network.timezone_database, network_id=network.code, fueltechs_excluded=duid_in_case(EXCLUDE_FUELTECHS), ) return dedent(query)
def stats_factory( stats: List[DataQueryResult], units: UnitDefinition, interval: TimeInterval, period: Optional[TimePeriod] = None, network: Optional[NetworkSchema] = None, timezone: Optional[Union[timezone, str]] = None, code: Optional[str] = None, region: Optional[str] = None, include_group_code: bool = False, fueltech_group: Optional[bool] = False, group_field: Optional[str] = None, data_id: Optional[str] = None, localize: Optional[bool] = True, include_code: Optional[bool] = True, cast_nulls: Optional[bool] = True, ) -> Optional[OpennemDataSet]: """ Takes a list of data query results and returns OpennemDataSets @TODO optional groupby field @TODO multiple groupings / slight refactor """ if network: timezone = network.get_timezone() group_codes = list(set([i.group_by for i in stats if i.group_by])) stats_grouped = [] for group_code in group_codes: data_grouped: Dict[datetime, Any] = dict() for stat in stats: if stat.group_by != group_code: continue if stat.interval not in data_grouped: data_grouped[stat.interval] = None # if stat.result: data_grouped[stat.interval] = stat.result data_sorted = OrderedDict(sorted(data_grouped.items())) data_value = list(data_sorted.values()) # Skip null series if len([i for i in data_value if i]) == 0: continue # @TODO possible bring this back # Skip zero series # if sum([i for i in data_value if i]) == 0: # continue # Cast trailing nulls if (not units.name.startswith("temperature") or (units.cast_nulls is True)) and (cast_nulls is True): data_value = cast_trailing_nulls(data_value) data_trimmed = dict(zip(data_sorted.keys(), data_value)) data_trimmed = trim_nulls(data_trimmed) # Find start/end dates dates = list(data_trimmed.keys()) if not dates: return None start = min(dates) end = max(dates) # should probably make sure these are the same TZ if localize: if timezone and not is_aware(start): start = make_aware(start, timezone) if timezone and not is_aware(end): end = make_aware(end, timezone) if timezone and localize and network and network.offset: tz = network.get_timezone() if tz: start = start.astimezone(tz) end = end.astimezone(tz) # Everything needs a timezone even flat dates if network and timezone and not is_aware(start): start = start.replace(tzinfo=network.get_fixed_offset()) if network and timezone and not is_aware(end): end = end.replace(tzinfo=network.get_fixed_offset()) # @TODO compose this and make it generic - some intervals # get truncated. # trunc the date for days and months if interval == human_to_interval("1d"): start = date_trunc(start, truncate_to="day") end = date_trunc(end, truncate_to="day") if interval == human_to_interval("1M"): start = date_trunc(start, truncate_to="month") end = date_trunc(end, truncate_to="month") # free dates = [] history = OpennemDataHistory( start=start, last=end, interval=interval.interval_human, data=data_trimmed.values(), ) data = OpennemData( data_type=units.unit_type, units=units.unit, # interval=interval, # period=period, history=history, ) if include_code: data.code = group_code if network: data.network = network.code.lower() # *sigh* - not the most flexible model # @TODO fix this schema and make it more flexible if fueltech_group: data.fuel_tech = group_code data_comps = [ # @NOTE disable for now since FE doesn't # support it network.country if network else None, network.code.lower() if network else None, region.lower() if region and region.lower() != network.code.lower() else None, "fuel_tech", group_code, units.unit_type, ] data.id = ".".join(i for i in data_comps if i) # @TODO make this an alias data.type = units.unit_type if group_field: group_fields = [] # setattr(data, group_field, group_code) if network: group_fields.append(network.country.lower()) group_fields.append(network.code.lower()) if region: if region.lower() != network.code.lower(): group_fields.append(region.lower()) if units.name_alias: group_fields.append(units.name_alias) elif units.unit_type: group_fields.append(units.unit_type) if group_code and include_group_code: group_fields.append(group_code) group_fields.append(group_field) data.id = ".".join([f for f in group_fields if f]) data.type = units.unit_type if data_id: data.id = data_id if not data.id: _id_list = [] # @NOTE disable for now since FE doesn't # support it # network.country if network else None, if network: _id_list.append(network.code.lower()) if region and (region.lower() != network.code.lower()): _id_list.append(region.lower()) if group_code: _id_list.append(group_code.lower()) if units and units.name_alias: _id_list.append(units.name_alias) elif units and units.name: _id_list.append(units.name) data.id = ".".join([f for f in _id_list if f]) data.type = units.unit_type if region: data.region = region stats_grouped.append(data) dt_now = datetime.now() if network: dt_now = dt_now.astimezone(network.get_timezone()) # @NOTE this should probably be # country.network.region if not code: if network: code = network.code if region: code = region stat_set = OpennemDataSet( type=units.unit_type, data=stats_grouped, created_at=dt_now, version=get_version(), ) if include_code: stat_set.code = code if network: stat_set.network = network.code if region: stat_set.region = region return stat_set
def get_range(self) -> DatetimeRange: """Return a DatetimeRange from the time series for queries""" start = self.start end = self.end # If its a forward looking forecast # jump out early if self.forecast: start = self.end + timedelta(minutes=self.interval.interval) end = self.end + get_human_interval(self.forecast_period) start = start.astimezone(self.network.get_fixed_offset()) end = end.astimezone(self.network.get_fixed_offset()) return DatetimeRange(start=start, end=end, interval=self.interval) # subtract the period (ie. 7d from the end for start if not all) if self.period == human_to_period("all"): start = date_trunc(start, self.interval.trunc) start = start.replace( hour=0, minute=0, second=0, tzinfo=self.network.get_fixed_offset() ) # If its all per month take the end of the last month if self.interval == human_to_interval("1M"): end = date_trunc(get_end_of_last_month(end), "day") end = end.replace( hour=23, minute=59, second=59, tzinfo=self.network.get_fixed_offset() ) self.year = None else: start = self.end - get_human_interval(self.period.period_human) if self.year: if self.year > end.year: raise Exception("Specified year is great than end year") start = start.replace( year=self.year, month=1, day=1, hour=0, minute=0, second=0, tzinfo=self.network.get_fixed_offset(), ) end = datetime( year=self.year, month=12, day=31, hour=23, minute=59, second=59, tzinfo=self.network.get_fixed_offset(), ) if self.year == CUR_YEAR: today = datetime.now(tz=self.network.get_fixed_offset()) end = datetime( year=CUR_YEAR, month=today.month, day=today.day, hour=23, minute=59, second=59 ) end = end - timedelta(days=1) end = end.replace(tzinfo=self.network.get_fixed_offset()) if self.end.date() < today.date(): end = self.end if self.month: start = datetime( year=self.month.year, month=self.month.month, day=1, hour=0, minute=0, second=0, tzinfo=self.network.get_fixed_offset(), ) end = start + get_human_interval("1M") - timedelta(days=1) end = end.replace( hour=23, minute=59, second=59, ) # localize times if not start.tzinfo or start.tzinfo != self.network.get_fixed_offset(): start = start.astimezone(self.network.get_fixed_offset()) if not end.tzinfo or end.tzinfo != self.network.get_fixed_offset(): end = end.astimezone(self.network.get_fixed_offset()) dtr = DatetimeRange(start=start, end=end, interval=self.interval) return dtr