示例#1
0
def import_nemweb_scada() -> None:
    engine_mysql = get_mysql_engine()
    logger.info("Connected to database.")

    year = NEMWEB_DISPATCH_OLD_MIN_DATE.year
    month = NEMWEB_DISPATCH_OLD_MIN_DATE.month

    for dt in date_series(
            date_trunc(NEMWEB_DISPATCH_OLD_MAX_DATE, "month"),
            date_trunc(NEMWEB_DISPATCH_OLD_MIN_DATE, "month"),
            interval=relativedelta(months=1),
            reverse=True,
    ):
        query = get_scada_old_query(year=dt.year, month=dt.month)

        with engine_mysql.connect() as c:
            logger.debug(query)

            results_raw = list(c.execute(query))

            logger.info("Got {} rows for year {} and month {}".format(
                len(results_raw), year, month))

        results_schema = [
            DispatchUnitSolutionOld(
                trading_interval=i[0],
                facility_code=i[1],
                generated=i[2],
            ) for i in results_raw
        ]

        insert_scada_records(results_schema)

    return None
示例#2
0
def get_crawler_missing_intervals(
    crawler_name: str,
    interval: TimeInterval,
    days: int = 365 * 3,
) -> list[datetime]:
    """Gets the crawler missing intervals going back a period of days"""
    engine = get_database_engine()

    stmt = sql(
        """
        with intervals as (
            select
                interval
            from generate_series(nemweb_latest_interval() - interval :days, nemweb_latest_interval(), interval  :interval_size) AS interval
        )

        select
            intervals.interval,
            case when ch.inserted_records is NULL then FALSE else TRUE end as has_record
        from intervals
        left join (
            select * from crawl_history
            where crawler_name = :crawler_name
            and interval <= nemweb_latest_interval() and interval >= nemweb_latest_interval() - interval :days
        ) as ch on ch.interval = intervals.interval
        where ch.inserted_records is null
        order by 1 desc;
    """
    )

    query = stmt.bindparams(crawler_name=crawler_name, days=f"{days} days", interval_size=interval.interval_sql)

    logger.debug(dedent(str(query)))

    with engine.connect() as c:
        results = list(c.execute(query))

    models = [i[0] for i in results]

    # truncate
    # @NOTE specific >= as we trunc hour or greater
    if interval.interval >= 60:
        models = [date_trunc(i, interval.trunc) for i in models]

    return models
示例#3
0
def daily_fueltech_summary_query(day: datetime = DATE_YESTERDAY,
                                 network: NetworkSchema = NetworkNEM) -> str:
    EXCLUDE_FUELTECHS = [
        "imports", "exports", "interconnector", "battery_discharging"
    ]

    day_date = day.replace(tzinfo=network.get_fixed_offset())

    date_min = date_trunc(day_date, truncate_to="day")
    date_max = date_min + timedelta(days=1)

    query = __daily_fueltech_query.format(
        date_min=date_min,
        date_max=date_max,
        tz=network.timezone_database,
        network_id=network.code,
        fueltechs_excluded=duid_in_case(EXCLUDE_FUELTECHS),
    )

    return dedent(query)
示例#4
0
def stats_factory(
    stats: List[DataQueryResult],
    units: UnitDefinition,
    interval: TimeInterval,
    period: Optional[TimePeriod] = None,
    network: Optional[NetworkSchema] = None,
    timezone: Optional[Union[timezone, str]] = None,
    code: Optional[str] = None,
    region: Optional[str] = None,
    include_group_code: bool = False,
    fueltech_group: Optional[bool] = False,
    group_field: Optional[str] = None,
    data_id: Optional[str] = None,
    localize: Optional[bool] = True,
    include_code: Optional[bool] = True,
    cast_nulls: Optional[bool] = True,
) -> Optional[OpennemDataSet]:
    """
    Takes a list of data query results and returns OpennemDataSets

    @TODO optional groupby field
    @TODO multiple groupings / slight refactor

    """

    if network:
        timezone = network.get_timezone()

    group_codes = list(set([i.group_by for i in stats if i.group_by]))

    stats_grouped = []

    for group_code in group_codes:

        data_grouped: Dict[datetime, Any] = dict()

        for stat in stats:
            if stat.group_by != group_code:
                continue

            if stat.interval not in data_grouped:
                data_grouped[stat.interval] = None

            # if stat.result:
            data_grouped[stat.interval] = stat.result

        data_sorted = OrderedDict(sorted(data_grouped.items()))

        data_value = list(data_sorted.values())

        # Skip null series
        if len([i for i in data_value if i]) == 0:
            continue

        # @TODO possible bring this back
        # Skip zero series
        # if sum([i for i in data_value if i]) == 0:
        # continue

        # Cast trailing nulls
        if (not units.name.startswith("temperature") or
            (units.cast_nulls is True)) and (cast_nulls is True):
            data_value = cast_trailing_nulls(data_value)

        data_trimmed = dict(zip(data_sorted.keys(), data_value))

        data_trimmed = trim_nulls(data_trimmed)

        # Find start/end dates
        dates = list(data_trimmed.keys())

        if not dates:
            return None

        start = min(dates)
        end = max(dates)

        # should probably make sure these are the same TZ
        if localize:
            if timezone and not is_aware(start):
                start = make_aware(start, timezone)

            if timezone and not is_aware(end):
                end = make_aware(end, timezone)

            if timezone and localize and network and network.offset:
                tz = network.get_timezone()

                if tz:
                    start = start.astimezone(tz)
                    end = end.astimezone(tz)

        # Everything needs a timezone even flat dates
        if network and timezone and not is_aware(start):
            start = start.replace(tzinfo=network.get_fixed_offset())

        if network and timezone and not is_aware(end):
            end = end.replace(tzinfo=network.get_fixed_offset())

        # @TODO compose this and make it generic - some intervals
        # get truncated.
        # trunc the date for days and months
        if interval == human_to_interval("1d"):
            start = date_trunc(start, truncate_to="day")
            end = date_trunc(end, truncate_to="day")

        if interval == human_to_interval("1M"):
            start = date_trunc(start, truncate_to="month")
            end = date_trunc(end, truncate_to="month")

        # free
        dates = []

        history = OpennemDataHistory(
            start=start,
            last=end,
            interval=interval.interval_human,
            data=data_trimmed.values(),
        )

        data = OpennemData(
            data_type=units.unit_type,
            units=units.unit,
            # interval=interval,
            # period=period,
            history=history,
        )

        if include_code:
            data.code = group_code

        if network:
            data.network = network.code.lower()

        # *sigh* - not the most flexible model
        # @TODO fix this schema and make it more flexible
        if fueltech_group:
            data.fuel_tech = group_code

            data_comps = [
                # @NOTE disable for now since FE doesn't
                # support it
                network.country if network else None,
                network.code.lower() if network else None,
                region.lower()
                if region and region.lower() != network.code.lower() else None,
                "fuel_tech",
                group_code,
                units.unit_type,
            ]

            data.id = ".".join(i for i in data_comps if i)
            # @TODO make this an alias
            data.type = units.unit_type

        if group_field:
            group_fields = []

            # setattr(data, group_field, group_code)

            if network:
                group_fields.append(network.country.lower())
                group_fields.append(network.code.lower())

            if region:
                if region.lower() != network.code.lower():
                    group_fields.append(region.lower())

            if units.name_alias:
                group_fields.append(units.name_alias)

            elif units.unit_type:
                group_fields.append(units.unit_type)

            if group_code and include_group_code:
                group_fields.append(group_code)
                group_fields.append(group_field)

            data.id = ".".join([f for f in group_fields if f])
            data.type = units.unit_type

        if data_id:
            data.id = data_id

        if not data.id:
            _id_list = []

            # @NOTE disable for now since FE doesn't
            # support it
            # network.country if network else None,

            if network:
                _id_list.append(network.code.lower())

            if region and (region.lower() != network.code.lower()):
                _id_list.append(region.lower())

            if group_code:
                _id_list.append(group_code.lower())

            if units and units.name_alias:
                _id_list.append(units.name_alias)
            elif units and units.name:
                _id_list.append(units.name)

            data.id = ".".join([f for f in _id_list if f])
            data.type = units.unit_type

        if region:
            data.region = region

        stats_grouped.append(data)

    dt_now = datetime.now()

    if network:
        dt_now = dt_now.astimezone(network.get_timezone())

    # @NOTE this should probably be
    # country.network.region
    if not code:
        if network:
            code = network.code

        if region:
            code = region

    stat_set = OpennemDataSet(
        type=units.unit_type,
        data=stats_grouped,
        created_at=dt_now,
        version=get_version(),
    )

    if include_code:
        stat_set.code = code

    if network:
        stat_set.network = network.code

    if region:
        stat_set.region = region

    return stat_set
示例#5
0
    def get_range(self) -> DatetimeRange:
        """Return a DatetimeRange from the time series for queries"""
        start = self.start
        end = self.end

        # If its a forward looking forecast
        # jump out early
        if self.forecast:
            start = self.end + timedelta(minutes=self.interval.interval)
            end = self.end + get_human_interval(self.forecast_period)

            start = start.astimezone(self.network.get_fixed_offset())
            end = end.astimezone(self.network.get_fixed_offset())

            return DatetimeRange(start=start, end=end, interval=self.interval)

        # subtract the period (ie. 7d from the end for start if not all)
        if self.period == human_to_period("all"):
            start = date_trunc(start, self.interval.trunc)
            start = start.replace(
                hour=0, minute=0, second=0, tzinfo=self.network.get_fixed_offset()
            )

            # If its all per month take the end of the last month
            if self.interval == human_to_interval("1M"):
                end = date_trunc(get_end_of_last_month(end), "day")
                end = end.replace(
                    hour=23, minute=59, second=59, tzinfo=self.network.get_fixed_offset()
                )

            self.year = None

        else:
            start = self.end - get_human_interval(self.period.period_human)

        if self.year:
            if self.year > end.year:
                raise Exception("Specified year is great than end year")

            start = start.replace(
                year=self.year,
                month=1,
                day=1,
                hour=0,
                minute=0,
                second=0,
                tzinfo=self.network.get_fixed_offset(),
            )

            end = datetime(
                year=self.year,
                month=12,
                day=31,
                hour=23,
                minute=59,
                second=59,
                tzinfo=self.network.get_fixed_offset(),
            )

            if self.year == CUR_YEAR:
                today = datetime.now(tz=self.network.get_fixed_offset())

                end = datetime(
                    year=CUR_YEAR, month=today.month, day=today.day, hour=23, minute=59, second=59
                )

                end = end - timedelta(days=1)

                end = end.replace(tzinfo=self.network.get_fixed_offset())

                if self.end.date() < today.date():
                    end = self.end

        if self.month:
            start = datetime(
                year=self.month.year,
                month=self.month.month,
                day=1,
                hour=0,
                minute=0,
                second=0,
                tzinfo=self.network.get_fixed_offset(),
            )

            end = start + get_human_interval("1M") - timedelta(days=1)

            end = end.replace(
                hour=23,
                minute=59,
                second=59,
            )

        # localize times
        if not start.tzinfo or start.tzinfo != self.network.get_fixed_offset():
            start = start.astimezone(self.network.get_fixed_offset())

        if not end.tzinfo or end.tzinfo != self.network.get_fixed_offset():
            end = end.astimezone(self.network.get_fixed_offset())

        dtr = DatetimeRange(start=start, end=end, interval=self.interval)

        return dtr