def test_generate_series_scalar(self, connection): x = func.generate_series(1, 2).alias("x") y = func.generate_series(1, 2).alias("y") stmt = select(x.column, y.column).join_from(x, y, true()) eq_(connection.execute(stmt).all(), [(1, 1), (1, 2), (2, 1), (2, 2)])
def count_datasets_through_time_query(self, start, end, period, time_field, expressions): raw_expressions = self._alchemify_expressions(expressions) start_times = select( (func.generate_series(start, end, cast( period, INTERVAL)).label('start_time'), )).alias('start_times') time_range_select = (select((func.tstzrange( start_times.c.start_time, func.lead(start_times.c.start_time).over()).label('time_period'), ))).alias('all_time_ranges') # Exclude the trailing (end time to infinite) row. Is there a simpler way? time_ranges = (select( (time_range_select, )).where(~func.upper_inf(time_range_select.c.time_period)) ).alias('time_ranges') count_query = (select((func.count('*'), )).select_from( self._from_expression(DATASET, expressions)).where( and_( time_field.alchemy_expression.overlaps( time_ranges.c.time_period), DATASET.c.archived == None, *raw_expressions))) return select( (time_ranges.c.time_period, count_query.label('dataset_count')))
def test_with_ordinality_star(self, connection): stmt = select("*").select_from( func.generate_series( 4, 1, -1).table_valued(with_ordinality="ordinality")) eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)])
def traffic_history_query(): events = (select(func.sum(TrafficVolume.amount).label('amount'), literal_column('day'), cast(TrafficVolume.type, TEXT).label('type') ) .select_from( func.generate_series( func.date_trunc('day', literal_column('arg_start')), func.date_trunc('day', literal_column('arg_end')), '1 day' ).alias('day') .outerjoin(TrafficVolume.__table__, and_( func.date_trunc('day', TrafficVolume.timestamp) == literal_column('day'), TrafficVolume.user_id == literal_column('arg_user_id')) ) ) .group_by(literal_column('day'), literal_column('type')) ).cte() events_ingress = select(events).where(or_(events.c.type == 'Ingress', events.c.type == None)).cte() events_egress = select(events).where(or_(events.c.type == 'Egress', events.c.type == None)).cte() hist = (select(func.coalesce(events_ingress.c.day, events_egress.c.day).label('timestamp'), events_ingress.c.amount.label('ingress'), events_egress.c.amount.label('egress')) .select_from(events_ingress.join(events_egress, events_ingress.c.day == events_egress.c.day, full=true)) .order_by(literal_column('timestamp')) ) return hist
def get(self): now = pendulum.now() series = self.query( cast( func.generate_series( cast(now.subtract(weeks=1), type_=DATE), cast(now, type_=DATE), cast("1 day", type_=Interval), ), type_=DATE, ).label("timestep")).subquery() stats = (self.query(series.c.timestep, func.count( Scrap.id), func.sum(Scrap.links_count)).outerjoin(( Scrap, cast(Scrap.date, type_=DATE) == cast(series.c.timestep, type_=DATE), )).group_by(series.c.timestep).order_by(series.c.timestep).all()) return { str(date): { "scraps": int(scraps), "items": int(items) } for (date, scraps, items) in stats }
def test_with_ordinality_named(self, connection): stmt = select( func.generate_series(4, 1, -1).table_valued( "gs", with_ordinality="ordinality").render_derived()) eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)])
def check_integrity(max_entry): series = select([func.generate_series(1, max_entry).label('seq_no') ]).alias('series') res = session.query(series.c.seq_no).join( LocalLandChargeHistory, and_(series.c.seq_no == LocalLandChargeHistory.entry_number), isouter=True).filter( LocalLandChargeHistory.entry_number == None).all() return [row[0] for row in res]
def compute_totals(summary_form): component_ids = component_names_to_ids(summary_form.component_names.data) from_date, to_date = summary_form.daterange.data resolution = summary_form.resolution.data table, date_column = get_history_target(summary_form.resolution.data) # Generate sequence of days/weeks/months in the specified range. from_date, to_date, delta = interval_delta(from_date, to_date, resolution) dates = (db.session.query( func.generate_series(from_date, to_date, delta).label('date')).subquery()) if summary_form.opsysreleases.data: # Query only requested opsys releases. releases = (db.session.query(OpSysRelease).filter( OpSysRelease.id.in_([ osr.id for osr in summary_form.opsysreleases.data ])).subquery()) else: # Query all active opsys releases. releases = (db.session.query(OpSysRelease).filter( OpSysRelease.status != 'EOL').subquery()) # Sum daily counts for each date in the range and each opsys release. history = (db.session.query( date_column.label('date'), func.sum(table.count).label('count'), table.opsysrelease_id).filter(from_date <= date_column).filter( date_column <= to_date).group_by(table.opsysrelease_id, date_column)) if component_ids: history = history.join(Report).filter( Report.component_id.in_(component_ids)) history = history.subquery() q = (db.session.query( dates.c.date, func.coalesce(history.c.count, 0).label('count'), OpSys.name, releases.c.version).outerjoin( releases, dates.c.date == dates.c.date).outerjoin( history, (history.c.date == dates.c.date) & (history.c.opsysrelease_id == releases.c.id)).join( OpSys, OpSys.id == releases.c.opsys_id).order_by( OpSys.id, releases.c.version, dates.c.date)) by_opsys = dict() groups = groupby(q.all(), lambda r: f'{r.name} {r.version}') for osr, rows in groups: counts = [(r.date, r.count) for r in rows] by_opsys[osr] = counts result = {'by_opsys': by_opsys, 'from_date': from_date, 'to_date': to_date} return result
def count_datasets_through_time(self, start, end, period, time_field, expressions): """ :type period: str :type start: datetime.datetime :type end: datetime.datetime :type expressions: tuple[datacube.index.postgres._fields.PgExpression] :rtype: list[((datetime.datetime, datetime.datetime), int)] """ raw_expressions = self._alchemify_expressions(expressions) start_times = select(( func.generate_series(start, end, cast(period, INTERVAL)).label('start_time'), )).alias('start_times') time_range_select = ( select(( func.tstzrange( start_times.c.start_time, func.lead(start_times.c.start_time).over() ).label('time_period'), )) ).alias('all_time_ranges') # Exclude the trailing (end time to infinite) row. Is there a simpler way? time_ranges = ( select(( time_range_select, )).where( ~func.upper_inf(time_range_select.c.time_period) ) ).alias('time_ranges') count_query = ( select( (func.count('*'),) ).select_from( self._from_expression(DATASET, expressions) ).where( and_( time_field.alchemy_expression.overlaps(time_ranges.c.time_period), DATASET.c.archived == None, *raw_expressions ) ) ) results = self._connection.execute(select(( time_ranges.c.time_period, count_query.label('dataset_count') ))) for time_period, dataset_count in results: # if not time_period.upper_inf: yield Range(time_period.lower, time_period.upper), dataset_count
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table # Special case for the 'quarter' unit of aggregation. step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')])\ .alias('defaults') where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters is not None: # Column filters has to be iterable here, because the '+' operator # behaves differently for SQLAlchemy conditions. Instead of # combining the conditions together, it would try to build # something like :param1 + <column_filters> as a new condition. where_filters += [column_filters] # Create a CTE that grabs the number of records contained in each time # bucket. Will only have rows for buckets with records. actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')])\ .where(sa.and_(*where_filters))\ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name))\ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]).\ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table # Special case for the 'quarter' unit of aggregation. step = '3 months' if agg_unit == 'quarter' else '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')]) \ .alias('defaults') where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters is not None: # Column filters has to be iterable here, because the '+' operator # behaves differently for SQLAlchemy conditions. Instead of # combining the conditions together, it would try to build # something like :param1 + <column_filters> as a new condition. where_filters += [column_filters] # Create a CTE that grabs the number of records contained in each time # bucket. Will only have rows for buckets with records. actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')]) \ .where(sa.and_(*where_filters)) \ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name)) \ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]). \ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def get_usage(db: Session, kwh: bool = True, count: bool = True, bucket: BucketType = BucketType.Days, chargepoint: str = None, from_timestamp: Optional[datetime] = None, to_timestamp: Optional[datetime] = None) -> List[KeyedTuple]: """ Get the usage of the charging stations """ args = [func.date(models.Transaction.start_timestamp).label('date')] if count: args.append(func.count(models.Transaction.id).label('count')) if kwh: args.append(func.sum(models.Transaction.meter_used).label('kwh')) query = db.query(*args) if chargepoint: # Transaction -> Connector -> ChargePoint query = query.filter( models.Transaction.connector.has( models.Connector.chargepoint.has( models.ChargePoint.identity == chargepoint))) # Apply time filters if from_timestamp: query = query.filter( models.Transaction.start_timestamp > from_timestamp) if to_timestamp: query = query.filter(models.Transaction.end_timestamp < to_timestamp) # Generate date series timeseries = db.query( func.date( func.generate_series( func.min(from_timestamp or models.Transaction.start_timestamp), func.max(to_timestamp or utc_datetime()), timedelta(days=1))).label('date')).subquery() # Group values data = query.group_by('date').subquery() values = [] if count: values.append(func.coalesce(data.c.count, 0).label('count')) if kwh: values.append(func.coalesce(data.c.kwh / 1000.0, 0).label('kWh')) return db.query(timeseries.c.date, *values).\ outerjoin(data, data.c.date == timeseries.c.date).\ order_by(timeseries.c.date).all()
def timeseries(self, agg_unit, start, end, geom=None, column_filters=None): # Reading this blog post # http://no0p.github.io/postgresql/2014/05/08/timeseries-tips-pg.html # inspired this implementation. t = self.point_table if agg_unit == 'quarter': step = '3 months' else: step = '1 ' + agg_unit # Create a CTE to represent every time bucket in the timeseries # with a default count of 0 day_generator = func.generate_series(func.date_trunc(agg_unit, start), func.date_trunc(agg_unit, end), step) defaults = select([sa.literal_column("0").label('count'), day_generator.label('time_bucket')])\ .alias('defaults') # Create a CTE that grabs the number of records # contained in each time bucket. # Will only have rows for buckets with records. where_filters = [t.c.point_date >= start, t.c.point_date <= end] if column_filters: where_filters += column_filters actuals = select([func.count(t.c.hash).label('count'), func.date_trunc(agg_unit, t.c.point_date). label('time_bucket')])\ .where(sa.and_(*where_filters))\ .group_by('time_bucket') # Also filter by geometry if requested if geom: contains = func.ST_Within(t.c.geom, func.ST_GeomFromGeoJSON(geom)) actuals = actuals.where(contains) # Need to alias to make it usable in a subexpression actuals = actuals.alias('actuals') # Outer join the default and observed values # to create the timeseries select statement. # If no observed value in a bucket, use the default. name = sa.literal_column("'{}'".format(self.dataset_name))\ .label('dataset_name') bucket = defaults.c.time_bucket.label('time_bucket') count = func.coalesce(actuals.c.count, defaults.c.count).label('count') ts = select([name, bucket, count]).\ select_from(defaults.outerjoin(actuals, actuals.c.time_bucket == defaults.c.time_bucket)) return ts
def find_missing_row_for_job(job_id, job_size): expected_row_numbers = db.session.query( func.generate_series(0, job_size - 1).label('row')).subquery() query = db.session.query( Notification.job_row_number, expected_row_numbers.c.row.label('missing_row')).outerjoin( Notification, and_(expected_row_numbers.c.row == Notification.job_row_number, Notification.job_id == job_id)).filter( Notification.job_row_number == None # noqa ) return query.all()
def get_report(self): """Данный очёт показывает количество тех задач которые не могут быть отрезолвены""" with self.c.session_context() as session: # todo create additional request for guaranty all month in timeline # date_list = func.generate_series(first_date, datetime.now(timezone.utc), '1 day').alias('end_month') User = self.User Issue = self.Issue # sq_created_count = self.count_for_month(session, Issue.created) # sq_resolved_count = self.count_for_month(session, Issue.resolutiondate) # created_year_month_column = sq_created_count.c[self.FIELD_YEAR_MONTH] # resolved_count_column = sq_resolved_count.c[self.FIELD_COUNT] # created_count_column = sq_created_count.c[self.FIELD_COUNT] # growth_column = created_count_column - resolved_count_column sq = session.query( User.jira_name, # begin_column, func.generate_series( # to get only full periods before current date User.working_start_date + self.SHIFT_INTERVAL, datetime.now(timezone.utc), '30 days', ).label(self.PERIOD_END_DATE), # ( # column(self.PERIOD_END_DATE) - column(self.SHIFT_INTERVAL) # ).label(self.PERIOD_START_DATE), ).filter(User.type.in_((UserType.backend, ))).subquery() column_period_end_date = sq.c[self.PERIOD_END_DATE] column_period_start_date = column_period_end_date - self.SHIFT_INTERVAL q = session.query( sq.c[UserAttrs.jira_name], # Numerate all periods of working in company from the begining func.row_number().over( order_by=column_period_end_date, partition_by=sq.c[UserAttrs.jira_name], ).label(self.PERIOD_NUMBER), column_period_end_date, column_period_start_date.label(self.PERIOD_START_DATE), Issue.resolutiondate, ).outerjoin( Issue, and_( Issue.resolutiondate >= column_period_start_date, Issue.resolutiondate < column_period_end_date, )) self._save_report(q)
def missing_blocks(cls, max_block_number=None): """ Return missing blocks in the blocks table between 0 to block_number :param int max_block_number: Maximum block number that we want to find missing blocks from """ current_session = get_current_session() if max_block_number is None: max_block_number = cls.get_max_block_number() logger.debug(max_block_number) with current_session.db_session_scope(): stmt = current_session.db_session.query( func.generate_series(0, max_block_number).label('i')).subquery() joined = stmt.outerjoin(cls, cls.block_number == stmt.c.i) missing_blocks = current_session.db_session.query(stmt.c.i.label('block_number')).\ select_from(joined).filter(cls.block_number == None).all() return missing_blocks
def get_services_group_data_chart(services_group_id, time_from, time_until, metric_type): """ Return data for billing cpu/vsize charts. If for time_from-time_until period number of data points more then `MAX_POINTS`, data will be aggregated to get less number of points. """ delta = time_until - time_from # Compute interval (in hours) for grouping data, it can't be 0 interval = delta.total_seconds() // (3600 * MAX_POINTS) or 1 serie = db.session.query( func.generate_series( time_from, time_until, timedelta(hours=interval)).label("date")).subquery() metrics = (db.session.query( serie.c.date.label("time"), serie.c.date + timedelta(hours=interval), func.coalesce(func.avg(column("value")), 0).label("value"), ).outerjoin( BatchStoryBilling, and_( BatchStoryBilling.time >= serie.c.date, BatchStoryBilling.time < (serie.c.date + timedelta(hours=interval)), ), ).outerjoin( MetricBilling, and_( MetricBilling.batch_id == BatchStoryBilling.id, MetricBilling.type == metric_type, MetricBilling.services_group_id == services_group_id, ), ).group_by(serie.c.date).order_by(serie.c.date)) return metrics.all()
def get_tweet_counts_by_division(self, div_id=None): series = self.session.query( func.generate_series( func.date_trunc('hour', datetime.utcnow() - timedelta(hours=24)), datetime.utcnow(), timedelta(hours=1)).label('time')).subquery() tw_count = self.session.query( func.date_trunc('hour', Tweet.created_at).label('time'), func.count('*').over(partition_by=func.date_trunc('hour', Tweet.created_at)).label('count')).\ filter(Tweet.created_at > datetime.utcnow() - timedelta(hours=24)).\ distinct() if div_id != None: tw_count = tw_count.filter(Tweet.division_id == div_id) tw_count = tw_count.subquery() res = self.session.query( series.c.time, func.coalesce(tw_count.c.count, 0).label('count')).\ select_from(series).\ outerjoin(tw_count, tw_count.c.time == series.c.time).all() return map(lambda x: { "time": x[0].isoformat(), "count": x[1] }, res)
def get_task_stats( start_date, end_date, org_id, org_name, campaign, project_id, country ): """ Creates tasks stats for a period using the TaskStatsDTO """ query = ( db.session.query( TaskHistory.task_id, TaskHistory.project_id, TaskHistory.action_text, func.DATE(TaskHistory.action_date).label("day"), ) .distinct( tuple_( TaskHistory.project_id, TaskHistory.task_id, TaskHistory.action_text ) ) .filter( TaskHistory.action == "STATE_CHANGE", or_( TaskHistory.action_text == "MAPPED", TaskHistory.action_text == "VALIDATED", TaskHistory.action_text == "BADIMAGERY", ), ) .order_by( TaskHistory.project_id, TaskHistory.task_id, TaskHistory.action_text, TaskHistory.action_date, ) ) if org_id: query = query.join(Project, Project.id == TaskHistory.project_id).filter( Project.organisation_id == org_id ) if org_name: try: organisation_id = OrganisationService.get_organisation_by_name( org_name ).id except NotFound: organisation_id = None query = query.join(Project, Project.id == TaskHistory.project_id).filter( Project.organisation_id == organisation_id ) if campaign: try: campaign_id = CampaignService.get_campaign_by_name(campaign).id except NotFound: campaign_id = None query = query.join( campaign_projects, campaign_projects.c.project_id == TaskHistory.project_id, ).filter(campaign_projects.c.campaign_id == campaign_id) if project_id: query = query.filter(TaskHistory.project_id.in_(project_id)) if country: # Unnest country column array. sq = Project.query.with_entities( Project.id, func.unnest(Project.country).label("country") ).subquery() query = query.filter(sq.c.country.ilike("%{}%".format(country))).filter( TaskHistory.project_id == sq.c.id ) query = query.subquery() date_query = db.session.query( func.DATE( func.generate_series(start_date, end_date, timedelta(days=1)) ).label("d_day") ).subquery() grouped_dates = ( db.session.query( date_query.c.d_day, query.c.action_text, func.count(query.c.action_text).label("cnt"), ) .join(date_query, date_query.c.d_day == query.c.day) .group_by(date_query.c.d_day, query.c.action_text) .order_by(date_query.c.d_day) ).subquery() mapped = ( db.session.query( grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt ) .select_from(grouped_dates) .filter(grouped_dates.c.action_text == "MAPPED") .subquery() ) validated = ( db.session.query( grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt ) .select_from(grouped_dates) .filter(grouped_dates.c.action_text == "VALIDATED") .subquery() ) badimagery = ( db.session.query( grouped_dates.c.d_day, grouped_dates.c.action_text, grouped_dates.c.cnt ) .select_from(grouped_dates) .filter(grouped_dates.c.action_text == "BADIMAGERY") .subquery() ) result = ( db.session.query( func.to_char(grouped_dates.c.d_day, "YYYY-MM-DD"), func.coalesce(mapped.c.cnt, 0).label("mapped"), func.coalesce(validated.c.cnt, 0).label("validated"), func.coalesce(badimagery.c.cnt, 0).label("badimagery"), ) .select_from(grouped_dates) .distinct(grouped_dates.c.d_day) .filter(grouped_dates.c.d_day is not None) .outerjoin(mapped, mapped.c.d_day == grouped_dates.c.d_day) .outerjoin(validated, validated.c.d_day == grouped_dates.c.d_day) .outerjoin(badimagery, badimagery.c.d_day == grouped_dates.c.d_day) ) day_stats_dto = list(map(StatsService.set_task_stats, result)) results_dto = TaskStatsDTO() results_dto.stats = day_stats_dto return results_dto
def traffic_history_query(): timestamptz = TIMESTAMP(timezone=True) events = union_all( select([ TrafficCredit.amount, TrafficCredit.timestamp, literal("Credit").label('type') ]).where(TrafficCredit.user_id == literal_column('arg_user_id')), select([ (-TrafficVolume.amount).label('amount'), TrafficVolume.timestamp, cast(TrafficVolume.type, TEXT).label('type') ]).where(TrafficVolume.user_id == literal_column('arg_user_id'))).cte( 'traffic_events') def round_time(time_expr, ceil=False): round_func = func.ceil if ceil else func.trunc step_epoch = func.extract('epoch', literal_column('arg_step')) return cast( func.to_timestamp( round_func(func.extract('epoch', time_expr) / step_epoch) * step_epoch), timestamptz) balance = select([TrafficBalance.amount, TrafficBalance.timestamp])\ .select_from(User.__table__.outerjoin(TrafficBalance))\ .where(User.id == literal_column('arg_user_id'))\ .cte('balance') balance_amount = select([balance.c.amount]).as_scalar() balance_timestamp = select([balance.c.timestamp]).as_scalar() # Bucket layout # n = interval / step # 0: Aggregates all prior traffic_events so that the balance value can be calculated # 1 - n: Traffic history entry # n+1: Aggregates all data after the last point in time, will be discarded buckets = select([ literal_column('bucket'), (func.row_number().over(order_by=literal_column('bucket')) - 1).label('index') ]).select_from( func.generate_series( round_time(cast(literal_column('arg_start'), timestamptz)) - literal_column('arg_step'), round_time( cast(literal_column('arg_start'), timestamptz) + literal_column('arg_interval')), literal_column('arg_step')).alias('bucket')).order_by( literal_column('bucket')).cte('buckets') def cond_sum(condition, label, invert=False): return func.sum( case([(condition, events.c.amount if not invert else -events.c.amount)], else_=None)).label(label) hist = select([ buckets.c.bucket, cond_sum(events.c.type == 'Credit', 'credit'), cond_sum(events.c.type == 'Ingress', 'ingress', invert=True), cond_sum(events.c.type == 'Egress', 'egress', invert=True), func.sum(events.c.amount).label('amount'), cond_sum( and_(balance_timestamp != None, events.c.timestamp < balance_timestamp), 'before_balance'), cond_sum( or_(balance_timestamp == None, events.c.timestamp >= balance_timestamp), 'after_balance') ]).select_from( buckets.outerjoin( events, func.width_bucket( events.c.timestamp, select([ func.array( select([buckets.c.bucket]).select_from(buckets).where( buckets.c.index != 0).label('dummy')) ])) == buckets.c.index)).where( # Discard bucket n+1 buckets.c.index < select([func.max(buckets.c.index)]) ).group_by(buckets.c.bucket).order_by( buckets.c.bucket).cte('traffic_hist') # Bucket is located before the balance and no traffic_events exist before it first_event_timestamp = select([func.min(events.c.timestamp)]).as_scalar() case_before_balance_no_data = (and_( balance_timestamp != None, hist.c.bucket < balance_timestamp, or_(first_event_timestamp == None, hist.c.bucket < first_event_timestamp)), None) # Bucket is located after the balance case_after_balance = ( or_(balance_timestamp == None, hist.c.bucket >= balance_timestamp), func.coalesce(balance_amount, 0) + func.coalesce( func.sum(hist.c.after_balance).over(order_by=hist.c.bucket.asc(), rows=(None, 0)), 0)) # Bucket is located before the balance, but there still exist traffic_events before it else_before_balance = ( func.coalesce(balance_amount, 0) + func.coalesce(hist.c.after_balance, 0) - func.coalesce( func.sum(hist.c.before_balance).over(order_by=hist.c.bucket.desc(), rows=(None, -1)), 0)) agg_hist = select([ hist.c.bucket, hist.c.credit, hist.c.ingress, hist.c.egress, case([case_before_balance_no_data, case_after_balance], else_=else_before_balance).label('balance') ]).alias('agg_hist') # Remove bucket 0 result = select([agg_hist]).order_by(agg_hist.c.bucket).offset(1) return result
def fetch_annual_billing(service_id, year): start_date, end_date = get_financial_year_range(year) fragments_free_limit = literal( fetch_sms_free_allowance_for_financial_year(service_id, year)) query = dao_get_priced_billing_data().filter( FactBilling.service_id == service_id, FactBilling.aet_date >= start_date, FactBilling.aet_date <= end_date, ) # break down usage by month with empty gaps by generating a series months_series = func.generate_series(start_date, end_date, '1 months').alias('month') months_series_c = column('month').cast(Date()).label('month') query_cte = query.cte() # calculate in credits in order to work backwards to get available units # after removing cost usage credits_free = fragments_free_limit * FRAGMENT_UNIT_RATE # coalesce and sum these totals by month cost = total(query_cte.c.cost) notifications = total(query_cte.c.notifications_sent) notifications_email_ = func.sum(query_cte.c.notifications_sent).filter( query_cte.c.notification_type == EMAIL_TYPE) notifications_sms_ = func.sum(query_cte.c.notifications_sent).filter( query_cte.c.notification_type == SMS_TYPE) notifications_email = func.coalesce(notifications_email_, 0) notifications_sms = func.coalesce(notifications_sms_, 0) fragments_domestic = total(query_cte.c.domestic_units) fragments_international = total(query_cte.c.international_units) # cumulative figures for entire year cost_cumulative = func.sum(cost).over(order_by=months_series_c) cost_starting = cost_cumulative - cost credits_remaining = func.greatest(credits_free - cost_cumulative, 0) credits_available = func.greatest(credits_free - cost_starting, 0) cost_chargeable = func.greatest(cost - credits_available, 0) cost_chargeable_cumulative = func.greatest(cost_cumulative - credits_free, 0) credits_used = func.least(cost, credits_available) units = cost / FRAGMENT_UNIT_RATE units_cumulative = cost_cumulative / FRAGMENT_UNIT_RATE units_chargeable = cost_chargeable / FRAGMENT_UNIT_RATE units_free_used = credits_used / FRAGMENT_UNIT_RATE units_free_available = credits_available / FRAGMENT_UNIT_RATE units_free_remaining = credits_remaining / FRAGMENT_UNIT_RATE units_chargeable_cumulative = cost_chargeable_cumulative / FRAGMENT_UNIT_RATE gapfilled_query = db.session.query( months_series_c, fragments_free_limit.label('fragments_free_limit'), fragments_domestic.label('fragments_domestic'), fragments_international.label('fragments_international'), notifications.label('notifications'), notifications_sms.label('notifications_sms'), notifications_email.label('notifications_email'), cost.label('cost'), cost_chargeable.label('cost_chargeable'), cost_cumulative.label('cost_cumulative'), cost_chargeable_cumulative.label('cost_chargeable_cumulative'), units.label('units'), units_cumulative.label('units_cumulative'), units_chargeable.label('units_chargeable'), units_chargeable_cumulative.label('units_chargeable_cumulative'), units_free_available.label('units_free_available'), units_free_remaining.label('units_free_remaining'), units_free_used.label('units_free_used'), literal(DOMESTIC_UNIT_RATE * 100).label('unit_rate_domestic'), literal(INTERNATIONAL_UNIT_RATE * 100).label('unit_rate_international'), ).select_from(months_series, ).outerjoin( query_cte, query_cte.c.aet_month == months_series_c, ).group_by(months_series_c, ).order_by(months_series_c) return gapfilled_query
def traffic_history_query(): timestamptz = TIMESTAMP(timezone=True) events = union_all( select([TrafficCredit.amount, TrafficCredit.timestamp, literal("Credit").label('type')] ).where(TrafficCredit.user_id == literal_column('arg_user_id')), select([(-TrafficVolume.amount).label('amount'), TrafficVolume.timestamp, cast(TrafficVolume.type, TEXT).label('type')] ).where(TrafficVolume.user_id == literal_column('arg_user_id')) ).cte('traffic_events') def round_time(time_expr, ceil=False): round_func = func.ceil if ceil else func.trunc step_epoch = func.extract('epoch', literal_column('arg_step')) return cast(func.to_timestamp(round_func(func.extract('epoch', time_expr) / step_epoch) * step_epoch), timestamptz) balance = select([TrafficBalance.amount, TrafficBalance.timestamp])\ .select_from(User.__table__.outerjoin(TrafficBalance))\ .where(User.id == literal_column('arg_user_id'))\ .cte('balance') balance_amount = select([balance.c.amount]).as_scalar() balance_timestamp = select([balance.c.timestamp]).as_scalar() # Bucket layout # n = interval / step # 0: Aggregates all prior traffic_events so that the balance value can be calculated # 1 - n: Traffic history entry # n+1: Aggregates all data after the last point in time, will be discarded buckets = select([literal_column('bucket'), (func.row_number().over(order_by=literal_column('bucket')) - 1).label('index')] ).select_from( func.generate_series( round_time(cast(literal_column('arg_start'), timestamptz)) - literal_column('arg_step'), round_time(cast(literal_column('arg_start'), timestamptz) + literal_column('arg_interval')), literal_column('arg_step') ).alias('bucket') ).order_by( literal_column('bucket') ).cte('buckets') def cond_sum(condition, label, invert=False): return func.sum(case( [(condition, events.c.amount if not invert else -events.c.amount)], else_=None)).label(label) hist = select([buckets.c.bucket, cond_sum(events.c.type == 'Credit', 'credit'), cond_sum(events.c.type == 'Ingress', 'ingress', invert=True), cond_sum(events.c.type == 'Egress', 'egress', invert=True), func.sum(events.c.amount).label('amount'), cond_sum(and_(balance_timestamp != None, events.c.timestamp < balance_timestamp), 'before_balance'), cond_sum(or_(balance_timestamp == None, events.c.timestamp >= balance_timestamp), 'after_balance')] ).select_from(buckets.outerjoin( events, func.width_bucket( events.c.timestamp, select([func.array(select([buckets.c.bucket]).select_from(buckets).where(buckets.c.index != 0).label('dummy'))]) ) == buckets.c.index )).where( # Discard bucket n+1 buckets.c.index < select([func.max(buckets.c.index)]) ).group_by( buckets.c.bucket ).order_by( buckets.c.bucket ).cte('traffic_hist') # Bucket is located before the balance and no traffic_events exist before it first_event_timestamp = select([func.min(events.c.timestamp)]).as_scalar() case_before_balance_no_data = ( and_(balance_timestamp != None, hist.c.bucket < balance_timestamp, or_(first_event_timestamp == None, hist.c.bucket < first_event_timestamp )), None ) # Bucket is located after the balance case_after_balance = ( or_(balance_timestamp == None, hist.c.bucket >= balance_timestamp), func.coalesce(balance_amount, 0) + func.coalesce( func.sum(hist.c.after_balance).over( order_by=hist.c.bucket.asc(), rows=(None, 0)), 0) ) # Bucket is located before the balance, but there still exist traffic_events before it else_before_balance = ( func.coalesce(balance_amount, 0) + func.coalesce(hist.c.after_balance, 0) - func.coalesce( func.sum(hist.c.before_balance).over( order_by=hist.c.bucket.desc(), rows=(None, -1) ), 0) ) agg_hist = select( [hist.c.bucket, hist.c.credit, hist.c.ingress, hist.c.egress, case( [case_before_balance_no_data, case_after_balance], else_=else_before_balance ).label('balance')]).alias('agg_hist') # Remove bucket 0 result = select([agg_hist]).order_by(agg_hist.c.bucket).offset(1) return result