def column_windows(session, column, windowsize): """Return a series of WHERE clauses against a given column that break it into windows. Result is an iterable of tuples, consisting of ((start, end), whereclause), where (start, end) are the ids. """ def int_for_range(start_id, end_id): "create a range" if end_id: return and_(column >= start_id, column < end_id) else: return column >= start_id qry = session.query(column, func.row_number(). over(order_by=column). label('rownum') ).from_self(column) if windowsize > 1: qry = qry.filter("rownum %% %d=1" % windowsize) intervals = [qid for qid, in qry] while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def get_top_scorers(self): """Top players by score. Shared by all renderers.""" cutoff = self.now - timedelta(days=self.lifetime) cutoff = self.now - timedelta(days=120) top_scorers_q = DBSession.query( fg.row_number().over(order_by=expr.desc(func.sum(PlayerGameStat.score))).label("rank"), Player.player_id, Player.nick, func.sum(PlayerGameStat.score).label("total_score"))\ .filter(Player.player_id == PlayerGameStat.player_id)\ .filter(Game.game_id == PlayerGameStat.game_id)\ .filter(Game.map_id == self.map_id)\ .filter(Player.player_id > 2)\ .filter(PlayerGameStat.create_dt > cutoff)\ .order_by(expr.desc(func.sum(PlayerGameStat.score)))\ .group_by(Player.nick)\ .group_by(Player.player_id) if self.last: top_scorers_q = top_scorers_q.offset(self.last) if self.limit: top_scorers_q = top_scorers_q.limit(self.limit) top_scorers = top_scorers_q.all() return top_scorers
def test_over(self): eq_( select([ flds.c.intcol, func.row_number().over(order_by=flds.c.strcol) ]).execute().fetchall(), [(13, 1), (5, 2)] )
def top_players(self): """Top players on this server by total playing time.""" try: top_players_q = DBSession.query( fg.row_number().over( order_by=expr.desc(func.sum(PlayerGameStat.alivetime))).label("rank"), Player.player_id, Player.nick, func.sum(PlayerGameStat.alivetime).label("alivetime"))\ .filter(Player.player_id == PlayerGameStat.player_id)\ .filter(Game.game_id == PlayerGameStat.game_id)\ .filter(Game.server_id == self.server_id)\ .filter(Player.player_id > 2)\ .filter(PlayerGameStat.create_dt > (self.now - timedelta(days=self.lifetime)))\ .order_by(expr.desc(func.sum(PlayerGameStat.alivetime)))\ .group_by(Player.nick)\ .group_by(Player.player_id) if self.last: top_players_q = top_players_q.offset(self.last) if self.limit: top_players_q = top_players_q.limit(self.limit) top_players = top_players_q.all() except Exception as e: log.debug(e) raise HTTPNotFound return top_players
def top_maps(self): """Returns the raw data shared by all renderers.""" try: top_maps_q = DBSession.query( fg.row_number().over(order_by=expr.desc(func.count())).label("rank"), Game.map_id, Map.name, func.count().label("times_played"))\ .filter(Map.map_id == Game.map_id)\ .filter(Game.server_id == self.server_id)\ .filter(Game.create_dt > (self.now - timedelta(days=self.lifetime)))\ .group_by(Game.map_id)\ .group_by(Map.name) \ .order_by(expr.desc(func.count())) if self.last: top_maps_q = top_maps_q.offset(self.last) if self.limit: top_maps_q = top_maps_q.limit(self.limit) top_maps = top_maps_q.all() except Exception as e: log.debug(e) raise HTTPNotFound return top_maps
def test_over(self): stmt = select([column("foo"), column("bar")]) stmt = select( [func.row_number().over(order_by="foo", partition_by="bar")] ).select_from(stmt) self.assert_compile( stmt, "SELECT row_number() OVER (PARTITION BY bar ORDER BY foo) " "AS anon_1 FROM (SELECT foo, bar)", )
def test_no_paren_fns(self): for fn, expected in [ (func.uid(), "uid"), (func.UID(), "UID"), (func.sysdate(), "sysdate"), (func.row_number(), "row_number()"), (func.rank(), "rank()"), (func.now(), "CURRENT_TIMESTAMP"), (func.current_timestamp(), "CURRENT_TIMESTAMP"), (func.user(), "USER"), ]: self.assert_compile(fn, expected)
def get_child_query_by_priority (self, child_name): CHILD_NAME_TO_MODEL_MAPPER = { 'skills_categories': SkillCategory, 'skills': Skill, 'perks': Perk, 'items': Item, 'item_groups': ItemGroup, 'races': Race, 'character_class': CharacterClass, 'dices': Dice, } _model = CHILD_NAME_TO_MODEL_MAPPER[child_name] # partition_by = rules_id ? return DBSession.query(_model, func.row_number().over(order_by = _model.priority).label('_priority')).filter_by(rules_id = self.id)
def iter_bounds(db_session, column, batch_size, importlimit): """ Return a list of (lower bound, upper bound) tuples which contain row ids to iterate through a table in batches of ``batch_size``. If ``importlimit`` is greater than zero, return only enough tuples to contain ``importlimit`` rows. The second element of the last tuple in the returned list may be ``None``. This happens if the last batch will contain less than ``batch_size`` rows. :param sqlalchemy.orm.session.Session db_session: :param sqlalchemy.Column column: :param int batch_size: :param int importlimit: :rtype: [(int, int)] """ q = db_session.query( column, func.row_number(). over(order_by=column). label('rownum') ).\ from_self(column) if batch_size > 1: q = q.filter("rownum %% %d=1" % batch_size) if importlimit: q = q.filter("rownum <= %d" % (importlimit)) intervals = [id for id in q] bounds = [] while intervals: start = intervals.pop(0)[0] if intervals: end = intervals[0][0] elif importlimit: # If there's an importlimit, just add a noop bound. This way, # :func:`sir.indexing.index_entity` doesn't require any # information about the limit end = start else: end = None bounds.append((start, end)) return bounds
def test_clause_expansion(self): Data = self.classes.Data b1 = Bundle("b1", Data.id, Data.d1, Data.d2) sess = Session() self.assert_compile( sess.query(Data).order_by(b1), "SELECT data.id AS data_id, data.d1 AS data_d1, " "data.d2 AS data_d2, data.d3 AS data_d3 FROM data " "ORDER BY data.id, data.d1, data.d2", ) self.assert_compile( sess.query(func.row_number().over(order_by=b1)), "SELECT row_number() OVER (ORDER BY data.id, data.d1, data.d2) " "AS anon_1 FROM data", )
def test_clause_expansion(self): Data = self.classes.Data b1 = Bundle('b1', Data.id, Data.d1, Data.d2) sess = Session() self.assert_compile( sess.query(Data).order_by(b1), "SELECT data.id AS data_id, data.d1 AS data_d1, " "data.d2 AS data_d2, data.d3 AS data_d3 FROM data " "ORDER BY data.id, data.d1, data.d2" ) self.assert_compile( sess.query(func.row_number().over(order_by=b1)), "SELECT row_number() OVER (ORDER BY data.id, data.d1, data.d2) " "AS anon_1 FROM data" )
def column_windows(session, column, windowsize): """Return a series of WHERE clauses against a given column that break it into windows. Result is an iterable of tuples, consisting of ((start, end), whereclause), where (start, end) are the ids. Requires a database that supports window functions, i.e. Postgresql, SQL Server, Oracle. Enhance this yourself ! Add a "where" argument so that windows of just a subset of rows can be computed. """ def int_for_range(start_id, end_id): if end_id: return and_( column>=start_id, column<end_id ) else: return column>=start_id q = session.query( column, func.row_number().\ over(order_by=column).\ label('rownum') ).\ from_self(column) if windowsize > 1: q = q.filter(sqlalchemy.text("rownum %% %d=1" % windowsize)) intervals = [id for id, in q] while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def daily_statistics_agg(session): """ Aggregate statistics for each day in each year. Result columns: - day_of_year, year - bid - post_count, edit_count, posts_length, threads_created, active_users - active_threads: list of dicts of the most active threads (w.r.t. post count) of the day. Each dict consists of json_thread_columns (tid, [sub]title) plus "thread_post_count". """ year = func.extract('year', Post.timestamp).label('year') cte = aggregate_stats_segregated_by_time( session, func.extract('doy', Post.timestamp), 'day_of_year').subquery() json_thread_columns = (Thread.tid, Thread.title, Thread.subtitle) threads_active_during_time = (session.query( *json_thread_columns, func.count(Post.pid).label('thread_post_count'), func.extract('doy', Post.timestamp).label('doy'), year, Thread.bid, func.row_number().over( partition_by=tuple_(year, Thread.bid, func.extract('doy', Post.timestamp)), order_by=tuple_(desc(func.count(Post.pid)), Thread.tid)).label('rank')).join( Post.thread).group_by(*json_thread_columns, 'doy', Thread.bid, year)).subquery('tadt') active_threads = (session.query( threads_active_during_time.c.doy, threads_active_during_time.c.year, threads_active_during_time.c.bid, func.json_agg(column('tadt')).label('active_threads')).select_from( threads_active_during_time).filter( threads_active_during_time.c.rank <= 5).group_by( 'doy', 'bid', 'year').subquery()) return (session.query(*cte.c, active_threads.c.active_threads).join( active_threads, and_(active_threads.c.doy == cte.c.day_of_year, active_threads.c.year == cte.c.year, active_threads.c.bid == cte.c.bid)))
def resolve_przwin_list_page(root, info, pagination=None): page = int(pagination.get("page")) rows_per_page = int(pagination.get("rows_per_page")) rn_start = ( page ) * rows_per_page rn_end = rn_start + rows_per_page Model = LottoPrzwinType._meta.model t1 = session.query(Model) \ .add_column(func.row_number().over(order_by=desc(Model.drwt_no)).label('rn')) \ .cte(recursive=False, name="SRC_OBJ") query = session.query(Model) \ .select_entity_from(t1) \ .filter(t1.c.rn > rn_start) \ .filter(t1.c.rn <= rn_end) return query.all()
def history_data(self, start, end, interval): """ Return history data(a list of devicedata object). example: [DeviceData(1), DeviceData(2), DeviceData(3)] """ data = self.data.filter( and_(DeviceData.time >= start, DeviceData.time <= end)) number = data.count() # Set proper remainder, make the end always to be the latest data number = number % interval row_number_column = func.row_number().over( order_by=DeviceData.time).label('row_number') data = data.add_columns(row_number_column) data = data.from_self().filter(row_number_column % interval == number).all() return data
def collections(self): print 'The userid is %d' % g.user.id session = DBSession() #products = session.query(Product).filter(Product.user_id == g.user.id).all() products = session.query(User) \ .join(User.products) \ .join(Product.productitems) \ .group_by(Product.id,Product.created_date,Product.title) \ .order_by(Product.created_date) \ .values( Product.id.label('product_id'), Product.title.label('title'), Product.created_date.label('created_date'), (func.row_number().over(order_by='products.created_date').label('number')), (func.count(ProductItem.id)).label('total')) if products is not None: if(self.request.method =='GET'): return render_template('user_collections.html',products=products) return render_template('user_collections.html',products=products)
def column_windows(session, column, windowsize): """Return a series of WHERE clauses against a given column that break it into windows. Result is an iterable of tuples, consisting of ((start, end), whereclause), where (start, end) are the ids. Requires a database that supports window functions, i.e. Postgresql, SQL Server, Oracle. Enhance this yourself ! Add a "where" argument so that windows of just a subset of rows can be computed. """ def int_for_range(start_id, end_id): if end_id: return and_( column>=start_id, column<end_id ) else: return column>=start_id q = session.query( column, func.row_number().\ over(order_by=column).\ label('rownum') ).\ from_self(column) if windowsize > 1: q = q.filter("rownum %% %d=1" % windowsize) intervals = [id for id, in q] while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def crime_data2(): subqry = session.query(sac_crime_data.Police_District, sac_crime_data.Offense_Category, func.count(sac_crime_data.Offense_Category), func.row_number().over(partition_by=sac_crime_data.Police_District, order_by=func.count(sac_crime_data.Offense_Category).desc()).label("row_num")).\ group_by(sac_crime_data.Police_District, sac_crime_data.Offense_Category).subquery() values = session.query(subqry).filter(subqry.c.row_num == 1).filter( subqry.c.Police_District.in_(['1', '2', '3', '4', '5', '6'])).all() print(str(values)) list = [] for value in values: dict_values = {"Police_District": value[0], "Offense_Category": value[1], "Total_Crimes": value[2]} list.append(dict_values) return jsonify(list)
def upgrade(): batches = Table('test_batches', MetaData(schema='jsil'), Column('id', Integer), Column('job_id', Integer), Column('condor_proc', SmallInteger), ) indexed_batches = select([ batches.c.id, (func.row_number()\ .over(partition_by=batches.c.job_id, order_by=batches.c.id)\ - op.inline_literal(1)).label('idx') ]).alias('indexed_batches') op.execute( batches.update()\ .where(batches.c.condor_proc == op.inline_literal(-1))\ .where(batches.c.id == indexed_batches.c.id)\ .values(condor_proc = indexed_batches.c.idx) )
def get_champions(): """ Select championships. :return: list of championships """ subquery = db.session.query(Matchups, func.row_number().over( partition_by=Matchups.year, order_by=desc(Matchups.matchup_id) ).label("row_number")) subquery = subquery.filter(Matchups.is_playoffs.is_(True)).subquery() matchups = db.session.query(subquery).filter(subquery.c.row_number == 1).all() champions = list() for m in matchups: if m.is_win: champions.append(Champions(year=m.year, owner_id=m.owner_id)) else: champions.append(Champions(year=m.year, owner_id=m.opponent_owner_id)) return champions
def collections(self): print 'The userid is %d' % g.user.id session = DBSession() #products = session.query(Product).filter(Product.user_id == g.user.id).all() products = session.query(User) \ .join(User.products) \ .join(Product.productitems) \ .group_by(Product.id,Product.created_date,Product.title) \ .order_by(Product.created_date) \ .values( Product.id.label('product_id'), Product.title.label('title'), Product.created_date.label('created_date'), (func.row_number().over(order_by='products.created_date').label('number')), (func.count(ProductItem.id)).label('total')) if products is not None: if (self.request.method == 'GET'): return render_template('user_collections.html', products=products) return render_template('user_collections.html', products=products)
def get_groupBy(table_value): """ Parameters ---------- table_value: table from Sqlalchemy model. Model from Sqlalchemy table and module in format 'module.table'. Returns ------- Int: with count of all rows from the 'module.table'. """ try: return db_connection()[1].query(func.row_number() .over(order_by=func.count(table_value).label(str(table_value)+'_numb').desc()) .label("row_number"), table_value, func.count(table_value) \ .label(str(table_value)+'_numb')).group_by(table_value).all() except Exception as exc: logging.warning(exc)
def dashboard(user_id): """ Generate the user's dashboard """ user_obj: User = User.query.get_or_404(user_id) LOGGER.info("Get the the better result made by the user") user_score: Score = ( db.session.query(Score).filter(Score.user_id == user_obj.id).order_by(desc(Score.points)).limit(1) ) LOGGER.info("Get the 10 highest positions in the ranking") score_list = ( db.session.query(User.username, Score.points) .join(Score, User.id == Score.user_id) .order_by(desc(Score.points)) .limit(10) ) if not user_score.all(): return render_template( "home/dashboard.html", no_result_yet=True, first_10=score_list.all(), title="Dashboard", ) LOGGER.info("Get the user position in the general ranking") sub = db.session.query(Score.user_id, func.row_number().over(order_by=desc(Score.points)).label("pos")).subquery() user_position = [position for position in db.session.query(sub.c.pos).filter(sub.c.user_id == user_id).first()] return render_template( "home/dashboard.html", no_result_yet=False, user_score=user_score.all()[0].points, user_position=user_position[0], first_10=score_list.all(), title="Dashboard", )
def get_movies_list(self, N: int = None, genres: str = None, year_from: int = None, year_to: int = None, regexp: str = None): from_request = self.session.query( Movie, func.row_number().over(Movie.genre).label("row_num")) if genres is not None: from_request = from_request.where( func.locate(Movie.genre, genres) > 0) if year_from is not None: from_request = from_request.where(year_from <= Movie.movie_year) if year_to is not None: from_request = from_request.where(year_to >= Movie.movie_year) if regexp is not None: from_request = from_request.where( func.regexp_instr(Movie.movie_name, regexp) > 0) from_request = from_request.order_by(Movie.genre, Movie.rating.desc()).cte("temp") genre, movie_id, movie_name, year, rating, count_of_ratings, _ = from_request.c main_request = self.session.query(genre, movie_id, movie_name, year, rating, count_of_ratings) row_num = from_request.c.row_num if N is not None: main_request = main_request.where(row_num <= N) result_dict = {} for genre, movie_id, movie_name, movie_year, rating, count_of_ratings in main_request.all( ): if genre not in result_dict.keys(): result_dict[genre] = [] result_dict[genre].append( dict(movie_id=movie_id, movie_name=movie_name, movie_year=movie_year, rating=rating, count_of_ratings=count_of_ratings)) return result_dict
def __init__(self, order_by, query_alias=None, partition_by=None, page_size=10, cursor=0): numbered_query = select([ func.row_number().over(order_by=order_by, partition_by=partition_by).label('rn'), *[ getattr(query_alias.c, attr) for attr in dir(query_alias.c) if not attr.startswith('_') ] ]).alias(name='paged_query') self.base_query = select(['*']) \ .select_from(numbered_query) \ .where(numbered_query.c.rn > cursor) \ .limit(page_size) self.count_query = select([func.count()]) \ .select_from(numbered_query)
def limit_groups(query, model, partition_by, order_by, limit=None, offset=0): """Limit the number of rows returned for each group. This utility allows you to apply a limit/offset to grouped rows of a query. Note that the query will only contain the data from `model`; i.e. you cannot add additional entities. :param query: The original query, including filters, joins, etc. :param model: The model class for `query` :param partition_by: The column to group by :param order_by: The column to order the partitions by :param limit: The maximum number of rows for each partition :param offset: The number of rows to skip in each partition """ inner = query.add_columns(over(func.row_number(), partition_by=partition_by, order_by=order_by).label('rownum')).subquery() query = model.query.select_entity_from(inner) if limit: return query.filter(offset < inner.c.rownum, inner.c.rownum <= (limit + offset)) else: return query.filter(offset < inner.c.rownum)
def states_daily_query_with_limit(state=None, preview=False, limit=None): ''' TODO: shortly, this method will eventually replace the top `states_daily_query`, but for now, we're using it as a separate method ''' # first retrieve latest published batch per state filter_list = [Batch.dataEntryType.in_(['daily', 'edit'])] if state is not None: filter_list.append(CoreData.state == state) if preview: filter_list.append(Batch.isPublished == False) else: filter_list.append(Batch.isPublished == True) latest_state_daily_batches = db.session.query( CoreData.state, CoreData.date, func.max(CoreData.batchId).label('maxBid'), func.row_number().over( partition_by=CoreData.state, order_by=CoreData.date.desc()).label('row')).join(Batch).filter( *filter_list).group_by(CoreData.date, CoreData.state).order_by( CoreData.date.desc(), CoreData.state).subquery('latest_state_daily_batches') filter_list = [] if limit is not None: filter_list = [latest_state_daily_batches.c.row <= limit] latest_daily_data_query = db.session.query(CoreData).join( latest_state_daily_batches, and_(CoreData.batchId == latest_state_daily_batches.c.maxBid, CoreData.state == latest_state_daily_batches.c.state, CoreData.date == latest_state_daily_batches.c.date)).filter( *filter_list).order_by(CoreData.date.desc()).order_by( CoreData.state) return latest_daily_data_query
def generate_grids(config, area=None): bounding_box = WKTElement(config['BOUNDING_BOX'], srid=4326) grid_obj = config['GRID_OBJ'] resolution = config['RESOLUTION'] epsg = config['EPSG'] try: grids = session.query(func.ST_Dump( func.makegrid_2d(bounding_box, resolution, resolution)).geom.label('geom') # self-defined function in Psql ).subquery() # using the boundary to crop the area # if config['AREA'] == 'los_angeles': # grids = session.query(grids.c.geom) \ # .filter(func.ST_Intersects(LosAngelesCountyBoundary.wkb_geometry, grids.c.geom)).subquery() results = session.query( func.row_number().over().label('gid'), func.ST_Centroid(grids.c.geom).label('centroid'), func.ST_X(func.ST_Centroid(grids.c.geom)).label('lon'), func.ST_Y(func.ST_Centroid(grids.c.geom)).label('lat'), grids.c.geom, func.ST_X(func.ST_Transform(func.ST_Centroid(grids.c.geom), epsg)).label('lon_proj'), func.ST_Y(func.ST_Transform(func.ST_Centroid(grids.c.geom), epsg)).label('lat_proj')).all() obj_results = [] for res in results: obj_results.append(grid_obj(gid=res[0], centroid=res[1], lon=res[2], lat=res[3], geom=res[4], lon_proj=res[5], lat_proj=res[6])) # session.add_all(obj_results) # session.commit() return except Exception as e: print(e) exit(-1)
def get_latest_fixes(self, max_age=timedelta(hours=6), **kw): row_number = over(func.row_number(), partition_by=TrackingFix.pilot_id, order_by=desc(TrackingFix.time)) tracking_delay = cast(cast(User.tracking_delay, String) + ' minutes', Interval) subq = DBSession.query(TrackingFix.id, row_number.label('row_number')) \ .outerjoin(TrackingFix.pilot) \ .filter(TrackingFix.time >= datetime.utcnow() - max_age) \ .filter(TrackingFix.time <= datetime.utcnow() - tracking_delay) \ .filter(TrackingFix.location_wkt != None) \ .subquery() query = DBSession.query(TrackingFix) \ .options(joinedload(TrackingFix.pilot)) \ .filter(TrackingFix.id == subq.c.id) \ .filter(subq.c.row_number == 1) \ .order_by(desc(TrackingFix.time)) return query
def _column_windows(self, session, column, windowsize): """Return a series of WHERE clauses against a given column that break it into windows. Result is an iterable of tuples, consisting of ((start, end), whereclause), where (start, end) are the ids. The code is taken from: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery """ def int_for_range(start_id, end_id): if end_id: return and_( column >= start_id, column < end_id ) else: return column >= start_id q = session.query( column, func.row_number(). \ over(order_by=column). \ label('rownum') ). \ from_self(column) if windowsize > 1: q = q.filter(text("rownum %% %d=1" % windowsize)) intervals = [id for id, in q] while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def limit_groups(query, model, partition_by, order_by, limit=None, offset=0): """Limits the number of rows returned for each group This utility allows you to apply a limit/offset to grouped rows of a query. Note that the query will only contain the data from `model`; i.e. you cannot add additional entities. :param query: The original query, including filters, joins, etc. :param model: The model class for `query` :param partition_by: The column to group by :param order_by: The column to order the partitions by :param limit: The maximum number of rows for each partition :param offset: The number of rows to skip in each partition """ inner = query.add_columns(over(func.row_number(), partition_by=partition_by, order_by=order_by).label('rownum')).subquery() query = model.query.select_entity_from(inner) if limit: return query.filter(offset < inner.c.rownum, inner.c.rownum <= (limit + offset)) else: return query.filter(offset < inner.c.rownum)
def setup_classes(cls): Base = cls.DeclarativeBasic class A(Base): __tablename__ = "a" id = Column(Integer, primary_key=True) class B(Base): __tablename__ = "b" id = Column(Integer, primary_key=True) a_id = Column(ForeignKey("a.id")) cs = relationship("C") class C(Base): __tablename__ = "c" id = Column(Integer, primary_key=True) b_id = Column(ForeignKey("b.id")) partition = select( [ B, func.row_number() .over(order_by=B.id, partition_by=B.a_id) .label("index"), ] ).alias() partitioned_b = aliased(B, alias=partition) A.partitioned_bs = relationship( partitioned_b, primaryjoin=and_( partitioned_b.a_id == A.id, partition.c.index < 10 ), )
def state(cls): """ Return the expression to get the 'latest' state from DbCalcState, to be used in queries, where 'latest' is defined using the state order defined in _sorted_datastates. """ # Sort first the latest states whens = { v: idx for idx, v in enumerate(_sorted_datastates[::-1], start=1) } custom_sort_order = case( value=DbCalcState.state, whens=whens, else_=100) # else: high value to put it at the bottom q1 = select([ DbCalcState.id.label('id'), DbCalcState.dbnode_id.label('dbnode_id'), DbCalcState.state.label('state'), func.row_number().over( partition_by=DbCalcState.dbnode_id, order_by=custom_sort_order).label('the_row_number') ]) q1 = q1.cte() subq = select([ q1.c.dbnode_id.label('dbnode_id'), q1.c.state.label('state') ]).select_from(q1).where(q1.c.the_row_number == 1).alias() return select([subq.c.state]).\ where( subq.c.dbnode_id == cls.id, ).\ label('laststate')
def group_subscriptions_per_current_status(self): csh_t = self.session.query( SubscriptionStatusHistory, func.row_number().\ over( partition_by=SubscriptionStatusHistory.subscription_id, order_by=SubscriptionStatusHistory.effect_date.desc()).\ label('row_number') ).\ filter( SubscriptionStatusHistory.effect_date <= self.reference_date ).\ subquery('csh_t') csh = self.session.query( csh_t ).\ filter( csh_t.c.row_number == 1 ).\ subquery('csh') return csh
def get_final_states_query(cls): """Return query that filters complete/failed states and includes a 'duration' column.""" # We enumerate states for each request and get 'updated' field of the following state. # It allows to find out time between two states. states = db.session.query( cls.request_id, cls.updated, func.lead(cls.updated, 1).over(partition_by=cls.request_id, order_by=cls.updated).label("next_updated"), func.row_number().over(partition_by=cls.request_id, order_by=cls.updated).label("num"), ).subquery() return ( db.session.query( cls.request_id, cls.state, cls.state_reason, cls.updated, func.extract( "epoch", cls.updated.cast(TIMESTAMP) - states.c.updated.cast(TIMESTAMP), ).label("duration"), func.extract( "epoch", states.c.next_updated.cast(TIMESTAMP) - states.c.updated.cast(TIMESTAMP), ).label("time_in_queue"), ).join(states, states.c.request_id == cls.request_id).filter( cls.state.in_([ RequestStateMapping.complete.value, RequestStateMapping.failed.value ])) # We need only 'init' state information here to join it with the final state. .filter(states.c.num == 1))
def column_windows(session, column, windowsize, start=None): def int_for_range(start_id, end_id): if end_id: return and_(column >= start_id, column < end_id) else: return column >= start_id q = session.query( column, func.row_number().over(order_by=column).label("rownum") ).from_self(column) if start: q = q.filter(column >= start) if windowsize > 1: q = q.filter(sqlalchemy.text("rownum % {}=1".format(windowsize))) intervals = [id for id, in q] while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def fulltextsearch(self): try: lang = self.request.registry.settings["default_locale_name"] except KeyError: return HTTPInternalServerError(detail="default_locale_name not defined in settings") try: lang = self.languages[lang] except KeyError: return HTTPInternalServerError(detail="%s not defined in languages" % lang) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") query = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int(self.request.params.get("limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms = "&".join(w + ":*" for w in query.split(" ") if w != "") _filter = "%(tsvector)s @@ to_tsquery('%(lang)s', '%(terms)s')" % { "tsvector": "ts", "lang": lang, "terms": terms, } # flake8 does not like `== True` if self.request.user is None: _filter = and_(_filter, FullTextSearch.public == True) # NOQA else: _filter = and_( _filter, or_( FullTextSearch.public == True, # NOQA FullTextSearch.role_id == None, FullTextSearch.role_id == self.request.user.role.id, ), ) # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = "ts_rank_cd(%(tsvector)s, " "to_tsquery('%(lang)s', '%(terms)s'), 2|8)" % { "tsvector": "ts", "lang": lang, "terms": terms, } if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = ( func.row_number() .over(partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label)) .label("row_number") ) subq = DBSession.query(FullTextSearch).add_columns(row_number).filter(_filter).subquery() query = DBSession.query(subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: if o.the_geom is not None: properties = {"label": o.label, "layer_name": o.layer_name, "params": o.params} geom = wkb_loads(str(o.the_geom.geom_wkb)) feature = Feature(id=o.id, geometry=geom, properties=properties, bbox=geom.bounds) features.append(feature) # TODO: add callback function if provided in self.request, else return geojson return FeatureCollection(features)
def paginate(query, page_num, page_size, order_by_column, partition_by_column=None, order_by=None, session=None): """ Modify the `query` object with paginated _row_number and order by clause on the specified `column`. The window size is created dynamically based on the application user input. This function adds a pagination wrapper around the query object on the specified column(s). Args: query(object): SQLAlchemy query object or Subquery object. page_num(int): Page number page_size(int): Number of record of per page order_by_column(object or list): SQLAlchemy column(s) object(s). partition_by_column(object or list): SQLAlchemy column(s) object(s) There is a major assumption that the value in this column should be unique per record (not repeating) in the initial input query. order_by(str): Order by clause, 'asc' for ascending or 'desc' for descending. Default is 'asc'. session(object): database session connection object. Returns: An output query object wrapped with paginated where clause based on row_number (_row_number), sorted by and partitioned by the respective column(s). """ if not hasattr(query, 'session'): # subquery object is passed. if not session: raise AttributeError("query object has no attribute 'session'") else: # query object is passed. session = query.session if partition_by_column is not None: if order_by: partition_by_column = _get_order_by_columns(partition_by_column, order_by) paginate_column = func.row_number().over( partition_by=partition_by_column, order_by=order_by_column).label('_row_number') else: if order_by: order_by_column = _get_order_by_columns(order_by_column, order_by) paginate_column = func.row_number().over( order_by=order_by_column).label('_row_number') pagination_subquery = _get_paginated_subquery(session, query, paginate_column) start_page = _get_window_top(page_num, page_size) end_page = _get_window_bottom(page_num, page_size) return _paged_query_object(session, pagination_subquery, start_page, end_page)
def fulltextsearch(self): lang = locale_negotiator(self.request) try: language = self.languages[lang] except KeyError: return HTTPInternalServerError( detail="%s not defined in languages" % lang) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") terms = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int(self.request.params.get( "limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms_ts = "&".join(w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "") _filter = FullTextSearch.ts.op("@@")(func.to_tsquery(language, terms_ts)) if self.request.user is None or self.request.user.role is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id == self.request.user.role.id ) ) if "interface" in self.request.params: _filter = and_(_filter, or_( FullTextSearch.interface_id.is_(None), FullTextSearch.interface_id == self._get_interface_id( self.request.params["interface"] ) )) else: _filter = and_(_filter, FullTextSearch.interface_id.is_(None)) _filter = and_(_filter, or_( FullTextSearch.lang.is_(None), FullTextSearch.lang == lang, )) # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8) if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = func.row_number().over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label) ).label("row_number") subq = DBSession.query(FullTextSearch) \ .add_columns(row_number).filter(_filter).subquery() query = DBSession.query( subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom, subq.c.actions ) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: properties = { "label": o.label, } if o.layer_name is not None: properties["layer_name"] = o.layer_name if o.params is not None: properties["params"] = o.params if o.actions is not None: properties["actions"] = o.actions if o.actions is None and o.layer_name is not None: properties["actions"] = [{ "action": "add_layer", "data": o.layer_name, }] if o.the_geom is not None: geom = to_shape(o.the_geom) feature = Feature( id=o.id, geometry=geom, properties=properties, bbox=geom.bounds ) features.append(feature) else: feature = Feature( id=o.id, properties=properties ) features.append(feature) # TODO: add callback function if provided in self.request, else return geojson return FeatureCollection(features)
def test_over(self): eq_( select([ flds.c.intcol, func.row_number().over(order_by=flds.c.strcol) ]).execute().fetchall(), [(13, 1), (5, 2)])
def get_top_genre_users(args): genres = [] if "genre" in args: genres = args.get("genre") # If the with_users url arg is provided, then populate the user metadata else return user ids with_users = args.get("with_users", False) db = get_db_read_replica() with db.scoped_session() as session: with_genres = len(genres) != 0 # Associate the user w/ a genre by counting the total # of tracks per genre # taking the genre w/ the most tracks (using genre name as secondary sort) user_genre_count_query = (session.query( User.user_id.label('user_id'), Track.genre.label('genre'), func.row_number().over( partition_by=User.user_id, order_by=(desc(func.count(Track.genre)), asc( Track.genre))).label("row_number")).join( Track, Track.owner_id == User.user_id).filter( User.is_current == True, User.is_creator == True, Track.is_unlisted == False, Track.stem_of == None, Track.is_current == True, Track.is_delete == False).group_by( User.user_id, Track.genre).order_by( desc(func.count(Track.genre)), asc(Track.genre))) user_genre_count_query = user_genre_count_query.subquery( 'user_genre_count_query') user_genre_query = (session.query( user_genre_count_query.c.user_id.label('user_id'), user_genre_count_query.c.genre.label('genre'), ).filter(user_genre_count_query.c.row_number == 1).subquery( 'user_genre_query')) # Using the subquery of user to associated genre, # filter by the requested genres and # sort by user follower count user_genre_followers_query = ( session.query(user_genre_query.c.user_id.label('user_id')).join( Follow, Follow.followee_user_id == user_genre_query.c.user_id).filter( Follow.is_current == True, Follow.is_delete == False).group_by( user_genre_query.c.user_id, user_genre_query.c.genre).order_by( # desc('follower_count') desc(func.count(Follow.follower_user_id)))) if with_genres: user_genre_followers_query = user_genre_followers_query.filter( user_genre_query.c.genre.in_(genres)) # If the with_users flag is not set, respond with the user_ids users = paginate_query(user_genre_followers_query).all() user_ids = list(map(lambda user: user[0], users)) # If the with_users flag is used, retrieve the user metadata if with_users: user_query = session.query(User).filter(User.user_id.in_(user_ids), User.is_current == True) users = user_query.all() users = helpers.query_result_to_list(users) queried_user_ids = list(map(lambda user: user["user_id"], users)) users = populate_user_metadata(session, queried_user_ids, users, None) # Sort the users so that it's in the same order as the previous query user_map = {user['user_id']: user for user in users} users = [user_map[user_id] for user_id in user_ids] return {'users': users} return {'user_ids': user_ids}
def connection_block(field: ASTNode, parent_name: typing.Optional[str]) -> Alias: return_type = field.return_type sqla_model = return_type.sqla_model block_name = slugify_path(field.path) if parent_name is None: join_conditions = [True] else: join_conditions = to_join_clause(field, parent_name) filter_conditions = to_conditions_clause(field) limit = to_limit(field) has_total = check_has_total(field) is_page_after = "after" in field.args is_page_before = "before" in field.args totalCount_alias = field.get_subfield_alias(["totalCount"]) edges_alias = field.get_subfield_alias(["edges"]) node_alias = field.get_subfield_alias(["edges", "node"]) cursor_alias = field.get_subfield_alias(["edges", "cursor"]) pageInfo_alias = field.get_subfield_alias(["pageInfo"]) hasNextPage_alias = field.get_subfield_alias(["pageInfo", "hasNextPage"]) hasPreviousPage_alias = field.get_subfield_alias( ["pageInfo", "hasPreviousPage"]) startCursor_alias = field.get_subfield_alias(["pageInfo", "startCursor"]) endCursor_alias = field.get_subfield_alias(["pageInfo", "endCursor"]) # Apply Filters core_model = sqla_model.__table__ core_model_ref = ( select(core_model.c).select_from(core_model).where( and_( # Join clause *join_conditions, # Conditions *filter_conditions, ))).alias(block_name) new_edge_node_selects = [] new_relation_selects = [] for subfield in get_edge_node_fields(field): # Does anything other than NodeID go here? if subfield.return_type == ID: # elem = select([to_node_id_sql(sqla_model, core_model_ref)]).label(subfield.alias) elem = to_node_id_sql(sqla_model, core_model_ref).label(subfield.alias) new_edge_node_selects.append(elem) elif isinstance(subfield.return_type, (ScalarType, CompositeType, EnumType)): col_name = field_name_to_column(sqla_model, subfield.name).name elem = core_model_ref.c[col_name].label(subfield.alias) new_edge_node_selects.append(elem) else: elem = build_relationship(subfield, block_name) new_relation_selects.append(elem) # Setup Pagination args = field.args after_cursor = args.get("after", None) before_cursor = args.get("before", None) first = args.get("first", None) last = args.get("last", None) if first is not None and last is not None: raise ValueError('only one of "first" and "last" may be provided') pkey_cols = get_primary_key_columns(sqla_model) if after_cursor or before_cursor: local_table_name = get_table_name(field.return_type.sqla_model) cursor_table_name = before_cursor.table_name if before_cursor else after_cursor.table_name cursor_values = before_cursor.values if before_cursor else after_cursor.values if after_cursor is not None and before_cursor is not None: raise ValueError( 'only one of "before" and "after" may be provided') if after_cursor is not None and last is not None: raise ValueError( '"after" is not compatible with "last". Use "first"') if before_cursor is not None and first is not None: raise ValueError( '"before" is not compatible with "first". Use "last"') if cursor_table_name != local_table_name: raise ValueError("Invalid cursor for entity type") pagination_clause = tuple_( *[core_model_ref.c[col.name] for col in pkey_cols]).op( ">" if after_cursor is not None else "<")( tuple_(*[cursor_values[col.name] for col in pkey_cols])) else: pagination_clause = True order_clause = [ asc(core_model_ref.c[col.name]) for col in get_primary_key_columns(sqla_model) ] reverse_order_clause = [ desc(core_model_ref.c[col.name]) for col in get_primary_key_columns(sqla_model) ] total_block = (select([func.count(ONE).label("total_count")]).select_from( core_model_ref.alias()).where(has_total)).alias(block_name + "_total") node_id_sql = to_node_id_sql(sqla_model, core_model_ref) cursor_sql = to_cursor_sql(sqla_model, core_model_ref) # Select the right stuff p1_block = ( select([ *new_edge_node_selects, *new_relation_selects, # For internal Use node_id_sql.label("_nodeId"), cursor_sql.label("_cursor"), # For internal Use func.row_number().over().label("_row_num"), ]).select_from(core_model_ref).where(pagination_clause).order_by( *(reverse_order_clause if (is_page_before or last is not None) else order_clause), *order_clause).limit(cast(limit + 1, Integer()))).alias(block_name + "_p1") # Drop maybe extra row p2_block = (select([ *p1_block.c, (func.max(p1_block.c._row_num).over() > limit).label("_has_next_page") ]).select_from(p1_block).limit(limit)).alias(block_name + "_p2") ordering = (desc(literal_column("_row_num")) if (is_page_before or last is not None) else asc( literal_column("_row_num"))) p3_block = (select(p2_block.c).select_from(p2_block).order_by(ordering) ).alias(block_name + "_p3") final = (select([ func.jsonb_build_object( literal_string(totalCount_alias), func.coalesce(func.min(total_block.c.total_count), ZERO) if has_total else None, literal_string(pageInfo_alias), func.jsonb_build_object( literal_string(hasNextPage_alias), func.coalesce( func.array_agg(p3_block.c._has_next_page)[ONE], FALSE), literal_string(hasPreviousPage_alias), TRUE if is_page_after else FALSE, literal_string(startCursor_alias), func.array_agg(p3_block.c._nodeId)[ONE], literal_string(endCursor_alias), func.array_agg(p3_block.c._nodeId)[func.array_upper( func.array_agg(p3_block.c._nodeId), ONE)], ), literal_string(edges_alias), func.coalesce( func.jsonb_agg( func.jsonb_build_object( literal_string(cursor_alias), p3_block.c._nodeId, literal_string(node_alias), func.cast( func.row_to_json(literal_column(p3_block.name)), JSONB()), )), func.cast(literal("[]"), JSONB()), ), ).label("ret_json") ]).select_from(p3_block).select_from( total_block if has_total else select([1]).alias())).alias() return final
def filter_by_form(form, officer_query, department_id=None): # Some SQL acrobatics to left join only the most recent assignment per officer row_num_col = func.row_number().over( partition_by=Assignment.officer_id, order_by=Assignment.star_date.desc()).label('row_num') subq = db.session.query( Assignment.officer_id, Assignment.job_id, Assignment.star_date, Assignment.star_no, Assignment.unit_id).add_columns(row_num_col).from_self().filter( row_num_col == 1).subquery() officer_query = officer_query.outerjoin(subq) if form.get('name'): officer_query = officer_query.filter( Officer.last_name.ilike('%%{}%%'.format(form['name']))) if not department_id and form.get('dept'): department_id = form['dept'].id officer_query = officer_query.filter( Officer.department_id == department_id) if form.get('badge'): officer_query = officer_query.filter( subq.c.assignments_star_no.like('%%{}%%'.format(form['badge']))) if form.get('unit'): officer_query = officer_query.filter( subq.c.assignments_unit_id == form['unit']) if form.get('unique_internal_identifier'): officer_query = officer_query.filter( Officer.unique_internal_identifier.ilike('%%{}%%'.format( form['unique_internal_identifier']))) race_values = [x for x, _ in RACE_CHOICES] if form.get('race') and all(race in race_values for race in form['race']): if 'Not Sure' in form['race']: form['race'].append(None) officer_query = officer_query.filter(Officer.race.in_(form['race'])) gender_values = [x for x, _ in GENDER_CHOICES] if form.get('gender') and all(gender in gender_values for gender in form['gender']): if 'Not Sure' in form['gender']: form['gender'].append(None) officer_query = officer_query.filter(Officer.gender.in_( form['gender'])) if form.get('min_age') and form.get('max_age'): current_year = datetime.datetime.now().year min_birth_year = current_year - int(form['min_age']) max_birth_year = current_year - int(form['max_age']) officer_query = officer_query.filter( db.or_( db.and_(Officer.birth_year <= min_birth_year, Officer.birth_year >= max_birth_year), Officer.birth_year == None)) # noqa officer_query = officer_query.outerjoin(Job, Assignment.job) rank_values = [ x[0] for x in db.session.query(Job.job_title).filter_by( department_id=department_id, is_sworn_officer=True).all() ] if form.get('rank') and all(rank in rank_values for rank in form['rank']): if 'Not Sure' in form['rank']: form['rank'].append(None) officer_query = officer_query.filter(Job.job_title.in_(form['rank'])) return officer_query
def post_transactions_for_membership_fee(membership_fee, processor): """ Posts transactions (and splits) for users where the specified membership fee was not posted yet. User select: User -> Split (user account) -> Transaction -> Split (fee account) Conditions: User has `membership_fee` property on begins_on + 1 day and begins_on + grace - 1 day :param membership_fee: The membership fee which should be posted :param processor: :return: A list of name of all affected users """ description = membership_fee_description.format( fee_name=membership_fee.name).to_json() split_user_account = Split.__table__.alias() split_fee_account = Split.__table__.alias() users = (select([User.id.label('user_id'), User.name.label('user_name'), User.account_id.label('account_id')]) .select_from(User.__table__ .join(func.evaluate_properties(membership_fee.begins_on + timedelta(1)) .alias('properties_beginning'), literal_column('properties_beginning.user_id') == User.id) .join(func.evaluate_properties(membership_fee.begins_on + membership_fee.grace_period - timedelta(1)) .alias('properties_grace'), literal_column('properties_grace.user_id') == User.id) ) .where(not_(exists(select([None]).select_from(split_user_account .join(Transaction, Transaction.id == split_user_account.c.transaction_id) .join(split_fee_account, split_fee_account.c.transaction_id == Transaction.id) ) .where(and_(split_user_account.c.account_id == User.account_id, Transaction.valid_on.between(literal(membership_fee.begins_on), literal(membership_fee.ends_on)), split_fee_account.c.account_id == literal(config.membership_fee_account_id), split_fee_account.c.id != split_user_account.c.id)) ))) .where(or_(literal_column('properties_beginning.property_name') == 'membership_fee', literal_column('properties_grace.property_name') == 'membership_fee')) .distinct() .cte('membership_fee_users')) numbered_users = (select([users.c.user_id, users.c.account_id, func.row_number().over().label('index')]) .select_from(users) .cte("membership_fee_numbered_users")) transactions = (Transaction.__table__.insert() .from_select([Transaction.description, Transaction.author_id, Transaction.posted_at, Transaction.valid_on], select([literal(description), literal(processor.id), func.current_timestamp(), literal(membership_fee.ends_on)]).select_from(users)) .returning(Transaction.id) .cte('membership_fee_transactions')) numbered_transactions = (select([transactions.c.id, func.row_number().over().label('index')]) .select_from(transactions) .cte('membership_fee_numbered_transactions')) split_insert_fee_account = (Split.__table__.insert() .from_select([Split.amount, Split.account_id, Split.transaction_id], select([literal(-membership_fee.regular_fee, type_=Money), literal(config.membership_fee_account_id),transactions.c.id]) .select_from(transactions)) .returning(Split.id) .cte('membership_fee_split_fee_account')) split_insert_user = (Split.__table__.insert().from_select( [Split.amount, Split.account_id, Split.transaction_id], select([literal(membership_fee.regular_fee, type_=Money), numbered_users.c.account_id, numbered_transactions.c.id]) .select_from(numbered_users.join(numbered_transactions, numbered_transactions.c.index == numbered_users.c.index))) .returning(Split.id) .cte('membership_fee_split_user')) affected_users_raw = session.session.execute(select([users.c.user_id, users.c.user_name])).fetchall() # TODO: Unite the following two queries into one (the membership_fee_users is called twice currently. session.session.execute(select([]).select_from(split_insert_fee_account .join(split_insert_user, split_insert_user.c.id == split_insert_fee_account.c.id))) affected_users = [] for user in affected_users_raw: affected_users.insert(0, {'id': user[0], 'name': user[1]}) return affected_users
def fulltextsearch(self): try: lang = self.request.registry.settings['default_locale_name'] except KeyError: return HTTPInternalServerError( detail='default_locale_name not defined in settings') try: lang = self.languages[lang] except KeyError: return HTTPInternalServerError( detail='%s not defined in languages' % lang) if 'query' not in self.request.params: return HTTPBadRequest(detail='no query') query = self.request.params.get('query') maxlimit = self.settings.get('maxlimit', 200) try: limit = int(self.request.params.get( 'limit', self.settings.get('defaultlimit', 30))) except ValueError: return HTTPBadRequest(detail='limit value is incorrect') if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get('partitionlimit', 0)) except ValueError: return HTTPBadRequest(detail='partitionlimit value is incorrect') if partitionlimit > maxlimit: partitionlimit = maxlimit terms = '&'.join(re.sub("'", "''", w) + ':*' for w in query.split(' ') if w != '') _filter = "%(tsvector)s @@ to_tsquery('%(lang)s', '%(terms)s')" % \ {'tsvector': 'ts', 'lang': lang, 'terms': terms} if self.request.user is None or self.request.user.role is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id == self.request.user.role.id ) ) # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = "ts_rank_cd(%(tsvector)s, " \ "to_tsquery('%(lang)s', '%(terms)s'), 2|8)" % { 'tsvector': 'ts', 'lang': lang, 'terms': terms } if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = func.row_number() \ .over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label)) \ .label('row_number') subq = DBSession.query(FullTextSearch) \ .add_columns(row_number).filter(_filter).subquery() query = DBSession.query(subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: if o.the_geom is not None: properties = { "label": o.label, "layer_name": o.layer_name, "params": o.params, } geom = to_shape(o.the_geom) feature = Feature(id=o.id, geometry=geom, properties=properties, bbox=geom.bounds) features.append(feature) # TODO: add callback function if provided in self.request, else return geojson return FeatureCollection(features)
def column_windows(session, column, windowsize, filter=None, limit=None): """Return a series of WHERE clauses against a given column that break it into windows. Requires a database that supports window functions, i.e. Postgresql, SQL Server, Oracle. """ def int_for_range(start_id, end_id): if filter is not None: if end_id: return and_( column>=start_id, column<end_id, filter ) else: return and_( column>=start_id, filter ) else: if end_id: return and_( column>=start_id, column<end_id ) else: return column>=start_id # Use the row_number() window function to order and number all rows q = session.query( column, func.row_number().\ over(order_by=column).\ label('rownum') ) # Add any additional filters that will be applied before the window function if filter is not None: # http://docs.sqlalchemy.org/en/latest/changelog/migration_06.html#an-important-expression-language-gotcha q = q.filter(filter) # Limit the inner subquery where rows are sorted and numbered if limit: q = q.limit(limit) # Create outer query selecting from inner subquery q = q.from_self(column) # Collect the column IDs for the rows at the boundary of each window if windowsize > 1: q = q.filter(sqlalchemy.text(f'rownum % {windowsize}=1')) intervals = [id for id, in q] # Yield WHERE clauses using column ID ranges while intervals: start = intervals.pop(0) if intervals: end = intervals[0] else: end = None yield int_for_range(start, end)
def traffic_history_query(): timestamptz = TIMESTAMP(timezone=True) events = union_all( select([TrafficCredit.amount, TrafficCredit.timestamp, literal("Credit").label('type')] ).where(TrafficCredit.user_id == literal_column('arg_user_id')), select([(-TrafficVolume.amount).label('amount'), TrafficVolume.timestamp, cast(TrafficVolume.type, TEXT).label('type')] ).where(TrafficVolume.user_id == literal_column('arg_user_id')) ).cte('traffic_events') def round_time(time_expr, ceil=False): round_func = func.ceil if ceil else func.trunc step_epoch = func.extract('epoch', literal_column('arg_step')) return cast(func.to_timestamp(round_func(func.extract('epoch', time_expr) / step_epoch) * step_epoch), timestamptz) balance = select([TrafficBalance.amount, TrafficBalance.timestamp])\ .select_from(User.__table__.outerjoin(TrafficBalance))\ .where(User.id == literal_column('arg_user_id'))\ .cte('balance') balance_amount = select([balance.c.amount]).as_scalar() balance_timestamp = select([balance.c.timestamp]).as_scalar() # Bucket layout # n = interval / step # 0: Aggregates all prior traffic_events so that the balance value can be calculated # 1 - n: Traffic history entry # n+1: Aggregates all data after the last point in time, will be discarded buckets = select([literal_column('bucket'), (func.row_number().over(order_by=literal_column('bucket')) - 1).label('index')] ).select_from( func.generate_series( round_time(cast(literal_column('arg_start'), timestamptz)) - literal_column('arg_step'), round_time(cast(literal_column('arg_start'), timestamptz) + literal_column('arg_interval')), literal_column('arg_step') ).alias('bucket') ).order_by( literal_column('bucket') ).cte('buckets') def cond_sum(condition, label, invert=False): return func.sum(case( [(condition, events.c.amount if not invert else -events.c.amount)], else_=None)).label(label) hist = select([buckets.c.bucket, cond_sum(events.c.type == 'Credit', 'credit'), cond_sum(events.c.type == 'Ingress', 'ingress', invert=True), cond_sum(events.c.type == 'Egress', 'egress', invert=True), func.sum(events.c.amount).label('amount'), cond_sum(and_(balance_timestamp != None, events.c.timestamp < balance_timestamp), 'before_balance'), cond_sum(or_(balance_timestamp == None, events.c.timestamp >= balance_timestamp), 'after_balance')] ).select_from(buckets.outerjoin( events, func.width_bucket( events.c.timestamp, select([func.array(select([buckets.c.bucket]).select_from(buckets).where(buckets.c.index != 0).label('dummy'))]) ) == buckets.c.index )).where( # Discard bucket n+1 buckets.c.index < select([func.max(buckets.c.index)]) ).group_by( buckets.c.bucket ).order_by( buckets.c.bucket ).cte('traffic_hist') # Bucket is located before the balance and no traffic_events exist before it first_event_timestamp = select([func.min(events.c.timestamp)]).as_scalar() case_before_balance_no_data = ( and_(balance_timestamp != None, hist.c.bucket < balance_timestamp, or_(first_event_timestamp == None, hist.c.bucket < first_event_timestamp )), None ) # Bucket is located after the balance case_after_balance = ( or_(balance_timestamp == None, hist.c.bucket >= balance_timestamp), func.coalesce(balance_amount, 0) + func.coalesce( func.sum(hist.c.after_balance).over( order_by=hist.c.bucket.asc(), rows=(None, 0)), 0) ) # Bucket is located before the balance, but there still exist traffic_events before it else_before_balance = ( func.coalesce(balance_amount, 0) + func.coalesce(hist.c.after_balance, 0) - func.coalesce( func.sum(hist.c.before_balance).over( order_by=hist.c.bucket.desc(), rows=(None, -1) ), 0) ) agg_hist = select( [hist.c.bucket, hist.c.credit, hist.c.ingress, hist.c.egress, case( [case_before_balance_no_data, case_after_balance], else_=else_before_balance ).label('balance')]).alias('agg_hist') # Remove bucket 0 result = select([agg_hist]).order_by(agg_hist.c.bucket).offset(1) return result
QUERY_STATISTICS_SUMMARY_MEAN = [ StatisticsShortTerm.metadata_id, func.avg(StatisticsShortTerm.mean), func.min(StatisticsShortTerm.min), func.max(StatisticsShortTerm.max), ] QUERY_STATISTICS_SUMMARY_SUM = [ StatisticsShortTerm.metadata_id, StatisticsShortTerm.start, StatisticsShortTerm.last_reset, StatisticsShortTerm.state, StatisticsShortTerm.sum, func.row_number().over( partition_by=StatisticsShortTerm.metadata_id, order_by=StatisticsShortTerm.start.desc(), ).label("rownum"), ] QUERY_STATISTICS_SUMMARY_SUM_LEGACY = [ StatisticsShortTerm.metadata_id, StatisticsShortTerm.last_reset, StatisticsShortTerm.state, StatisticsShortTerm.sum, ] QUERY_STATISTIC_META = [ StatisticsMeta.id, StatisticsMeta.statistic_id, StatisticsMeta.source, StatisticsMeta.unit_of_measurement,
def insert_organizations(db: Session): ''' This method will insert all osm objects into organization if the osm_id is not yet inserted ''' # sub-request for select osm_id,name,name_normalized,importance # name_normalized is for removing trailling space and replace white-space by dash query_normalized_name = db.query( OSMName.osm_id.label('osm_id'), OSMName.name.label('name'), func.replace(func.trim(OSMName.name, ' '), ' ', '-').label("name_normalized"), OSMName.importance.label('importance')) query_normalized_name = query_normalized_name.subquery('normalized_name') # sub-request for select osm_id, name, name_normalized, row_number # row_number will be used for generate the slug in the next request # the row number is partition_by name_normalized # and order by "importance" DESC # src: https://osmnames.readthedocs.io/en/latest/introduction.html # "importance" :Importance of the feature, ranging [0.0-1.0], 1.0 being the most important. query_unique_slug = db.query( query_normalized_name.c.osm_id.label('osm_id'), query_normalized_name.c.name.label('name'), query_normalized_name.c.name_normalized.label('name_normalized'), func.row_number().over( partition_by=query_normalized_name.c.name_normalized, order_by=desc( query_normalized_name.c.importance)).label("row_number")) query_unique_slug = query_unique_slug.subquery('unique_slug') # request for filtering what we insert where_query = db.query(Organization.osm_id.label('osm_id')) # final request for build the slug # example # we could have many name_normalized # name | name_normalized | osm_id | row_number # new york | new-york | 1254 | 1 # new-york | new-york | 215486 | 2 # will generate # name | osm_id | slug # new york | 1254 | new-york # new-york | 215486 | new-york-215486 query = db.query( query_unique_slug.c.name.label('name'), query_unique_slug.c.osm_id.label('osm_id'), case([(query_unique_slug.c.row_number == 1, query_unique_slug.c.name_normalized)], else_=func.concat(query_unique_slug.c.name_normalized, '-', query_unique_slug.c.osm_id)).label('slug')) query = query.filter(query_unique_slug.c.osm_id.notin_(where_query)) insert_query = insert(Organization) insert_query = insert_query.from_select( (Organization.name, Organization.osm_id, Organization.slug), query) try: db.execute(insert_query) db.commit() db.close() finally: pass
return [(party, int(count)) for party, count in party_counts] @property def representative_people(self): return [cosponsor for cosponsor in self.cosponsors if cosponsor.name in self.sponsor] def _to_dict_light(self): d = self._columns_to_dict() d['status'] = self.status # TODO: add relation data return d bill_and_status = select([func.row_number().over().label('status_order'), func.unnest(Bill.status_ids).label('bill_status_id'), Bill.id.label('bill_id')]).alias() Bill.statuses = relationship("BillStatus", secondary=bill_and_status, primaryjoin=Bill.id == bill_and_status.c.bill_id, secondaryjoin=bill_and_status.c.bill_status_id == BillStatus.id, order_by=bill_and_status.c.status_order, viewonly=True, backref='bills') def assembly_id_by_bill_id(bill_id): return int(bill_id.lstrip('Z')[:2])
def get_model_stats(session): return session.query(Replay.model_hash, func.row_number().over(order_by=Model.model_hash), Model.total_reward, func.count(Replay.model_hash).label('total')) \ .join(Model) \ .group_by(Replay.model_hash, Model.model_hash).order_by('models.total_reward ASC').all()
.filter(Place.coordinates != None)\ places = {} for place, url in place_query: places[place.gid] = {'name': place.name, 'coordinates': place.coordinates, 'commons_link': url, 'events': [] } ev_alias = aliased(Event) stmt = s.query(Place.id.label("place_id"), ev_alias.id.label("event_id"), func.row_number().over(partition_by=Place.id, order_by=ev_alias.begin_date). label("row_num"))\ .outerjoin(LinkEventPlace)\ .outerjoin(ev_alias)\ .order_by(ev_alias.begin_date)\ .cte() event_query = s.query(Place.gid.label("place_gid"), ev_alias)\ .join(stmt, Place.id == stmt.c.place_id)\ .join(LinkEventPlace, Place.id == LinkEventPlace.entity1_id)\ .join(ev_alias, ev_alias.id == LinkEventPlace.entity0_id)\ .filter("row_num < 3")\ .filter(Place.coordinates != None) for place_gid, event in event_query:
def fulltextsearch(self): lang = locale_negotiator(self.request) try: language = self.languages[lang] except KeyError: return HTTPInternalServerError( detail="{0!s} not defined in languages".format(lang)) if "query" not in self.request.params: return HTTPBadRequest(detail="no query") terms = self.request.params.get("query") maxlimit = self.settings.get("maxlimit", 200) try: limit = int( self.request.params.get("limit", self.settings.get("defaultlimit", 30))) except ValueError: return HTTPBadRequest(detail="limit value is incorrect") if limit > maxlimit: limit = maxlimit try: partitionlimit = int(self.request.params.get("partitionlimit", 0)) except ValueError: return HTTPBadRequest(detail="partitionlimit value is incorrect") if partitionlimit > maxlimit: partitionlimit = maxlimit terms_ts = "&".join( w + ":*" for w in IGNORED_CHARS_RE.sub(" ", terms).split(" ") if w != "") _filter = FullTextSearch.ts.op("@@")(func.to_tsquery( language, terms_ts)) if self.request.user is None: _filter = and_(_filter, FullTextSearch.public.is_(True)) else: _filter = and_( _filter, or_( FullTextSearch.public.is_(True), FullTextSearch.role_id.is_(None), FullTextSearch.role_id.in_( [r.id for r in self.request.user.roles]), ), ) if "interface" in self.request.params: _filter = and_( _filter, or_( FullTextSearch.interface_id.is_(None), FullTextSearch.interface_id == self._get_interface_id( self.request.params["interface"]), ), ) else: _filter = and_(_filter, FullTextSearch.interface_id.is_(None)) _filter = and_( _filter, or_(FullTextSearch.lang.is_(None), FullTextSearch.lang == lang)) rank_system = self.request.params.get("ranksystem") if rank_system == "ts_rank_cd": # The numbers used in ts_rank_cd() below indicate a normalization method. # Several normalization methods can be combined using |. # 2 divides the rank by the document length # 8 divides the rank by the number of unique words in document # By combining them, shorter results seem to be preferred over longer ones # with the same ratio of matching words. But this relies only on testing it # and on some assumptions about how it might be calculated # (the normalization is applied two times with the combination of 2 and 8, # so the effect on at least the one-word-results is therefore stronger). rank = func.ts_rank_cd(FullTextSearch.ts, func.to_tsquery(language, terms_ts), 2 | 8) else: # Use similarity ranking system from module pg_trgm. rank = func.similarity(FullTextSearch.label, terms) if partitionlimit: # Here we want to partition the search results based on # layer_name and limit each partition. row_number = (func.row_number().over( partition_by=FullTextSearch.layer_name, order_by=(desc(rank), FullTextSearch.label)).label("row_number")) subq = DBSession.query(FullTextSearch).add_columns( row_number).filter(_filter).subquery() query = DBSession.query(subq.c.id, subq.c.label, subq.c.params, subq.c.layer_name, subq.c.the_geom, subq.c.actions) query = query.filter(subq.c.row_number <= partitionlimit) else: query = DBSession.query(FullTextSearch).filter(_filter) query = query.order_by(desc(rank)) query = query.order_by(FullTextSearch.label) query = query.limit(limit) objs = query.all() features = [] for o in objs: properties = {"label": o.label} if o.layer_name is not None: properties["layer_name"] = o.layer_name if o.params is not None: properties["params"] = o.params if o.actions is not None: properties["actions"] = o.actions if o.actions is None and o.layer_name is not None: properties["actions"] = [{ "action": "add_layer", "data": o.layer_name }] if o.the_geom is not None: geom = to_shape(o.the_geom) feature = Feature(id=o.id, geometry=geom, properties=properties, bbox=geom.bounds) features.append(feature) else: feature = Feature(id=o.id, properties=properties) features.append(feature) return FeatureCollection(features)
def post_transactions_for_membership_fee(membership_fee, processor, simulate=False): """ Posts transactions (and splits) for users where the specified membership fee was not posted yet. User select: User -> Split (user account) -> Transaction -> Split (fee account) Conditions: User has `membership_fee` property on begins_on + booking_begin - 1 day or begins_on + booking_end - 1 day and no transaction exists on the user account int the fee timespan :param membership_fee: The membership fee which should be posted :param processor: :param simulate: Do not post any transactions, just return the affected users. :return: A list of name of all affected users """ description = membership_fee_description.format(fee_name=membership_fee.name).to_json() split_user_account = Split.__table__.alias() split_fee_account = Split.__table__.alias() rhe_end = RoomHistoryEntry.__table__.alias() rhe_begin = RoomHistoryEntry.__table__.alias() fee_accounts = Account.q.join(Building).distinct(Account.id).all() fee_accounts_ids = set([acc.id for acc in fee_accounts] + [config.membership_fee_account_id]) properties_beginning_timestamp = datetime.combine((membership_fee.begins_on + membership_fee.booking_begin - timedelta(1)), time_min()) properties_end_timestamp = datetime.combine((membership_fee.begins_on + membership_fee.booking_end - timedelta(1)), time_max()) begin_tstz = datetime.combine(membership_fee.begins_on, time_min()) end_tstz = datetime.combine(membership_fee.ends_on, time_max()) # Select all users who fulfill the requirements for the fee in the fee timespan users = (select([User.id.label('id'), User.name.label('name'), User.account_id.label('account_id'), # Select fee_account_id of the building or the default # fee_account_id if user was not living in a room at booking time func.coalesce(Building.fee_account_id, literal(config.membership_fee_account_id)).label('fee_account_id')]) .select_from(User.__table__ # Join the users properties at `booking_begin` .outerjoin(func.evaluate_properties(properties_beginning_timestamp) .alias('properties_beginning'), literal_column('properties_beginning.user_id') == User.id) # Join the users properties at `booking_end` .outerjoin(func.evaluate_properties(properties_end_timestamp) .alias('properties_end'), literal_column('properties_end.user_id') == User.id) # Join RoomHistoryEntry, Room and Building of the user at membership_fee.ends_on .outerjoin(rhe_end, and_(rhe_end.c.user_id == User.id, # Only join RoomHistoryEntry that is relevant # on the fee interval end date literal(end_tstz).op("<@")( func.tstzrange(rhe_end.c.begins_at, func.coalesce(rhe_end.c.ends_at, literal('infinity').cast(DateTime) ) , '[)') ))) # Join RoomHistoryEntry, Room and Building of the user at membership_fee.begins_on # As second option if user moved out within the month .outerjoin(rhe_begin, and_(rhe_begin.c.user_id == User.id, # Only join RoomHistoryEntry that is relevant # on the fee interval end date literal(begin_tstz).op("<@")( func.tstzrange(rhe_begin.c.begins_at, func.coalesce(rhe_begin.c.ends_at, literal('infinity').cast(DateTime) ) , '[)') ))) # Join with Room from membership_fee.ends_on if available, # if not, join with the Room from membership_fee.begins_on .outerjoin(Room, Room.id == func.coalesce(rhe_end.c.room_id, rhe_begin.c.room_id)) .outerjoin(Building, Building.id == Room.building_id) ) # Check if a booking already exists on the user account in the fee timespan .where(not_(exists(select([None]).select_from(split_user_account .join(Transaction, Transaction.id == split_user_account.c.transaction_id) .join(split_fee_account, split_fee_account.c.transaction_id == Transaction.id) ) .where(and_(split_user_account.c.account_id == User.account_id, Transaction.valid_on.between(literal(membership_fee.begins_on), literal(membership_fee.ends_on)), split_fee_account.c.account_id.in_(fee_accounts_ids), split_fee_account.c.amount < 0, split_fee_account.c.id != split_user_account.c.id)) ))) # Only those users who had the `membership_fee` property on `booking_begin` or # `booking_end` .where(or_(and_(literal_column('properties_beginning.property_name') == 'membership_fee', not_(literal_column('properties_beginning.denied'))), and_(literal_column('properties_end.property_name') == 'membership_fee', not_(literal_column('properties_end.denied'))))) .distinct() .cte('membership_fee_users')) affected_users_raw = session.session.execute(select([users.c.id, users.c.name, users.c.fee_account_id])).fetchall() if not simulate: numbered_users = (select([users.c.id, users.c.fee_account_id.label('fee_account_id'), users.c.account_id, func.row_number().over().label('index')]) .select_from(users) .cte("membership_fee_numbered_users")) transactions = (Transaction.__table__.insert() .from_select([Transaction.description, Transaction.author_id, Transaction.posted_at, Transaction.valid_on, Transaction.confirmed], select([literal(description), literal(processor.id), func.current_timestamp(), literal(membership_fee.ends_on), True]).select_from(users)) .returning(Transaction.id) .cte('membership_fee_transactions')) numbered_transactions = (select([transactions.c.id, func.row_number().over().label('index')]) .select_from(transactions) .cte('membership_fee_numbered_transactions')) split_insert_fee_account = (Split.__table__.insert() .from_select([Split.amount, Split.account_id, Split.transaction_id], select([literal(-membership_fee.regular_fee, type_=Money), numbered_users.c.fee_account_id, numbered_transactions.c.id]) .select_from(numbered_users.join(numbered_transactions, numbered_transactions.c.index == numbered_users.c.index)) ) .returning(Split.id) .cte('membership_fee_split_fee_account')) split_insert_user = (Split.__table__.insert().from_select( [Split.amount, Split.account_id, Split.transaction_id], select([literal(membership_fee.regular_fee, type_=Money), numbered_users.c.account_id, numbered_transactions.c.id]) .select_from(numbered_users.join(numbered_transactions, numbered_transactions.c.index == numbered_users.c.index))) .returning(Split.id) .cte('membership_fee_split_user')) session.session.execute(select([]).select_from(split_insert_fee_account .join(split_insert_user, split_insert_user.c.id == split_insert_fee_account.c.id))) affected_users = [dict(user) for user in affected_users_raw] return affected_users
def collection_preview(self, user_id): """Returns the collections of a user and only the most recent items in each collection.""" # Build a subquery to retrieve items partitioned by collection. subquery = select( [ Item.id, Item.safe_id, Item.name, Item.slug, Item.collection_id, Item.photos, func.row_number().over(partition_by=Item.collection_id, order_by=desc(Item.updated)).label("rownumber"), ] ).alias("subquery") # Filter the subquery on the user and the rownumber of each # partition in order to only retrieve a preview of the items # in each of the user's collections. Order by last updated to # get the most recent. query = ( db.session.query( Collection.id, Collection.safe_id, Collection.name, Collection.slug, User.id, User.username, subquery.c.id, subquery.c.safe_id, subquery.c.name, subquery.c.slug, subquery.c.photos, ) .join(subquery, Collection.id == subquery.c.collection_id) .join(User, Collection.user_id == User.id) .filter(and_(User.id == user_id, subquery.c.rownumber <= Collection.PREVIEW_SIZE)) .order_by(desc(Collection.updated)) ) # Execute the query preview = query.all() # Setup a list to hold the collections, and functions to map out # the columns into dictionary items. collections = [] keyfunc = lambda c: {"safe_id": c[1], "name": c[2], "slug": c[3]} itemfunc = lambda i: { "user_id": i[4], "username": i[5], "safe_id": i[7], "name": i[8], "slug": i[9], "photo": i[10][0], } # Group the results by collection and extract the item data. for key, group in groupby(preview, keyfunc): collection = key collection["items"] = map(itemfunc, group) collections.append(collection) # Return the list of collections. return collections