def aggregate(self, table, groupby, filters={}, aggregate='count(*)', page=0, page_size=100, orderby=None): self.log.info(( "table=%s, groupby=%s, filters=%s, aggregate=%s, page=%s," " page_size=%s, orderby=%s" ), table, groupby, filters, aggregate, page, page_size, orderby) table_ = self._table(table) columnd = { col.name:col for col in table_.columns } if isinstance(groupby, basestring): groupby = [ groupby ] if isinstance(aggregate, basestring): aggregate = [ aggregate ] session = self.sessionmaker() try: groupby_ = [ label(c, str2col(c, table_)) for c in groupby ] aggregate_ = [ label(a, str2col(a, table_)) for a in aggregate ] query = session.query(*(aggregate_ + groupby_)) query = with_filters(query, table_, filters) query = query.group_by(*groupby_) query = with_orderby(query, table_, orderby) query = with_pagination(query, table_, page, page_size) result = result2dict(query.all()) self.log.info("retrieved %d rows", len(result)) return result finally: session.close()
def __call__(self, user_ids, session): """ Parameters: user_ids : list of mediawiki user ids to restrict computation to session : sqlalchemy session open on a mediawiki database Returns: { user id: 1 if they're a rolling new active editor, 0 otherwise for all cohort users, or all users that have edits in the time period } """ number_of_edits = int(self.number_of_edits.data) rolling_days = int(self.rolling_days.data) end_date = self.end_date.data start_date = end_date - timedelta(days=rolling_days) newly_registered = session.query(Logging.log_user) \ .filter(Logging.log_type == 'newusers') \ .filter(Logging.log_action == 'create') \ .filter(between(Logging.log_timestamp, start_date, end_date)) filtered_new = self.filter( newly_registered, user_ids, column=Logging.log_user ).subquery() rev_user = label('user_id', Revision.rev_user) ar_user = label('user_id', Archive.ar_user) count = label('count', func.count()) revisions = session.query(rev_user, count)\ .filter(between(Revision.rev_timestamp, start_date, end_date))\ .filter(Revision.rev_user.in_(filtered_new))\ .group_by(Revision.rev_user) archived = session.query(ar_user, count)\ .filter(between(Archive.ar_timestamp, start_date, end_date))\ .filter(Archive.ar_user.in_(filtered_new))\ .group_by(Archive.ar_user) bot_user_ids = session.query(MediawikiUserGroups.ug_user)\ .filter(MediawikiUserGroups.ug_group == 'bot')\ .subquery() new_edits = revisions.union_all(archived).subquery() new_edits_by_user = session.query(new_edits.c.user_id)\ .filter(new_edits.c.user_id.notin_(bot_user_ids))\ .group_by(new_edits.c.user_id)\ .having(func.SUM(new_edits.c.count) >= number_of_edits) metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()} if user_ids is None: return metric_results else: return { uid: metric_results.get(uid, self.default_result) for uid in user_ids }
def postgres_aggregates(self, resolution): if isinstance(resolution, basestring): try: resolution = float(resolution) except ValueError: resolution = self.resolution return [ label('cell_x', func.floor(ST_X(Column('cell')) / resolution) * resolution), label('cell_y', func.floor(ST_Y(Column('cell')) / resolution) * resolution)]
def build_query_to_report(self, query, aggregate_table, params): fk = Column(self.key, Integer) geom = Column(self.geometry_column, Geometry()) join_table = Table(self.table, aggregate_table.metadata, fk, geom) if params == "key": query = query.column(label(self.key, aggregate_table.c.join_key)) else: query = query.column(label("geometry", func.ST_AsGeoJSON(func.ST_Collect(geom)))) return ( query.select_from(join_table).where(aggregate_table.c.join_key == fk).group_by(aggregate_table.c.join_key) )
def __call__(self, user_ids, session): """ Parameters: user_ids : list of mediawiki user ids to find edit for session : sqlalchemy session open on a mediawiki database Returns: dictionary from user ids to the number of edit found. """ start_date = self.start_date.data end_date = self.end_date.data revisions = session\ .query( label('user_id', Revision.rev_user), label('timestamp', Revision.rev_timestamp) )\ .filter(Revision.rev_timestamp > start_date)\ .filter(Revision.rev_timestamp <= end_date) archives = session\ .query( label('user_id', Archive.ar_user), label('timestamp', Archive.ar_timestamp) )\ .filter(Archive.ar_timestamp > start_date)\ .filter(Archive.ar_timestamp <= end_date) if self.namespaces.data and len(self.namespaces.data) > 0: revisions = revisions.join(Page)\ .filter(Page.page_namespace.in_(self.namespaces.data)) archives = archives\ .filter(Archive.ar_namespace.in_(self.namespaces.data)) revisions = self.filter(revisions, user_ids, column=Revision.rev_user) archives = self.filter(archives, user_ids, column=Archive.ar_user) both = revisions if self.include_deleted.data: both = both.union_all(archives) both = both.subquery() query = session.query(both.c.user_id, func.count())\ .group_by(both.c.user_id) query = self.apply_timeseries(query, column=both.c.timestamp) return self.results_by_user( user_ids, query, [(self.id, 1, 0)], date_index=2, )
def __call__(self, user_ids, session): """ Parameters: user_ids : list of mediawiki user ids to restrict computation to session : sqlalchemy session open on a mediawiki database Returns: dictionary from user ids to: 1 if they're a rolling active editor, 0 if not """ number_of_edits = int(self.number_of_edits.data) rolling_days = int(self.rolling_days.data) end_date = self.end_date.data start_date = end_date - timedelta(days=rolling_days) rev_user = label("user_id", Revision.rev_user) ar_user = label("user_id", Archive.ar_user) count = label("count", func.count()) revisions = ( session.query(rev_user, count) .filter(between(Revision.rev_timestamp, start_date, end_date)) .group_by(Revision.rev_user) ) revisions = self.filter(revisions, user_ids, column=Revision.rev_user) archived = ( session.query(ar_user, count) .filter(between(Archive.ar_timestamp, start_date, end_date)) .group_by(Archive.ar_user) ) archived = self.filter(archived, user_ids, column=Archive.ar_user) bot_user_ids = ( session.query(MediawikiUserGroups.ug_user).filter(MediawikiUserGroups.ug_group == "bot").subquery() ) edits = revisions.union_all(archived).subquery() edits_by_user = ( session.query(edits.c.user_id) .filter(edits.c.user_id.notin_(bot_user_ids)) .group_by(edits.c.user_id) .having(func.SUM(edits.c.count) >= number_of_edits) ) metric_results = {r[0]: {self.id: 1} for r in edits_by_user.all()} if user_ids is None: return metric_results else: return {uid: metric_results.get(uid, self.default_result) for uid in user_ids}
def get_messages_by_domain(self, session, limit=10): '''SELECT m.mailing_list_url, lower(p.domain_name) as domain, count(m.message_id) as num_messages FROM messages m,messages_people mp, people p WHERE m.message_ID = mp.message_ID AND lower(mp.email_address) = lower(p.email_address) AND mp.type_of_recipient = 'From' GROUP BY m.mailing_list_url, domain ORDER BY num_messages DESC, domain LIMIT %s;''' mailing_lists = int(self.get_num_of_mailing_lists(session)[0]) limit = limit * mailing_lists m = aliased(db.Messages) mp = aliased(db.MessagesPeople) p = aliased(db.People) ret = session.query(m.mailing_list_url, label('domain', func.lower(p.domain_name)), func.count(m.message_id))\ .filter(m.message_id == mp.message_id)\ .filter(func.lower(mp.email_address) == func.lower(p.email_address))\ .filter(mp.type_of_recipient == 'From')\ .group_by(m.mailing_list_url, func.lower(p.domain_name))\ .order_by(func.count(m.message_id).desc(), func.lower(p.domain_name))\ .limit(limit) return ret.all()
def column(self, field, dimension = None): """Return a table column for `field` which can be either :class:`cubes.Attribute` or a string. Possible column names: * ``field`` for fact field or flat dimension * ``field.locale`` for localized fact field or flat dimension * ``dimension.field`` for multi-level dimension field * ``dimension.field.locale`` for localized multi-level dimension field """ # FIXME: should use: field.full_name(dimension, self.locale) # if there is no localization for field, use default name/first locale locale_suffix = "" if isinstance(field, cubes.model.Attribute) and field.locales: locale = self.locale if self.locale in field.locales else field.locales[0] locale_suffix = "." + locale if dimension: # FIXME: temporary flat dimension hack, not sure about impact of this to other parts of the # framework if not dimension.is_flat or dimension.has_details: logical_name = dimension.name + '.' + str(field) else: logical_name = str(field) else: logical_name = field self.logger.debug("getting column %s(%s) loc: %s - %s" % (field, type(field), self.locale, locale_suffix)) localized_name = logical_name + locale_suffix column = self.view.c[localized_name] return expression.label(logical_name, column)
def get_player_graph_data(server, granularity=15, start_date=None, end_date=None): end_date = end_date or datetime.utcnow() start_date = start_date or end_date - timedelta(days=7) result = db.session.query( label( 'timestamp_group', func.round( (func.unix_timestamp(ServerStatus.timestamp) - time.timezone) / (granularity * 60) ), ), func.avg(ServerStatus.player_count) ).filter( ServerStatus.server == server, ServerStatus.timestamp >= start_date, ServerStatus.timestamp <= end_date ).group_by('timestamp_group').order_by( ServerStatus.timestamp ).all() points = [] for chunk, count in result: points.append({ 'time': int(chunk * granularity * 60 * 1000), 'player_count': int(count) }) return { 'start_time': int(calendar.timegm(start_date.timetuple()) * 1000), 'end_time': int(calendar.timegm(end_date.timetuple()) * 1000), 'points': points }
def SelectSingeMenuPrivilege(strUserID, MidList): """ @note 查询列表里菜单ID的权限 :param strUserID: :param MidList: :return: 返回菜单权限列表 """ project_dic = CommonSession.SelectProject('ProjectDic') menu_list = [] with GetSession() as db_ses: privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid, expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";", tables.MenuPrivilege.r_priv, ";", tables.MenuPrivilege.w_priv))).join( tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter(tables.MenuPrivilege.uid == strUserID, tables.MenuPrivilege.mid.in_( MidList)).group_by( tables.MenuPrivilege.mid).all() for menu in privilege: priv_list = [] for prjs in str(menu[4]).split(','): priv = prjs.split(';') prj_dic = {} if priv[0] in project_dic.keys(): prj_dic[project_dic[priv[0]]] = {'pid': priv[0], 'r_priv': priv[1], 'w_priv': priv[2]} priv_list.append(prj_dic) menu_dic = {'menu_id': menu[0], 'menu_name': menu[1], 'menu_url': menu[2], 'menu_preid': menu[3], 'menu_pri': priv_list} menu_list.append(menu_dic) return menu_list
def get_people_by_domain(self, session, limit=10): '''SELECT mailing_list_url, lower(domain_name) as domain, count(lower(p.email_address)) as t FROM mailing_lists_people as ml, people as p WHERE lower(ml.email_address) = lower(p.email_address) GROUP BY mailing_list_url, domain ORDER BY t DESC, domain LIMIT %s;''' mailing_lists = int(self.get_num_of_mailing_lists(session)[0]) limit = limit * mailing_lists mlp = aliased(db.MailingListsPeople) p = aliased(db.People) ret = session.query(mlp.mailing_list_url, label('domain', func.lower(p.domain_name)), func.count(func.lower(p.email_address)))\ .filter(func.lower(mlp.email_address) == func.lower(p.email_address))\ .group_by(mlp.mailing_list_url, func.lower(p.domain_name))\ .order_by(func.count(func.lower(p.email_address)).desc(), func.lower(p.domain_name))\ .limit(limit) return ret.all()
def select(self, table, columns=None, filters={}, page=0, page_size=100, orderby=None): self.log.info(( "table=%s, columns=%s, filters=%s, page=%s, page_size=%s, orderby=%s" ), table, columns, filters, page, page_size, orderby) table_ = self._table(table) columnd = { c.name:c for c in table_.columns } # get column objects corresponding to names if isinstance(columns, basestring): columns = [ columns ] if columns is None: columns_ = list(table_.columns) else: columns_ = [label(c, str2col(c, table_)) for c in columns] session = self.sessionmaker() try: query = session.query(*columns_) query = with_filters(query, table_, filters) query = with_orderby(query, table_, orderby) query = with_pagination(query, table_, page, page_size) result = result2dict(query.all()) self.log.info("retrieved %d rows", len(result)) return result finally: session.close()
def build_query_to_report(self, query, aggregate_table, params): assert params in self._known_units res = params truncated_time = func.date_trunc(res, aggregate_table.c.time_step) return (query .column(label("time_slice", func.extract("epoch", truncated_time))) .group_by(truncated_time))
def build_query_to_report(self, query, aggregate_table, params): assert params in self._known_units res = params truncated_time = func.date_trunc(res, aggregate_table.c.time_step) return (query.column( label("time_slice", func.extract("epoch", truncated_time))).group_by(truncated_time))
def build_query_to_report(self, query, aggregate_table, res): if isinstance(res, basestring): try: res = float(res) except ValueError: res = self.resolution snapped_geom = func.ST_SnapToGrid(aggregate_table.c.cell, res) grid_cell = func.ST_MakeBox2D(snapped_geom, func.ST_Translate(snapped_geom, res, res)) return query.column(label('cell', func.ST_AsGeoJSON(grid_cell))).group_by(snapped_geom)
def synsets(session, pos=None): """Query for synsets, concatenated ids and lemmas of their LUs. Parameters ---------- session : orm.session.Session pos : list Parts of speech to select (default [2]) """ if not pos: pos = [2] return (session.query( Synset.id_, Synset.definition, label('lex_ids', func.group_concat(UnitSynset.lex_id)), label('unitindexes', func.group_concat( UnitSynset.unitindex))).join(UnitSynset).join(LexicalUnit).filter( LexicalUnit.pos.in_(pos)).order_by(Synset.id_).group_by( Synset.id_))
def get_grouped_over_period( self, period: PeriodType, filter_request: FilterRequest) -> List[QKeyValue]: """Gets the transaction amount grouped by period matching the provided filters. :param period: the period :param filter_request: the filter request :return: the list of (key, value) results """ period_expr = self.period_expression(period, TransactionDbo.date_value) query = self.__entity_manager.query( label('key', period_expr), label('value', func.sum(TransactionDbo.amount))) query = self.__query_builder.build(query, filters=filter_request) query = query.group_by(period_expr) query = query.order_by(period_expr) logging.debug(query) return query.all()
def get_grouped_by_category_over_period( self, period: PeriodType, filter_request: FilterRequest) -> List[QCompositeKeyValue]: """Gets the transaction amount grouped by category and period matching the provided filters. :param period: the period :param filter_request: the filter request :return: the list of (key_one, key_two, value) results """ period_expr = self.period_expression(period, TransactionDbo.date_value) query = self.__entity_manager.query( label('value', func.sum(TransactionDbo.amount)), label('key_one', period_expr), label('key_two', CategoryDbo.id)) query = self.__query_builder.build(query, filters=filter_request, groups=['label.category.id']) query = query.group_by(period_expr) logging.debug(query) return query.all()
def SelectMenuProjectPrivilege(strUserId, strMenuID='None'): """ @note 查询用户菜单权限 :param strUserId: :param strMenuID: None 返回此用户所有菜单权限 不为None 返回此用户某个菜单ID的菜单权限 :return: """ project_dic = CommonSession.SelectProject('ProjectDic') menu_list = [] with GetSession() as db_ses: if strMenuID == 'None': privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid, expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";", tables.MenuPrivilege.r_priv, ";", tables.MenuPrivilege.w_priv))).join( tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter( tables.MenuPrivilege.uid == strUserId).group_by(tables.MenuPrivilege.mid).all() else: privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid, expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";", tables.MenuPrivilege.r_priv, ";", tables.MenuPrivilege.w_priv))).join( tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter( tables.MenuPrivilege.uid == strUserId, tables.MenuPrivilege.mid == strMenuID).group_by( tables.MenuPrivilege.mid).all() for menu in privilege: priv_list = [] for prjs in str(menu[4]).split(','): priv = prjs.split(';') prj_dic = {} if priv[0] in project_dic.keys(): prj_dic[project_dic[priv[0]]] = {'pid': priv[0], 'r_priv': priv[1], 'w_priv': priv[2]} priv_list.append(prj_dic) menu_dic = {'menu_id': menu[0], 'menu_name': menu[1], 'menu_url': menu[2], 'menu_preid': menu[3], 'menu_pri': priv_list} menu_list.append(menu_dic) return menu_list
def get(run_id, query, with_stats=True): """Return a list of genotypes in a vcf conforming to the given query, as well as a dict of stats calculated on them. If a truth_vcf is associated with this VCF, stats include true/false, positive/negative stats, as well as precision, recall, and f1score. Stats also include the number of records, and the number of records once filters are applied. A query is a dictionary which specifies the range, filters, limit, offset and ordering which should be applied against genotypes before genotypes and stats are returned. It has structure: {range: {contig: "X", start: 0, end: 250000000}, filters: [{columnName: 'info:DP', filterValue: '50', type: '<'}, ...], sortBy: [{columnName: 'contig', order: 'asc'}, {columnName: 'position', order: 'asc'}, ...], page: 10, limit: 250 } """ query = _annotate_query_with_types(query, spec(run_id)) compare_to_run_id = query.get('compareToVcfId') with tables(db.engine, 'genotypes') as (con, g): if compare_to_run_id: # We consider a genotype validated if a truth genotype exists at its # location (contig/position) with the same ref/alts. This isn't # entirely accurate: for example, it handles SVs very poorly. gt = g.alias() joined_q = outerjoin(g, gt, and_( gt.c.vcf_id == compare_to_run_id, g.c.contig == gt.c.contig, g.c.position == gt.c.position, g.c.reference == gt.c.reference, g.c.alternates == gt.c.alternates, g.c.sample_name == gt.c.sample_name)) valid_column = label('tag:true-positive', gt.c.contig != None) q = (select(g.c + [valid_column]) .select_from(joined_q) .where(g.c.vcf_id == run_id)) else: q = select(g.c).where(g.c.vcf_id == run_id) q = _add_range(q, g, query.get('range')) q = _add_filters(q, g, query.get('filters')) q = _add_orderings(q, g, query.get('sortBy')) q = _add_paging(q, g, query.get('limit'), query.get('page')) q = _add_ordering(q, g, 'String', 'contig', 'asc') q = _add_ordering(q, g, 'Integer', 'position', 'asc') genotypes = [dict(g) for g in con.execute(q).fetchall()] stats = calculate_stats(run_id, compare_to_run_id, query) if with_stats else {} return {'records': genotypes, 'stats': stats}
def pwn_mappings(session, pos=None, pos_en=None): """Query plWN for already mapped synsets between plWN and PWN. Selects: polish synset id, english synset unitsstr, POS Source: Polish - Target (child): English RelationType: selects only plWN-PWN mappings does not take 'po_pa, po_ap' relation types. POS: Only selects nouns Parameters ---------- session : orm.session.Session pos : list of int pos_en : list of int """ if not pos: pos = [2] if not pos_en: pos_en = [6] rel_types = reltypes_pwn_plwn(session) syns_en = orm.aliased(Synset) uas_pl = orm.aliased(UnitSynset) lunit_pl = orm.aliased(LexicalUnit) return (session.query( label('pl_uid', Synset.id_), label('en_uid', syns_en.id_), syns_en.unitsstr, LexicalUnit.pos).join( SynsetRelation, Synset.id_ == SynsetRelation.parent_id).join( syns_en, SynsetRelation.child_id == syns_en.id_).join( UnitSynset, syns_en.id_ == UnitSynset.syn_id).join( LexicalUnit, UnitSynset.lex_id == LexicalUnit.id_).join( uas_pl, Synset.id_ == uas_pl.syn_id). join(lunit_pl, uas_pl.lex_id == lunit_pl.id_).join( RelationType, SynsetRelation.rel_id == RelationType.id_).filter( RelationType.id_.in_(rel_types)).filter( LexicalUnit.pos.in_(pos_en)).filter( lunit_pl.pos.in_(pos)).group_by( Synset.id_, syns_en.id_, syns_en.unitsstr, LexicalUnit.pos).order_by(Synset.id_))
def build_query_to_report(self, query, aggregate_table, res): if isinstance(res, basestring): try: res = float(res) except ValueError: res = self.resolution snapped_geom = func.ST_SnapToGrid(aggregate_table.c.cell, res) grid_cell = func.ST_MakeBox2D( snapped_geom, func.ST_Translate(snapped_geom, res, res)) return query.column(label( 'cell', func.ST_AsGeoJSON(grid_cell))).group_by(snapped_geom)
def get_years(model): if model == "distinct": q = DB.session.query( label("year", distinct(func.date_part("year", VSynthese.date_min))) ).order_by("year") if model == "min-max": q = DB.session.query( func.min(func.date_part("year", VSynthese.date_min)), func.max(func.date_part("year", VSynthese.date_min)), ) return q.all()
def build_query_to_populate(self, query, full_table, aggregate_table): insert_columns = [aggregate_table.c.isp] ip_range = Column("ip_range", INT8RANGE) isp_name = Column("label", String) join_table = Table(self.maxmind_table, full_table.metadata, ip_range, isp_name, keep_existing = True) isp_label = label('maxmind_isp', self._sql_rewrite(isp_name)) select_query = (query.select_from(join_table) .where(ip_range.contains(full_table.c.client_ip)) .column(isp_label) .group_by('maxmind_isp')) return insert_columns, select_query
def get_total(self, filter_request: FilterRequest) -> float: """Gets the total of all transaction matching the provided filters. :param filter_request: the filter request :return: the total """ query = self.__entity_manager.query( label('total', func.sum(TransactionDbo.amount))) query = self.__query_builder.build(query, filters=filter_request) logging.debug(query) total = query.scalar() return 0 if total is None else total
def _select_column(self, attribute, locale=None): """get select column""" if locale: localized_alias = attribute.alias + "." + locale else: localized_alias = attribute.alias if self.dimension_table_prefix: prefix = self.dimension_table_prefix else: prefix = "" self.logger.debug("looking for mapping %s (%s)" % (localized_alias, attribute.alias)) if self.cube.mappings and localized_alias in self.cube.mappings: mapping = self.cube.mappings[localized_alias] original_mapping = mapping self.logger.debug(" is in mappings: %s" % mapping) elif self.cube.mappings and attribute.alias in self.cube.mappings: mapping = self.cube.mappings[attribute.alias] original_mapping = mapping self.logger.debug(" not in mappings, using default trans: %s" % mapping) else: original_mapping = None if attribute.dimension: mapping = prefix + attribute.alias else: mapping = attribute.alias # FIXME: make this work if locale: mapping = mapping + "_" + locale self.logger.debug(" defaulting to: %s" % mapping) (table_name, field_name) = self.split_field(mapping) if not table_name: table_name = self.fact_name table = self.table(table_name) try: column = table.c[field_name] except KeyError: raise model.ModelError("Mapped column '%s' does not exist (as %s.%s)" \ % (localized_alias, table_name, field_name) ) self.logger.debug("adding column %s as %s" % (column, localized_alias)) # self.mappings[localized_alias] = column return expression.label(localized_alias, column)
def stations2_filtered_pl(start, end): last_10_minutes = datetime.utcnow() - timedelta(minutes=10) query = (db.session.query( Receiver.name.label("s"), label("lt", func.round(func.ST_Y(Receiver.location_wkt) * 10000) / 10000), label("lg", func.round(func.ST_X(Receiver.location_wkt) * 10000) / 10000), case([(Receiver.lastseen > last_10_minutes, "U")], else_="D").label("u"), Receiver.lastseen.label("ut"), label("v", Receiver.version + "." + Receiver.platform), ).order_by(Receiver.lastseen).filter( db.or_(db.and_(start < Receiver.firstseen, end > Receiver.firstseen), db.and_(start < Receiver.lastseen, end > Receiver.lastseen)))) res = db.session.execute(query) stations = json.dumps({"stations": [dict(r) for r in res]}, default=alchemyencoder) return stations
def _attach_ignored_data(self) -> TopicQuery: """Join the data related to whether the user has ignored the topic.""" query = self.join( TopicIgnore, and_( TopicIgnore.topic_id == Topic.topic_id, TopicIgnore.user == self.request.user, ), isouter=(not self._only_ignored), ) query = query.add_columns(label("ignored_time", TopicIgnore.created_time)) return query
def get_messages_by_people(self, session, limit=10): '''SELECT m.mailing_list_url, lower(mp.email_address) as email, count(m.message_ID) as t FROM messages m, messages_people mp WHERE m.message_ID = mp.message_ID AND mp.type_of_recipient = 'From' GROUP BY m.mailing_list_url, email ORDER BY t desc, email limit %s;''' m = aliased(db.Messages) mp = aliased(db.MessagesPeople) ret = session.query(m.mailing_list_url, label('email', func.lower(mp.email_address)), label('t', func.count(m.message_id))) \ .filter(m.message_id == mp.message_id) \ .filter(mp.type_of_recipient == 'From') \ .group_by(m.mailing_list_url, func.lower(mp.email_address)) \ .order_by(func.count(m.message_id).desc(), func.lower(mp.email_address)) \ .limit(limit) return ret.all()
def _attach_bookmark_data(self) -> "TopicQuery": """Join the data related to whether the user has bookmarked the topic.""" query = self.join( TopicBookmark, and_( TopicBookmark.topic_id == Topic.topic_id, TopicBookmark.user == self.request.user, ), isouter=(not self._only_bookmarked), ) query = query.add_columns(label("bookmarked_time", TopicBookmark.created_time)) return query
def get_messages_by_people(self, session, limit=10): '''SELECT m.mailing_list_url, lower(mp.email_address) as email, count(m.message_ID) as t FROM messages m, messages_people mp WHERE m.message_ID = mp.message_ID AND mp.type_of_recipient = 'From' GROUP BY m.mailing_list_url, email ORDER BY t desc, email limit %s;''' m = aliased(db.Messages) mp = aliased(db.MessagesPeople) ret = session.query(m.mailing_list_url, label('email', func.lower(mp.email_address)), label('t', func.count(m.message_id)))\ .filter(m.message_id == mp.message_id)\ .filter(mp.type_of_recipient == 'From')\ .group_by(m.mailing_list_url, func.lower(mp.email_address))\ .order_by(func.count(m.message_id).desc(), func.lower(mp.email_address))\ .limit(limit) return ret.all()
def average(cls, session, *, guild_xid, channel_xid, scope, window_min): filters = [ WaitTime.guild_xid == guild_xid, WaitTime.created_at > datetime.utcnow() - timedelta(minutes=window_min), ] if scope == "channel": filters.append(WaitTime.channel_xid == channel_xid) row = (session.query( label("average", func.sum(WaitTime.seconds) / func.count())).filter( and_(*filters)).one_or_none()) return row.average if row else None
def _attach_vote_data(self) -> "TopicQuery": """Join the data related to whether the user has voted on the topic.""" query = self.join( TopicVote, and_( TopicVote.topic_id == Topic.topic_id, TopicVote.user == self.request.user, ), isouter=(not self._only_user_voted), ) query = query.add_columns(label("voted_time", TopicVote.created_time)) return query
def _attach_vote_data(self) -> CommentQuery: """Join the data related to whether the user has voted on the comment.""" query = self.join( CommentVote, and_( CommentVote.comment_id == Comment.comment_id, CommentVote.user == self.request.user, ), isouter=(not self._only_user_voted), ) query = query.add_columns(label("voted_time", CommentVote.created_time)) return query
def top(self, page=0, per_page=25): upvote_counts = s.query(PostUpvote.post_id, func.count(PostUpvote.id).label('count')) \ .group_by(PostUpvote.id) \ .subquery() total_upvotes = coalesce(upvote_counts.c.count, 0) query = s.query(Post, label('total_upvotes', total_upvotes)) query = query.outerjoin(upvote_counts, upvote_counts.c.post_id == Post.id) \ .order_by(desc('total_upvotes')) \ .offset(page * per_page) \ .limit(per_page) return [result[0] for result in list(query)]
def _attach_bookmark_data(self) -> "CommentQuery": """Join the data related to whether the user has bookmarked the comment.""" query = self.join( CommentBookmark, and_( CommentBookmark.comment_id == Comment.comment_id, CommentBookmark.user == self.request.user, ), isouter=(not self._only_bookmarked), ) query = query.add_columns( label("bookmarked_time", CommentBookmark.created_time)) return query
def _select_column(self, attribute, locale = None): """get select column""" if locale: localized_alias = attribute.alias + "." + locale else: localized_alias = attribute.alias if self.dimension_table_prefix: prefix = self.dimension_table_prefix else: prefix = "" self.logger.debug("looking for mapping %s (%s)" % (localized_alias, attribute.alias)) if self.cube.mappings and localized_alias in self.cube.mappings: mapping = self.cube.mappings[localized_alias] original_mapping = mapping self.logger.debug(" is in mappings: %s" % mapping) elif self.cube.mappings and attribute.alias in self.cube.mappings: mapping = self.cube.mappings[attribute.alias] original_mapping = mapping self.logger.debug(" not in mappings, using default trans: %s" % mapping) else: original_mapping = None if attribute.dimension: mapping = prefix + attribute.alias else: mapping = attribute.alias # FIXME: make this work if locale: mapping = mapping + "_" + locale self.logger.debug(" defaulting to: %s" % mapping) (table_name, field_name) = self.split_field(mapping) if not table_name: table_name = self.fact_name table = self.table(table_name) try: column = table.c[field_name] except KeyError: raise model.ModelError("Mapped column '%s' does not exist (as %s.%s)" \ % (localized_alias, table_name, field_name) ) self.logger.debug("adding column %s as %s" % (column, localized_alias)) # self.mappings[localized_alias] = column return expression.label(localized_alias, column)
def aggregate(self, table, groupby, filters={}, aggregate='count(*)', page=0, page_size=100, orderby=None): self.log.info( ("table=%s, groupby=%s, filters=%s, aggregate=%s, page=%s," " page_size=%s, orderby=%s"), table, groupby, filters, aggregate, page, page_size, orderby) table_ = self._table(table) columnd = {col.name: col for col in table_.columns} if isinstance(groupby, basestring): groupby = [groupby] if isinstance(aggregate, basestring): aggregate = [aggregate] session = self.sessionmaker() try: groupby_ = [label(c, str2col(c, table_)) for c in groupby] aggregate_ = [label(a, str2col(a, table_)) for a in aggregate] query = session.query(*(aggregate_ + groupby_)) query = with_filters(query, table_, filters) query = query.group_by(*groupby_) query = with_orderby(query, table_, orderby) query = with_pagination(query, table_, page, page_size) result = result2dict(query.all()) self.log.info("retrieved %d rows", len(result)) return result finally: session.close()
def build_query_to_populate(self, query, full_table, aggregate_table): insert_columns = [aggregate_table.c.isp] ip_range = Column("ip_range", INT8RANGE) isp_name = Column("label", String) join_table = Table(self.maxmind_table, full_table.metadata, ip_range, isp_name, keep_existing=True) isp_label = label('maxmind_isp', self._sql_rewrite(isp_name)) select_query = (query.select_from(join_table).where( ip_range.contains(full_table.c.client_ip)).column( isp_label).group_by('maxmind_isp')) return insert_columns, select_query
def get_summary_distribution(state_code, district_id=None, school_id=None, asmt_type=AssessmentType.SUMMATIVE): ''' Get a bucketed distribution of scores ''' with EdCoreDBConnection(state_code=state_code) as connection: fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome') # should it be always for summative? query = select([ label(Constants.SCORE_BUCKET, (fact_asmt_outcome_vw.c.asmt_score / get_bucket_size()) * get_bucket_size()), count( case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.MATH, 1)], else_=0)).label(Constants.TOTAL_MATH), count( case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.ELA, 1) ], else_=0)).label(Constants.TOTAL_ELA) ], from_obj=[fact_asmt_outcome_vw]) query = query.where(fact_asmt_outcome_vw.c.state_code == state_code) query = query.where(fact_asmt_outcome_vw.c.asmt_type == asmt_type) query = query.where( fact_asmt_outcome_vw.c.rec_status == Constants.CURRENT) if (district_id is not None): query = query.where( fact_asmt_outcome_vw.c.district_id == district_id) if (school_id is not None): query = query.where(fact_asmt_outcome_vw.c.school_id == school_id) query = query.group_by(Constants.SCORE_BUCKET).order_by( Constants.SCORE_BUCKET) return connection.get_result(query)
def generate_show(): subquery = (db.session.query(LikeModel.media, func.count(1).label('count')) .group_by(LikeModel.media).subquery()) now = int(time.time() / 7200) order = expression.label('hacker', (subquery.c.count + 1.0) / (now - ShowModel.hour_tagged + 2.0) / (now - ShowModel.hour_tagged + 2.0)) medias =\ (db.session.query(ShowModel) .filter(ShowModel.showable == 0) .outerjoin(subquery, ShowModel.mid == subquery.c.media) .filter(ShowModel.mid != None) # NOQA .order_by(order.desc()) .order_by(ShowModel.date_tagged.desc()) .order_by(ShowModel.date_created.desc()) .all()) return [x.mid for x in medias]
def get_summary_distribution(state_code, district_id=None, school_id=None, asmt_type=AssessmentType.SUMMATIVE): ''' Get a bucketed distribution of scores ''' with EdCoreDBConnection(state_code=state_code) as connection: fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome') # should it be always for summative? query = select([label(Constants.SCORE_BUCKET, (fact_asmt_outcome_vw.c.asmt_score / get_bucket_size()) * get_bucket_size()), count(case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.MATH, 1)], else_=0)).label(Constants.TOTAL_MATH), count(case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.ELA, 1)], else_=0)).label(Constants.TOTAL_ELA)], from_obj=[fact_asmt_outcome_vw]) query = query.where(fact_asmt_outcome_vw.c.state_code == state_code) query = query.where(fact_asmt_outcome_vw.c.asmt_type == asmt_type) query = query.where(fact_asmt_outcome_vw.c.rec_status == Constants.CURRENT) if (district_id is not None): query = query.where(fact_asmt_outcome_vw.c.district_id == district_id) if (school_id is not None): query = query.where(fact_asmt_outcome_vw.c.school_id == school_id) query = query.group_by(Constants.SCORE_BUCKET).order_by(Constants.SCORE_BUCKET) return connection.get_result(query)
def get_synthese_stat(): params = request.args q = DB.session.query( label("year", func.date_part("year", VSynthese.date_min)), func.count(VSynthese.id_synthese), func.count(distinct(VSynthese.cd_ref)), ).group_by("year") if ("selectedRegne" in params) and (params["selectedRegne"] != ""): q = q.filter(VSynthese.regne == params["selectedRegne"]) if ("selectedPhylum" in params) and (params["selectedPhylum"] != ""): q = q.filter(VSynthese.phylum == params["selectedPhylum"]) if "selectedClasse" in params and (params["selectedClasse"] != ""): q = q.filter(VSynthese.classe == params["selectedClasse"]) if "selectedOrdre" in params and (params["selectedOrdre"] != ""): q = q.filter(VSynthese.ordre == params["selectedOrdre"]) if "selectedFamille" in params and (params["selectedFamille"] != ""): q = q.filter(VSynthese.famille == params["selectedFamille"]) if ("selectedGroup2INPN" in params) and (params["selectedGroup2INPN"] != ""): q = q.filter(VSynthese.group2_inpn == params["selectedGroup2INPN"]) if ("selectedGroup1INPN" in params) and (params["selectedGroup1INPN"] != ""): q = q.filter(VSynthese.group1_inpn == params["selectedGroup1INPN"]) if ("taxon" in params) and (params["taxon"] != ""): q = q.filter(VSynthese.cd_ref == params["taxon"]) return q.all()
def column(self, field, dimension = None): """Return a table column for `field` which can be either :class:`cubes.Attribute` or a string. Possible column names: * ``field`` for fact field or flat dimension * ``field.locale`` for localized fact field or flat dimension * ``dimension.field`` for multi-level dimension field * ``dimension.field.locale`` for localized multi-level dimension field """ # FIXME: should use: field.full_name(dimension, self.locale) # if there is no localization for field, use default name/first locale locale_suffix = "" if isinstance(field, cubes.model.Attribute) and field.locales: locale = self.locale if self.locale in field.locales else field.locales[0] locale_suffix = "." + locale if dimension: # FIXME: temporary flat dimension hack, not sure about impact of this to other parts of the # framework # FIXME: the third condition is a temporary quick fix for https://github.com/Stiivi/cubes/issues/14 field_name = str(field) if not dimension.is_flat or dimension.has_details or dimension.name != field_name: logical_name = dimension.name + '.' + field_name else: logical_name = field_name else: logical_name = field self.logger.debug("getting column %s(%s) loc: %s - %s" % (field, type(field), self.locale, locale_suffix)) localized_name = logical_name + locale_suffix column = self.view.c[localized_name] return expression.label(logical_name, column)
def get_slow_pages(path=None): l = label("average", func.avg(ViewLog.load_time)) c = label("count", func.count(ViewLog.id)) return ( DBSession.query( ViewLog.path, l, c, label("cumulative_time", func.sum(ViewLog.load_time)), label("stddev", func.stddev_pop(ViewLog.load_time)), label("maximum", func.max(ViewLog.load_time)), label("minimum", func.min(ViewLog.load_time)), ) .filter(ViewLog.path == path if path != None else True) .having(c > 2) .group_by(ViewLog.path) .order_by(l.desc()) )
def build_query_to_report(self, query, aggregate_table): a = aggregate_table return query.column(label("upload_max", func.max(a.c.upload_max)))
def compute_logbook_entries(session=None): logger.info("Compute logbook.") if session is None: session = app.session or_args = [ between(TakeoffLanding.timestamp, '2016-06-28 00:00:00', '2016-06-28 23:59:59') ] or_args = [] # 'wo' is the window order for the sql window function wo = and_(func.date(TakeoffLanding.timestamp), TakeoffLanding.device_id, TakeoffLanding.timestamp, TakeoffLanding.airport_id) # make a query with current, previous and next "takeoff_landing" event, so we can find complete flights sq = session.query( TakeoffLanding.device_id, func.lag(TakeoffLanding.device_id).over(order_by=wo).label('device_id_prev'), func.lead(TakeoffLanding.device_id).over(order_by=wo).label('device_id_next'), TakeoffLanding.timestamp, func.lag(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_prev'), func.lead(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_next'), TakeoffLanding.track, func.lag(TakeoffLanding.track).over(order_by=wo).label('track_prev'), func.lead(TakeoffLanding.track).over(order_by=wo).label('track_next'), TakeoffLanding.is_takeoff, func.lag(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_prev'), func.lead(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_next'), TakeoffLanding.airport_id, func.lag(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_prev'), func.lead(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_next')) \ .filter(*or_args) \ .subquery() # find complete flights (with takeoff and landing on the same day) complete_flight_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'), label('duration', sq.c.timestamp_next - sq.c.timestamp)) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) == func.date(sq.c.timestamp)) # split complete flights (with takeoff and landing on different days) into one takeoff and one landing split_start_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'), null().label('duration')) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp)) split_landing_query = session.query( sq.c.timestamp_next.label('reftime'), sq.c.device_id.label('device_id'), null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'), sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'), null().label('duration')) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp)) # find landings without start only_landings_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'), sq.c.timestamp.label('landing_timestamp'), sq.c.track.label('landing_track'), sq.c.airport_id.label('landing_airport_id'), null().label('duration')) \ .filter(sq.c.is_takeoff == false()) \ .filter(or_(sq.c.device_id != sq.c.device_id_prev, sq.c.is_takeoff_prev == false(), sq.c.is_takeoff_prev == null())) # find starts without landing only_starts_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'), null().label('duration')) \ .filter(sq.c.is_takeoff == true()) \ .filter(or_(sq.c.device_id != sq.c.device_id_next, sq.c.is_takeoff_next == true(), sq.c.is_takeoff_next == null())) # unite all computated flights union_query = complete_flight_query.union( split_start_query, split_landing_query, only_landings_query, only_starts_query) \ .subquery() # if a logbook entry exist --> update it upd = update(Logbook) \ .where(and_(Logbook.device_id == union_query.c.device_id, union_query.c.takeoff_airport_id != null(), union_query.c.landing_airport_id != null(), or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id, Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp, Logbook.landing_airport_id == null()), and_(Logbook.takeoff_airport_id == null(), Logbook.landing_airport_id == union_query.c.landing_airport_id, Logbook.landing_timestamp == union_query.c.landing_timestamp)))) \ .values({"takeoff_timestamp": union_query.c.takeoff_timestamp, "takeoff_track": union_query.c.takeoff_track, "takeoff_airport_id": union_query.c.takeoff_airport_id, "landing_timestamp": union_query.c.landing_timestamp, "landing_track": union_query.c.landing_track, "landing_airport_id": union_query.c.landing_airport_id, "duration": union_query.c.duration}) result = session.execute(upd) update_counter = result.rowcount session.commit() logger.debug("Updated logbook entries: {}".format(update_counter)) # if a logbook entry doesnt exist --> insert it new_logbook_entries = session.query(union_query) \ .filter(~exists().where( and_(Logbook.device_id == union_query.c.device_id, or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id, Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp), and_(Logbook.takeoff_airport_id == null(), union_query.c.takeoff_airport_id == null())), or_(and_(Logbook.landing_airport_id == union_query.c.landing_airport_id, Logbook.landing_timestamp == union_query.c.landing_timestamp), and_(Logbook.landing_airport_id == null(), union_query.c.landing_airport_id == null()))))) ins = insert(Logbook).from_select( (Logbook.reftime, Logbook.device_id, Logbook.takeoff_timestamp, Logbook.takeoff_track, Logbook.takeoff_airport_id, Logbook.landing_timestamp, Logbook.landing_track, Logbook.landing_airport_id, Logbook.duration), new_logbook_entries) result = session.execute(ins) insert_counter = result.rowcount session.commit() logger.debug("New logbook entries: {}".format(insert_counter)) return "{}/{}".format(update_counter, insert_counter)
def build_query_to_report(self, query, aggregate_table): median = func.median(aggregate_table.c.upload_samples) return query.column(label("upload_median", median))
def build_query_to_report(self, query, aggregate_table): a = aggregate_table return query.column(label("download_min", func.min(a.c.download_min)))
def build_query_to_report(self, query, aggregate_table): a = aggregate_table return query.column(label("upload_count", func.sum(a.c.upload_count)))
def build_query_to_report(self, query, aggregate_table): a = aggregate_table mean = func.sum(a.c.upload_octets) / func.sum(a.c.upload_time) is_safe = func.sum(a.c.upload_time) > 0 safe_mean = case([(is_safe, mean)], else_=None) return query.column(label("upload_avg", safe_mean))
def compute_logbook_entries(session=None): logger.info("Compute logbook.") if session is None: session = app.session or_args = [between(TakeoffLanding.timestamp, '2016-06-28 00:00:00', '2016-06-28 23:59:59')] or_args = [] # 'wo' is the window order for the sql window function wo = and_(func.date(TakeoffLanding.timestamp), TakeoffLanding.device_id, TakeoffLanding.timestamp, TakeoffLanding.airport_id) # make a query with current, previous and next "takeoff_landing" event, so we can find complete flights sq = session.query( TakeoffLanding.device_id, func.lag(TakeoffLanding.device_id).over(order_by=wo).label('device_id_prev'), func.lead(TakeoffLanding.device_id).over(order_by=wo).label('device_id_next'), TakeoffLanding.timestamp, func.lag(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_prev'), func.lead(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_next'), TakeoffLanding.track, func.lag(TakeoffLanding.track).over(order_by=wo).label('track_prev'), func.lead(TakeoffLanding.track).over(order_by=wo).label('track_next'), TakeoffLanding.is_takeoff, func.lag(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_prev'), func.lead(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_next'), TakeoffLanding.airport_id, func.lag(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_prev'), func.lead(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_next')) \ .filter(*or_args) \ .subquery() # find complete flights (with takeoff and landing on the same day) complete_flight_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'), label('duration', sq.c.timestamp_next - sq.c.timestamp)) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) == func.date(sq.c.timestamp)) # split complete flights (with takeoff and landing on different days) into one takeoff and one landing split_start_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'), null().label('duration')) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp)) split_landing_query = session.query( sq.c.timestamp_next.label('reftime'), sq.c.device_id.label('device_id'), null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'), sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'), null().label('duration')) \ .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \ .filter(sq.c.device_id == sq.c.device_id_next) \ .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp)) # find landings without start only_landings_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'), sq.c.timestamp.label('landing_timestamp'), sq.c.track.label('landing_track'), sq.c.airport_id.label('landing_airport_id'), null().label('duration')) \ .filter(sq.c.is_takeoff == false()) \ .filter(or_(sq.c.device_id != sq.c.device_id_prev, sq.c.is_takeoff_prev == false(), sq.c.is_takeoff_prev == null())) # find starts without landing only_starts_query = session.query( sq.c.timestamp.label('reftime'), sq.c.device_id.label('device_id'), sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'), null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'), null().label('duration')) \ .filter(sq.c.is_takeoff == true()) \ .filter(or_(sq.c.device_id != sq.c.device_id_next, sq.c.is_takeoff_next == true(), sq.c.is_takeoff_next == null())) # unite all computated flights union_query = complete_flight_query.union( split_start_query, split_landing_query, only_landings_query, only_starts_query) \ .subquery() # if a logbook entry exist --> update it upd = update(Logbook) \ .where(and_(Logbook.device_id == union_query.c.device_id, union_query.c.takeoff_airport_id != null(), union_query.c.landing_airport_id != null(), or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id, Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp, Logbook.landing_airport_id == null()), and_(Logbook.takeoff_airport_id == null(), Logbook.landing_airport_id == union_query.c.landing_airport_id, Logbook.landing_timestamp == union_query.c.landing_timestamp)))) \ .values({"takeoff_timestamp": union_query.c.takeoff_timestamp, "takeoff_track": union_query.c.takeoff_track, "takeoff_airport_id": union_query.c.takeoff_airport_id, "landing_timestamp": union_query.c.landing_timestamp, "landing_track": union_query.c.landing_track, "landing_airport_id": union_query.c.landing_airport_id, "duration": union_query.c.duration}) result = session.execute(upd) update_counter = result.rowcount session.commit() logger.debug("Updated logbook entries: {}".format(update_counter)) # if a logbook entry doesnt exist --> insert it new_logbook_entries = session.query(union_query) \ .filter(~exists().where( and_(Logbook.device_id == union_query.c.device_id, or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id, Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp), and_(Logbook.takeoff_airport_id == null(), union_query.c.takeoff_airport_id == null())), or_(and_(Logbook.landing_airport_id == union_query.c.landing_airport_id, Logbook.landing_timestamp == union_query.c.landing_timestamp), and_(Logbook.landing_airport_id == null(), union_query.c.landing_airport_id == null()))))) ins = insert(Logbook).from_select((Logbook.reftime, Logbook.device_id, Logbook.takeoff_timestamp, Logbook.takeoff_track, Logbook.takeoff_airport_id, Logbook.landing_timestamp, Logbook.landing_track, Logbook.landing_airport_id, Logbook.duration), new_logbook_entries) result = session.execute(ins) insert_counter = result.rowcount session.commit() logger.debug("New logbook entries: {}".format(insert_counter)) return "{}/{}".format(update_counter, insert_counter)
def build_query_to_report(self, query, aggregate_table): median = func.median(aggregate_table.c.rtt_samples) return query.column(label("MedianRTT", median))
def __call__(self, user_ids, session): """ Parameters: user_ids : list of mediawiki user ids to find pages for session : sqlalchemy session open on a mediawiki database Returns: dictionary from user ids to the number of pages edited found """ start_date = self.start_date.data end_date = self.end_date.data deduplicate = self.deduplicate_across_users.data revisions = ( session.query( label('user_id', Revision.rev_user), label('page_id', Revision.rev_page), label('timestamp', Revision.rev_timestamp) ) .filter(Revision.rev_timestamp > start_date) .filter(Revision.rev_timestamp <= end_date)) archives = ( session.query( label('user_id', Archive.ar_user), label('page_id', Archive.ar_page_id), label('timestamp', Archive.ar_timestamp) ) .filter(Archive.ar_timestamp > start_date) .filter(Archive.ar_timestamp <= end_date)) if self.namespaces.data and len(self.namespaces.data) > 0: revisions = ( revisions .join(Page) .filter(Page.page_namespace.in_(self.namespaces.data)) ) archives = ( archives .filter(Archive.ar_namespace.in_(self.namespaces.data)) ) revisions = self.filter(revisions, user_ids, column=Revision.rev_user) archives = self.filter(archives, user_ids, column=Archive.ar_user) both = revisions if self.include_deleted.data: both = both.union_all(archives) both = both.subquery() if deduplicate: # Use a constant user id here to deduplicate only by page # A single result will be returned and assigned to user_id = ROLLUP_USER_ID both_grouped = ( session.query( label('user_id', literal_column(str(ROLLUP_USER_ID))), both.c.page_id ) .distinct().subquery() ) else: # Select distinct user_id-page_id pairs # to count edits by the same user on the same page as one both_grouped = ( session.query(both.c.user_id, both.c.page_id) .distinct().subquery() ) query = ( session.query(both_grouped.c.user_id, func.count()) .group_by(both_grouped.c.user_id) ) # Format the output metric_results = {r[0]: {PagesEdited.id : r[1]} for r in query.all()} if user_ids is None: return metric_results elif deduplicate: ret = {} ret[ROLLUP_USER_ID] = metric_results.get( ROLLUP_USER_ID, self.default_result ) return ret else: return { uid: metric_results.get(uid, self.default_result) for uid in user_ids }
def build_query_to_report(self, query, aggregate_table): a = aggregate_table mean = func.sum(a.c.upload_octets) / func.sum(a.c.upload_time) is_safe = func.sum(a.c.upload_time) > 0 safe_mean = case([(is_safe, mean)], else_ = None) return query.column(label("upload_avg", safe_mean))