def get_activity_query(user_id=None, session_id=None, test_id=None): # pylint: disable=no-member from .models import Activity, Comment, User _filter = functools.partial(_apply_filters, user_id=user_id, session_id=session_id, test_id=test_id) comments = select([ literal_column("('comment:' || comment.id)").label('id'), literal_column(str(ACTION_COMMENTED)).label('action'), Comment.user_id.label('user_id'), Comment.session_id.label('session_id'), Comment.test_id.label('test_id'), Comment.timestamp.label('timestamp'), Comment.comment.label('text'), User.email.label('user_email'), ]).select_from(Comment.__table__.join(User, User.id == Comment.user_id)) comments = _filter(Comment, comments) activity = select([ literal_column("('activity:' || activity.id)").label('id'), Activity.action.label('action'), Activity.user_id.label('user_id'), Activity.session_id.label('session_id'), Activity.test_id.label('test_id'), Activity.timestamp.label('timestamp'), literal_column("NULL").label('text'), User.email.label('user_email'), ]).select_from(Activity.__table__.join(User, User.id == Activity.user_id)) activity = _filter(Activity, activity) u = union_all(comments, activity).alias('u') return select([u]).order_by(u.c.timestamp)
def __init__(self, sess, unfiltered, filt_crit, tt, window_size=WINDOW_SIZE): self.sess = sess self.unfiltered = unfiltered self.filt_crit = filt_crit self.tt = tt self.window_size = window_size self.skipped = [] # select-only, can't be used for updates self.filtered_s = filtered = select(unfiltered.c).where(filt_crit).alias("filtered") self.selectable = ( select( [ filtered.c.size, func.count().label("inode_count"), func.max(filtered.c.has_updates).label("has_updates"), ] ) .group_by(filtered.c.size) .having(and_(literal_column("inode_count") > 1, literal_column("has_updates") > 0)) ) # This is higher than selectable.first().size, in order to also clear # updates without commonality. self.upper_bound = self.sess.query(self.unfiltered.c.size).order_by(-self.unfiltered.c.size).limit(1).scalar()
def load(self, request, response, subject, data): candidates = data['identifiers'] if not candidates: return response([]) identifiers = [] for i, identifier in enumerate(candidates): identifiers.append("(%d, '%s')" % (i, str(identifier))) expr = select([column('rank'), column('id')], from_obj="(values %s) as subset(rank, id)" % ', '.join(identifiers)) query = (self.schema.session.query(self.model) .join(expr.cte('__subset__'), literal_column('__subset__.id')==self.model.id) .order_by(literal_column('__subset__.rank'))) resources = [] instances = list(query.all()) instance = (instances.pop(0) if instances else None) for id in candidates: if instance: if instance.id == id: resources.append(self._construct_resource(request, instance, data)) if instances: instance = instances.pop(0) else: instance = None else: resources.append(None) else: resources.append(None) response(resources)
def messages_in_narrow_backend(request, user_profile, msg_ids = REQ(validator=check_list(check_int)), narrow = REQ(converter=narrow_parameter)): # type: (HttpRequest, UserProfile, List[int], List[Dict[str, Any]]) -> HttpResponse # Note that this function will only work on messages the user # actually received # TODO: We assume that the narrow is a search. For now this works because # the browser only ever calls this function for searches, since it can't # apply that narrow operator itself. query = select([column("message_id"), column("subject"), column("rendered_content")], and_(column("user_profile_id") == literal(user_profile.id), column("message_id").in_(msg_ids)), join(table("zerver_usermessage"), table("zerver_message"), literal_column("zerver_usermessage.message_id") == literal_column("zerver_message.id"))) builder = NarrowBuilder(user_profile, column("message_id")) for term in narrow: query = builder.add_term(query, term) sa_conn = get_sqlalchemy_connection() query_result = list(sa_conn.execute(query).fetchall()) search_fields = dict() for row in query_result: (message_id, subject, rendered_content, content_matches, subject_matches) = row search_fields[message_id] = get_search_fields(rendered_content, subject, content_matches, subject_matches) return json_success({"messages": search_fields})
def createView(self): # filter indexes catalog = self.env.catalog.index_catalog xmlindex_list = catalog.getIndexes(package_id='seismology', resourcetype_id='event') filter = ['datetime', 'latitude', 'longitude', 'depth', 'magnitude', 'magnitude_type', 'event_type', 'np1_strike', 'np1_dip', 'np1_rake', 'mt_mrr', 'mt_mtt', 'mt_mpp', 'mt_mrt', 'mt_mrp', 'mt_mtp', 'localisation_method'] xmlindex_list = [x for x in xmlindex_list if x.label in filter] if not xmlindex_list: return # build up query query, joins = catalog._createIndexView(xmlindex_list, compact=True) options = [ sql.literal_column("datetime.keyval").label("end_datetime"), sql.literal_column("datetime.keyval").label("start_datetime"), sql.case( value=sql.literal_column("localisation_method.keyval"), whens={'manual': 'circle'}, else_='square').label('gis_localisation_method'), sql.func.GeomFromText( sql.text("'POINT(' || longitude.keyval || ' ' || " + \ "latitude.keyval || ')', 4326")).label('geom') ] for option in options: query.append_column(option) query = query.select_from(joins) return util.compileStatement(query)
def get_query(qtype = 'none', qobject = 'none'): if qtype != 'none' and qobject != 'none': # built queries for specified subset of patients query = db.session.query(label('sid', qobject.c.patient_sid), label('value_d', qobject.c.double_value), label('value_s', qobject.c.string_value), label('attribute', qobject.c.attribute_value)) elif qtype == 'count' and qobject == 'none': # count of patients query = db.session.query(distinct(Clinical.patient_sid).label('sid')) else: # entire population query = db.session.query(distinct(Clinical.patient_sid).label('sid'), literal_column("'complement'").label('attribute'), literal_column("'0'").label('value_d'), literal_column("'null'").label('value_s')) db.session.commit() db.session.close() return query
def read_many_byuser(self, request): """ """ username = request.matchdict['username'] page = int(request.params.get("page", 1)) pagesize = int(request.params.get("pagesize", 10)) if self.Session.query(User).filter(User.username == username).first() == None: raise HTTPNotFound("Requested user does not exist.") items = [] activities_sub_query = self.Session.query(Activity.activity_identifier.label("identifier"), Activity.version, Changeset.timestamp, Changeset.fk_user).\ join(Changeset).\ filter(or_(Activity.fk_status == 2, Activity.fk_status == 3)).subquery(name="sub_act") activities_query = self.Session.query(activities_sub_query, User.username).\ join(User).filter(User.username == username).subquery(name="act") # All active and inactive stakeholders stakeholder_active = self.Session.query(Stakeholder).\ filter(or_(Stakeholder.fk_status == 2, Stakeholder.fk_status == 3)).\ subquery("st_active") # Get the five latest stakeholder by changeset stakeholder_sub_query = self.Session.query(stakeholder_active.c.stakeholder_identifier.label("identifier"), \ stakeholder_active.c.version, Changeset.timestamp, Changeset.fk_user).\ join(Changeset, Changeset.id == stakeholder_active.c.fk_changeset).\ subquery(name="sub_st") # Join the resulting set to the user table stakeholder_query = self.Session.query(stakeholder_sub_query, User.username).\ join(User).filter(User.username == username).subquery(name="st") query = self.Session.query(activities_query, literal_column("\'activity\'").label("type")).\ union(self.Session.query(stakeholder_query, literal_column("\'stakeholder\'").label("type"))).\ order_by(desc(activities_query.c.timestamp)).order_by(desc(activities_query.c.version)) for i in query.offset((page-1)*pagesize).limit(pagesize).all(): items.append({ "type": i.type, "author": i.username, "timestamp": i.timestamp, "version": i.version, "identifier": str(i.identifier) }) return { "items": items, "username": username, "totalitems": query.count(), "pagesize": pagesize, "currentpage": page } return {}
def get_server_search_sources(): return g.db.query( ExternalWFSSource.name.op('||')(literal_column("' ('")).op('||')(ExternalWFSSource.search_property).op('||')(literal_column("')'")).label('label'), literal_column("'wfs_'").op('||') (ExternalWFSSource.name).label('value') ).filter_by(active=True).union_all(g.db.query( GBIServer.title.label('label'), literal_column("'parcel_'").op('||') (ParcelSearchSource.id).label('value') ).filter(ParcelSearchSource.active==True).join(ParcelSearchSource.gbi_server)).all()
def test_render_check_constraint_sqlexpr(self): c = column("c") five = literal_column("5") ten = literal_column("10") eq_ignore_whitespace( autogenerate.render._render_check_constraint( CheckConstraint(and_(c > five, c < ten)), self.autogen_context ), "sa.CheckConstraint('c > 5 AND c < 10')", )
def test_sqlexpr(self): m = MetaData() t = Table('t', m, Column( 'x', Integer, server_default=literal_column('a') + literal_column('b')) ) self.assert_compile( CreateTable(t), "CREATE TABLE t (x INTEGER DEFAULT a + b)" )
def execute(self, request, user, name): alliance = Alliance.load(name) if alliance is None: return HttpResponseRedirect(reverse("alliance_ranks")) ph = aliased(PlanetHistory) members = count().label("members") size = sum(ph.size).label("size") value = sum(ph.value).label("value") score = sum(ph.score).label("score") avg_size = size.op("/")(members).label("avg_size") avg_value = value.op("/")(members).label("avg_value") t10v = count(case(whens=((ph.value_rank <= 10 ,1),), else_=None)).label("t10v") t100v = count(case(whens=((ph.value_rank <= 100 ,1),), else_=None)).label("t100v") pho = aliased(PlanetHistory) sizeo = sum(pho.size).label("sizeo") valueo = sum(pho.value).label("valueo") scoreo = sum(pho.score).label("scoreo") Q = session.query(PlanetHistory.tick.label("tick"), Alliance.id.label("id"), literal_column("rank() OVER (PARTITION BY planet_history.tick ORDER BY sum(planet_history.size) DESC)").label("size_rank"), literal_column("rank() OVER (PARTITION BY planet_history.tick ORDER BY sum(planet_history.value) DESC)").label("value_rank"), ) Q = Q.filter(PlanetHistory.active == True) Q = Q.join(PlanetHistory.current) Q = Q.join(Planet.intel) Q = Q.join(Intel.alliance) Q = Q.group_by(PlanetHistory.tick, Alliance.id) ranks = Q.subquery() Q = session.query(ph.tick, members, size, value, avg_size, avg_value, size-sizeo, value-valueo, score-scoreo, t10v, t100v, ) Q = Q.filter(ph.active == True) Q = Q.join(ph.current) Q = Q.join(Planet.intel) Q = Q.join(Intel.alliance) Q = Q.outerjoin((pho, and_(ph.id==pho.id, ph.tick-1==pho.tick),)) Q = Q.filter(Intel.alliance == alliance) Q = Q.group_by(ph.tick) Q = Q.from_self().add_columns(ranks.c.size_rank, ranks.c.value_rank) Q = Q.outerjoin((ranks, and_(ph.tick == ranks.c.tick, alliance.id == ranks.c.id),)) Q = Q.order_by(desc(ph.tick)) history = Q.all() return render("ialliancehistory.tpl", request, alliance=alliance, members=alliance.intel_members, history=history)
def system_utilisation_counts_by_group(grouping, systems): retval = defaultdict(lambda: dict((k, 0) for k in ['recipe', 'manual', 'idle_automated', 'idle_manual', 'idle_broken', 'idle_removed'])) query = systems.outerjoin(System.open_reservation)\ .with_entities(grouping, func.coalesce(Reservation.type, func.concat('idle_', func.lower(System.status))), func.count(System.id))\ .group_by(literal_column("1"), literal_column("2")) for group, state, count in query: retval[group][state] = count return retval
def testlabels2(self): metadata = MetaData() table = Table("ImATable", metadata, Column("col1", Integer)) x = select([table.c.col1.label("ImATable_col1")]).alias("SomeAlias") assert str(select([x.c.ImATable_col1])) == '''SELECT "SomeAlias"."ImATable_col1" \nFROM (SELECT "ImATable".col1 AS "ImATable_col1" \nFROM "ImATable") AS "SomeAlias"''' # note that 'foo' and 'FooCol' are literals already quoted x = select([sql.literal_column("'foo'").label("somelabel")], from_obj=[table]).alias("AnAlias") x = x.select() assert str(x) == '''SELECT "AnAlias".somelabel \nFROM (SELECT 'foo' AS somelabel \nFROM "ImATable") AS "AnAlias"''' x = select([sql.literal_column("'FooCol'").label("SomeLabel")], from_obj=[table]) x = x.select() assert str(x) == '''SELECT "SomeLabel" \nFROM (SELECT 'FooCol' AS "SomeLabel" \nFROM "ImATable")'''
def polymorphic_union(table_map, typecolname, aliasname='p_union'): """create a UNION statement used by a polymorphic mapper. See the SQLAlchemy advanced mapping docs for an example of how this is used.""" colnames = util.Set() colnamemaps = {} types = {} for key in table_map.keys(): table = table_map[key] # mysql doesnt like selecting from a select; make it an alias of the select if isinstance(table, sql.Select): table = table.alias() table_map[key] = table m = {} for c in table.c: colnames.add(c.name) m[c.name] = c types[c.name] = c.type colnamemaps[table] = m def col(name, table): try: return colnamemaps[table][name] except KeyError: return sql.cast(sql.null(), types[name]).label(name) result = [] for type, table in table_map.iteritems(): if typecolname is not None: result.append(sql.select([col(name, table) for name in colnames] + [sql.literal_column("'%s'" % type).label(typecolname)], from_obj=[table])) else: result.append(sql.select([col(name, table) for name in colnames], from_obj=[table])) return sql.union_all(*result).alias(aliasname)
def analytics_compability(): user2set = dict([ (user_id, map(operator.itemgetter(0), { "artist" : db.session.query(Scrobble.artist).\ group_by(Scrobble.artist), "track" : db.session.query(func.concat(Scrobble.artist, literal_column('" – "'), Scrobble.track)).\ group_by(Scrobble.artist, Scrobble.track), } [request.args.get("criterion")].\ filter_by(user_id=user_id).\ having(func.count(Scrobble.id) > int(request.args.get("more_than_x_scrobbles"))).\ all())) for user_id in map(int, request.args.getlist("users")) ]) user2username = dict(db.session.query(User.id, User.username).all()) length2groups = [ (length, filter(lambda (users, set): len(set) > 0, sorted([ ( ", ".join(sorted([user2username[i] for i in user2username if i in group], key=lambda username: username.lower())), reduce(set.intersection, map(set, [user2set[user_id] for user_id in group])) ) for group in itertools.combinations(map(int, request.args.getlist("users")), length) if len(group) == length ], key=lambda (users, set): -len(set)))[:10]) for length in range(2, len(user2username) + 1) ]
def adhoc_metric_to_sqla(self, metric, cols): """ Turn an adhoc metric into a sqlalchemy column. :param dict metric: Adhoc metric definition :param dict cols: Columns for the current table :returns: The metric defined as a sqlalchemy column :rtype: sqlalchemy.sql.column """ expression_type = metric.get('expressionType') db_engine_spec = self.database.db_engine_spec label = db_engine_spec.make_label_compatible(metric.get('label')) if expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SIMPLE']: column_name = metric.get('column').get('column_name') sqla_column = column(column_name) table_column = cols.get(column_name) if table_column: sqla_column = table_column.get_sqla_col() sqla_metric = self.sqla_aggregations[metric.get('aggregate')](sqla_column) sqla_metric = sqla_metric.label(label) return sqla_metric elif expression_type == utils.ADHOC_METRIC_EXPRESSION_TYPES['SQL']: sqla_metric = literal_column(metric.get('sqlExpression')) sqla_metric = sqla_metric.label(label) return sqla_metric else: return None
def query(self): pq = qualstat_getstatdata(column("eval_type") == "f") base = alias(pq) query = (select([ func.array_agg(column("queryid")).label("queryids"), "qualid", cast(column("quals"), JSONB).label('quals'), "occurences", "execution_count", func.array_agg(column("query")).label("queries"), "avg_filter", "filter_ratio" ]).select_from( join(base, powa_databases, onclause=( powa_databases.c.oid == literal_column("dbid")))) .where(powa_databases.c.datname == bindparam("database")) .where(column("avg_filter") > 1000) .where(column("filter_ratio") > 0.3) .group_by(column("qualid"), column("execution_count"), column("occurences"), cast(column("quals"), JSONB), column("avg_filter"), column("filter_ratio")) .order_by(column("occurences").desc()) .limit(200)) return query
def visit_select(self, select, **kwargs): """Look for ``LIMIT`` and OFFSET in a select statement, and if so tries to wrap it in a subquery with ``row_number()`` criterion. """ if not getattr(select, '_mssql_visit', None) and select._offset: # to use ROW_NUMBER(), an ORDER BY is required. orderby = self.process(select._order_by_clause) if not orderby: raise exc.InvalidRequestError('MSSQL requires an order_by when ' 'using an offset.') _offset = select._offset _limit = select._limit select._mssql_visit = True select = select.column( sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" \ % orderby).label("mssql_rn") ).order_by(None).alias() limitselect = sql.select([c for c in select.c if c.key!='mssql_rn']) limitselect.append_whereclause("mssql_rn>%d" % _offset) if _limit is not None: limitselect.append_whereclause("mssql_rn<=%d" % (_limit + _offset)) return self.process(limitselect, iswrapper=True, **kwargs) else: return compiler.SQLCompiler.visit_select(self, select, **kwargs)
def visit_select(self, select, **kwargs): """Look for ``LIMIT`` and OFFSET in a select statement, and if so tries to wrap it in a subquery with ``row_number()`` criterion. """ if self.dialect.has_window_funcs and (not getattr(select, '_mssql_visit', None)) and (select._limit is not None or select._offset is not None): # to use ROW_NUMBER(), an ORDER BY is required. orderby = self.process(select._order_by_clause) if not orderby: orderby = list(select.oid_column.proxies)[0] orderby = self.process(orderby) _offset = select._offset _limit = select._limit select._mssql_visit = True select = select.column(sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("mssql_rn")).order_by(None).alias() limitselect = sql.select([c for c in select.c if c.key!='mssql_rn']) if _offset is not None: limitselect.append_whereclause("mssql_rn>=%d" % _offset) if _limit is not None: limitselect.append_whereclause("mssql_rn<=%d" % (_limit + _offset)) else: limitselect.append_whereclause("mssql_rn<=%d" % _limit) return self.process(limitselect, iswrapper=True, **kwargs) else: return compiler.DefaultCompiler.visit_select(self, select, **kwargs)
def polymorphic_union(table_map, typecolname, aliasname="p_union", cast_nulls=True): """Create a ``UNION`` statement used by a polymorphic mapper. See :ref:`concrete_inheritance` for an example of how this is used. :param table_map: mapping of polymorphic identities to :class:`.Table` objects. :param typecolname: string name of a "discriminator" column, which will be derived from the query, producing the polymorphic identity for each row. If ``None``, no polymorphic discriminator is generated. :param aliasname: name of the :func:`~sqlalchemy.sql.expression.alias()` construct generated. :param cast_nulls: if True, non-existent columns, which are represented as labeled NULLs, will be passed into CAST. This is a legacy behavior that is problematic on some backends such as Oracle - in which case it can be set to False. """ colnames = util.OrderedSet() colnamemaps = {} types = {} for key in table_map.keys(): table = table_map[key] # mysql doesnt like selecting from a select; # make it an alias of the select if isinstance(table, sql.Select): table = table.alias() table_map[key] = table m = {} for c in table.c: colnames.add(c.key) m[c.key] = c types[c.key] = c.type colnamemaps[table] = m def col(name, table): try: return colnamemaps[table][name] except KeyError: if cast_nulls: return sql.cast(sql.null(), types[name]).label(name) else: return sql.type_coerce(sql.null(), types[name]).label(name) result = [] for type, table in table_map.iteritems(): if typecolname is not None: result.append( sql.select( [col(name, table) for name in colnames] + [sql.literal_column(sql_util._quote_ddl_expr(type)).label(typecolname)], from_obj=[table], ) ) else: result.append(sql.select([col(name, table) for name in colnames], from_obj=[table])) return sql.union_all(*result).alias(aliasname)
def sqla_col(self): name = self.column_name if not self.expression: col = column(self.column_name).label(name) else: col = literal_column(self.expression).label(name) return col
def visit_select(self, select): """Look for ``LIMIT`` and OFFSET in a select statement, and if so tries to wrap it in a subquery with ``row_number()`` criterion. """ # TODO: put a real copy-container on Select and copy, or somehow make this # not modify the Select statement if getattr(select, '_oracle_visit', False): # cancel out the compiled order_by on the select if hasattr(select, "order_by_clause"): self.strings[select.order_by_clause] = "" ansisql.ANSICompiler.visit_select(self, select) return if select.limit is not None or select.offset is not None: select._oracle_visit = True # to use ROW_NUMBER(), an ORDER BY is required. orderby = self.strings[select.order_by_clause] if not orderby: orderby = select.oid_column self.traverse(orderby) orderby = self.strings[orderby] select.append_column(sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("ora_rn")) limitselect = sql.select([c for c in select.c if c.key!='ora_rn']) if select.offset is not None: limitselect.append_whereclause("ora_rn>%d" % select.offset) if select.limit is not None: limitselect.append_whereclause("ora_rn<=%d" % (select.limit + select.offset)) else: limitselect.append_whereclause("ora_rn<=%d" % select.limit) self.traverse(limitselect) self.strings[select] = self.strings[limitselect] self.froms[select] = self.froms[limitselect] else: ansisql.ANSICompiler.visit_select(self, select)
def get_sqla_col(self, label=None): db_engine_spec = self.table.database.db_engine_spec label = db_engine_spec.make_label_compatible(label if label else self.column_name) if not self.expression: col = column(self.column_name).label(label) else: col = literal_column(self.expression).label(label) return col
def qualstat_get_figures(conn, database, tsfrom, tsto, queries=None, quals=None): condition = text("""datname = :database AND coalesce_range && tstzrange(:from, :to)""") if queries is not None: condition = and_(condition, array([int(q) for q in queries]) .any(literal_column("s.queryid"))) if quals is not None: condition = and_(condition, array([int(q) for q in quals]) .any(literal_column("qnc.qualid"))) sql = (select([ text('most_filtering.quals'), text('most_filtering.query'), text('to_json(most_filtering) as "most filtering"'), text('to_json(least_filtering) as "least filtering"'), text('to_json(most_executed) as "most executed"'), text('to_json(most_used) as "most used"')]) .select_from( qual_constants("most_filtering", condition) .alias("most_filtering") .join( qual_constants("least_filtering", condition) .alias("least_filtering"), text("most_filtering.rownumber = " "least_filtering.rownumber")) .join(qual_constants("most_executed", condition) .alias("most_executed"), text("most_executed.rownumber = " "least_filtering.rownumber")) .join(qual_constants("most_used", condition) .alias("most_used"), text("most_used.rownumber = " "least_filtering.rownumber")))) params = {"database": database, "from": tsfrom, "to": tsto} quals = conn.execute(sql, params=params) if quals.rowcount == 0: return None row = quals.first() return row
def list_packages(): """ Return a list of all packages as JSON. Uses Postgres to generate all the JSON in a single query. Optional query parameters: collection: filter by collection name (list, literal match) name: filter by package name (list, literal match) Response format: [ { "name": "foo", "collection": "f29", "state": "unresolved", "last_complete_build": { "task_id": 123 } }, ... ] """ query = ( db.query( Package.name.label('name'), Collection.name.label('collection'), # pylint:disable=no-member Package.state_string.label('state'), sql_if( Build.id != None, db.query(Build.task_id.label('task_id')) .correlate(Build) .as_record() ).label('last_complete_build') ) .join(Collection) .outerjoin( Build, (Package.last_complete_build_id == Build.id) & Build.last_complete ) .order_by(Package.name) ) if 'name' in request.args: query = query.filter(Package.name.in_(request.args.getlist('name'))) if 'collection' in request.args: query = query.filter(Collection.name.in_(request.args.getlist('collection'))) result = ( db.query(literal_column( "coalesce(array_to_json(array_agg(row_to_json(pkg_query)))::text, '[]')" ).label('q')) .select_from(query.subquery('pkg_query')) .scalar() ) return Response(result, mimetype='application/json')
def get_sqla_col(self, label=None): label = label or self.column_name if not self.expression: db_engine_spec = self.table.database.db_engine_spec type_ = db_engine_spec.get_sqla_column_type(self.type) col = column(self.column_name, type_=type_) else: col = literal_column(self.expression) col = self.table.make_sqla_column_compatible(col, label) return col
def visit_extract(self, extract, **kwargs): field = self.extract_map.get(extract.field, extract.field) affinity = sql_util.determine_date_affinity(extract.expr) casts = {sqltypes.Date:'date', sqltypes.DateTime:'timestamp', sqltypes.Interval:'interval', sqltypes.Time:'time'} cast = casts.get(affinity, None) if isinstance(extract.expr, sql.ColumnElement) and cast is not None: expr = extract.expr.op('::')(sql.literal_column(cast)) else: expr = extract.expr return "EXTRACT(%s FROM %s)" % ( field, self.process(expr))
def weighted_ilike(self, value, weight=1): """ Calls the ILIKE operator and returns either 0 or the given weight. """ # Make sure weight is numeric and we can safely # pass it to the literal_column() assert isinstance(weight, (int, float)) # Convert weight to a literal_column() weight = literal_column(str(weight)) # Return ilike expression return cast(and_(self != None, self.ilike(value)), Integer) * weight
def get_rows(cls, app_id, group_id): q1 = DBSession.query(osModules.id.label('module_id'), osModules.kode.label('module_kode'), osModules.nama.label('module_nama'), literal_column('0').label('reads'), literal_column('0').label('writes'), literal_column('0').label('inserts'), literal_column('0').label('deletes'), ).filter(osModules.app_id == app_id) \ .filter(osModules.id.notin_( DBSession.query(cls.module_id).filter(cls.group_id==group_id))) q2 = DBSession.query(osGroupModules.module_id.label('module_id'), osModules.kode.label('module_kode'), osModules.nama.label('module_nama'), osGroupModules.reads.label('reads'), osGroupModules.writes.label('writes'), osGroupModules.inserts.label('inserts'), osGroupModules.deletes.label('deletes'), ).join(osModules) \ .filter(osGroupModules.group_id == group_id) \ .filter(osModules.app_id == app_id) return q1.union(q2).all()
def test_literal_column_already_with_quotes(self): # Lower case names metadata = MetaData() table = Table("t1", metadata, Column("col1", Integer)) # Note that 'col1' is already quoted (literal_column) columns = [sql.literal_column("'col1'").label("label1")] x = select(columns, from_obj=[table]).alias("alias1") x = x.select() self.assert_compile( x, "SELECT " "alias1.label1 " "FROM (" "SELECT " "'col1' AS label1 " "FROM t1" ") AS alias1", ) # Not lower case names metadata = MetaData() table = Table("T1", metadata, Column("Col1", Integer)) # Note that 'Col1' is already quoted (literal_column) columns = [sql.literal_column("'Col1'").label("Label1")] x = select(columns, from_obj=[table]).alias("Alias1") x = x.select() self.assert_compile( x, "SELECT " '"Alias1"."Label1" ' "FROM (" "SELECT " "'Col1' AS \"Label1\" " 'FROM "T1"' ') AS "Alias1"', )
def get_measurement(measurement_id, download=None): """Get one measurement by measurement_id, fetching the file from S3 or the fastpath host as needed Returns only the measurement without extra data from the database """ if measurement_id.startswith(FASTPATH_MSM_ID_PREFIX): return get_one_fastpath_measurement(measurement_id, download) # XXX this query is slow due to filtering by report_id and input # It also occasionally return multiple rows and serves only the first one # TODO: add timing metric # TODO: switch to OOID to speed up the query # https://github.com/ooni/pipeline/issues/48 m = RE_MSM_ID.match(measurement_id) if not m: raise BadRequest("Invalid measurement_id") msm_no = int(m.group(1)) cols = [ literal_column("measurement.report_no"), literal_column("frame_off"), literal_column("frame_size"), literal_column("intra_off"), literal_column("intra_size"), literal_column("textname"), literal_column("report.autoclaved_no"), literal_column("autoclaved.filename"), ] table = ( sql.table("measurement") .join( sql.table("report"), sql.text("measurement.report_no = report.report_no"), ) .join( sql.table("autoclaved"), sql.text("autoclaved.autoclaved_no = report.autoclaved_no"), ) ) where = sql.text("measurement.msm_no = :msm_no") query = select(cols).where(where).select_from(table) query_params = dict(msm_no=msm_no) q = current_app.db_session.execute(query, query_params) msmt = q.fetchone() if msmt is None: abort(404) # Usual size of LZ4 frames is 256kb of decompressed text. # Largest size of LZ4 frame was ~55Mb compressed and ~56Mb decompressed. :-/ range_header = "bytes={}-{}".format( msmt.frame_off, msmt.frame_off + msmt.frame_size - 1 ) filename = msmt["autoclaved.filename"] r = requests.get( urljoin(current_app.config["AUTOCLAVED_BASE_URL"], filename), headers={"Range": range_header, REQID_HDR: request_id()}, ) r.raise_for_status() blob = r.content if len(blob) != msmt.frame_size: raise RuntimeError("Failed to fetch LZ4 frame", len(blob), msmt.frame_size) blob = lz4framed.decompress(blob)[msmt.intra_off : msmt.intra_off + msmt.intra_size] if len(blob) != msmt.intra_size or blob[:1] != b"{" or blob[-1:] != b"}": raise RuntimeError( "Failed to decompress LZ4 frame to measurement.json", len(blob), msmt.intra_size, blob[:1], blob[-1:], ) # There is no replacement of `measurement_id` with `msm_no` or anything # else to keep sanity. Maybe it'll happen as part of orchestration update. # Also, blob is not decoded intentionally to save CPU filename = "ooni-msmt-{}-{}".format(measurement_id, msmt.textname.replace("/", "-")) response = make_response(blob) response.headers.set("Content-Type", "application/json") if download is not None: response.headers.set("Content-Disposition", "attachment", filename=filename) return response
def list_measurements( report_id=None, probe_asn=None, probe_cc=None, test_name=None, since=None, until=None, since_index=None, order_by=None, order="desc", offset=0, limit=100, failure=None, anomaly=None, confirmed=None, category_code=None, ): """Search for measurements using only the database. Provide pagination. """ # TODO: list_measurements and get_measurement will be simplified and # made faster by OOID: https://github.com/ooni/pipeline/issues/48 log = current_app.logger ## Workaround for https://github.com/ooni/probe/issues/1034 user_agent = request.headers.get("User-Agent") if user_agent.startswith("okhttp"): bug_probe1034_response = jsonify( { "metadata": { "count": 1, "current_page": 1, "limit": 100, "next_url": None, "offset": 0, "pages": 1, "query_time": 0.001, }, "results": [{"measurement_url": ""}], } ) return bug_probe1034_response ## Prepare query parameters input_ = request.args.get("input") domain = request.args.get("domain") if probe_asn is not None: if probe_asn.startswith("AS"): probe_asn = probe_asn[2:] probe_asn = int(probe_asn) # When the user specifies a list that includes all the possible values for # boolean arguments, that is logically the same of applying no filtering at # all. # TODO: treat it as an error? if failure is not None: if set(failure) == set(["true", "false"]): failure = None else: failure = set(failure) == set(["true"]) if anomaly is not None: if set(anomaly) == set(["true", "false"]): anomaly = None else: anomaly = set(anomaly) == set(["true"]) if confirmed is not None: if set(confirmed) == set(["true", "false"]): confirmed = None else: confirmed = set(confirmed) == set(["true"]) try: if since is not None: since = parse_date(since) except ValueError: raise BadRequest("Invalid since") try: if until is not None: until = parse_date(until) except ValueError: raise BadRequest("Invalid until") if order.lower() not in ("asc", "desc"): raise BadRequest("Invalid order") INULL = "" # Special value for input = NULL to merge rows with FULL OUTER JOIN ## Create measurement+report colums for SQL query cols = [ # sql.text("measurement.input_no"), literal_column("report.test_start_time").label("test_start_time"), literal_column("measurement.measurement_start_time").label( "measurement_start_time" ), func.concat(MSM_ID_PREFIX, "-", sql.text("measurement.msm_no")).label( "measurement_id" ), literal_column("measurement.report_no").label("m_report_no"), func.coalesce(sql.text("measurement.anomaly"), false()).label("anomaly"), func.coalesce(sql.text("measurement.confirmed"), false()).label("confirmed"), sql.text("measurement.exc IS NOT NULL AS failure"), func.coalesce("{}").label("scores"), literal_column("measurement.exc").label("exc"), literal_column("measurement.residual_no").label("residual_no"), literal_column("report.report_id").label("report_id"), literal_column("report.probe_cc").label("probe_cc"), literal_column("report.probe_asn").label("probe_asn"), literal_column("report.test_name").label("test_name"), literal_column("report.report_no").label("report_no"), func.coalesce(sql.text("domain_input.input"), INULL).label("input"), ] ## Create fastpath columns for query fpcols = [ # func.coalesce(0).label("m_input_no"), # We use test_start_time here as the batch pipeline has many NULL measurement_start_times literal_column("measurement_start_time").label("test_start_time"), literal_column("measurement_start_time").label("measurement_start_time"), func.concat(FASTPATH_MSM_ID_PREFIX, sql.text("tid")).label("measurement_id"), literal_column("anomaly").label("anomaly"), literal_column("confirmed").label("confirmed"), literal_column("msm_failure").label("failure"), cast(sql.text("scores"), String).label("scores"), literal_column("report_id"), literal_column("probe_cc"), literal_column("probe_asn"), literal_column("test_name"), func.coalesce(sql.text("fastpath.input"), INULL).label("input"), ] mrwhere = [] fpwhere = [] query_params = {} # Populate WHERE clauses and query_params dict if since is not None: query_params["since"] = since mrwhere.append(sql.text("measurement.measurement_start_time > :since")) fpwhere.append(sql.text("measurement_start_time > :since")) if until is not None: query_params["until"] = until mrwhere.append(sql.text("measurement.measurement_start_time <= :until")) fpwhere.append(sql.text("measurement_start_time <= :until")) if report_id: query_params["report_id"] = report_id mrwhere.append(sql.text("report.report_id = :report_id")) fpwhere.append(sql.text("report_id = :report_id")) if probe_cc: query_params["probe_cc"] = probe_cc mrwhere.append(sql.text("report.probe_cc = :probe_cc")) fpwhere.append(sql.text("probe_cc = :probe_cc")) if probe_asn is not None: query_params["probe_asn"] = probe_asn mrwhere.append(sql.text("report.probe_asn = :probe_asn")) fpwhere.append(sql.text("probe_asn = :probe_asn")) if test_name is not None: query_params["test_name"] = test_name mrwhere.append(sql.text("report.test_name = :test_name")) fpwhere.append(sql.text("test_name = :test_name")) # Filter on anomaly, confirmed and failure: # The database stores anomaly and confirmed as boolean + NULL and stores # failures in different columns. This leads to many possible combinations # but only a subset is used. # On anomaly and confirmed: any value != TRUE is treated as FALSE # See test_list_measurements_filter_flags_fastpath if anomaly is True: mrwhere.append(sql.text("measurement.anomaly IS TRUE")) fpwhere.append(sql.text("fastpath.anomaly IS TRUE")) elif anomaly is False: mrwhere.append(sql.text("measurement.anomaly IS NOT TRUE")) fpwhere.append(sql.text("fastpath.anomaly IS NOT TRUE")) if confirmed is True: mrwhere.append(sql.text("measurement.confirmed IS TRUE")) fpwhere.append(sql.text("fastpath.confirmed IS TRUE")) elif confirmed is False: mrwhere.append(sql.text("measurement.confirmed IS NOT TRUE")) fpwhere.append(sql.text("fastpath.confirmed IS NOT TRUE")) if failure is True: # residual_no is never NULL, msm_failure is always NULL mrwhere.append(sql.text("measurement.exc IS NOT NULL")) fpwhere.append(sql.text("fastpath.msm_failure IS TRUE")) elif failure is False: # on success measurement.exc is NULL mrwhere.append(sql.text("measurement.exc IS NULL")) fpwhere.append(sql.text("fastpath.msm_failure IS NOT TRUE")) fpq_table = sql.table("fastpath") mr_table = sql.table("measurement").join( sql.table("report"), sql.text("measurement.report_no = report.report_no"), ) if input_ or domain or category_code: # join in domain_input mr_table = mr_table.join( sql.table("domain_input"), sql.text("domain_input.input_no = measurement.input_no"), ) fpq_table = fpq_table.join( sql.table("domain_input"), sql.text("domain_input.input = fastpath.input") ) if input_: # input_ overrides domain and category_code query_params["input"] = input_ mrwhere.append(sql.text("domain_input.input = :input")) fpwhere.append(sql.text("domain_input.input = :input")) else: # both domain and category_code can be set at the same time if domain: query_params["domain"] = domain mrwhere.append(sql.text("domain_input.domain = :domain")) fpwhere.append(sql.text("domain_input.domain = :domain")) if category_code: query_params["category_code"] = category_code mr_table = mr_table.join( sql.table("citizenlab"), sql.text("citizenlab.url = domain_input.input"), ) fpq_table = fpq_table.join( sql.table("citizenlab"), sql.text("citizenlab.url = domain_input.input"), ) mrwhere.append(sql.text("citizenlab.category_code = :category_code")) fpwhere.append(sql.text("citizenlab.category_code = :category_code")) else: mr_table = mr_table.outerjoin( sql.table("domain_input"), sql.text("domain_input.input_no = measurement.input_no"), ) # We runs SELECTs on the measurement-report (mr) tables and faspath independently # from each other and then merge them. # The FULL OUTER JOIN query is using LIMIT and OFFSET based on the # list_measurements arguments. To speed up the two nested queries, # an ORDER BY + LIMIT on "limit+offset" is applied in each of them to trim # away rows that would be removed anyways by the outer query. # # During a merge we can find that a measurement is: # - only in fastpath: get_measurement will pick the JSON msmt from the fastpath host # - in both selects: pick `scores` from fastpath and the msmt from the can # - only in "mr": the msmt from the can # # This implements a failover mechanism where new msmts are loaded from fastpath # but can fall back to the traditional pipeline. mr_query = ( select(cols).where(and_(*mrwhere)).select_from(mr_table).limit(offset + limit) ) fp_query = ( select(fpcols) .where(and_(*fpwhere)) .select_from(fpq_table) .limit(offset + limit) ) if order_by is None: # Use test_start_time or measurement_start_time depending on other # filters in order to avoid heavy joins. # Filtering on anomaly, confirmed, msm_failure -> measurement_start_time # Filtering on probe_cc, probe_asn, test_name -> test_start_time # See test_list_measurements_slow_order_by_* tests if probe_cc or probe_asn or test_name: order_by = "test_start_time" elif anomaly or confirmed or failure or input_ or domain or category_code: order_by = "measurement_start_time" else: order_by = "measurement_start_time" mr_query = mr_query.order_by(text("{} {}".format(order_by, order))) fp_query = fp_query.order_by(text("{} {}".format(order_by, order))) mr_query = mr_query.alias("mr") fp_query = fp_query.alias("fp") j = fp_query.join( mr_query, sql.text("fp.input = mr.input AND fp.report_id = mr.report_id"), full=True, ) def coal(colname): return func.coalesce( literal_column(f"fp.{colname}"), literal_column(f"mr.{colname}") ).label(colname) # Merge data from mr_table and fastpath. # Most of the time we prefer data from fastpath, using coal(). # For measurement_id, we prefer mr_table. See test_list_measurements_shared merger = [ coal("test_start_time"), coal("measurement_start_time"), func.coalesce( literal_column("mr.measurement_id"), literal_column("fp.measurement_id") ).label("measurement_id"), func.coalesce(literal_column("mr.m_report_no"), 0).label("m_report_no"), coal("anomaly"), coal("confirmed"), coal("failure"), func.coalesce(literal_column("fp.scores"), "{}").label("scores"), column("exc"), func.coalesce(literal_column("mr.residual_no"), 0).label("residual_no"), coal("report_id"), coal("probe_cc"), coal("probe_asn"), coal("test_name"), coal("input"), ] # Assemble the "external" query. Run a final order by followed by limit and # offset fob = text("{} {}".format(order_by, order)) query = select(merger).select_from(j).order_by(fob).offset(offset).limit(limit) with sentry.configure_scope() as scope: # Set query (without params) in Sentry scope for the rest of the API call # https://github.com/getsentry/sentry-python/issues/184 scope.set_extra("sql_query", query) # Run the query, generate the results list iter_start_time = time.time() try: q = current_app.db_session.execute(query, query_params) tmpresults = [] for row in q: url = urljoin( current_app.config["BASE_URL"], "/api/v1/measurement/%s" % row.measurement_id, ) tmpresults.append( { "measurement_url": url, "measurement_id": row.measurement_id, "report_id": row.report_id, "probe_cc": row.probe_cc, "probe_asn": "AS{}".format(row.probe_asn), "test_name": row.test_name, "measurement_start_time": row.measurement_start_time, "input": row.input, "anomaly": row.anomaly, "confirmed": row.confirmed, "failure": row.failure, "scores": json.loads(row.scores), } ) except OperationalError as exc: if isinstance(exc.orig, QueryCanceledError): # Timeout due to a slow query. Generate metric and do not feed it # to Sentry. abort(504) raise exc # For each report_id / input tuple, we want at most one entry. Measurements # from mr_table and fastpath has already been merged by the FULL OUTER JOIN # but we have duplicate msmts sharing the same report_id / input. results = _merge_results(tmpresults) # Replace the special value INULL for "input" with None for i, r in enumerate(results): if r["input"] == INULL: results[i]["input"] = None pages = -1 count = -1 current_page = math.ceil(offset / limit) + 1 # We got less results than what we expected, we know the count and that we are done if len(results) < limit: count = offset + len(results) pages = math.ceil(count / limit) next_url = None else: # XXX this is too intensive. find a workaround # count_start_time = time.time() # count = q.count() # pages = math.ceil(count / limit) # current_page = math.ceil(offset / limit) + 1 # query_time += time.time() - count_start_time next_args = request.args.to_dict() next_args["offset"] = "%s" % (offset + limit) next_args["limit"] = "%s" % limit next_url = urljoin( current_app.config["BASE_URL"], "/api/v1/measurements?%s" % urlencode(next_args), ) query_time = time.time() - iter_start_time metadata = { "offset": offset, "limit": limit, "count": count, "pages": pages, "current_page": current_page, "next_url": next_url, "query_time": query_time, } return jsonify({"metadata": metadata, "results": results[:limit]})
def coal(colname): return func.coalesce( literal_column(f"fp.{colname}"), literal_column(f"mr.{colname}") ).label(colname)
async def update(cls, conn: SAConn, col: dict, col_upd: dict) -> bool: key = list(col.keys())[0] query = cls.__table__.update().where( literal_column(key) == col[key]).values(**col_upd) await conn.execute(query) return True
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, } template_processor = self.get_template_processor(**template_kwargs) # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: @compiles(ColumnClause) def visit_column(element, compiler, **kw): """Patch for sqlalchemy bug TODO: sqlalchemy 1.2 release should be doing this on its own. Patch only if the column clause is specific for DateTime set and granularity is selected. """ text = compiler.visit_column(element, **kw) try: if (element.is_literal and hasattr(element.type, 'python_type') and type(element.type) is DateTime): text = text.replace('%%', '%') except NotImplementedError: # Some elements raise NotImplementedError for python_type pass return text dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] time_filter = dttm_col.get_time_filter(from_dttm, to_dttm) select_exprs += metrics_exprs qry = sa.select(select_exprs) # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = template_processor.process_template(self.sql) tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry') else: tbl = self.get_sqla_table() if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [types.strip("'").strip('"') for types in eq] if col_obj.is_num: values = [utils.js_string_to_num(s) for s in values] cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) elif op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*([time_filter] + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) return qry.select_from(tbl)
async def delete(cls, conn: SAConn, col: dict) -> bool: key = list(col.keys())[0] query = cls.__table__.delete().where(literal_column(key) == col[key]) await conn.execute(query) return True
def visit_select(self, select, **kwargs): """Look for ``LIMIT`` and OFFSET in a select statement, and if so tries to wrap it in a subquery with ``rownum`` criterion. """ if not getattr(select, '_oracle_visit', None): if not self.dialect.use_ansi: if self.stack and 'from' in self.stack[-1]: existingfroms = self.stack[-1]['from'] else: existingfroms = None froms = select._get_display_froms(existingfroms) whereclause = self._get_nonansi_join_whereclause(froms) if whereclause: select = select.where(whereclause) select._oracle_visit = True if select._limit is not None or select._offset is not None: # See http://www.oracle.com/technology/oramag/oracle/06-sep/o56asktom.html # # Generalized form of an Oracle pagination query: # select ... from ( # select /*+ FIRST_ROWS(N) */ ...., rownum as ora_rn from ( # select distinct ... where ... order by ... # ) where ROWNUM <= :limit+:offset # ) where ora_rn > :offset # Outer select and "ROWNUM as ora_rn" can be dropped if limit=0 # TODO: use annotations instead of clone + attr set ? select = select._generate() select._oracle_visit = True # Wrap the middle select and add the hint limitselect = sql.select([c for c in select.c]) if select._limit and self.dialect.optimize_limits: limitselect = limitselect.prefix_with("/*+ FIRST_ROWS(%d) */" % select._limit) limitselect._oracle_visit = True limitselect._is_wrapper = True # If needed, add the limiting clause if select._limit is not None: max_row = select._limit if select._offset is not None: max_row += select._offset limitselect.append_whereclause( sql.literal_column("ROWNUM")<=max_row) # If needed, add the ora_rn, and wrap again with offset. if select._offset is None: select = limitselect else: limitselect = limitselect.column( sql.literal_column("ROWNUM").label("ora_rn")) limitselect._oracle_visit = True limitselect._is_wrapper = True offsetselect = sql.select( [c for c in limitselect.c if c.key!='ora_rn']) offsetselect._oracle_visit = True offsetselect._is_wrapper = True offsetselect.append_whereclause( sql.literal_column("ora_rn")>select._offset) select = offsetselect kwargs['iswrapper'] = getattr(select, '_is_wrapper', False) return compiler.DefaultCompiler.visit_select(self, select, **kwargs)
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, form_data=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'form_data': form_data, } template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec if DTTM_ALIAS in groupby: groupby.remove(DTTM_ALIAS) is_timeseries = True # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor, db_engine_spec) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) return qry.select_from(tbl)
def airspace_image(cache_key, airspace_id): if not mapscript_available: abort(404) # get information from cache... infringements = cache.get('upload_airspace_infringements_' + cache_key) flight_path = cache.get('upload_airspace_flight_path_' + cache_key) # abort if invalid cache key if not infringements \ or not flight_path: abort(404) # Convert the coordinate into a list of tuples coordinates = [(c.location['longitude'], c.location['latitude']) for c in flight_path] # Create a shapely LineString object from the coordinates linestring = LineString(coordinates) # Save the new path as WKB locations = from_shape(linestring, srid=4326) highlight_locations = [] extent_epsg4326 = [180, 85.05112878, -180, -85.05112878] for period in infringements[airspace_id]: # Convert the coordinate into a list of tuples coordinates = [(c['location']['longitude'], c['location']['latitude']) for c in period] # Create a shapely LineString object from the coordinates if len(coordinates) == 1: # a LineString must contain at least two points... linestring = LineString([coordinates[0], coordinates[0]]) else: linestring = LineString(coordinates) highlight_locations.append(linestring) # gather extent (minx, miny, maxx, maxy) = linestring.bounds extent_epsg4326[0] = min(extent_epsg4326[0], minx) extent_epsg4326[1] = min(extent_epsg4326[1], miny) extent_epsg4326[2] = max(extent_epsg4326[2], maxx) extent_epsg4326[3] = max(extent_epsg4326[3], maxy) # Save the new path as WKB highlight_multilinestring = from_shape(MultiLineString(highlight_locations), srid=4326) # increase extent by factor 1.05 width = abs(extent_epsg4326[0] - extent_epsg4326[2]) height = abs(extent_epsg4326[1] - extent_epsg4326[3]) center_x = (extent_epsg4326[0] + extent_epsg4326[2]) / 2 center_y = (extent_epsg4326[1] + extent_epsg4326[3]) / 2 extent_epsg4326[0] = center_x - width / 2 * 1.05 extent_epsg4326[1] = center_y - height / 2 * 1.05 extent_epsg4326[2] = center_x + width / 2 * 1.05 extent_epsg4326[3] = center_y + height / 2 * 1.05 # minimum extent should be 0.3 deg width = abs(extent_epsg4326[0] - extent_epsg4326[2]) height = abs(extent_epsg4326[1] - extent_epsg4326[3]) if width < 0.3: extent_epsg4326[0] = center_x - 0.15 extent_epsg4326[2] = center_x + 0.15 if height < 0.3: extent_epsg4326[1] = center_y - 0.15 extent_epsg4326[3] = center_y + 0.15 # convert extent from EPSG4326 to EPSG3857 epsg4326 = pyproj.Proj(init='epsg:4326') epsg3857 = pyproj.Proj(init='epsg:3857') x1, y1 = pyproj.transform(epsg4326, epsg3857, extent_epsg4326[0], extent_epsg4326[1]) x2, y2 = pyproj.transform(epsg4326, epsg3857, extent_epsg4326[2], extent_epsg4326[3]) extent_epsg3857 = [x1, y1, x2, y2] # load basemap and set size + extent basemap_path = os.path.join(current_app.config.get('SKYLINES_MAPSERVER_PATH'), 'basemap.map') map_object = mapscript.mapObj(basemap_path) map_object.setSize(400, 400) map_object.setExtent(extent_epsg3857[0], extent_epsg3857[1], extent_epsg3857[2], extent_epsg3857[3]) # enable airspace and airports layers num_layers = map_object.numlayers for i in range(num_layers): layer = map_object.getLayer(i) if layer.group == 'Airports': layer.status = mapscript.MS_ON if layer.group == 'Airspace': layer.status = mapscript.MS_ON # get flights layer flights_layer = map_object.getLayerByName('Flights') highlight_layer = map_object.getLayerByName('Flights_Highlight') # set sql query for blue flight one = literal_column('1 as flight_id') flight_query = db.session.query(locations.label('flight_geometry'), one) flights_layer.data = 'flight_geometry FROM (' + query_to_sql(flight_query) + ')' + \ ' AS foo USING UNIQUE flight_id USING SRID=4326' # set sql query for highlighted linestrings highlighted_query = db.session.query(highlight_multilinestring.label('flight_geometry'), one) highlight_layer.data = 'flight_geometry FROM (' + query_to_sql(highlighted_query) + ')' + \ ' AS foo USING UNIQUE flight_id USING SRID=4326' highlight_layer.status = mapscript.MS_ON # get osm layer and set WMS url osm_layer = map_object.getLayerByName('OSM') osm_layer.connection = current_app.config.get('SKYLINES_MAP_TILE_URL') + \ '/service?' # draw map map_image = map_object.draw() # get image mapscript.msIO_installStdoutToBuffer() map_image.write() content = mapscript.msIO_getStdoutBufferBytes() # return to client resp = make_response(content) resp.headers['Content-type'] = map_image.format.mimetype return resp
def get_lastrowid(self): s = sql.select([sql.literal_column('IDENTITY_VAL_LOCAL()')]) return self.connection.scalar(s)
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, ) -> SqlaQuery: """Querying any sqla table from this common interface""" template_kwargs = { "from_dttm": from_dttm, "groupby": groupby, "metrics": metrics, "row_limit": row_limit, "to_dttm": to_dttm, "filter": filter, "columns": {col.column_name: col for col in self.columns}, } template_kwargs.update(self.template_params_dict) extra_cache_keys: List[Any] = [] template_kwargs["extra_cache_keys"] = extra_cache_keys template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec prequeries: List[str] = [] orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols: Dict[str, Column] = {col.column_name: col for col in self.columns} metrics_dict: Dict[str, SqlMetric] = { m.metric_name: m for m in self.metrics } if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) if not groupby and not metrics and not columns: raise Exception(_("Empty query?")) metrics_exprs = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict[m].get_sqla_col()) else: raise Exception( _("Metric '%(metric)s' does not exist", metric=m)) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr, label = literal_column("COUNT(*)"), "ccount" main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, label) select_exprs: List[Column] = [] groupby_exprs_sans_timestamp: OrderedDict = OrderedDict() if groupby: select_exprs = [] for s in groupby: if s in cols: outer = cols[s].get_sqla_col() else: outer = literal_column(f"({s})") outer = self.make_sqla_column_compatible(outer, s) groupby_exprs_sans_timestamp[outer.name] = outer select_exprs.append(outer) elif columns: for s in columns: select_exprs.append( cols[s].get_sqla_col() if s in cols else self. make_sqla_column_compatible(literal_column(s))) metrics_exprs = [] groupby_exprs_with_timestamp = OrderedDict( groupby_exprs_sans_timestamp.items()) if granularity: dttm_col = cols[granularity] time_grain = extras.get("time_grain_sqla") time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs_with_timestamp[timestamp.name] = timestamp # Use main dttm column to support index with secondary dttm columns if (db_engine_spec.time_secondary_columns and self.main_dttm_col in self.dttm_cols and self.main_dttm_col != dttm_col.column_name): time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs labels_expected = [c._df_label_expected for c in select_exprs] select_exprs = db_engine_spec.make_select_compatible( groupby_exprs_with_timestamp.values(), select_exprs) qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs_with_timestamp.values()) where_clause_and = [] having_clause_and: List = [] for flt in filter: if not all([flt.get(s) for s in ["col", "op"]]): continue col = flt["col"] op = flt["op"] col_obj = cols.get(col) if col_obj: is_list_target = op in ("in", "not in") eq = self.filter_values_handler( flt.get("val"), target_column_is_numeric=col_obj.is_num, is_list_target=is_list_target, ) if op in ("in", "not in"): cond = col_obj.get_sqla_col().in_(eq) if "<NULL>" in eq: cond = or_(cond, col_obj.get_sqla_col() == None) if op == "not in": cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt["val"]) if op == "==": where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == "!=": where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == ">": where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == "<": where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == ">=": where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == "<=": where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == "LIKE": where_clause_and.append( col_obj.get_sqla_col().like(eq)) elif op == "IS NULL": where_clause_and.append(col_obj.get_sqla_col() == None) elif op == "IS NOT NULL": where_clause_and.append(col_obj.get_sqla_col() != None) if extras: where = extras.get("where") if where: where = template_processor.process_template(where) where_clause_and += [sa.text("({})".format(where))] having = extras.get("having") if having: having = template_processor.process_template(having) having_clause_and += [sa.text("({})".format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) elif col in cols: col = cols[col].get_sqla_col() qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.allows_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, "mme_inner__") inner_groupby_exprs = [] inner_select_exprs = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): inner = self.make_sqla_column_compatible( gby_obj, gby_name + "__") inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs).select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm) subq = subq.where( and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: ob = self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): # in this case the column name, not the alias, needs to be # conditionally mutated, as it refers to the column alias in # the inner query col_name = db_engine_spec.make_label_compatible(gby_name + "__") on_clause.append(gby_obj == column(col_name)) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: if timeseries_limit_metric: orderby = [( self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols), False, )] # run prequery to get top groups prequery_obj = { "is_timeseries": False, "row_limit": timeseries_limit, "groupby": groupby, "metrics": metrics, "granularity": granularity, "from_dttm": inner_from_dttm or from_dttm, "to_dttm": inner_to_dttm or to_dttm, "filter": filter, "orderby": orderby, "extras": extras, "columns": columns, "order_desc": True, } result = self.query(prequery_obj) prequeries.append(result.query) dimensions = [ c for c in result.df.columns if c not in metrics and c in groupby_exprs_sans_timestamp ] top_groups = self._get_top_groups( result.df, dimensions, groupby_exprs_sans_timestamp) qry = qry.where(top_groups) return SqlaQuery( extra_cache_keys=extra_cache_keys, labels_expected=labels_expected, sqla_query=qry.select_from(tbl), prequeries=prequeries, )
def do_analyse(): print("\r\n[do_analyse...]") min_to_end_stmnt = static_assessment.maximum_question_length // letters_per_min created_ago = datetime.datetime.now() - datetime.timedelta( minutes=min_to_end_stmnt) session = db_session() stmnts = session.query(Statement.id, Statement.channel_id, Statement.user_id, Statement.first_msg_id, Statement.last_msg_id).\ filter(and_(Statement.created<created_ago, Statement.was_processed==False)).distinct().all() if stmnts is None or len(stmnts) == 0: print("[do_analyse] nothing to process.") return else: print("[do_analyse] to process: ", len(stmnts)) pairs = dict() for stmnt in stmnts: stmnt_id, channel_id, user_id, first_id, last_id = stmnt message_text = session.query(func.string_agg(TelegramTextMessage.message, aggregate_order_by(literal_column("'. '"), TelegramTextMessage.created))).\ filter(and_(TelegramTextMessage.channel_id==channel_id, TelegramTextMessage.user_id==user_id)).\ filter(TelegramTextMessage.message_id.between(first_id, last_id)).\ distinct().\ all() pairs[stmnt_id] = message_text session.close() questions = list() not_question = list() for stmnt_id, message in pairs.items(): if len(message) == 0: print("[Message len error]") not_question.append(stmnt_id) continue is_question = analyser.validate(''.join(message[0])) if is_question: questions.append(stmnt_id) else: not_question.append(stmnt_id) session = db_session() if len(questions) > 0: print("[do_analyse] questions found: ", len(questions)) update_query = Statement.__table__.update().values(is_question=True, was_processed=True).\ where(Statement.id.in_(questions)) session.execute(update_query) if len(not_question) > 0: print("[do_analyse] not questions: ", len(not_question)) update_query_2 = Statement.__table__.update().values(is_question=False, was_processed=True).\ where(Statement.id.in_(not_question)) session.execute(update_query_2) session.commit() session.close() print("[do_analyse] done.")
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, prequeries=None, is_prequery=False, ): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'filter': filter, 'columns': {col.column_name: col for col in self.columns}, } template_kwargs.update(self.template_params_dict) template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception(_( 'Datetime column not provided as part table configuration ' 'and is required by this type of chart')) if not groupby and not metrics and not columns: raise Exception(_('Empty query?')) metrics_exprs = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict.get(m).get_sqla_col()) else: raise Exception(_("Metric '{}' is not valid".format(m))) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column('COUNT(*)').label( db_engine_spec.make_label_compatible('count')) select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.get_sqla_col() inner = col.get_sqla_col(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].get_sqla_col()) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col]. get_time_filter(from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op']]): continue col = flt['col'] op = flt['op'] col_obj = cols.get(col) if col_obj: is_list_target = op in ('in', 'not in') eq = self.filter_values_handler( flt.get('val'), target_column_is_numeric=col_obj.is_num, is_list_target=is_list_target) if op in ('in', 'not in'): cond = col_obj.get_sqla_col().in_(eq) if '<NULL>' in eq: cond = or_(cond, col_obj.get_sqla_col() == None) # noqa if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == '!=': where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == '>': where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == '<': where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == '>=': where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == '<=': where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.get_sqla_col().like(eq)) elif op == 'IS NULL': where_clause_and.append(col_obj.get_sqla_col() == None) # noqa elif op == 'IS NOT NULL': where_clause_and.append( col_obj.get_sqla_col() != None) # noqa if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.inner_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: if utils.is_adhoc_metric(timeseries_limit_metric): ob = self.adhoc_metric_to_sqla(timeseries_limit_metric, cols) elif timeseries_limit_metric in metrics_dict: timeseries_limit_metric = metrics_dict.get( timeseries_limit_metric, ) ob = timeseries_limit_metric.get_sqla_col() else: raise Exception(_("Metric '{}' is not valid".format(m))) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: # run subquery to get top groups subquery_obj = { 'prequeries': prequeries, 'is_prequery': True, 'is_timeseries': False, 'row_limit': timeseries_limit, 'groupby': groupby, 'metrics': metrics, 'granularity': granularity, 'from_dttm': inner_from_dttm or from_dttm, 'to_dttm': inner_to_dttm or to_dttm, 'filter': filter, 'orderby': orderby, 'extras': extras, 'columns': columns, 'order_desc': True, } result = self.query(subquery_obj) cols = {col.column_name: col for col in self.columns} dimensions = [ c for c in result.df.columns if c not in metrics and c in cols ] top_groups = self._get_top_groups(result.df, dimensions) qry = qry.where(top_groups) return qry.select_from(tbl)
def query(self, groupby, metrics, granularity, from_dttm, to_dttm, custom_query, limit_spec=None, filter=None, is_timeseries=True, timeseries_limit=15, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, extras=None, columns=None): qry_start_dttm = datetime.now() if not custom_query: # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} if not self.main_dttm_col: raise Exception( "Datetime column not provided as part table configuration") dttm_expr = cols[granularity].expression if dttm_expr: timestamp = ColumnClause(dttm_expr, is_literal=True).label('timestamp') else: timestamp = literal_column(granularity).label('timestamp') metrics_exprs = [ literal_column(m.expression).label(m.metric_name) for m in self.metrics if m.metric_name in metrics ] if metrics: main_metric_expr = literal_column([ m.expression for m in self.metrics if m.metric_name == metrics[0] ][0]) else: main_metric_expr = literal_column("COUNT(*)") groupby_exprs = [] select_exprs = [] if groupby: inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] expr = col.expression if expr: outer = ColumnClause(expr, is_literal=True).label(s) inner = ColumnClause(expr, is_literal=True).label('__' + s) else: outer = column(s).label(s) inner = column(s).label('__' + s) groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(s) metrics_exprs = [] if is_timeseries: select_exprs += [timestamp] groupby_exprs += [timestamp] select_exprs += metrics_exprs qry = select(select_exprs) from_clause = table(self.table_name) if not columns: qry = qry.group_by(*groupby_exprs) time_filter = [ timestamp >= from_dttm.isoformat(), timestamp <= to_dttm.isoformat(), ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat() if inner_to_dttm: inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat() where_clause_and = [] having_clause_and = [] for col, op, eq in filter: col_obj = cols[col] if op in ('in', 'not in'): values = eq.split(",") if col_obj.expression: cond = ColumnClause(col_obj.expression, is_literal=True).in_(values) else: cond = column(col).in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] if extras and 'having' in extras: having_clause_and += [text(extras['having'])] qry = qry.where(and_(*(time_filter + where_clause_and))) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_select_exprs) subq = subq.select_from(table(self.table_name)) subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column("__" + gb)) from_clause = from_clause.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(from_clause) engine = self.database.get_sqla_engine() sql = str( qry.compile(engine, compile_kwargs={"literal_binds": True})) df = read_sql_query(sql=sql, con=engine) textwrap.dedent(sql) else: """ Legacy way of querying by building a SQL string without using the sqlalchemy expression API (new approach which supports all dialects) """ engine = self.database.get_sqla_engine() sql = custom_query.format(**locals()) df = read_sql_query(sql=sql, con=engine) textwrap.dedent(sql) return QueryResult(df=df, duration=datetime.now() - qry_start_dttm, query=sql)
def get_sqla_col(self, label=None): label = label or self.metric_name sqla_col = literal_column(self.expression) return self.table.make_sqla_column_compatible(sqla_col, label)
def query(self, groupby, metrics, granularity, from_dttm, to_dttm, limit_spec=None, filter=None, is_timeseries=True, timeseries_limit=15, row_limit=None): qry_start_dttm = datetime.now() timestamp = literal_column( self.main_datetime_column.column_name).label('timestamp') metrics_exprs = [ literal_column(m.expression).label(m.metric_name) for m in self.metrics if m.metric_name in metrics ] if metrics: main_metric_expr = literal_column([ m.expression for m in self.metrics if m.metric_name == metrics[0] ][0]) else: main_metric_expr = literal_column("COUNT(*)") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [literal_column(s) for s in groupby] groupby_exprs = [literal_column(s) for s in groupby] inner_groupby_exprs = [ literal_column(s).label('__' + s) for s in groupby ] if granularity != "all": select_exprs += [timestamp] groupby_exprs += [timestamp] select_exprs += metrics_exprs qry = select(select_exprs) from_clause = table(self.table_name) qry = qry.group_by(*groupby_exprs) where_clause_and = [ timestamp >= from_dttm.isoformat(), timestamp < to_dttm.isoformat(), ] for col, op, eq in filter: if op in ('in', 'not in'): values = eq.split(",") cond = literal_column(col).in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) qry = qry.where(and_(*where_clause_and)) qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_groupby_exprs) subq = subq.select_from(table(self.table_name)) subq = subq.where(and_(*where_clause_and)) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for gb in groupby: on_clause.append( literal_column(gb) == literal_column("__" + gb)) from_clause = from_clause.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(from_clause) engine = self.database.get_sqla_engine() sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True})) df = read_sql_query(sql=sql, con=engine) sql = sqlparse.format(sql, reindent=True) return QueryResult(df=df, duration=datetime.now() - qry_start_dttm, query=sql)
async def select_filter_by(cls, conn: SAConn, col: dict) -> RowProxy: key = list(col.keys())[0] query = cls.__table__.select().where(literal_column(key) == col[key]) cursor = await conn.execute(query) items = await cursor.fetchall() return items
def query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, extras=None, columns=None): """Querying any sqla table from this common interface""" # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} qry_start_dttm = datetime.now() if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) metrics_exprs = [ m.sqla_col for m in self.metrics if m.metric_name in metrics ] if metrics: main_metric_expr = [ m.sqla_col for m in self.metrics if m.metric_name == metrics[0] ][0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label('__' + col.column_name) groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: dttm_expr = cols[granularity].sqla_col.label('timestamp') timestamp = dttm_expr # Transforming time grain into an expression based on configuration time_grain_sqla = extras.get('time_grain_sqla') if time_grain_sqla: udf = self.database.grains_dict().get(time_grain_sqla, '{col}') timestamp_grain = literal_column( udf.function.format(col=dttm_expr)).label('timestamp') else: timestamp_grain = timestamp if is_timeseries: select_exprs += [timestamp_grain] groupby_exprs += [timestamp_grain] tf = '%Y-%m-%d %H:%M:%S.%f' time_filter = [ timestamp >= text(self.database.dttm_converter(from_dttm)), timestamp <= text(self.database.dttm_converter(to_dttm)), ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= text( self.database.dttm_converter(inner_from_dttm)) if inner_to_dttm: inner_time_filter[1] = timestamp <= text( self.database.dttm_converter(inner_to_dttm)) else: inner_time_filter = [] select_exprs += metrics_exprs qry = select(select_exprs) tbl = table(self.table_name) if self.schema: tbl.schema = self.schema if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for col, op, eq in filter: col_obj = cols[col] if op in ('in', 'not in'): values = eq.split(",") cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] if extras and 'having' in extras: having_clause_and += [text(extras['having'])] if granularity: qry = qry.where(and_(*(time_filter + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_select_exprs) subq = subq.select_from(tbl) subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column("__" + gb)) tbl = tbl.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(tbl) engine = self.database.get_sqla_engine() sql = "{}".format( qry.compile( engine, compile_kwargs={"literal_binds": True}, ), ) print(sql) df = pd.read_sql_query(sql=sql, con=engine) sql = sqlparse.format(sql, reindent=True) return QueryResult(df=df, duration=datetime.now() - qry_start_dttm, query=sql)
def visit_select(self, select): select.append_column( sql.literal_column("ROW_NUMBER() OVER (ORDER BY %s)" % orderby).label("ora_rn"))
def test_literal_column_default_no_label(self): self._run_test(default=literal_column("1", type_=self.MyInteger))
CoreData.negativeTestsViral, CoreData.positiveCasesViral, CoreData.deathConfirmed, CoreData.deathProbable, CoreData.probableCases, CoreData.totalTestEncountersViral, CoreData.totalTestsPeopleAntibody, CoreData.positiveTestsPeopleAntibody, CoreData.negativeTestsPeopleAntibody, CoreData.totalTestsPeopleAntigen, CoreData.positiveTestsPeopleAntigen, CoreData.negativeTestsPeopleAntigen, CoreData.totalTestsAntigen, CoreData.positiveTestsAntigen, CoreData.negativeTestsAntigen, # Fake Column literal_column("''").label('_posNeg'), Literal("totalTestResults"), ] def select(columns): return [ CSVColumn( label=COLUMNS_DISPLAY_NAMES.get(c.name) if c.name in COLUMNS_DISPLAY_NAMES else c.name, model_column=c.name if c.name in COLUMNS_DISPLAY_NAMES else None, blank=c in COLUMNS_DISPLAY_NAMES) for c in columns ]
def get_sqla_col(self, label=None): label = label if label else self.metric_name label = self.table.get_label(label) return literal_column(self.expression).label(label)
def visit_select(self, select, **kwargs): """Look for ``LIMIT`` and OFFSET in a select statement, and if so tries to wrap it in a subquery with ``rownum`` criterion. """ if not getattr(select, '_oracle_visit', None): if not self.dialect.use_ansi: froms = self._display_froms_for_select( select, kwargs.get('asfrom', False)) whereclause = self._get_nonansi_join_whereclause(froms) if whereclause is not None: select = select.where(whereclause) select._oracle_visit = True limit_clause = select._limit_clause offset_clause = select._offset_clause if limit_clause is not None or offset_clause is not None: # See http://www.oracle.com/technology/oramag/oracle/06-sep/o56asktom.html # # Generalized form of an Oracle pagination query: # select ... from ( # select /*+ FIRST_ROWS(N) */ ...., rownum as ora_rn from ( # select distinct ... where ... order by ... # ) where ROWNUM <= :limit+:offset # ) where ora_rn > :offset # Outer select and "ROWNUM as ora_rn" can be dropped if limit=0 # TODO: use annotations instead of clone + attr set ? select = select._generate() select._oracle_visit = True # Wrap the middle select and add the hint limitselect = sql.select([c for c in select.c]) if limit_clause is not None and \ self.dialect.optimize_limits and \ select._simple_int_limit: limitselect = limitselect.prefix_with( "/*+ FIRST_ROWS(%d) */" % select._limit) limitselect._oracle_visit = True limitselect._is_wrapper = True # If needed, add the limiting clause if limit_clause is not None: if not self.dialect.use_binds_for_limits: # use simple int limits, will raise an exception # if the limit isn't specified this way max_row = select._limit if offset_clause is not None: max_row += select._offset max_row = sql.literal_column("%d" % max_row) else: max_row = limit_clause if offset_clause is not None: max_row = max_row + offset_clause limitselect.append_whereclause( sql.literal_column("ROWNUM") <= max_row) # If needed, add the ora_rn, and wrap again with offset. if offset_clause is None: limitselect._for_update_arg = select._for_update_arg select = limitselect else: limitselect = limitselect.column( sql.literal_column("ROWNUM").label("ora_rn")) limitselect._oracle_visit = True limitselect._is_wrapper = True offsetselect = sql.select( [c for c in limitselect.c if c.key != 'ora_rn']) offsetselect._oracle_visit = True offsetselect._is_wrapper = True if not self.dialect.use_binds_for_limits: offset_clause = sql.literal_column( "%d" % select._offset) offsetselect.append_whereclause( sql.literal_column("ora_rn") > offset_clause) offsetselect._for_update_arg = select._for_update_arg select = offsetselect kwargs['iswrapper'] = getattr(select, '_is_wrapper', False) return compiler.SQLCompiler.visit_select(self, select, **kwargs)
def get_old_messages_backend(request, user_profile, anchor=REQ(converter=int), num_before=REQ(converter=to_non_negative_int), num_after=REQ(converter=to_non_negative_int), narrow=REQ('narrow', converter=narrow_parameter, default=None), use_first_unread_anchor=REQ( default=False, converter=ujson.loads), apply_markdown=REQ(default=True, converter=ujson.loads)): # type: (HttpRequest, UserProfile, int, int, int, Optional[List[Dict[str, Any]]], bool, bool) -> HttpResponse include_history = ok_to_include_history(narrow, user_profile.realm) if include_history and not use_first_unread_anchor: query = select([column("id").label("message_id")], None, "zerver_message") inner_msg_id_col = literal_column("zerver_message.id") elif narrow is None: query = select( [column("message_id"), column("flags")], column("user_profile_id") == literal(user_profile.id), "zerver_usermessage") inner_msg_id_col = column("message_id") else: # TODO: Don't do this join if we're not doing a search query = select( [column("message_id"), column("flags")], column("user_profile_id") == literal(user_profile.id), join( "zerver_usermessage", "zerver_message", literal_column("zerver_usermessage.message_id") == literal_column("zerver_message.id"))) inner_msg_id_col = column("message_id") num_extra_messages = 1 is_search = False if narrow is not None: # Add some metadata to our logging data for narrows verbose_operators = [] for term in narrow: if term['operator'] == "is": verbose_operators.append("is:" + term['operand']) else: verbose_operators.append(term['operator']) request._log_data['extra'] = "[%s]" % (",".join(verbose_operators), ) # Build the query for the narrow num_extra_messages = 0 builder = NarrowBuilder(user_profile, inner_msg_id_col) for term in narrow: if term['operator'] == 'search' and not is_search: query = query.column("subject").column("rendered_content") is_search = True query = builder.add_term(query, term) # We add 1 to the number of messages requested if no narrow was # specified to ensure that the resulting list always contains the # anchor message. If a narrow was specified, the anchor message # might not match the narrow anyway. if num_after != 0: num_after += num_extra_messages else: num_before += num_extra_messages sa_conn = get_sqlalchemy_connection() if use_first_unread_anchor: condition = column("flags").op("&")(UserMessage.flags.read.mask) == 0 # We exclude messages on muted topics when finding the first unread # message in this narrow muting_conditions = exclude_muting_conditions(user_profile, narrow) if muting_conditions: condition = and_(condition, *muting_conditions) first_unread_query = query.where(condition) first_unread_query = first_unread_query.order_by( inner_msg_id_col.asc()).limit(1) first_unread_result = list( sa_conn.execute(first_unread_query).fetchall()) if len(first_unread_result) > 0: anchor = first_unread_result[0][0] else: anchor = 10000000000000000 before_query = None after_query = None if num_before != 0: before_anchor = anchor if num_after != 0: # Don't include the anchor in both the before query and the after query before_anchor = anchor - 1 before_query = query.where(inner_msg_id_col <= before_anchor) \ .order_by(inner_msg_id_col.desc()).limit(num_before) if num_after != 0: after_query = query.where(inner_msg_id_col >= anchor) \ .order_by(inner_msg_id_col.asc()).limit(num_after) if num_before == 0 and num_after == 0: # This can happen when a narrow is specified. after_query = query.where(inner_msg_id_col == anchor) if before_query is not None: if after_query is not None: query = union_all(before_query.self_group(), after_query.self_group()) else: query = before_query else: query = after_query main_query = alias(query) query = select(main_query.c, None, main_query).order_by(column("message_id").asc()) # This is a hack to tag the query we use for testing query = query.prefix_with("/* get_old_messages */") query_result = list(sa_conn.execute(query).fetchall()) # The following is a little messy, but ensures that the code paths # are similar regardless of the value of include_history. The # 'user_messages' dictionary maps each message to the user's # UserMessage object for that message, which we will attach to the # rendered message dict before returning it. We attempt to # bulk-fetch rendered message dicts from remote cache using the # 'messages' list. search_fields = dict() # type: Dict[int, Dict[str, text_type]] message_ids = [] # type: List[int] user_message_flags = {} # type: Dict[int, List[str]] if include_history: message_ids = [row[0] for row in query_result] # TODO: This could be done with an outer join instead of two queries user_message_flags = dict( (user_message.message_id, user_message.flags_list()) for user_message in UserMessage.objects.filter( user_profile=user_profile, message__id__in=message_ids)) for row in query_result: message_id = row[0] if user_message_flags.get(message_id) is None: user_message_flags[message_id] = ["read", "historical"] if is_search: (_, subject, rendered_content, content_matches, subject_matches) = row search_fields[message_id] = get_search_fields( rendered_content, subject, content_matches, subject_matches) else: for row in query_result: message_id = row[0] flags = row[1] user_message_flags[message_id] = parse_usermessage_flags(flags) message_ids.append(message_id) if is_search: (_, _, subject, rendered_content, content_matches, subject_matches) = row search_fields[message_id] = get_search_fields( rendered_content, subject, content_matches, subject_matches) cache_transformer = lambda row: Message.build_dict_from_raw_db_row( row, apply_markdown) id_fetcher = lambda row: row['id'] message_dicts = generic_bulk_cached_fetch( lambda message_id: to_dict_cache_key_id(message_id, apply_markdown), Message.get_raw_db_rows, message_ids, id_fetcher=id_fetcher, cache_transformer=cache_transformer, extractor=extract_message_dict, setter=stringify_message_dict) message_list = [] for message_id in message_ids: msg_dict = message_dicts[message_id] msg_dict.update({"flags": user_message_flags[message_id]}) msg_dict.update(search_fields.get(message_id, {})) message_list.append(msg_dict) statsd.incr('loaded_old_messages', len(message_list)) ret = {'messages': message_list, "result": "success", "msg": ""} return json_success(ret)
def list_files( probe_asn=None, probe_cc=None, test_name=None, since=None, until=None, since_index=None, order_by="index", order="desc", offset=0, limit=100, ): log = current_app.logger if probe_asn is not None: if probe_asn.startswith("AS"): probe_asn = probe_asn[2:] probe_asn = int(probe_asn) try: if since is not None: since = parse_date(since) except ValueError: raise BadRequest("Invalid since") try: if until is not None: until = parse_date(until) except ValueError: raise BadRequest("Invalid until") if since_index is not None: since_index = int(since_index) report_no = max(0, since_index - REPORT_INDEX_OFFSET) if order_by in ("index", "idx"): order_by = "report_no" cols = [ literal_column("textname"), literal_column("test_start_time"), literal_column("probe_cc"), literal_column("probe_asn"), literal_column("report_no"), literal_column("test_name"), ] where = [] query_params = {} # XXX maybe all of this can go into some sort of function. if probe_cc: where.append(sql.text("probe_cc = :probe_cc")) query_params["probe_cc"] = probe_cc if probe_asn: where.append(sql.text("probe_asn = :probe_asn")) query_params["probe_asn"] = probe_asn if test_name: where.append(sql.text("test_name = :test_name")) query_params["test_name"] = test_name if since: where.append(sql.text("test_start_time > :since")) query_params["since"] = since if until: where.append(sql.text("test_start_time <= :until")) query_params["until"] = until if since_index: where.append(sql.text("report_no > :report_no")) query_params["report_no"] = report_no query = select(cols).where(and_(*where)).select_from("report") count = -1 pages = -1 current_page = math.ceil(offset / limit) + 1 query = query.order_by(text("{} {}".format(order_by, order))) query = query.limit(limit).offset(offset) results = [] log.debug(query) q = current_app.db_session.execute(query, query_params) for row in q: download_url = urljoin( current_app.config["BASE_URL"], "/files/download/%s" % row.textname ) results.append( { "download_url": download_url, "probe_cc": row.probe_cc, "probe_asn": "AS{}".format(row.probe_asn), "test_name": row.test_name, "index": int(row.report_no) + REPORT_INDEX_OFFSET, "test_start_time": row.test_start_time, } ) # We got less results than what we expected, we know the count and that we are done if len(results) < limit: count = offset + len(results) pages = math.ceil(count / limit) next_url = None else: next_args = request.args.to_dict() next_args["offset"] = "%s" % (offset + limit) next_args["limit"] = "%s" % limit next_url = urljoin( current_app.config["BASE_URL"], "/api/v1/files?%s" % urlencode(next_args) ) metadata = { "offset": offset, "limit": limit, "count": count, "pages": pages, "current_page": current_page, "next_url": next_url, } return jsonify({"metadata": metadata, "results": results})
def get_sqla_query( # sqla self, metrics, granularity, from_dttm, to_dttm, columns=None, groupby=None, filter=None, is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, order_desc=True, ) -> SqlaQuery: """Querying any sqla table from this common interface""" template_kwargs = { "from_dttm": from_dttm, "groupby": groupby, "metrics": metrics, "row_limit": row_limit, "to_dttm": to_dttm, "filter": filter, "columns": {col.column_name: col for col in self.columns}, } is_sip_38 = is_feature_enabled("SIP_38_VIZ_REARCHITECTURE") template_kwargs.update(self.template_params_dict) extra_cache_keys: List[Any] = [] template_kwargs["extra_cache_keys"] = extra_cache_keys template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec prequeries: List[str] = [] orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols: Dict[str, Column] = {col.column_name: col for col in self.columns} metrics_dict: Dict[str, SqlMetric] = { m.metric_name: m for m in self.metrics } if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) if (not metrics and not columns and (is_sip_38 or (not is_sip_38 and not groupby))): raise Exception(_("Empty query?")) metrics_exprs: List[ColumnElement] = [] for m in metrics: if utils.is_adhoc_metric(m): metrics_exprs.append(self.adhoc_metric_to_sqla(m, cols)) elif m in metrics_dict: metrics_exprs.append(metrics_dict[m].get_sqla_col()) else: raise Exception( _("Metric '%(metric)s' does not exist", metric=m)) if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr, label = literal_column("COUNT(*)"), "ccount" main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, label) select_exprs: List[Column] = [] groupby_exprs_sans_timestamp: OrderedDict = OrderedDict() if (is_sip_38 and metrics and columns) or (not is_sip_38 and groupby): # dedup columns while preserving order groupby = list(dict.fromkeys(columns if is_sip_38 else groupby)) select_exprs = [] for s in groupby: if s in cols: outer = cols[s].get_sqla_col() else: outer = literal_column(f"({s})") outer = self.make_sqla_column_compatible(outer, s) groupby_exprs_sans_timestamp[outer.name] = outer select_exprs.append(outer) elif columns: for s in columns: select_exprs.append( cols[s].get_sqla_col() if s in cols else self. make_sqla_column_compatible(literal_column(s))) metrics_exprs = [] time_range_endpoints = extras.get("time_range_endpoints") groupby_exprs_with_timestamp = OrderedDict( groupby_exprs_sans_timestamp.items()) if granularity: dttm_col = cols[granularity] time_grain = extras.get("time_grain_sqla") time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs_with_timestamp[timestamp.name] = timestamp # Use main dttm column to support index with secondary dttm columns. if (db_engine_spec.time_secondary_columns and self.main_dttm_col in self.dttm_cols and self.main_dttm_col != dttm_col.column_name): time_filters.append(cols[self.main_dttm_col].get_time_filter( from_dttm, to_dttm, time_range_endpoints)) time_filters.append( dttm_col.get_time_filter(from_dttm, to_dttm, time_range_endpoints)) select_exprs += metrics_exprs labels_expected = [c._df_label_expected for c in select_exprs] select_exprs = db_engine_spec.make_select_compatible( groupby_exprs_with_timestamp.values(), select_exprs) qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if (is_sip_38 and metrics) or (not is_sip_38 and not columns): qry = qry.group_by(*groupby_exprs_with_timestamp.values()) where_clause_and = [] having_clause_and: List = [] for flt in filter: if not all([flt.get(s) for s in ["col", "op"]]): continue col = flt["col"] op = flt["op"].upper() col_obj = cols.get(col) if col_obj: is_list_target = op in ( utils.FilterOperator.IN.value, utils.FilterOperator.NOT_IN.value, ) eq = self.filter_values_handler( values=flt.get("val"), target_column_is_numeric=col_obj.is_numeric, is_list_target=is_list_target, ) if op in ( utils.FilterOperator.IN.value, utils.FilterOperator.NOT_IN.value, ): cond = col_obj.get_sqla_col().in_(eq) if isinstance(eq, str) and NULL_STRING in eq: cond = or_(cond, col_obj.get_sqla_col() is None) if op == utils.FilterOperator.NOT_IN.value: cond = ~cond where_clause_and.append(cond) else: if col_obj.is_numeric: eq = utils.cast_to_num(flt["val"]) if op == utils.FilterOperator.EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() == eq) elif op == utils.FilterOperator.NOT_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() != eq) elif op == utils.FilterOperator.GREATER_THAN.value: where_clause_and.append(col_obj.get_sqla_col() > eq) elif op == utils.FilterOperator.LESS_THAN.value: where_clause_and.append(col_obj.get_sqla_col() < eq) elif op == utils.FilterOperator.GREATER_THAN_OR_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() >= eq) elif op == utils.FilterOperator.LESS_THAN_OR_EQUALS.value: where_clause_and.append(col_obj.get_sqla_col() <= eq) elif op == utils.FilterOperator.LIKE.value: where_clause_and.append( col_obj.get_sqla_col().like(eq)) elif op == utils.FilterOperator.IS_NULL.value: where_clause_and.append(col_obj.get_sqla_col() == None) elif op == utils.FilterOperator.IS_NOT_NULL.value: where_clause_and.append(col_obj.get_sqla_col() != None) else: raise Exception( _("Invalid filter operation type: %(op)s", op=op)) if config["ENABLE_ROW_LEVEL_SECURITY"]: where_clause_and += self._get_sqla_row_level_filters( template_processor) if extras: where = extras.get("where") if where: where = template_processor.process_template(where) where_clause_and += [sa.text("({})".format(where))] having = extras.get("having") if having: having = template_processor.process_template(having) having_clause_and += [sa.text("({})".format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and ((is_sip_38 and metrics) or (not is_sip_38 and not columns)): orderby = [(main_metric_expr, not order_desc)] # To ensure correct handling of the ORDER BY labeling we need to reference the # metric instance if defined in the SELECT clause. metrics_exprs_by_label = {m._label: m for m in metrics_exprs} for col, ascending in orderby: direction = asc if ascending else desc if utils.is_adhoc_metric(col): col = self.adhoc_metric_to_sqla(col, cols) elif col in cols: col = cols[col].get_sqla_col() if isinstance(col, Label) and col._label in metrics_exprs_by_label: col = metrics_exprs_by_label[col._label] qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if (is_timeseries and timeseries_limit and not time_groupby_inline and ((is_sip_38 and columns) or (not is_sip_38 and groupby))): if self.database.db_engine_spec.allows_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = self.make_sqla_column_compatible( main_metric_expr, "mme_inner__") inner_groupby_exprs = [] inner_select_exprs = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): inner = self.make_sqla_column_compatible( gby_obj, gby_name + "__") inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs).select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, time_range_endpoints, ) subq = subq.where( and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: ob = self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols) direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for gby_name, gby_obj in groupby_exprs_sans_timestamp.items(): # in this case the column name, not the alias, needs to be # conditionally mutated, as it refers to the column alias in # the inner query col_name = db_engine_spec.make_label_compatible(gby_name + "__") on_clause.append(gby_obj == column(col_name)) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: if timeseries_limit_metric: orderby = [( self._get_timeseries_orderby(timeseries_limit_metric, metrics_dict, cols), False, )] # run prequery to get top groups prequery_obj = { "is_timeseries": False, "row_limit": timeseries_limit, "metrics": metrics, "granularity": granularity, "from_dttm": inner_from_dttm or from_dttm, "to_dttm": inner_to_dttm or to_dttm, "filter": filter, "orderby": orderby, "extras": extras, "columns": columns, "order_desc": True, } if not is_sip_38: prequery_obj["groupby"] = groupby result = self.query(prequery_obj) prequeries.append(result.query) dimensions = [ c for c in result.df.columns if c not in metrics and c in groupby_exprs_sans_timestamp ] top_groups = self._get_top_groups( result.df, dimensions, groupby_exprs_sans_timestamp) qry = qry.where(top_groups) return SqlaQuery( extra_cache_keys=extra_cache_keys, labels_expected=labels_expected, sqla_query=qry.select_from(tbl), prequeries=prequeries, )
def sqla_col(self): name = self.metric_name return literal_column(self.expression).label(name)
def get_sqla_col(self, label=None): db_engine_spec = self.table.database.db_engine_spec label = db_engine_spec.make_label_compatible(label if label else self.metric_name) return literal_column(self.expression).label(label)
def polymorphic_union(table_map, typecolname, aliasname='p_union', cast_nulls=True): """Create a ``UNION`` statement used by a polymorphic mapper. See :ref:`concrete_inheritance` for an example of how this is used. :param table_map: mapping of polymorphic identities to :class:`.Table` objects. :param typecolname: string name of a "discriminator" column, which will be derived from the query, producing the polymorphic identity for each row. If ``None``, no polymorphic discriminator is generated. :param aliasname: name of the :func:`~sqlalchemy.sql.expression.alias()` construct generated. :param cast_nulls: if True, non-existent columns, which are represented as labeled NULLs, will be passed into CAST. This is a legacy behavior that is problematic on some backends such as Oracle - in which case it can be set to False. """ colnames = util.OrderedSet() colnamemaps = {} types = {} for key in table_map.keys(): table = table_map[key] # mysql doesnt like selecting from a select; # make it an alias of the select if isinstance(table, sql.Select): table = table.alias() table_map[key] = table m = {} for c in table.c: colnames.add(c.key) m[c.key] = c types[c.key] = c.type colnamemaps[table] = m def col(name, table): try: return colnamemaps[table][name] except KeyError: if cast_nulls: return sql.cast(sql.null(), types[name]).label(name) else: return sql.type_coerce(sql.null(), types[name]).label(name) result = [] for type, table in table_map.iteritems(): if typecolname is not None: result.append( sql.select([col(name, table) for name in colnames] + [ sql.literal_column( sql_util._quote_ddl_expr(type)).label(typecolname) ], from_obj=[table])) else: result.append( sql.select([col(name, table) for name in colnames], from_obj=[table])) return sql.union_all(*result).alias(aliasname)
async def count(self) -> Optional[int]: col = sql.func.count(sql.literal_column("*")) return await self.from_self(col).scalar()