def _sanity_check(executable, hostname, hash_executable=None, expiration_delay=DEFAULT_EXPIRATION_DELAY, session=None): """ Check if processes on the host are still running. :param executable: Executable name as a string, e.g., conveyor-submitter. :param hostname: Hostname as a string, e.g., rucio-daemon-prod-01.cern.ch. :param hash_executable: Hash of the executable. :param expiration_delay: time (in seconds) after which any inactive health check will be removed :param session: The database session in use. """ if executable: if not hash_executable: hash_executable = calc_hash(executable) for pid, in session.query(distinct(Heartbeats.pid)).filter_by( executable=hash_executable, hostname=hostname): if not pid_exists(pid): session.query(Heartbeats).filter_by(executable=hash_executable, hostname=hostname, pid=pid).delete() else: for pid, in session.query(distinct( Heartbeats.pid)).filter_by(hostname=hostname): if not pid_exists(pid): session.query(Heartbeats).filter_by(hostname=hostname, pid=pid).delete() if expiration_delay: cardiac_arrest(older_than=expiration_delay, session=session)
def get_query(qtype='none', qobject='none'): if qtype != 'none' and qobject != 'none': # built queries for specified subset of patients query = db.session.query(label('sid', qobject.c.patient_sid), label('value_d', qobject.c.double_value), label('value_s', qobject.c.string_value), label('attribute', qobject.c.attribute_value)) elif qtype == 'count' and qobject == 'none': # count of patients query = db.session.query(distinct(Clinical.patient_sid).label('sid')) else: # entire population query = db.session.query( distinct(Clinical.patient_sid).label('sid'), literal_column("'complement'").label('attribute'), literal_column("'0'").label('value_d'), literal_column("'null'").label('value_s')) db.session.commit() db.session.close() return query
def organizations_and_counters(): '''Query organizations with their counters''' memberships = aliased(model.Member) query = DB.query(model.Group, func.count(distinct(model.Package.id)).label('nb_datasets'), func.count(distinct(memberships.id)).label('nb_members') ) query = query.outerjoin(CertifiedPublicService) query = query.outerjoin(model.Package, and_( model.Group.id == model.Package.owner_org, ~model.Package.private, model.Package.state == 'active', )) query = query.outerjoin(memberships, and_( memberships.group_id == model.Group.id, memberships.state == 'active', memberships.table_name == 'user' )) query = query.filter(model.Group.state == 'active') query = query.filter(model.Group.approval_status == 'approved') query = query.filter(model.Group.is_organization == True) query = query.group_by(model.Group.id, CertifiedPublicService.organization_id) query = query.order_by( CertifiedPublicService.organization_id == null(), desc('nb_datasets'), desc('nb_members'), model.Group.title ) return query
def organizations_and_counters(): '''Query organizations with their counters''' query = DB.query(Group, func.count(distinct(Package.id)).label('nb_datasets'), func.count(distinct(Member.id)).label('nb_members')) query = query.outerjoin(CertifiedPublicService) query = query.outerjoin( Package, and_( Group.id == Package.owner_org, ~Package.private, Package.state == 'active', )) query = query.outerjoin( Member, and_(Member.group_id == Group.id, Member.state == 'active', Member.table_name == 'user')) query = query.filter(Group.state == 'active') query = query.filter(Group.approval_status == 'approved') query = query.filter(Group.is_organization == True) query = query.group_by(Group.id, CertifiedPublicService.organization_id) query = query.order_by(CertifiedPublicService.organization_id == null(), desc('nb_datasets'), desc('nb_members'), Group.title) query = query.options(orm.joinedload(Group.certified_public_service)) return query
def organizations_and_counters(): '''Query organizations with their counters''' memberships = aliased(model.Member) query = DB.query( model.Group, func.count(distinct(model.Package.id)).label('nb_datasets'), func.count(distinct(memberships.id)).label('nb_members')) query = query.outerjoin(CertifiedPublicService) query = query.outerjoin( model.Package, and_( model.Group.id == model.Package.owner_org, ~model.Package.private, model.Package.state == 'active', )) query = query.outerjoin( memberships, and_(memberships.group_id == model.Group.id, memberships.state == 'active', memberships.table_name == 'user')) query = query.filter(model.Group.state == 'active') query = query.filter(model.Group.approval_status == 'approved') query = query.filter(model.Group.is_organization == True) query = query.group_by(model.Group.id, CertifiedPublicService.organization_id) query = query.order_by(CertifiedPublicService.organization_id == null(), desc('nb_datasets'), desc('nb_members'), model.Group.title) return query
def organizations_and_counters(): '''Query organizations with their counters''' query = DB.query(Group, func.count(distinct(Package.id)).label('nb_datasets'), func.count(distinct(Member.id)).label('nb_members') ) query = query.outerjoin(CertifiedPublicService) query = query.outerjoin(Package, and_( Group.id == Package.owner_org, ~Package.private, Package.state == 'active', )) query = query.outerjoin(Member, and_( Member.group_id == Group.id, Member.state == 'active', Member.table_name == 'user' )) query = query.filter(Group.state == 'active') query = query.filter(Group.approval_status == 'approved') query = query.filter(Group.is_organization == True) query = query.group_by(Group.id, CertifiedPublicService.organization_id) query = query.order_by( CertifiedPublicService.organization_id == null(), desc('nb_datasets'), desc('nb_members'), Group.title ) query = query.options(orm.joinedload(Group.certified_public_service)) return query
def _sanity_check(executable, hostname, hash_executable=None, session=None): """ Check if processes on the host are still running. :param executable: Executable name as a string, e.g., conveyor-submitter. :param hostname: Hostname as a string, e.g., rucio-daemon-prod-01.cern.ch. :param hash_executable: Hash of the executable. :param session: The database session in use. """ if executable: if not hash_executable: hash_executable = calc_hash(executable) for pid, in session.query(distinct(Heartbeats.pid)).filter_by( executable=hash_executable, hostname=hostname): if not pid_exists(pid): session.query(Heartbeats).filter_by(executable=hash_executable, hostname=hostname, pid=pid).delete() else: for pid, in session.query(distinct( Heartbeats.pid)).filter_by(hostname=hostname): if not pid_exists(pid): session.query(Heartbeats).filter_by(hostname=hostname, pid=pid).delete()
def get_query(qtype = 'none', qobject = 'none'): if qtype != 'none' and qobject != 'none': # built queries for specified subset of patients query = db.session.query(label('sid', qobject.c.patient_sid), label('value_d', qobject.c.double_value), label('value_s', qobject.c.string_value), label('attribute', qobject.c.attribute_value)) elif qtype == 'count' and qobject == 'none': # count of patients query = db.session.query(distinct(Clinical.patient_sid).label('sid')) else: # entire population query = db.session.query(distinct(Clinical.patient_sid).label('sid'), literal_column("'complement'").label('attribute'), literal_column("'0'").label('value_d'), literal_column("'null'").label('value_s')) db.session.commit() db.session.close() return query
def experiment(): experiment = request.args['experiment'] exp = Experiment.query.filter_by(accession=experiment).first_or_404() user = User.query.with_entities(User.fullname).filter_by(id=exp.user_id).one() samples = Sample.query.with_entities(Sample.id, Sample.name).\ filter_by(exp_id=exp.id).\ group_by(Sample.id, Sample.name).all() tges = TGE.query.join(Observation).join(Sample).filter_by(exp_id=exp.id).all() # organisms = [item for sublist in organisms for item in sublist] # sampleNum = Sample.query.filter_by(exp_id=experiment).distinct().count() obsNum = Observation.query.join(Sample).join(Experiment).\ filter_by(id=exp.id).distinct().count() tgeNum = TGE.query.join(Observation).join(Sample).join(Experiment).\ filter_by(id=exp.id).distinct(Observation.tge_id).count() trnNum = Transcript.query.with_entities(distinct(Transcript.dna_seq)).\ join(Observation).join(Sample).join(Experiment).\ filter_by(id=exp.id).count() peptAll = Observation.query.with_entities(func.sum(Observation.peptide_num).label("pepNum")).\ join(Sample).join(Experiment).\ filter_by(id=exp.id).one() # unique peptide count # peptUniq = TgeToPeptide.query.with_entities(distinct(TgeToPeptide.peptide_id)).\ # join(Observation).join(Sample).\ # filter(Sample.exp_id==exp.id).count() # sum of unique peptide counts peptUniq = TgeToPeptide.query.with_entities(func.count(distinct(TgeToPeptide.peptide_id))).\ join(Observation).join(Sample).group_by(Observation.sample_id).\ filter(Sample.exp_id==exp.id).all() peptUniq = [item for sublist in peptUniq for item in sublist] summary = {'accession': experiment,'title': exp.title, 'user': user.fullname, 'sampleNum': len(samples), 'tgeNum' : separators(tgeNum), 'obsNum' : separators(obsNum), 'trnNum' : separators(trnNum), 'peptAll' : separators(peptAll), 'peptUniq' : separators(sum(peptUniq)) }; sampleList = [] for sample in samples: tgePerSample = Observation.query.filter(Observation.sample_id==sample.id).distinct(Observation.tge_id).count() pepPerSample = TgeToPeptide.query.with_entities(distinct(TgeToPeptide.peptide_id)).\ join(Observation).join(Sample).filter(Observation.sample_id==sample.id).count() sampleList.append({'id':sample.id, 'name': sample.name, 'tgeNum': separators(tgePerSample), 'pepNum': separators(pepPerSample)}) return render_template('results/experiment.html', summary = summary, sampleList = sampleList, tges = tges)
def mention_frequencies(self, ids): """ Return dict from person ID to a list of how frequently each person was mentioned per day, over the period. """ rows = ( db.session.query( Entity.person_id, func.date_format(Document.published_at, "%Y-%m-%d").label("date"), func.count(distinct(DocumentEntity.doc_id)).label("count"), ) .join(DocumentEntity, Entity.id == DocumentEntity.entity_id) .join(Document, DocumentEntity.doc_id == Document.id) .filter(Entity.person_id.in_(ids)) .filter(DocumentEntity.doc_id.in_(self.doc_ids)) .group_by(Entity.person_id, "date") .order_by(Entity.person_id, Document.published_at) .all() ) freqs = {} for person_id, group in groupby(rows, lambda r: r[0]): freqs[person_id] = [0] * (self.days + 1) # set day buckets based on date for row in group: d, n = parse(row[1]).date(), row[2] day = (d - self.start_date).days freqs[person_id][day] = n return freqs
def global_stats(): """ Renvoie des chiffres globaux séparés par cycle """ data = {} query = DB.session.query(func.count('*')).select_from(TDispositifs) \ .filter(TDispositifs.placettes.any()) data['nb_dispositifs'] = query.scalar() query = DB.session.query( TCycles.num_cycle, func.count(CorCyclesPlacettes.id_placette), func.count(distinct(TCycles.id_dispositif))) \ .join(CorCyclesPlacettes) \ .group_by(TCycles.num_cycle) data['cycles'] = { pg[0]: { 'nb_placettes': pg[1], 'nb_dispositifs': pg[2] } for pg in query.all() } query = DB.session.query( TCycles.num_cycle, func.count(TArbresMesures.id_arbre_mesure)) \ .join(TArbresMesures) \ .group_by(TCycles.num_cycle) for pg in query.all(): data['cycles'][pg[0]]['nb_arbres'] = pg[1] data['nb_cycles'] = len(data['cycles']) return data
def get_underlying(cls, account_id=20): # read table to data frame tmp = read_sql( select([distinct(model_params.c.modelinstance)]).where( and_(model_params.c.accountid == account_id, model_params.c.model == 'wing')), model_params.bind) # expand columns tmp1 = tmp.iloc[:, 0].str.split('-', expand=True).iloc[:, :2] selected_column_names = ['exchange', 'underlying'] # set columns' names tmp1.columns = selected_column_names # drop duplicates tmp1 = tmp1.drop_duplicates(selected_column_names).reset_index( drop=True) # set exchange zh name tmp1 = tmp1.assign(exchange_zh=lambda tb: tb['exchange'].apply( lambda x: cls.get_exchange_zh(x))) # set contract zh name tmp1 = tmp1.assign(contract_zh=lambda tb: tb.apply( lambda x: cls.get_contract_zh(x['exchange'], x['underlying']), axis=1)) return tmp1
def on_air_bangumi(self): session = SessionManager.Session() current_day = datetime.today() start_time = datetime(current_day.year, current_day.month, 1) if current_day.month == 12: next_year = current_day.year + 1 next_month = 1 else: next_year = current_day.year next_month = current_day.month + 1 end_time = datetime(next_year, next_month, 1) try: result = session.query(distinct(Episode.bangumi_id), Bangumi).\ join(Bangumi).\ filter(Episode.airdate >= start_time).\ filter(Episode.airdate <= end_time) bangumi_list = [] for bangumi_id, bangumi in result: bangumi_dict = row2dict(bangumi) bangumi_dict['cover'] = utils.generate_cover_link(bangumi) bangumi_list.append(bangumi_dict) return json_resp({'data': bangumi_list}) except Exception as error: raise error finally: SessionManager.Session.remove()
def metrics(request): context = contexts.Ctx(request) _ = context._ datasets = DB.query(Package).filter(Package.state == 'active', ~Package.private).count() reuses = DB.query(Related).count() resources = DB.query(Resource).filter(Resource.state == 'active').count() file_formats = DB.query(distinct(Resource.format)).count() organizations = DB.query(Group).filter(Group.is_organization == True, Group.state == 'active').count() certified_organizations = DB.query(CertifiedPublicService).join(Group).filter(Group.state == 'active').count() users = DB.query(User).count() return templates.render_site('metrics.html', request, ws_url=conf['ws_url'], metrics=( ('datasets_count', _('Datasets'), datasets), ('related_count', _('Reuses'), reuses), ('resources_count', _('Resources'), resources), ('organizations_count', _('Organizations'), organizations), ('certifieds', _('Certified organizations'), certified_organizations), ('users', _('Users'), users), ('datasets_total_weight', _('Total quality'), '...'), ('datasets_average_weight', _('Average quality'), '...'), ('datasets_median_weight', _('Median quality'), '...'), ('formats_count', _('File formats'), file_formats), ))
def get_strains_distances(self, valid_schema): """Gets the strains distances. For all the possible pairs of strains, counts how many of their genes are different (different seqids so different sequences). The compared genes are restricted to the ones given in the valid_schema. """ alias_1 = model.mlst.alias() alias_2 = model.mlst.alias() result = self.connection.execute( select([ alias_1.c.souche, alias_2.c.souche, count(distinct(alias_1.c.gene)) ]).select_from( alias_1.join( alias_2, and_(alias_1.c.seqid != alias_2.c.seqid, alias_1.c.souche != alias_2.c.souche, alias_1.c.gene == alias_2.c.gene))).where( and_(in_(alias_1.c.gene, valid_schema), alias_1.c.souche != self.__ref, alias_2.c.souche != self.__ref)).group_by( alias_1.c.souche, alias_2.c.souche)).fetchall() distance = {} for entry in result: dist = distance.setdefault(entry[0], {}) dist[entry[1]] = entry[2] return distance
def mention_frequencies(self, ids): """ Return dict from person ID to a list of how frequently each person was mentioned per day, over the period. """ rows = db.session.query( Entity.person_id, func.date_format(Document.published_at, '%Y-%m-%d').label('date'), func.count(distinct(DocumentEntity.doc_id)).label('count') ) \ .join(DocumentEntity, Entity.id == DocumentEntity.entity_id) \ .join(Document, DocumentEntity.doc_id == Document.id) \ .filter(Entity.person_id.in_(ids))\ .filter(DocumentEntity.doc_id.in_(self.doc_ids))\ .group_by(Entity.person_id, 'date')\ .order_by(Entity.person_id, Document.published_at)\ .all() freqs = {} for person_id, group in groupby(rows, lambda r: r[0]): freqs[person_id] = [0] * (self.days+1) # set day buckets based on date for row in group: d, n = parse(row[1]).date(), row[2] day = (d - self.start_date).days freqs[person_id][day] = n return freqs
def count_wfs_per_document_corpus_and_lexicon(session, corpus_name, lexicon_name): """Count the number of wordforms per document that occurs in a lexicon. Inputs: session: SQLAlchemy session object. corpus_name: name of the corpus to count wordforms per document for. lexicon_name: name of the lexicon the wordforms should be in. Returns: SQLAlchemy query result. """ q = select([Document.title, func.count(distinct(Wordform.wordform_id)) .label('lexicon_freq')]) \ .select_from(Corpus.__table__.join(corpusId_x_documentId) .join(Document).join(TextAttestation).join(Wordform) .join(lexical_source_wordform).join(Lexicon)) \ .where(and_(Corpus.name == corpus_name, Lexicon.lexicon_name == lexicon_name)) \ .group_by(Document.title) logger.debug(f'Executing query:\n{q}') return session.execute(q)
def pick_words(limit=50, verbose=False): """ Pick etymologically ambigious nouns for creating manual clustering. """ query = select([ headword.c.name, freqs.c.freq, ]).select_from(joined_freq).where( word_sense.c.etymology_index.isnot(None) & (word_sense.c.pos == "Noun") & word_sense.c.inflection_of_id.is_(None) ).group_by( headword.c.id ).having( count( distinct(word_sense.c.etymology_index) ) > 1 ).order_by(freqs.c.freq.desc()).limit(limit) session = get_session() candidates = session.execute(query).fetchall() for word, freq in candidates: print(word + ".Noun", "#", freq) if verbose: print("\n") for word, _ in candidates: print("#", word) pprint(session.execute(select([ word_sense.c.sense_id, word_sense.c.sense, ]).select_from(joined).where( headword.c.name == word )).fetchall())
def on_air_bangumi(self): session = SessionManager.Session() current_day = datetime.today() start_time = datetime(current_day.year, current_day.month, 1) if current_day.month == 12: next_year = current_day.year + 1 next_month = 1 else: next_year = current_day.year next_month = current_day.month + 1 end_time = datetime(next_year, next_month, 1) try: result = session.query(distinct(Episode.bangumi_id), Bangumi).\ join(Bangumi).\ filter(Episode.airdate >= start_time).\ filter(Episode.airdate <= end_time) bangumi_list = [ row2dict(bangumi) for bangumi_id, bangumi in result ] return json_resp({'data': bangumi_list}) except Exception as error: raise error finally: SessionManager.Session.remove()
def activity(): per_page = 100 form = ActivityForm(request.args) try: page = int(request.args.get('page', 1)) except ValueError: page = 1 if form.format.data == 'chart-json': # chart data in json format return jsonify(ActivityChartHelper(form).chart_data()) elif form.format.data == 'places-json': # places in json format query = Document.query\ .options(joinedload('places').joinedload('place')) query = form.filter_query(query) return jsonify(DocumentPlace.summary_for_docs(query.all())) elif form.format.data == 'xlsx': # excel spreadsheet excel = XLSXBuilder(form).build() response = make_response(excel) response.headers["Content-Disposition"] = "attachment; filename=%s" % form.filename() response.headers["Content-Type"] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' return response query = Document.query\ .options( joinedload(Document.created_by), joinedload(Document.medium), joinedload(Document.topic), joinedload(Document.origin), joinedload(Document.fairness), joinedload(Document.sources).lazyload('*') ) query = form.filter_query(query) # do manual pagination query = query.order_by(Document.created_at.desc()) items = query.limit(per_page).offset((page - 1) * per_page).all() if not items and page != 1: abort(404) total = form.filter_query(db.session.query(func.count(distinct(Document.id)))).scalar() paged_docs = Pagination(query, page, min(per_page, len(items)), total, items) # group by date added doc_groups = [] for date, group in groupby(paged_docs.items, lambda d: d.created_at.date()): doc_groups.append([date, list(group)]) return render_template('dashboard/activity.haml', form=form, paged_docs=paged_docs, doc_groups=doc_groups)
def organism(): tgeList = [] organism = request.args['organism'] obs = Observation.query.filter(Observation.organism.like("%"+organism+"%")) tgeClasses = Observation.query.with_entities(Observation.tge_class).\ filter(Observation.organism.like("%"+organism+"%")).group_by(Observation.tge_class).all() tgeClasses = [item for sublist in tgeClasses for item in sublist] tges = db.engine.execute("SELECT tge.accession, string_agg(distinct(observation.tge_class), ', ') AS tge_class, string_agg(distinct(observation.uniprot_id), ', ') AS uniprot_id "+ " FROM tge JOIN observation ON tge.id = observation.tge_id WHERE observation.organism LIKE '%%"+organism+"%%' "+ " GROUP BY tge.accession ORDER BY tge.accession").fetchall(); tgeNum = separators(obs.distinct(Observation.tge_id).count()) sampleNum = separators(obs.join(Sample).distinct(Sample.id).count()) expNum = separators(obs.join(Sample).join(Experiment).distinct(Experiment.id).count()) trnNum = separators(Transcript.query.with_entities(distinct(Transcript.dna_seq)).\ join(Observation).filter(Observation.organism.like("%"+organism+"%")).count()) pepNum = separators(Observation.query.with_entities(func.sum(Observation.peptide_num)).\ filter(Observation.organism.like("%"+organism+"%")).scalar()) summary = {'organism': organism,'tgeNum': tgeNum, 'sampleNum': sampleNum, 'expNum': expNum, 'trnNum': trnNum, 'pepNum' : pepNum }; # for tge in tges: # tgeList.append({'accession': tge[0], 'tgeClasses': tge[1], 'uniprotIDs': tge[2] }) # }) return render_template('results/organism.html', summary = summary, tges = tges, tgeClasses = tgeClasses)
def campaign_target_calls(campaign_id): start = request.values.get('start') end = request.values.get('end') campaign = Campaign.query.filter_by(id=campaign_id).first_or_404() query_calls = (db.session.query( Call.target_id, Call.status, func.count(distinct(Call.id)).label('calls_count')).filter( Call.campaign_id == int(campaign.id)).group_by( Call.target_id).group_by(Call.status)) if start: try: startDate = dateutil.parser.parse(start) except ValueError: abort(400, 'start should be in isostring format') query_calls = query_calls.filter(Call.timestamp >= startDate) if end: try: endDate = dateutil.parser.parse(end) if endDate < startDate: abort(400, 'end should be after start') if endDate == startDate: endDate = startDate + timedelta(days=1) except ValueError: abort(400, 'end should be in isostring format') query_calls = query_calls.filter(Call.timestamp <= endDate) # join with targets for name subquery = query_calls.subquery('query_calls') query_targets = (db.session.query(Target.title, Target.name, subquery.c.status, subquery.c.calls_count).join( subquery, subquery.c.target_id == Target.id)) # in case some calls don't get matched directly to targets # they are filtered out by join, so hold on to them calls_wo_targets = query_calls.filter(Call.target_id == None) targets = defaultdict(dict) for status in TWILIO_CALL_STATUS: # combine calls status for each target for (target_title, target_name, call_status, count) in query_targets.all(): target = u'{} {}'.format(target_title, target_name) if call_status == status: targets[target][call_status] = targets.get(target, {}).get( call_status, 0) + count for (target_title, target_name, call_status, count) in calls_wo_targets.all(): if call_status == status: targets['Unknown'][call_status] = targets.get( 'Unknown', {}).get(call_status, 0) + count return Response(json.dumps(targets), mimetype='application/json')
def count_souches_per_gene(self): """Gets the number of distinct stains per gene.""" res = self.connection.execute( select([model.mlst.c.gene, count(distinct(model.mlst.c.souche)) ]).where(model.mlst.c.souche != self.__ref).group_by( model.mlst.c.gene)).fetchall() return {r[0]: r[1] for r in res}
def list(self): connection = self._client.connect() rval = set() stmt = select([distinct(self._table.c.name)]) for row in connection.execute(stmt): rval.add(row['name']) return list(rval)
def get_schema_names(self, connection, **kw): # Just select the distinct creator from all tables. Probably not the # best way... query = sql.select( [sql.distinct(self.sys_tables.c.creator)], order_by=[self.sys_tables.c.creator], ) return [self.normalize_name(r[0]) for r in connection.execute(query)]
def test_question_3(): sql = "SELECT s.sid, s.sname FROM reserves AS r, sailors AS s WHERE r.sid = s.sid AND r.bid IN (SELECT b.bid FROM boats AS b WHERE b.color = 'red') GROUP BY r.sid HAVING COUNT(DISTINCT r.bid) = (SELECT COUNT(b.bid) FROM boats AS b WHERE b.color = 'red');" sub = s.query(Boat.bid).filter(Boat.color == 'red') number = sub.count() orm = s.query(Sailor.sid, Sailor.sname).join(Reservation).filter( Reservation.bid.in_(sub)).group_by(Reservation.sid).having( func.count(distinct(Reservation.bid)) == number) sailors_assert(sql, orm)
def problems_chart(self): counts = {} for p in DocumentAnalysisProblem.all(): query = db.session.query(func.count(distinct(Document.id))) query = self.filter(p.filter_query(query)) counts[p.short_desc] = query.scalar() return {'values': counts}
def count_unique_wfs_in_corpus(session, corpus_name): q = select([func.count(distinct(Wordform.wordform_id))]) \ .select_from(Corpus.__table__.join(corpusId_x_documentId) .join(Document).join(TextAttestation).join(Wordform)) \ .where(Corpus.name == corpus_name) logger.debug(f'Executing query:\n{q}') return session.execute(q)
def get_future_info(cls, account_id=20): res_tmp = select([ distinct(model_params.c.modelinstance) ]).where(model_params.c.accountid == account_id).where( model_params.c.model == 'wing').execute().fetchall() return list(set(map(lambda x: '-'.join(x[0].split('-')[:-1]), res_tmp)))
def get_genes_by_allele(self, allele): """Returns all the distinct genes in the database and their sequences for a given allele.""" genes = self.connection.execute( select([distinct( model.mlst.c.gene), model.sequences.c.sequence]).select_from( model.mlst.join( model.sequences, model.mlst.c.gene == model.sequences.c.gene)).where( model.sequences.c.allele == allele)).fetchall() return {gene.gene: gene.sequence for gene in genes}
def count_genes_per_souche(self, valid_shema): """Gets the number of distinct genes per strain. The counted genes are restricted to the ones given in the valid_schema.""" res = self.connection.execute( select([model.mlst.c.souche, count(distinct(model.mlst.c.gene)) ]).where(in_(model.mlst.c.gene, valid_shema)).group_by( model.mlst.c.souche)).fetchall() return {r[0]: r[1] for r in res}
def get_samples_per_report(user, *args, **kwargs): data = request.get_json() report_id = data.get("report_id") sample_names = { x[0]: x[1] for x in db.session.query(distinct(PlotData.sample_name), Report.title).join(Report).filter( PlotData.report_id == report_id).all() } return jsonify(sample_names)
def campaign_date_calls(campaign_id): start = request.values.get('start') end = request.values.get('end') timespan = request.values.get('timespan', 'day') if timespan not in API_TIMESPANS.keys(): abort(400, 'timespan should be one of %s' % ','.join(API_TIMESPANS)) else: timespan_strf, timespan_to_char = API_TIMESPANS[timespan] campaign = Campaign.query.filter_by(id=campaign_id).first_or_404() timestamp_to_char = func.to_char(Call.timestamp, timespan_to_char).label(timespan) query = ( db.session.query( func.min(Call.timestamp.label('date')), timestamp_to_char, Call.status, func.count(distinct(Call.id)).label('calls_count') ) .filter(Call.campaign_id == int(campaign.id)) .group_by(timestamp_to_char) .order_by(timespan) .group_by(Call.status) ) if start: try: startDate = dateutil.parser.parse(start) except ValueError: abort(400, 'start should be in isostring format') query = query.filter(Call.timestamp >= startDate) if end: try: endDate = dateutil.parser.parse(end) if start: if endDate < startDate: abort(400, 'end should be after start') if endDate == startDate: endDate = startDate + timedelta(days=1) except ValueError: abort(400, 'end should be in isostring format') query = query.filter(Call.timestamp <= endDate) dates = defaultdict(dict) for (date, timespan, call_status, count) in query.all(): # combine status values by date for status in TWILIO_CALL_STATUS: if call_status == status: date_string = date.strftime(timespan_strf) dates[date_string][status] = count sorted_dates = OrderedDict(sorted(dates.items())) return jsonify({'objects': sorted_dates})
def destinations(self): """ Provides a list of destinations (queue "addresses") available. @return: A list of the detinations available. @rtype: C{set} """ session = meta.Session() sel = select([distinct(model.frames_table.c.destination)]) result = session.execute(sel) return set([r[0] for r in result.fetchall()])
def problems_chart(self): counts = {} for p in DocumentAnalysisProblem.all(): query = db.session.query(func.count(distinct(Document.id))) query = self.filter(p.filter_query(query)) counts[p.short_desc] = query.scalar() return { 'values': counts }
def load_people_sources(self): """ Load all people source data for this period. """ rows = db.session.query(distinct(DocumentSource.person_id))\ .filter( DocumentSource.doc_id.in_(self.doc_ids), DocumentSource.person_id != None)\ .group_by(DocumentSource.person_id)\ .all() self.people = self._lookup_people([r[0] for r in rows])
def _load_people_mentions(self): """ Load all people mentions data for this period. """ rows = ( db.session.query(distinct(Entity.person_id)) .filter(DocumentEntity.doc_id.in_(self.doc_ids), Entity.person_id != None) .join(DocumentEntity, DocumentEntity.entity_id == Entity.id) .all() ) self.people = self._lookup_people([r[0] for r in rows])
def _load_people_mentions(self): """ Load all people mentions data for this period. """ rows = db.session.query(distinct(Entity.person_id))\ .filter( DocumentEntity.doc_id.in_(self.doc_ids), Entity.person_id != None)\ .join(DocumentEntity, DocumentEntity.entity_id == Entity.id)\ .all() self.people = self._lookup_people([r[0] for r in rows])
def campaign_date_calls(campaign_id): start = request.values.get('start') end = request.values.get('end') timespan = request.values.get('timespan', 'day') if timespan not in API_TIMESPANS.keys(): abort(400, 'timespan should be one of %s' % ','.join(API_TIMESPANS)) else: timespan_strf = API_TIMESPANS[timespan] campaign = Campaign.query.filter_by(id=campaign_id).first_or_404() timespan_extract = extract(timespan, Call.timestamp).label(timespan) query = ( db.session.query( func.min(Call.timestamp.label('date')), timespan_extract, Call.status, func.count(distinct(Call.id)).label('calls_count') ) .filter(Call.campaign_id == int(campaign.id)) .group_by(timespan_extract) .order_by(timespan) .group_by(Call.status) ) if start: try: startDate = dateutil.parser.parse(start) except ValueError: abort(400, 'start should be in isostring format') query = query.filter(Call.timestamp >= startDate) if end: try: endDate = dateutil.parser.parse(end) if endDate < startDate: abort(400, 'end should be after start') if endDate == startDate: endDate = startDate + timedelta(days=1) except ValueError: abort(400, 'end should be in isostring format') query = query.filter(Call.timestamp <= endDate) dates = defaultdict(dict) for (date, timespan, call_status, count) in query.all(): # combine status values by date for status in TWILIO_CALL_STATUS: if call_status == status: date_string = date.strftime(timespan_strf) dates[date_string][status] = count sorted_dates = OrderedDict(sorted(dates.items())) return Response(json.dumps(sorted_dates), mimetype='application/json')
def analytics_recommendations(): user = db.session.query(User).get(request.args.get("user")) table_header = ["Исполнитель", "Друзья", "Друзей", "Прослушиваний"] table_body = db.session.query( func.concat('<a href="http://last.fm/music/', Scrobble.artist, '">', Scrobble.artist, '</a>'), func.group_concat(distinct(User.username).op("SEPARATOR")(literal_column('", "'))), func.count(distinct(User.username)), func.count(Scrobble.id) ).\ join(User).\ filter( Scrobble.user_id.in_(request.args.getlist("users")), ~Scrobble.artist.in_([a[0] for a in db.session.query(distinct(Scrobble.artist)).filter_by(user=user).all()]) ).\ group_by(Scrobble.artist).\ order_by(-func.count(Scrobble.id) if request.args.get("target") == "scrobbles" else -func.count(distinct(User.username))).\ all()[0:1000] return dict(title="Рекомендации для %s" % user.username, table_header=table_header, table_body=table_body)
def test_column_uniqueness(self, column): if column.unique is True: return True session = self.parent._session() row_count = session.query(column).count() if row_count == 0: return False distinct_count = session.query(distinct(column)).count() session.close() if row_count == distinct_count: return True return False
def runs(session: Session) -> List[Run]: triaged_issues = (session.query( RunColumn.id.label("run_id"), func.count(distinct(IssueInstance.id)).label("count"), ).group_by(IssueInstance.run_id).join( IssueInstance, IssueInstance.run_id == RunColumn.id).join( Issue, Issue.id == IssueInstance.issue_id).filter( Issue.status != IssueStatus.UNCATEGORIZED).subquery()) return (session.query( RunColumn.id.label("run_id"), RunColumn.date, RunColumn.commit_hash, func.count(distinct(IssueInstance.id)).label("num_issues"), triaged_issues.c.count.label("triaged_issues"), ).group_by(RunColumn).join( IssueInstance, IssueInstance.run_id == RunColumn.id, isouter=True).join( triaged_issues, triaged_issues.c.run_id == RunColumn.id, isouter=True).filter(RunColumn.status == "finished").order_by( RunColumn.id.desc()).all())
def campaigns_overall(): start = request.values.get('start') end = request.values.get('end') timespan = request.values.get('timespan', 'day') if timespan not in API_TIMESPANS.keys(): abort(400, 'timespan should be one of %s' % ','.join(API_TIMESPANS)) else: timespan_strf, timespan_to_char = API_TIMESPANS[timespan] timestamp_to_char = func.to_char(Call.timestamp, timespan_to_char).label(timespan) query = (db.session.query( func.min(Call.timestamp.label('date')), Call.campaign_id, timestamp_to_char, func.count(distinct(Call.id)).label('calls_count')).group_by( Call.campaign_id).group_by(timestamp_to_char).order_by(timespan)) completed_query = db.session.query(Call.timestamp, Call.id).filter_by(status='completed') if start: try: startDate = dateutil.parser.parse(start) except ValueError: abort(400, 'start should be in isostring format') query = query.filter(Call.timestamp >= startDate) completed_query = completed_query.filter(Call.timestamp >= startDate) if end: try: endDate = dateutil.parser.parse(end) if start: if endDate < startDate: abort(400, 'end should be after start') if endDate == startDate: endDate = startDate + timedelta(days=1) except ValueError: abort(400, 'end should be in isostring format') query = query.filter(Call.timestamp <= endDate) completed_query = completed_query.filter(Call.timestamp <= endDate) dates = defaultdict(dict) for (date, campaign_id, timespan, count) in query.all(): date_string = date.strftime(timespan_strf) dates[date_string][int(campaign_id)] = count sorted_dates = OrderedDict(sorted(dates.items())) meta = {'calls_completed': completed_query.count()} return jsonify({'meta': meta, 'objects': sorted_dates})
def get_view_names(self, connection, schema=None, **kw): current_schema = self.denormalize_name( schema or self.default_schema_name) query = sql.select( # Need distinct since a view may span multiple rows as of DB2 for # z/OS 9, where SYSIBM.SYSVIEWS.TEXT is VARCHAR(1500). In DB2 for # z/OS 10, this is changed to SYSIBM.SYSVIEWS.STATEMENT, which is # CLOB(2M). We only supports version 9 for now. [sql.distinct(self.sys_views.c.name)], self.sys_views.c.creator == current_schema, order_by=[self.sys_views.c.name] ) return [self.normalize_name(r[0]) for r in connection.execute(query)]
def autocomplete(request): context = contexts.Ctx(request) pattern = '{0}%'.format(request.params.get('q', '')) num = int(request.params.get('num', 0)) query = DB.query(distinct(func.lower(Tag.name)).label('name'), func.count(PackageTag.package_id).label('total')) query = query.join(PackageTag) query = query.filter(Tag.name.ilike(pattern)) query = query.order_by('total desc', 'name').group_by('name') if num: query = query.limit(num) data = [row[0] for row in query] headers = wsgihelpers.handle_cross_origin_resource_sharing(context) return wsgihelpers.respond_json(context, data, headers=headers)
def protein(): genoverse = summary = {} uniprot = request.args['uniprot'] organism = Observation.query.with_entities(distinct(Observation.organism)).filter_by(uniprot_id=uniprot).first_or_404() protein = Observation.query.with_entities(Observation.organism, Observation.protein_name, Observation.protein_descr, Observation.gene_name).\ filter_by(uniprot_id=uniprot).group_by(Observation.organism, Observation.protein_name, Observation.protein_descr, Observation.gene_name).one() summary = {'protein_name': protein.protein_name, 'gene_name': protein.gene_name, 'protein_descr': protein.protein_descr, 'organism': protein.organism } tges = TGE.query.with_entities(TGE.accession, TGE.tge_class, func.count(Observation.id).label('obsCount')).\ join(Observation).filter_by(uniprot_id=uniprot).\ group_by(TGE.accession, TGE.tge_class).all() obj = Experiment.query.with_entities(Experiment.title, Sample.name, Sample.id).\ join(Sample).join(Observation).filter_by(uniprot_id=uniprot).\ group_by(Experiment.title, Sample.name, Sample.id).all() if (organism[0] == "H**o sapiens" or organism[0] == "Mus musculus"): for ob in obj: file = os.path.dirname(__file__)+"/../static/data/"+ob.title+"/"+ob.name+".assemblies.fasta.transdecoder.genome.gff3_identified.gff3" df = pd.read_table(file, sep="\t", index_col = None) obs = Observation.query.with_entities(Observation.long_description).\ filter_by(uniprot_id=uniprot, sample_id=ob.id).first() arr = obs.long_description.split(" ") mRNA = arr[0] gene = arr[1] row = df[df['attributes'].str.contains(re.escape("ID="+gene+";")+"|"+re.escape(mRNA)+"[;.]")] if (len(row['seqid'].iloc[0]) <= 5): chrom = re.search(r'(\d|[X]|[Y])+', row.iloc[0,0]).group() start = row.iloc[0,3] end = row.iloc[0,4] else: chrom = row.iloc[0,0] start = row.iloc[0,3] end = row.iloc[0,4] genoverse = { 'uniprot': uniprot, 'chr': chrom, 'start': start, 'end': end } break #chrom = re.search(r'\d+', row.iloc[0,0]).group() return render_template('results/protein.html', tges = tges, genoverse = genoverse, uniprot = uniprot, summary=summary, organism = organism[0])
def on_air_bangumi(self, user_id, type): session = SessionManager.Session() current_day = datetime.today() start_time = datetime(current_day.year, current_day.month, 1) if current_day.month == 12: next_year = current_day.year + 1 next_month = 1 else: next_year = current_day.year next_month = current_day.month + 1 end_time = datetime(next_year, next_month, 1) try: result = session.query(distinct(Episode.bangumi_id), Bangumi).\ join(Bangumi). \ options(joinedload(Bangumi.cover_image)).\ filter(Bangumi.delete_mark == None). \ filter(Bangumi.type == type).\ filter(Episode.airdate >= start_time).\ filter(Episode.airdate <= end_time). \ order_by(desc(getattr(Bangumi, 'air_date'))) bangumi_list = [] bangumi_id_list = [bangumi_id for bangumi_id, bangumi in result] if len(bangumi_id_list) == 0: return json_resp({'data': []}) favorites = session.query(Favorites).\ filter(Favorites.bangumi_id.in_(bangumi_id_list)).\ filter(Favorites.user_id == user_id).\ all() for bangumi_id, bangumi in result: bangumi_dict = row2dict(bangumi, Bangumi) bangumi_dict['cover'] = utils.generate_cover_link(bangumi) utils.process_bangumi_dict(bangumi, bangumi_dict) for fav in favorites: if fav.bangumi_id == bangumi_id: bangumi_dict['favorite_status'] = fav.status break bangumi_list.append(bangumi_dict) return json_resp({'data': bangumi_list}) finally: SessionManager.Session.remove()
def all(sess, nodes, flatten=False): parent_uri = '/'.join(nodes) parent_path = uri_to_ltree(parent_uri) depth = uri_depth(parent_uri) stmt = sess.query( distinct(func.subpath(Node.path, 0, depth+1).label("subpath")) ) stmt = stmt.filter( Node.path.op("<@")(parent_path) ) stmt = stmt.subquery() qry = sess.query( Node ) qry = qry.filter( Node.path.in_(stmt) ) return qry
def fairness_chart(self): query = db.session.query( Fairness.name.label('t'), func.count(distinct(DocumentFairness.doc_id)))\ .join(DocumentFairness)\ .join(Document, DocumentFairness.doc_id == Document.id)\ .group_by('t') rows = self.filter(query).all() counts = dict(rows) counts.setdefault('Fair', 0) # missing documents are considered fair counts['Fair'] += len(self.doc_ids) - sum(counts.itervalues()) return { 'values': counts }
def autocomplete_formats(request): context = contexts.Ctx(request) pattern = request.params.get('q', '') headers = wsgihelpers.handle_cross_origin_resource_sharing(context) if not pattern: return wsgihelpers.respond_json(context, [], headers=headers) pattern = '{0}%'.format(pattern) num = int(request.params.get('num', 0)) query = DB.query(distinct(func.lower(Resource.format)).label('format'), func.count(Resource.id).label('count')) query = query.filter(Resource.format.ilike(pattern)) query = query.order_by('count', 'format').group_by('format') if num: query = query.limit(num) data = [row[0] for row in query] return wsgihelpers.respond_json(context, data, headers=headers)
def directional_item(item_id, db, forward=True, tag=None, rating=0, count=1): query = db.query(File).order_by(File.id.asc() if forward else File.id.desc()).filter( File.deleted == 0) if tag and "untagged" in tag: query = query.filter(File.tags == None) elif tag: search_tags = tag.split(",") # Old Any search query = query.filter(File.tags.any(Tag.tag.in_(terms.split(" ")))) query = query.join(File.tags).filter(Tag.tag.in_(search_tags)).group_by(File).having( func.count(distinct(Tag.id)) == len(search_tags)) elif rating: query = query.filter(File.rating == rating) total = query.count() query = query.filter(File.id > int(item_id) if forward else File.id < int(item_id)).limit(count).all() return prepare_file_items(query, app.settings, expected=count, total=total)
def refresh_bloom(bot, db): """ Refreshes the bloom filter. :param bot: Bot storing the bloom filter :param db: Database handle :return: New bloom filter. """ # Get filter planning statistics count = db.query(sql.func.count(sql.distinct(StarsystemPrefix.first_word))).scalar() or 0 bits, hashes = BloomFilter.suggest_size_and_hashes(rate=0.01, count=max(32, count), max_hashes=10) bloom = BloomFilter(bits, BloomFilter.extend_hashes(hashes)) with timed() as t: bloom.update(x[0] for x in db.query(StarsystemPrefix.first_word).distinct()) # print( # "Recomputing bloom filter took {} seconds. {}/{} bits, {} hashes, {} false positive chance" # .format(end-start, bloom.setbits, bloom.bits, hashes, bloom.false_positive_chance()) # ) bot.memory['ratbot']['starsystem_bloom'] = bloom bot.memory['ratbot']['stats']['starsystem_bloom'] = {'entries': count, 'time': t.seconds} return bloom
def check_network_not_in_use(self, context, t_ctx, network_id): # use a different name to avoid override _ensure_entwork_not_in_use subnets = self._get_subnets_by_network(context, network_id) auto_delete_port_names = [] for subnet in subnets: subnet_id = subnet['id'] region_names = [e[0] for e in t_ctx.session.query( sql.distinct(models.Pod.region_name)).join( models.ResourceRouting, models.Pod.pod_id == models.ResourceRouting.pod_id).filter( models.ResourceRouting.top_id == subnet_id)] auto_delete_port_names.extend([t_constants.interface_port_name % ( region_name, subnet_id) for region_name in region_names]) dhcp_port_name = t_constants.dhcp_port_name % subnet_id snat_port_name = t_constants.snat_port_name % subnet_id auto_delete_port_names.append(dhcp_port_name) auto_delete_port_names.append(snat_port_name) if not auto_delete_port_names: # pre-created port not found, any ports left need to be deleted # before deleting network non_auto_delete_ports = context.session.query( models_v2.Port.id).filter_by(network_id=network_id) if non_auto_delete_ports.count(): raise exceptions.NetworkInUse(net_id=network_id) return t_pod = db_api.get_top_pod(t_ctx) auto_delete_port_ids = [e[0] for e in t_ctx.session.query( models.ResourceRouting.bottom_id).filter_by( pod_id=t_pod['pod_id'], resource_type=t_constants.RT_PORT).filter( models.ResourceRouting.top_id.in_(auto_delete_port_names))] non_auto_delete_ports = context.session.query( models_v2.Port.id).filter_by(network_id=network_id).filter( ~models_v2.Port.id.in_(auto_delete_port_ids)) if non_auto_delete_ports.count(): raise exceptions.NetworkInUse(net_id=network_id)
def create_mapper(sample_base_mapper, stock_sample_tbl, pooled_supplier_molecule_design_tbl, supplier_molecule_design_tbl): "Mapper factory." psmd = pooled_supplier_molecule_design_tbl smd = supplier_molecule_design_tbl sts = stock_sample_tbl.alias() prd_sel = \ select([distinct(smd.c.product_id)], and_( sts.c.molecule_design_set_id == stock_sample_tbl.c.molecule_design_set_id, smd.c.supplier_id == stock_sample_tbl.c.supplier_id, smd.c.is_current ), from_obj=[sts.join(psmd, psmd.c.molecule_design_set_id == sts.c.molecule_design_set_id) .join(smd, smd.c.supplier_molecule_design_id == psmd.c.supplier_molecule_design_id)] ) m = mapper(StockSample, stock_sample_tbl, inherits=sample_base_mapper, properties=dict( molecule_design_pool=relationship(MoleculeDesignPool, uselist=False, innerjoin=True, back_populates='stock_samples'), molecule_design_pool_id=stock_sample_tbl.c.molecule_design_set_id, supplier=relationship(Organization), molecule_type=relationship(MoleculeType), registration= relationship(SampleRegistration, back_populates='sample', uselist=False), product_id=column_property(prd_sel.as_scalar(), deferred=True) ), polymorphic_identity=SAMPLE_TYPES.STOCK ) return m
def index(): expNum = Experiment.query.with_entities(func.count(distinct(Experiment.id))).scalar() smlNum = Sample.query.with_entities(func.count(distinct(Sample.id))).scalar() tges = TGE.query.with_entities(func.count(distinct(TGE.id))).scalar() return render_template('home/index.html', expNum = expNum, smlNum = smlNum, species = 4, tges = separators(tges))