def eval(self, builder): """ return a query object """ dbh = builder._get_dbh() tokens = self.value[1:] expr_1 = self.value[0].eval( builder ) while tokens: op = tokens[0] eval_2 = tokens[1].eval( builder ) tokens = tokens[2:] if op == '|': eval_1 = sqla.union(eval_1, eval_2) elif op == '&': eval_1 = sqla.intersect(eval_1, eval_2) elif op == ':': eval_1 = sqla.except_( dbh.session().query(dbh.Sample.id).filter( dbh.Sample.id.in_( sqla.union(eval_1, eval_2)) ), dbh.session().query(dbh.Sample.id).filter( dbh.Sample.iid.n_( sqla.intersect(eval_1, eval_2)) ) ) q = dbh.session().query(dbh.Sample.id).filter( dbh.Sample.id.in_( eval_1 ) ) return q
def test_intersect_unions_2(self): u = intersect( union(select([t1.c.col3, t1.c.col4]), select([t3.c.col3, t3.c.col4])).alias().select(), union(select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4])).alias().select(), ) wanted = [("aaa", "ccc"), ("bbb", "aaa"), ("ccc", "bbb")] found = self._fetchall_sorted(u.execute()) eq_(found, wanted)
def test_intersect(self): i = intersect(select([t2.c.col3, t2.c.col4]), select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3)) wanted = [("aaa", "bbb"), ("bbb", "ccc"), ("ccc", "aaa")] found1 = self._fetchall_sorted(i.execute()) eq_(found1, wanted) found2 = self._fetchall_sorted(i.alias("bar").select().execute()) eq_(found2, wanted)
def test_composite_alias(self): ua = intersect( select([t2.c.col3, t2.c.col4]), union(select([t1.c.col3, t1.c.col4]), select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4])) .alias() .select(), ).alias() wanted = [("aaa", "bbb"), ("bbb", "ccc"), ("ccc", "aaa")] found = self._fetchall_sorted(ua.select().execute()) eq_(found, wanted)
def test_intersect(self): i = intersect(select([t2.c.col3, t2.c.col4]), select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3)) wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')] found1 = self._fetchall_sorted(i.execute()) eq_(found1, wanted) found2 = self._fetchall_sorted(i.alias('bar').select().execute()) eq_(found2, wanted)
def test_composite_alias(self): ua = intersect( select([t2.c.col3, t2.c.col4]), union( select([t1.c.col3, t1.c.col4]), select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select()).alias() wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')] found = self._fetchall_sorted(ua.select().execute()) eq_(found, wanted)
def test_composite_alias(self): ua = intersect( select([t2.c.col3, t2.c.col4]), union( select([t1.c.col3, t1.c.col4]), select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select() ).alias() wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')] found = self._fetchall_sorted(ua.select().execute()) eq_(found, wanted)
def test_intersect(self): i = intersect( select([t2.c.col3, t2.c.col4]), select([t2.c.col3, t2.c.col4], t2.c.col4 == t3.c.col3) ) wanted = [('aaa', 'bbb'), ('bbb', 'ccc'), ('ccc', 'aaa')] found1 = self._fetchall_sorted(i.execute()) eq_(found1, wanted) found2 = self._fetchall_sorted(i.alias('bar').select().execute()) eq_(found2, wanted)
def test_intersect_unions_2(self): u = intersect( union( select([t1.c.col3, t1.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select(), union( select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select()) wanted = [('aaa', 'ccc'), ('bbb', 'aaa'), ('ccc', 'bbb')] found = self._fetchall_sorted(u.execute()) eq_(found, wanted)
def test_intersect_unions_2(self): u = intersect( union( select([t1.c.col3, t1.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select(), union( select([t2.c.col3, t2.c.col4]), select([t3.c.col3, t3.c.col4]), ).alias().select() ) wanted = [('aaa', 'ccc'), ('bbb', 'aaa'), ('ccc', 'bbb')] found = self._fetchall_sorted(u.execute()) eq_(found, wanted)
def variants_in_region(self, connection: Connection, genomic_interval: GenomicInterval, output_region_attrs: List[Vocabulary], meta_attrs: MetadataAttrs, region_attrs: Optional[RegionAttrs]) -> Selectable: # init state self.connection = connection self._set_meta_attributes(meta_attrs) self.create_table_of_meta(['item_id']) self._set_region_attributes(region_attrs) self.create_table_of_regions(['item_id']) if self.my_region_t is not None: only_from_samples = intersect(select([self.my_meta_t.c.item_id]), select([self.my_region_t.c.item_id])) else: only_from_samples = select([self.my_meta_t.c.item_id]) only_from_samples = only_from_samples.alias('samples') select_columns = [ genomes.c[self.region_col_map[att]].label(att.name) for att in output_region_attrs ] stmt =\ select(select_columns).distinct() \ .select_from(genomes.join(only_from_samples, only_from_samples.c.item_id == genomes.c.item_id)) \ .where((genomes.c.start >= genomic_interval.start) & (genomes.c.start <= genomic_interval.stop) & (genomes.c.chrom == genomic_interval.chrom)) if self.log_sql_commands: utils.show_stmt( connection, stmt, self.logger.debug, f'KGenomes: VARIANTS IN REGION ' f'{genomic_interval.chrom}' f'-{genomic_interval.start}-{genomic_interval.stop}') return stmt
def build_join(op, lcte, rcte): if op == OR: return aliased(union(lcte, rcte)).select() else: return aliased(intersect(lcte, rcte)).select()
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, union, union_all, except_, intersect engine = create_engine('sqlite:///college.db', echo=True) meta = MetaData() conn = engine.connect() addresses = Table('addresses', meta, Column('id', Integer, primary_key=True), Column('st_id', Integer), Column('postal_add', String), Column('email_add', String)) u = union(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) result = conn.execute(u) result.fetchall() u = union_all( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) result = conn.execute(u) result.fetchall() u = except_( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.postal_add.like('%Pune'))) result = conn.execute(u) result.fetchall() u = intersect( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.postal_add.like('%Delhi'))) result = conn.execute(u) result.fetchall()
#stmt = select([students]).where(or_(students.c.name == 'Ravi', students.c.id == 1)) rows = con.execute(stmt) print(rows.fetchall()) stmt = select([students]).order_by(asc(students.c.id)) stmt = select([students]).order_by(desc(students.c.id)) stmt = select([students]).where(between(students.c.id, 2, 3)) prints(con.execute(stmt)) result = con.execute(select([func.now()])) print(result.fetchone()) result = con.execute(select([func.count(students.c.id)])) result = con.execute(select([func.max(students.c.id)])) result = con.execute(select([func.min(students.c.id)])) result = con.execute(select([func.avg(students.c.id).label('avg')])) print(result.fetchone()) from sqlalchemy import union, union_all, except_, intersect # u = union(addresses.select().where(addresses.c.email_add.like('%@gmail.com addresses.select().where(addresses.c.email_add.like('*****@*****.**')))) u = union_all( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) u = except_( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.postal_add.like('%Pune'))) u = intersect( addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.postal_add.like('%Pune'))) prints(con.execute(u))
print (res) # SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES" # WHERE "ADDRESSES".email_add LIKE ? UNION ALL SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add # FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ? u = union_all(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) result = conn.execute(u) for res in result: print (res) # EXCEPT # SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES" # WHERE "ADDRESSES".email_add LIKE ? EXCEPT SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add # FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ? u = except_(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) result = conn.execute(u) for res in result: print (res) # INTERSECT # SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add FROM "ADDRESSES" # WHERE "ADDRESSES".email_add LIKE ? INTERSECT SELECT "ADDRESSES".id, "ADDRESSES".st_id, "ADDRESSES".postal_add, "ADDRESSES".email_add # FROM "ADDRESSES" WHERE "ADDRESSES".email_add LIKE ? u = intersect(addresses.select().where(addresses.c.email_add.like('*****@*****.**')), addresses.select().where(addresses.c.email_add.like('*****@*****.**'))) result = conn.execute(u) for res in result: print (res) meta.drop_all(engine)
def rank_variants_by_frequency(self, connection, meta_attrs: MetadataAttrs, region_attrs: RegionAttrs, ascending: bool, freq_threshold: float, limit_result: int, time_estimate_only: bool) -> FromClause: # init state self.connection = connection self._set_meta_attributes(meta_attrs) self.create_table_of_meta(['item_id', 'gender']) self._set_region_attributes(region_attrs) self.create_table_of_regions(['item_id']) # if self.my_region_t is None: # raise Notice("1000Genomes data set is too broad. Please restrict the population size by setting at least one " # "region constraint imposing the presence of one or more variants in a precise locus or genomic " # "area.") females_and_males_stmt = \ select([self.my_meta_t.c.gender, func.count(self.my_meta_t.c.item_id)]) if self.my_region_t is not None: females_and_males_stmt = females_and_males_stmt\ .where(self.my_meta_t.c.item_id.in_(select([self.my_region_t.c.item_id]))) females_and_males_stmt = females_and_males_stmt.group_by( self.my_meta_t.c.gender) gender_of_individuals = [ row.values() for row in connection.execute(females_and_males_stmt).fetchall() ] if len(gender_of_individuals) == 0: raise EmptyResult('1000Genomes') females = next( (el[1] for el in gender_of_individuals if el[0] == 'female'), 0) males = next( (el[1] for el in gender_of_individuals if el[0] == 'male'), 0) population_size = males + females if population_size > 150: raise Notice( f"1000Genomes data set is too broad. Please restrict the population size by setting more constraints. " f"This query is allowed for populations smaller than 150. Currently selected is {population_size}" ) if time_estimate_only: estimated_time = str( 9 * population_size ) if population_size <= 149 else "2700" # ~45 min if pop > 149 self.notify_message(SourceMessage.Type.TIME_TO_FINISH, estimated_time) self.notify_message( SourceMessage.Type.GENERAL_WARNING, f'Genomes to analyze in 1000Genomes: {population_size}') locale.setlocale(locale.LC_ALL, '') estimated_n_variants = 4144924 * population_size self.notify_message( SourceMessage.Type.GENERAL_WARNING, f'Estimated number variants to rank in 1000Genomes: ~{estimated_n_variants:n}' ) raise EmptyResult('1000Genomes') # reduce size of the join with genomes table genomes_red = select( [genomes.c.item_id, genomes.c.chrom, genomes.c.start, genomes.c.ref, genomes.c.alt, genomes.c.al1, genomes.c.al2])\ .alias('variants_few_columns') # custom functions func_occurrence = ( func.sum(genomes_red.c.al1) + func.sum(func.coalesce(genomes_red.c.al2, 0))).label( Vocabulary.OCCURRENCE.name) func_positive_donors = func.count(genomes_red.c.item_id).label( Vocabulary.POSITIVE_DONORS.name) if meta_attrs.assembly == 'hg19': func_frequency_new = func.rr.mut_frequency_new_hg19( func_occurrence, males, females, genomes_red.c.chrom, genomes_red.c.start) else: func_frequency_new = func.rr.mut_frequency_new_grch38( func_occurrence, males, females, genomes_red.c.chrom, genomes_red.c.start) func_frequency_new = func_frequency_new.label( Vocabulary.FREQUENCY.name) # Actually, self.my_region_t already contains only the individuals compatible with meta_attrs, but it can contain # duplicated item_id. Since we want to join, it's better to remove them. # LIMIT is part of a trick used to speed up the job. See later if self.my_region_t is not None: sample_set_with_limit = intersect(select([self.my_meta_t.c.item_id]), select([self.my_region_t.c.item_id])) \ .limit(population_size) \ .alias('sample_set') else: sample_set_with_limit = select([self.my_meta_t.c.item_id])\ .limit(population_size) \ .alias('sample_set') stmt = select([genomes_red.c.chrom.label(Vocabulary.CHROM.name), genomes_red.c.start.label(Vocabulary.START.name), genomes_red.c.ref.label(Vocabulary.REF.name), genomes_red.c.alt.label(Vocabulary.ALT.name), cast(literal(population_size), types.Integer).label(Vocabulary.POPULATION_SIZE.name), func_occurrence, func_positive_donors, func_frequency_new]) \ .select_from(genomes_red.join( sample_set_with_limit, genomes_red.c.item_id == sample_set_with_limit.c.item_id)) \ .group_by(genomes_red.c.chrom, genomes_red.c.start, genomes_red.c.ref, genomes_red.c.alt) if ascending: if freq_threshold: stmt = stmt.having(func_frequency_new >= freq_threshold) stmt = stmt.order_by(asc(func_frequency_new), asc(func_occurrence)) else: if freq_threshold: stmt = stmt.having(func_frequency_new <= freq_threshold) stmt = stmt.order_by(desc(func_frequency_new), desc(func_occurrence)) stmt = stmt.limit(limit_result) self.logger.debug( f'KGenomes: request /rank_variants_by_frequency for a population of {population_size} individuals' ) # this + LIMIT in sample_set_with_limit make the trick to force using the index, but only up to 149 individuals if population_size <= 149: # 333 is the population size at which the execution time w index matches that w/o index connection.execute('SET SESSION enable_seqscan=false') # create result table if self.log_sql_commands: self.logger.debug('KGenomes: RANKING VARIANTS IN SAMPLE SET') t_name = utils.random_t_name_w_prefix('ranked_variants') utils.create_table_as(t_name, stmt, default_schema_to_use_name, connection, self.log_sql_commands, self.logger.debug) result = Table(t_name, db_meta, autoload=True, autoload_with=connection, schema=default_schema_to_use_name) connection.invalidate( ) # instead of setting seqscan=true discard this connection return result
stmt = simple_table.select(simple_table.c.id=='7') print(stmt) print(stmt.execute().fetchone()) stmt = simple_table.select(simple_table.c.col1!='update data') print(stmt.execute().fetchall()) print(stmt.execute().rowcount) # bindparam print('============================') stmt = simple_table.select(whereclause=simple_table.c.id==bindparam('id')) print(stmt.execute(id=6).fetchone()) stmt = simple_table.select(group_by=[simple_table.c.col1]) print(stmt.execute().fetchall()) print('============================') # join from_obj = simple_table.join(second_table) #from_obj = simple_table.outerjoin(second_table) q = simple_table.select().select_from(from_obj).where(simple_table.c.id == second_table.c.simple_id) print(q.column('second.simple_id')) print(q.execute().fetchall()) # set operation print('============================') q1 = simple_table.select(simple_table.c.id > 1) q2 = simple_table.select(simple_table.c.id < 7) print(q1.execute().fetchall()) print(q2.execute().fetchall()) q = intersect(q1, q2) print(q)
def build_join(op, lcte, rcte): if op == OR: return union(lcte, rcte).subquery().select() else: return intersect(lcte, rcte).subquery().select()