def gene2exon_list (cursor, gene_id, db_name=None, verbose=False): exons = [] if (db_name): if not switch_to_db(cursor, db_name): return False qry = "select * from gene2exon where gene_id = %d " % gene_id rows = search_db(cursor, qry) if (not rows): rows = search_db(cursor, 'select database()') if verbose: print "database ", rows[0][0] rows = search_db(cursor, qry, verbose = True) print rows return [] for row in rows: exon = Exon() if (not exon.load_from_gene2exon(row)): continue exons.append(exon) return exons
def gene2exon_list(cursor, gene_id, db_name=None, verbose=False): exons = [] if db_name: if not switch_to_db(cursor, db_name): return False qry = "select * from gene2exon where gene_id = %d " % gene_id rows = search_db(cursor, qry) if not rows: rows = search_db(cursor, "select database()") if verbose: print "database ", rows[0][0] rows = search_db(cursor, qry, verbose=True) print rows return [] for row in rows: exon = Exon() if not exon.load_from_gene2exon(row): continue exons.append(exon) return exons
def get_predicted_exons(cursor, gene_id, species): exons = [] # get the region on the gene ret = get_gene_region(cursor, gene_id) if ret: [gene_seq_id, gene_region_start, gene_region_end, gene_region_strand] = ret else: print "region not retrived for ", species, gene_id return [] qry = "SELECT * FROM prediction_exon WHERE seq_region_id = %d " % gene_seq_id qry += " AND seq_region_start >= %d AND seq_region_start <= %d " % (gene_region_start, gene_region_end) qry += " AND seq_region_end >= %d AND seq_region_end <= %d " % (gene_region_start, gene_region_end) rows = search_db(cursor, qry) if not rows: return [] for row in rows: exon = Exon() exon.gene_id = gene_id exon.load_from_ensembl_prediction(gene_region_start, gene_region_end, row) exons.append(exon) return exons
def get_predicted_exons (cursor, gene_id, species): exons = [] # get the region on the gene ret = get_gene_region (cursor, gene_id) if ret: [gene_seq_id, gene_region_start, gene_region_end, gene_region_strand] = ret else: print "region not retrived for ", species, gene_id return [] qry = "SELECT * FROM prediction_exon WHERE seq_region_id = %d " % gene_seq_id qry += " AND seq_region_start >= %d AND seq_region_start <= %d " % \ (gene_region_start, gene_region_end) qry += " AND seq_region_end >= %d AND seq_region_end <= %d " % \ (gene_region_start, gene_region_end) rows = search_db (cursor, qry) if (not rows): return [] for row in rows: exon = Exon() exon.gene_id = gene_id exon.load_from_ensembl_prediction (gene_region_start, gene_region_end, row) exons.append(exon) return exons
def get_exon (cursor, exon_id, is_known=None, db_name=None): exon = Exon () if (db_name): if not switch_to_db(cursor, db_name): return exon if is_known==2: # sw# exon qry = "select * from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_novel_exon (rows[0], "sw_exon") elif is_known==3: # sw# exon qry = "select * from usearch_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_novel_exon (rows[0], "usearch_exon") else: qry = "select * from gene2exon where exon_id = %d" % exon_id if is_known: qry += " and is_known = %s " % is_known rows = search_db(cursor, qry, verbose=False) if (not rows): return exon exon.load_from_gene2exon (rows[0]) return exon
def get_novel_exons (cursor, gene_id, table): exons = [] qry = "select * from %s " % table qry += " where gene_id = %d " % int(gene_id) rows = search_db (cursor, qry) if not rows: return exons for row in rows: exon = Exon() exon.load_from_novel_exon (row, table) exons.append(exon) return exons
def get_exon(cursor, exon_id, is_known=None, db_name=None): exon = Exon() if db_name: if not switch_to_db(cursor, db_name): return exon if is_known == 2: # sw# exon qry = "select * from sw_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_novel_exon(rows[0], "sw_exon") elif is_known == 3: # sw# exon qry = "select * from usearch_exon where exon_id = %d" % exon_id rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_novel_exon(rows[0], "usearch_exon") else: qry = "select * from gene2exon where exon_id = %d" % exon_id if is_known: qry += " and is_known = %s " % is_known rows = search_db(cursor, qry, verbose=False) if not rows: return exon exon.load_from_gene2exon(rows[0]) return exon
def get_known_exons (cursor, gene_id, species): exons = [] qry = "select distinct exon_transcript.exon_id from exon_transcript, transcript " qry += " where exon_transcript.transcript_id = transcript.transcript_id " qry += " and transcript.gene_id = %d " % gene_id rows = search_db (cursor, qry) if (not rows ): return [] if ('Error' in rows[0]): search_db (cursor, qry, verbose = True) return [] # get the region on the gene ret = get_gene_region (cursor, gene_id) if ret: [gene_seq_id, gene_region_start, gene_region_end, gene_region_strand] = ret else: print "region not retrived for ", species, gene_id return [] exon_ids = [] for row in rows: exon_ids.append(row[0]) for exon_id in exon_ids: qry = "select * from exon where exon_id=%d" % exon_id rows = search_db (cursor, qry) if (not rows or 'Error' in rows[0]): search_db (cursor, qry, verbose = True) continue exon = Exon() exon.gene_id = gene_id exon.load_from_ensembl_exon (gene_region_start, gene_region_end, rows[0]) exons.append(exon) return exons
def extract_regions_from_annotations(self, annotation_lines, region_name): ''' gets the annotation file lines returns a dictionary of exons positions, keys are the start position value is a list [start_pos, end_pos, chromosome, strand(- or + ), and the name of the gene] also returns the sorted keys of this dictionary (sorted start positions) ''' exons = ExonsList() for idx, line in enumerate(annotation_lines): line = line.split('\t') if line[2] == region_name: exon = Exon(line) exons.add_exon(exon) self.print_progres(idx, len(annotation_lines)) sys.stderr.write("\n") return exons
def add_exon(self, chr_map, sequence=''): exon = Exon(self, chr_map, sequence) self.exons.append(exon) return
def map2exon(cursor, ensembl_db_name, map, paralogue=False): # this is fake exon info! to be passe to get_exon_pepseq exon = Exon () exon.exon_id = map.exon_id_2 exon.is_known = map.exon_known_2 if map.source == 'sw_sharp': exon.analysis_id = -1 if not paralogue: # move to the other species rows = switch_to_db (cursor, ensembl_db_name[map.species_2]) if not rows: exon.exon_seq_id = -1 return exon else: qry = "select exon_seq_id from sw_exon where exon_id = %d " % exon.exon_id rows = search_db (cursor, qry) if not rows or not rows[0][0]: exon.exon_seq_id = -1 else: exon.exon_seq_id = int(rows[0][0]) elif map.source == 'usearch': exon.analysis_id = -2 if not paralogue: rows = switch_to_db (cursor, ensembl_db_name[map.species_2]) if not rows: exon.exon_seq_id = -1 return exon else: qry = "select exon_seq_id from usearch_exon where exon_id = %d " % exon.exon_id rows = search_db (cursor, qry) if not rows or not rows[0][0]: exon.exon_seq_id = -1 else: exon.exon_seq_id = int(rows[0][0]) else: exon.analysis_id = 1 return exon