def get_introns(self): """Returns a list of coordinates that represent the introns for this transcript""" introns = [] for i in range(len(self.exons)-1): ex1 = self.exons[i] ex2 = self.exons[i+1] if self.strand == -1: intron = Coord(self.chrom, ex2.end+1, ex1.start-1, strand=self.strand) else: intron = Coord(self.chrom, ex1.end+1, ex2.start-1, strand=self.strand) intron.exon_5p = ex1 intron.exon_3p = ex2 introns.append(intron) if self.intron_scores is not None: intron.score = self.intron_scores[i] if self.known_intron_flags is not None: if self.known_intron_flags[i] == "1": intron.is_known = True else: intron.is_known = False return introns
def get_introns(self): """Returns a list of coordinates that represent the introns for this transcript""" introns = [] for i in range(len(self.exons) - 1): ex1 = self.exons[i] ex2 = self.exons[i + 1] if self.strand == -1: intron = Coord(self.chrom, ex2.end + 1, ex1.start - 1, strand=self.strand) else: intron = Coord(self.chrom, ex1.end + 1, ex2.start - 1, strand=self.strand) intron.exon_5p = ex1 intron.exon_3p = ex2 introns.append(intron) if self.intron_scores is not None: intron.score = self.intron_scores[i] if self.known_intron_flags is not None: if self.known_intron_flags[i] == "1": intron.is_known = True else: intron.is_known = False return introns
def read_transcripts(path, chrom_dict): """Retrives all transcripts from the specified transcript file""" f = open(path, "r") transcripts = [] for row in txtfile.read_rows(f): if row['ID'] == "NA": tr_id = None else: tr_id = int(row['ID']) if row["NAME"] == "NA": name = None else: name = row['NAME'] # parse CDS start/end if row['CDS.START'] == 'NA': cds_start = None else: cds_start = int(row['CDS.START']) if row['CDS.END'] == 'NA': cds_end = None else: cds_end = int(row['CDS.END']) strand = int(row['STRAND']) chrom = chrom_dict[row['CHROM']] # parse exons exon_starts = [int(x) for x in row['EXON.STARTS'].split(",")] exon_ends = [int(x) for x in row['EXON.ENDS'].split(",")] if "EXON.SCORES" in row: exon_scores = [float(x) for x in row['EXON.SCORES'].split(",")] if len(exon_scores) != len(exon_starts): raise ValueError("Expected %d exon scores, got %d" % (len(exon_starts), len(exon_scores))) else: exon_scores = None if ("INTRON.SCORES" in row) and (row['INTRON.SCORES'] != 'NA'): intron_scores = [float(x) for x in row['INTRON.SCORES'].split(",")] if len(intron_scores) != len(exon_starts) - 1: raise ValueError("Expected %d intron scores, got %d" % (len(exon_starts)-1, len(intron_scores))) else: intron_scores = None if ("KNOWN.INTRON" in row) and (row['KNOWN.INTRON'] != "NA"): intron_flags = row['KNOWN.INTRON'].split(",") else: intron_flags = None exons = [] for i in range(len(exon_starts)): exon = Coord(chrom, exon_starts[i], exon_ends[i], strand) if exon_scores is not None: exon.score = exon_scores[i] exons.append(exon) tr = Transcript(name=name, exons=exons, cds_start=cds_start, cds_end=cds_end, intron_scores=intron_scores, known_intron_flags=intron_flags, idnum=tr_id) transcripts.append(tr) f.close() return transcripts
def read_transcripts(path, chrom_dict): """Retrives all transcripts from the specified transcript file""" f = open(path, "r") transcripts = [] for row in txtfile.read_rows(f): if row['ID'] == "NA": tr_id = None else: tr_id = int(row['ID']) if row["NAME"] == "NA": name = None else: name = row['NAME'] # parse CDS start/end if row['CDS.START'] == 'NA': cds_start = None else: cds_start = int(row['CDS.START']) if row['CDS.END'] == 'NA': cds_end = None else: cds_end = int(row['CDS.END']) strand = int(row['STRAND']) chrom = chrom_dict[row['CHROM']] # parse exons exon_starts = [int(x) for x in row['EXON.STARTS'].split(",")] exon_ends = [int(x) for x in row['EXON.ENDS'].split(",")] if "EXON.SCORES" in row: exon_scores = [float(x) for x in row['EXON.SCORES'].split(",")] if len(exon_scores) != len(exon_starts): raise ValueError("Expected %d exon scores, got %d" % (len(exon_starts), len(exon_scores))) else: exon_scores = None if ("INTRON.SCORES" in row) and (row['INTRON.SCORES'] != 'NA'): intron_scores = [float(x) for x in row['INTRON.SCORES'].split(",")] if len(intron_scores) != len(exon_starts) - 1: raise ValueError("Expected %d intron scores, got %d" % (len(exon_starts) - 1, len(intron_scores))) else: intron_scores = None if ("KNOWN.INTRON" in row) and (row['KNOWN.INTRON'] != "NA"): intron_flags = row['KNOWN.INTRON'].split(",") else: intron_flags = None exons = [] for i in range(len(exon_starts)): exon = Coord(chrom, exon_starts[i], exon_ends[i], strand) if exon_scores is not None: exon.score = exon_scores[i] exons.append(exon) tr = Transcript(name=name, exons=exons, cds_start=cds_start, cds_end=cds_end, intron_scores=intron_scores, known_intron_flags=intron_flags, idnum=tr_id) transcripts.append(tr) f.close() return transcripts