示例#1
0
    def get_introns(self):
        """Returns a list of coordinates that represent the introns for
        this transcript"""
        introns = []

        for i in range(len(self.exons)-1):
            ex1 = self.exons[i]
            ex2 = self.exons[i+1]

            if self.strand == -1:
                intron = Coord(self.chrom, ex2.end+1, ex1.start-1,
                               strand=self.strand)
            else:
                intron = Coord(self.chrom, ex1.end+1, ex2.start-1,
                               strand=self.strand)

            intron.exon_5p = ex1
            intron.exon_3p = ex2
            introns.append(intron)

            if self.intron_scores is not None:
                intron.score = self.intron_scores[i]

            if self.known_intron_flags is not None:                
                if self.known_intron_flags[i] == "1":
                    intron.is_known = True
                else:
                    intron.is_known = False
            
        return introns
示例#2
0
    def get_introns(self):
        """Returns a list of coordinates that represent the introns for
        this transcript"""
        introns = []

        for i in range(len(self.exons) - 1):
            ex1 = self.exons[i]
            ex2 = self.exons[i + 1]

            if self.strand == -1:
                intron = Coord(self.chrom,
                               ex2.end + 1,
                               ex1.start - 1,
                               strand=self.strand)
            else:
                intron = Coord(self.chrom,
                               ex1.end + 1,
                               ex2.start - 1,
                               strand=self.strand)

            intron.exon_5p = ex1
            intron.exon_3p = ex2
            introns.append(intron)

            if self.intron_scores is not None:
                intron.score = self.intron_scores[i]

            if self.known_intron_flags is not None:
                if self.known_intron_flags[i] == "1":
                    intron.is_known = True
                else:
                    intron.is_known = False

        return introns
示例#3
0
def read_transcripts(path, chrom_dict):
    """Retrives all transcripts from the specified transcript file"""

    f = open(path, "r")

    transcripts = []
    
    for row in txtfile.read_rows(f):
        if row['ID'] == "NA":
            tr_id = None
        else:
            tr_id = int(row['ID'])

        if row["NAME"] == "NA":
            name = None
        else:
            name = row['NAME']
        
        # parse CDS start/end
        if row['CDS.START'] == 'NA':
            cds_start = None
        else:
            cds_start = int(row['CDS.START'])

        if row['CDS.END'] == 'NA':
            cds_end = None
        else:
            cds_end = int(row['CDS.END'])

        strand = int(row['STRAND'])
        chrom = chrom_dict[row['CHROM']]

        # parse exons
        exon_starts = [int(x) for x in row['EXON.STARTS'].split(",")]
        exon_ends = [int(x) for x in row['EXON.ENDS'].split(",")]

        if "EXON.SCORES" in row:
            exon_scores = [float(x) for x in row['EXON.SCORES'].split(",")]
            if len(exon_scores) != len(exon_starts):
                raise ValueError("Expected %d exon scores, got %d" %
                                 (len(exon_starts), len(exon_scores)))
        else:
            exon_scores = None

        if ("INTRON.SCORES" in row) and (row['INTRON.SCORES'] != 'NA'):
            intron_scores = [float(x) for x in row['INTRON.SCORES'].split(",")]
            if len(intron_scores) != len(exon_starts) - 1:
                raise ValueError("Expected %d intron scores, got %d" %
                                 (len(exon_starts)-1, len(intron_scores)))
        else:
            intron_scores = None

        if ("KNOWN.INTRON" in row) and (row['KNOWN.INTRON'] != "NA"):
            intron_flags = row['KNOWN.INTRON'].split(",")
        else:
            intron_flags = None

        exons = []
        for i in range(len(exon_starts)):
            exon = Coord(chrom, exon_starts[i], exon_ends[i], strand)
            if exon_scores is not None:
                exon.score = exon_scores[i]
            exons.append(exon)
        
        tr = Transcript(name=name, exons=exons,
                        cds_start=cds_start, cds_end=cds_end,
                        intron_scores=intron_scores,
                        known_intron_flags=intron_flags,
                        idnum=tr_id)

        transcripts.append(tr)
        
    f.close()

    return transcripts
示例#4
0
def read_transcripts(path, chrom_dict):
    """Retrives all transcripts from the specified transcript file"""

    f = open(path, "r")

    transcripts = []

    for row in txtfile.read_rows(f):
        if row['ID'] == "NA":
            tr_id = None
        else:
            tr_id = int(row['ID'])

        if row["NAME"] == "NA":
            name = None
        else:
            name = row['NAME']

        # parse CDS start/end
        if row['CDS.START'] == 'NA':
            cds_start = None
        else:
            cds_start = int(row['CDS.START'])

        if row['CDS.END'] == 'NA':
            cds_end = None
        else:
            cds_end = int(row['CDS.END'])

        strand = int(row['STRAND'])
        chrom = chrom_dict[row['CHROM']]

        # parse exons
        exon_starts = [int(x) for x in row['EXON.STARTS'].split(",")]
        exon_ends = [int(x) for x in row['EXON.ENDS'].split(",")]

        if "EXON.SCORES" in row:
            exon_scores = [float(x) for x in row['EXON.SCORES'].split(",")]
            if len(exon_scores) != len(exon_starts):
                raise ValueError("Expected %d exon scores, got %d" %
                                 (len(exon_starts), len(exon_scores)))
        else:
            exon_scores = None

        if ("INTRON.SCORES" in row) and (row['INTRON.SCORES'] != 'NA'):
            intron_scores = [float(x) for x in row['INTRON.SCORES'].split(",")]
            if len(intron_scores) != len(exon_starts) - 1:
                raise ValueError("Expected %d intron scores, got %d" %
                                 (len(exon_starts) - 1, len(intron_scores)))
        else:
            intron_scores = None

        if ("KNOWN.INTRON" in row) and (row['KNOWN.INTRON'] != "NA"):
            intron_flags = row['KNOWN.INTRON'].split(",")
        else:
            intron_flags = None

        exons = []
        for i in range(len(exon_starts)):
            exon = Coord(chrom, exon_starts[i], exon_ends[i], strand)
            if exon_scores is not None:
                exon.score = exon_scores[i]
            exons.append(exon)

        tr = Transcript(name=name,
                        exons=exons,
                        cds_start=cds_start,
                        cds_end=cds_end,
                        intron_scores=intron_scores,
                        known_intron_flags=intron_flags,
                        idnum=tr_id)

        transcripts.append(tr)

    f.close()

    return transcripts