def test_cds_regions(self): transcript = Transcript() record = { 'strand': '+', 'exons': '10000-20000,30000-40000,50000-60000', 'coding_start': '12000', 'coding_end': '53000' } transcript.read_from_database_record(record) cds_regions = transcript.cds_regions() assert cds_regions[0] == (12000, 20000) assert cds_regions[1] == (30000, 40000) assert cds_regions[2] == (50000, 53001) transcript = Transcript() record = { 'strand': '-', 'exons': '50-60, 30-40, 10-20', 'coding_start': '55', 'coding_end': '15' } transcript.read_from_database_record(record) cds_regions = transcript.cds_regions() assert cds_regions[0] == (50, 56) assert cds_regions[1] == (30, 40) assert cds_regions[2] == (15, 20)
def test_add(self): transcript = Transcript(id='xyz', chrom='11', strand='+', start=130, end=580) transcript.exons = [Exon('100-200'), Exon('300-400'), Exon('500-600')] self.tdb_writer.add(transcript) assert self.tdb_writer._records['11'][0] == [ 'xyz', '11', '100-200,300-400,500-600', 130, 580 ]
def test_finalize(self): transcript = Transcript(strand='+', coding_start=16, coding_end=53) transcript.exons = [Exon('10-20'), Exon('30-40'), Exon('50-60')] transcript.finalize() assert transcript.cdna_length == 30 assert transcript.prot_length == 5 assert transcript.start == 10 assert transcript.end == 60 transcript = Transcript(strand='-', coding_start=55, coding_end=15) transcript.exons = [Exon('50-60'), Exon('30-40'), Exon('10-20')] transcript.finalize() assert transcript.cdna_length == 30 assert transcript.prot_length == 6 assert transcript.start == 10 assert transcript.end == 60
def test_finalize(self): t1 = Transcript(id='1', chrom='8', strand='+', start=130, end=580, exons=[]) t2 = Transcript(id='2', chrom='8', strand='+', start=800, end=900, exons=[]) t3 = Transcript(id='3', chrom='8', strand='+', start=800, end=880, exons=[]) t4 = Transcript(id='4', chrom='8', strand='+', start=30, end=40, exons=[]) self.tdb_writer.add(t1) self.tdb_writer.add(t2) self.tdb_writer.add(t3) self.tdb_writer.add(t4) self.tdb_writer.finalize() assert os.path.isfile(self.fn + '.gz') assert os.path.isfile(self.fn + '.gz.tbi') order = [] for line in gzip.open(self.fn + '.gz'): line = line.strip() if line.startswith('#'): continue cols = line.split('\t') order.append(int(cols[-1])) assert order == [40, 580, 880, 900]
def test_set_info(self): transcript = Transcript(strand='-', start=10000, end=19600) transcript.exons = [Exon('1-2'), Exon('3-4'), Exon('5-6')] transcript.cdna_length = 8100 transcript.prot_length = 1200 transcript.set_info() assert transcript.info == '-/9.6kb/3/8.1kb/1200'
def setUp(self): self.fn = str(uuid.uuid4()) tdb_writer = TranscriptDBWriter( self.fn, source='xyz', build='GRCh37', columns=['id', 'chrom', 'strand', 'start', 'end']) tdb_writer.add( Transcript(id='t1', chrom='8', strand='+', start=130, end=580)) tdb_writer.add( Transcript(id='t2', chrom='8', strand='+', start=800, end=900)) tdb_writer.add( Transcript(id='t3', chrom='8', strand='+', start=800, end=880)) tdb_writer.add( Transcript(id='t4', chrom='8', strand='+', start=30, end=40)) tdb_writer.finalize() self.tdb = TranscriptDB(self.fn + '.gz')
def test_get_protein_length(self): transcript = Transcript() record = { 'strand': '+', 'exons': '10-20,30-40,50-60', 'coding_start': '16', 'coding_end': '53' } transcript.read_from_database_record(record) assert transcript.get_protein_length() == 5 transcript = Transcript() record = { 'strand': '-', 'exons': '50-60,30-40,10-20', 'coding_start': '55', 'coding_end': '15' } transcript.read_from_database_record(record) assert transcript.get_protein_length() == 6
def test_read_from_database_record(self): record = { 'one': 'x', 'two': 'y', 'start': '12345', 'exons': '10000-20000,30000-40000' } transcript = Transcript() transcript.read_from_database_record(record) for key in record: assert key in transcript.__dict__ assert transcript.one == 'x' assert transcript.two == 'y' assert transcript.start == 12345 for exon in transcript.exons: assert type(exon) == Exon assert transcript.exons[0].start == 10000 assert transcript.exons[1].end == 40000
def test_sort_records(self): t1 = Transcript(id='1', chrom='8', strand='+', start=130, end=580, exons=[]) t2 = Transcript(id='2', chrom='8', strand='+', start=800, end=900, exons=[]) t3 = Transcript(id='3', chrom='8', strand='+', start=800, end=880, exons=[]) t4 = Transcript(id='4', chrom='8', strand='+', start=30, end=40, exons=[]) self.tdb_writer.add(t1) self.tdb_writer.add(t2) self.tdb_writer.add(t3) self.tdb_writer.add(t4) self.tdb_writer._sort_records() assert self.tdb_writer._records['8'][0][-1] == 40 assert self.tdb_writer._records['8'][1][-1] == 580 assert self.tdb_writer._records['8'][2][-1] == 880
def test_all_have_same_cds(self): t1 = Transcript() record = { 'strand': '+', 'exons': '10000-20000,30000-40000,50000-60000', 'coding_start': '12000', 'coding_end': '53000' } t1.read_from_database_record(record) t2 = Transcript() record = { 'strand': '+', 'exons': '9000-20000,30000-40000,50000-68000', 'coding_start': '12000', 'coding_end': '53000' } t2.read_from_database_record(record) t3 = Transcript() record = { 'strand': '+', 'exons': '3000-20000,30000-40000,50000-68000', 'coding_start': '12000', 'coding_end': '53000' } t3.read_from_database_record(record) t4 = Transcript() record = { 'strand': '+', 'exons': '10000-20000,30000-40000,50000-60000', 'coding_start': '12500', 'coding_end': '53000' } t4.read_from_database_record(record) assert not helper.all_have_same_cds([t1, t2, t3, t4]) assert helper.all_have_same_cds([t1, t2, t3])
def test_utr5_regions(self): transcript = Transcript() record = { 'strand': '+', 'exons': '10000-20000,25000-26000,30000-40000,50000-60000', 'coding_start': '25500', 'coding_end': '53000' } transcript.read_from_database_record(record) utr5_regions = transcript.utr5_regions() assert len(utr5_regions) == 2 assert utr5_regions[0] == (10000, 20000) assert utr5_regions[1] == (25000, 25500) transcript = Transcript() record = { 'strand': '+', 'exons': '10000-20000,30000-40000,50000-60000', 'coding_start': '10000', 'coding_end': '35000' } transcript.read_from_database_record(record) assert transcript.utr5_regions() == [] transcript = Transcript() record = { 'strand': '-', 'exons': '70-80, 50-60, 30-40, 10-20', 'coding_start': '55', 'coding_end': '15' } transcript.read_from_database_record(record) utr5_regions = transcript.utr5_regions() assert len(utr5_regions) == 2 assert utr5_regions[0] == (70, 80) assert utr5_regions[1] == (56, 60) transcript = Transcript() record = { 'strand': '-', 'exons': '50-60, 30-40, 10-20', 'coding_start': '59', 'coding_end': '15' } transcript.read_from_database_record(record) assert transcript.utr5_regions() == []
def test_any_unset(self): transcript = Transcript() assert transcript._any_unset(['start', 'end']) transcript.start = 1 transcript.end = 2 assert not transcript._any_unset(['start', 'end'])
def test_get_cdna_length(self): transcript = Transcript() record = {'strand': '+', 'exons': '10-20,100-200,1000-2000'} transcript.read_from_database_record(record) assert transcript.get_cdna_length() == 1110