def test_goodFiles(tmpdir, bamFile): d = tmpdir.mkdir('dir') p = d.join('test.bam') header = {'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1000, 'SN': 'ref'}]} outFile = pysam.AlignmentFile(str(p), "wb", header=header) a = pysam.AlignedSegment() a.query_name = "read1" a.query_sequence = "AAAAATTTTT" a.reference_id = 0 a.reference_start = 32 a.mapping_quality = 20 a.cigar = ((0, 10), ) #a.query_qualities = pysam.qualitystring_to_array("((((((((((") outFile.write(a) outFile.close() pysam.index(str(p)) count = 0 for col in countbases.countBasesInFile(str(p)): if count < 5: assert col['+']['A'] == 1 assert col['+']['G'] + col['+']['T'] + col['+']['C'] == 0 assert col['n'] == 1 else: assert col['+']['T'] == 1 assert col['+']['G'] + col['+']['A'] + col['+']['C'] == 0 assert col['n'] == 1 assert col['pos'] == count + 32 count += 1 bases = ['A', 'C', 'G', 'T'] strands = ['+', '-'] predictedStrandCounts = { '+': { 'A': [0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'C': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1], 'G': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], 'T': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0], }, '-': { 'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'C': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'G': [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'T': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] } } n = [1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1] #predictedCounts=[predictedStrandCounts['+'][base]+predictedStrandCounts['-'][base] for base in bases] pos = range(28, 46) ii = 0 for col in countbases.countBasesInFile(str(bamFile)): for base in bases: for strand in strands: #print str(pos[count])+base + strand assert col[strand][base] == predictedStrandCounts[strand][ base][ii] assert col['pos'] == pos[ii] assert col['n'] == n[ii] ii += 1
def test_goodFiles(tmpdir,bamFile): d = tmpdir.mkdir('dir') p = d.join('test.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1000, 'SN': 'ref'}] } outFile=pysam.AlignmentFile(str(p),"wb",header=header) a = pysam.AlignedSegment() a.query_name = "read1" a.query_sequence="AAAAATTTTT" a.reference_id = 0 a.reference_start = 32 a.mapping_quality = 20 a.cigar = ((0,10), ) #a.query_qualities = pysam.qualitystring_to_array("((((((((((") outFile.write(a) outFile.close() pysam.index(str(p)) count=0 for col in countbases.countBasesInFile(str(p)): if count<5: assert col['+']['A']==1 assert col['+']['G']+col['+']['T']+col['+']['C']==0 assert col['n']==1 else: assert col['+']['T']==1 assert col['+']['G']+col['+']['A']+col['+']['C']==0 assert col['n']==1 assert col['pos']==count+32 count+=1 bases=['A','C','G','T'] strands=['+','-'] predictedStrandCounts={'+':{ 'A':[0,0,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0], 'C':[0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1], 'G':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0], 'T':[0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0], },'-':{ 'A':[0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0], 'C':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 'G':[1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 'T':[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0] }} n= [1,1,1,1,3,3,3,3,3,3,2,2,2,2,1,1,1,1] #predictedCounts=[predictedStrandCounts['+'][base]+predictedStrandCounts['-'][base] for base in bases] pos=range(28,46) ii=0 for col in countbases.countBasesInFile(str(bamFile)): for base in bases: for strand in strands: #print str(pos[count])+base + strand assert col[strand][base]==predictedStrandCounts[strand][base][ii] assert col['pos']==pos[ii] assert col['n']==n[ii] ii+=1
def test_badFiles(tmpdir): d = tmpdir.mkdir('dir') p = d.join('test.txt') with pytest.raises(ValueError): next(countbases.countBasesInFile(str(d))) #doesn't exist yet with pytest.raises(IOError): next(countbases.countBasesInFile(str(p))) #incorrectly formatted file p.write("test") with pytest.raises(ValueError): next(countbases.countBasesInFile(str(p))) #make unreadable os.chmod(str(p),os.stat(str(p)).st_mode & ~stat.S_IREAD) with pytest.raises(ValueError): next(countbases.countBasesInFile(str(p)))
def test_badFiles(tmpdir): d = tmpdir.mkdir('dir') p = d.join('test.txt') with pytest.raises(ValueError): next(countbases.countBasesInFile(str(d))) #doesn't exist yet with pytest.raises(IOError): next(countbases.countBasesInFile(str(p))) #incorrectly formatted file p.write("test") with pytest.raises(ValueError): next(countbases.countBasesInFile(str(p))) #make unreadable os.chmod(str(p), os.stat(str(p)).st_mode & ~stat.S_IREAD) with pytest.raises(ValueError): next(countbases.countBasesInFile(str(p)))
def test_main(capsys,tmpdir,bamFile): with pytest.raises(SystemExit): countbases.main() out, err=capsys.readouterr() assert 'usage' in err with pytest.raises(SystemExit): countbases.main(['-h']) out, err=capsys.readouterr() assert 'usage' in out countbases.main(['-v',str(bamFile)]) out, err=capsys.readouterr() assert 'Arguments' in err compare=countbases.countBasesInFile(str(bamFile)) for ii,jj in zip(out.split('\n')[1:],compare): ii=ii.split(',') assert ii[0]==jj['ref'] assert int(ii[1])==jj['pos'] assert int(ii[2])==jj['n'] assert int(ii[3])==jj['+']['A']+jj['-']['A'] assert int(ii[4])==jj['+']['C']+jj['-']['C'] assert int(ii[5])==jj['+']['G']+jj['-']['G'] assert int(ii[6])==jj['+']['T']+jj['-']['T'] countbases.main(['-s',str(bamFile)]) out, err=capsys.readouterr() for ii,jj in zip(out.split('\n')[1:],compare): ii=ii.split(',') assert ii[0]==jj['ref'] assert int(ii[1])==jj['pos'] assert int(ii[2])==jj['n'] assert int(ii[3])==jj['+']['A'] assert int(ii[5])==jj['+']['C'] assert int(ii[7])==jj['+']['G'] assert int(ii[9])==jj['+']['T'] assert int(ii[4])==jj['-']['A'] assert int(ii[6])==jj['-']['C'] assert int(ii[8])==jj['-']['G'] assert int(ii[10])==jj['-']['T']
def test_main(capsys, tmpdir, bamFile): with pytest.raises(SystemExit): countbases.main() out, err = capsys.readouterr() assert 'usage' in err with pytest.raises(SystemExit): countbases.main(['-h']) out, err = capsys.readouterr() assert 'usage' in out countbases.main(['-v', str(bamFile)]) out, err = capsys.readouterr() assert 'Arguments' in err compare = countbases.countBasesInFile(str(bamFile)) for ii, jj in zip(out.split('\n')[1:], compare): ii = ii.split(',') assert ii[0] == jj['ref'] assert int(ii[1]) == jj['pos'] assert int(ii[2]) == jj['n'] assert int(ii[3]) == jj['+']['A'] + jj['-']['A'] assert int(ii[4]) == jj['+']['C'] + jj['-']['C'] assert int(ii[5]) == jj['+']['G'] + jj['-']['G'] assert int(ii[6]) == jj['+']['T'] + jj['-']['T'] countbases.main(['-s', str(bamFile)]) out, err = capsys.readouterr() for ii, jj in zip(out.split('\n')[1:], compare): ii = ii.split(',') assert ii[0] == jj['ref'] assert int(ii[1]) == jj['pos'] assert int(ii[2]) == jj['n'] assert int(ii[3]) == jj['+']['A'] assert int(ii[5]) == jj['+']['C'] assert int(ii[7]) == jj['+']['G'] assert int(ii[9]) == jj['+']['T'] assert int(ii[4]) == jj['-']['A'] assert int(ii[6]) == jj['-']['C'] assert int(ii[8]) == jj['-']['G'] assert int(ii[10]) == jj['-']['T']