def test_write_sequence_object(self): with FastaWriter(self.path) as fw: fw.write(SequenceRecord("name", "CCATA")) fw.write(SequenceRecord("name2", "HELLO")) assert fw._file.closed with open(self.path) as t: assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
def test_reverse_complement(self): assert SequenceRecord("name1", "ACGTUMRWSYKVHDBNacgtumrwsykvhdbn", "/AAAA/6E/EEEEEEEEEEEE/EEEEA///E/" ).reverse_complement() == \ SequenceRecord("name1", "nvhdbmrswykaacgtNVHDBMRSWYKAACGT", "/E///AEEEE/EEEEEEEEEEEE/E6/AAAA/")
def test_reverse_complement_none_qualities(self): assert SequenceRecord("name1", "GATTACA", None ).reverse_complement() == \ SequenceRecord("name1", "TGTAATC", None)
def test_twoheaders(self): with FastqWriter(self.path, two_headers=True) as fq: fq.write(SequenceRecord("name", "CCATA", "!#!#!")) fq.write(SequenceRecord("name2", "HELLO", "&&&!&")) assert fq._file.closed with open(self.path) as t: assert t.read( ) == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
def test_read(self): s1 = BytesIO(b'@r1\nACG\n+\nHHH\n') s2 = BytesIO(b'@r2\nGTT\n+\n858\n') with TwoFilePairedEndReader(s1, s2) as psr: assert [ (SequenceRecord("r1", "ACG", "HHH"), SequenceRecord("r2", "GTT", "858")), ] == list(psr)
def test_write_to_file_like_object(self): bio = BytesIO() with FastaWriter(bio) as fw: fw.write(SequenceRecord("name", "CCATA")) fw.write(SequenceRecord("name2", "HELLO")) assert bio.getvalue() == b'>name\nCCATA\n>name2\nHELLO\n' assert not bio.closed assert not fw._file.closed
def test(self): reads = [ (SequenceRecord('A/1 comment', 'TTA', '##H'), SequenceRecord('A/2 comment', 'GCT', 'HH#')), (SequenceRecord('B/1', 'CC', 'HH'), SequenceRecord('B/2', 'TG', '#H')), ] bio = BytesIO() with InterleavedPairedEndWriter(bio) as writer: for read1, read2 in reads: writer.write(read1, read2) assert bio.getvalue() == (b'@A/1 comment\nTTA\n+\n##H\n' b'@A/2 comment\nGCT\n+\nHH#\n' b'@B/1\nCC\n+\nHH\n' b'@B/2\nTG\n+\n#H\n')
def test(self): expected = [ (SequenceRecord('read1/1 some text', 'TTATTTGTCTCCAGC', '##HHHHHHHHHHHHH'), SequenceRecord('read1/2 other text', 'GCTGGAGACAAATAA', 'HHHHHHHHHHHHHHH')), (SequenceRecord('read3/1', 'CCAACTTGATATTAATAACA', 'HHHHHHHHHHHHHHHHHHHH'), SequenceRecord('read3/2', 'TGTTATTAATATCAAGTTGG', '#HHHHHHHHHHHHHHHHHHH')), ] with InterleavedPairedEndReader("tests/data/interleaved.fastq") as isr: reads = list(isr) assert reads == expected with dnaio.open("tests/data/interleaved.fastq", interleaved=True) as f: reads = list(f) assert reads == expected
def test_set_name_bad(self): seq = SequenceRecord("name1", "A", "=") with pytest.raises(ValueError) as error: seq.name = "näme1" error.match("ASCII")
def test_write_zero_length_sequence_record(self): bio = BytesIO() with FastaWriter(bio) as fw: fw.write(SequenceRecord("name", "")) assert bio.getvalue() == b'>name\n\n', '{!r}'.format(bio.getvalue())
def test_is_mate_succes(self): assert SequenceRecord("name1", "A", "=").is_mate(SequenceRecord("name2", "GC", "FF"))
def test_missing_final_newline(self): # Files with a missing final newline are currently allowed fastq = BytesIO(b'@r1\nA\n+\nH') with dnaio.open(fastq) as f: records = list(f) assert records == [SequenceRecord('r1', 'A', 'H')]
FastqReader, InterleavedPairedEndReader, FastaWriter, FastqWriter, InterleavedPairedEndWriter, TwoFilePairedEndReader, ) from dnaio import record_names_match, SequenceRecord from dnaio.writers import FileWriter from dnaio.readers import BinaryFileReader TEST_DATA = Path(__file__).parent / "data" SIMPLE_FASTQ = str(TEST_DATA / "simple.fastq") # files tests/data/simple.fast{q,a} simple_fastq = [ SequenceRecord("first_sequence", "SEQUENCE1", ":6;;8<=:<"), SequenceRecord("second_sequence", "SEQUENCE2", "83<??:(61") ] simple_fasta = [SequenceRecord(x.name, x.sequence, None) for x in simple_fastq] tiny_fastq = b'@r1\nACG\n+\nHHH\n@r2\nT\n+\n#\n' class TestFastaReader: def test_file(self): with FastaReader("tests/data/simple.fasta") as f: reads = list(f) assert reads == simple_fasta def test_bytesio(self):
def test_set_qualities_none(self): seq = SequenceRecord("name1", "A", "=") seq.qualities = None assert seq.qualities is None
def test_set_qualities_bad(self): seq = SequenceRecord("name1", "A", "=") with pytest.raises(ValueError) as error: seq.qualities = "Ä" error.match("ASCII")
def test_fastq_bytes_two_headers(self): assert SequenceRecord("name", "ACGT", "====").fastq_bytes(two_headers=True) == \ b"@name\nACGT\n+name\n====\n"
def test_init_qualities_none(self): seq = SequenceRecord("name1", "A", None) assert seq.qualities is None
def test_init_qualities_bad(self): with pytest.raises(ValueError) as error: SequenceRecord("name1", "A", "ä") error.match("ASCII")
def test_too_many_qualities(self): with pytest.raises(ValueError): SequenceRecord(name="name", sequence="ACGT", qualities="#####")
def test_init_sequence_none(self): with pytest.raises(TypeError) as error: SequenceRecord("name1", None, "=") error.match("str")
def test_init_sequence_bad(self): with pytest.raises(ValueError) as error: SequenceRecord("name1", "Ä", "=") error.match("ASCII")
def test_set_sequence_none(self): seq = SequenceRecord("name1", "A", "=") with pytest.raises(TypeError) as error: seq.sequence = None error.match("str")
def test_fastq_bytes(self): assert SequenceRecord("name", "ACGT", "====").fastq_bytes() == \ b"@name\nACGT\n+\n====\n"