def test_parse_file_notation(tmpdir): tmp_path = str(tmpdir.join('adapters.fasta')) with open(tmp_path, 'w') as f: f.write( dedent(""">first_name ADAPTER1 >second_name ADAPTER2 """)) parser = AdapterParser(max_error_rate=0.2, min_overlap=4, read_wildcards=False, adapter_wildcards=False, indels=False) adapters = list(parser.parse('file:' + tmp_path, cmdline_type='back')) assert len(adapters) == 2 assert adapters[0].name == 'first_name' assert adapters[0].sequence == 'ADAPTER1' assert adapters[1].name == 'second_name' assert adapters[1].sequence == 'ADAPTER2' for a in adapters: assert a.max_error_rate == 0.2 assert a.min_overlap == 4 assert not a.read_wildcards assert not a.adapter_wildcards assert not a.indels
def test_anywhere_parameter(): parser = AdapterParser(colorspace=False, max_error_rate=0.2, min_overlap=4, read_wildcards=False, adapter_wildcards=False, indels=True) adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0] assert adapter.remove == 'suffix' assert adapter.where == ANYWHERE read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA') from cutadapt.modifiers import AdapterCutter cutter = AdapterCutter([adapter]) trimmed_read = cutter(read, []) assert trimmed_read.sequence == ''
def test_anywhere_parameter(): parser = AdapterParser(max_error_rate=0.2, min_overlap=4, read_wildcards=False, adapter_wildcards=False, indels=True) adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0] assert adapter.remove == 'suffix' assert adapter.where is Where.ANYWHERE read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA') from cutadapt.modifiers import AdapterCutter cutter = AdapterCutter([adapter]) trimmed_read = cutter(read, []) assert trimmed_read.sequence == ''
def test_parse_file_notation(tmpdir): tmp_path = str(tmpdir.join('adapters.fasta')) with open(tmp_path, 'w') as f: f.write(dedent(""">first_name ADAPTER1 >second_name ADAPTER2 """)) parser = AdapterParser( max_error_rate=0.2, min_overlap=4, read_wildcards=False, adapter_wildcards=False, indels=False) adapters = list(parser.parse('file:' + tmp_path, cmdline_type='back')) assert len(adapters) == 2 assert adapters[0].name == 'first_name' assert adapters[0].sequence == 'ADAPTER1' assert adapters[1].name == 'second_name' assert adapters[1].sequence == 'ADAPTER2' for a in adapters: assert a.max_error_rate == 0.2 assert a.min_overlap == 4 assert not a.read_wildcards assert not a.adapter_wildcards assert not a.indels