Exemplo n.º 1
0
def _test_filter_duplicates(paired_reads, n_seqs_packet):
    assert isinstance(n_seqs_packet, int) or n_seqs_packet == None
    in_fhand = NamedTemporaryFile()
    fastq_with_dups = (FASTQ_NO_DUPS1 + FASTQ_DUPS + FASTQ_NO_DUPS2
                       + FASTQ_NO_DUPS3)
    in_fhand.write(fastq_with_dups)
    in_fhand.flush()
    in_fhand = open(in_fhand.name)
    out_fhand = NamedTemporaryFile()
    filter_duplicates([in_fhand], out_fhand, paired_reads, n_seqs_packet)
    flush_fhand(out_fhand)
    filtered_pairs = list(_read_pairs([open(out_fhand.name)],
                                      paired_reads))
    fastq_no_dups = FASTQ_NO_DUPS1 + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3
    expected_pairs = list(_read_pairs([StringIO(fastq_no_dups)],
                                        paired_reads))
    #print 'filtered_pairs ->', filtered_pairs
    #print 'expected_pairs ->', expected_pairs
    #print len(filtered_pairs), len(expected_pairs)
    #assert len(filtered_pairs) == len(expected_pairs)
    for pair1 in expected_pairs:
        counts = 0
        for pair2 in filtered_pairs:
            if _seqitem_pairs_equal(pair1, pair2):
                counts += 1
        assert counts == 1
    in_fhand.close()
Exemplo n.º 2
0
def _test_filter_duplicates(paired_reads, n_seqs_packet):
    assert isinstance(n_seqs_packet, int) or n_seqs_packet == None
    in_fhand = NamedTemporaryFile()
    fastq_with_dups = (FASTQ_NO_DUPS1 + FASTQ_DUPS + FASTQ_NO_DUPS2 +
                       FASTQ_NO_DUPS3)
    in_fhand.write(fastq_with_dups)
    in_fhand.flush()
    in_fhand = open(in_fhand.name)
    out_fhand = NamedTemporaryFile()
    filter_duplicates([in_fhand], out_fhand, paired_reads, n_seqs_packet)
    flush_fhand(out_fhand)
    filtered_pairs = list(_read_pairs([open(out_fhand.name)], paired_reads))
    fastq_no_dups = FASTQ_NO_DUPS1 + FASTQ_NO_DUPS2 + FASTQ_NO_DUPS3
    expected_pairs = list(_read_pairs([StringIO(fastq_no_dups)], paired_reads))
    #print 'filtered_pairs ->', filtered_pairs
    #print 'expected_pairs ->', expected_pairs
    #print len(filtered_pairs), len(expected_pairs)
    #assert len(filtered_pairs) == len(expected_pairs)
    for pair1 in expected_pairs:
        counts = 0
        for pair2 in filtered_pairs:
            if _seqitem_pairs_equal(pair1, pair2):
                counts += 1
        assert counts == 1
    in_fhand.close()

    # use length
    in_fhand = NamedTemporaryFile()
    in_fhand.write(FASTQ_DUPS)
    in_fhand.flush()
    in_fhand = open(in_fhand.name)

    out_fhand = NamedTemporaryFile()
    filter_duplicates([in_fhand],
                      out_fhand,
                      paired_reads=False,
                      n_seqs_packet=n_seqs_packet,
                      use_length=10)
    flush_fhand(out_fhand)
    filtered_pairs = list(
        _read_pairs([open(out_fhand.name)], paired_reads=False))
    assert len(filtered_pairs) == 2

    # use length
    in_fhand = NamedTemporaryFile()
    in_fhand.write(FASTQ_DUPS)
    in_fhand.flush()
    in_fhand = open(in_fhand.name)

    out_fhand = NamedTemporaryFile()
    filter_duplicates([in_fhand],
                      out_fhand,
                      paired_reads=False,
                      n_seqs_packet=n_seqs_packet,
                      use_length=1)
    flush_fhand(out_fhand)
    filtered_pairs = list(
        _read_pairs([open(out_fhand.name)], paired_reads=False))
    assert len(filtered_pairs) == 1
Exemplo n.º 3
0
    def test_seqitem_pairs_equal(self):
        seq1 = SeqWrapper(SEQITEM, SeqItem('seq1',
                                           ['@seq1\n', 'TAATAC\n', '+\n',
                                            'TTTDFG\n']), 'fastq')
        seq2 = SeqWrapper(SEQITEM, SeqItem('seq2',
                                           ['@seq2\n', 'TCATTA\n', '+\n',
                                            'ABCBEG\n']), 'fastq')
        seq3 = SeqWrapper(SEQITEM, SeqItem('seq3',
                                           ['@seq3\n', 'TAATAC\n', '+\n',
                                            'TTTDFG\n']), 'fastq')
        seq4 = SeqWrapper(SEQITEM, SeqItem('seq4',
                                           ['@seq4\n', 'ACGCGT\n', '+\n',
                                            'ABCBEG\n']), 'fastq')
        pair1 = (seq1, seq2)
        pair2 = (seq2, seq4)
        pair3 = (seq3, seq2)
        pair4 = (seq2, seq1)

        assert _seqitem_pairs_equal(pair1, pair3)
        assert not _seqitem_pairs_equal(pair1, pair2)
        assert not _seqitem_pairs_equal(pair1, pair4)
        assert _seqitem_pairs_equal([seq1], [seq3])
        assert not _seqitem_pairs_equal([seq1], [seq2])
        assert not _seqitem_pairs_equal([seq1], pair1)
        assert not _seqitem_pairs_equal(pair1, seq2)
Exemplo n.º 4
0
    def test_seqitem_pairs_equal(self):
        seq1 = SeqWrapper(
            SEQITEM, SeqItem('seq1',
                             ['@seq1\n', 'TAATAC\n', '+\n', 'TTTDFG\n']),
            'fastq')
        seq2 = SeqWrapper(
            SEQITEM, SeqItem('seq2',
                             ['@seq2\n', 'TCATTA\n', '+\n', 'ABCBEG\n']),
            'fastq')
        seq3 = SeqWrapper(
            SEQITEM, SeqItem('seq3',
                             ['@seq3\n', 'TAATAC\n', '+\n', 'TTTDFG\n']),
            'fastq')
        seq4 = SeqWrapper(
            SEQITEM, SeqItem('seq4',
                             ['@seq4\n', 'ACGCGT\n', '+\n', 'ABCBEG\n']),
            'fastq')
        pair1 = (seq1, seq2)
        pair2 = (seq2, seq4)
        pair3 = (seq3, seq2)
        pair4 = (seq2, seq1)

        assert _seqitem_pairs_equal(pair1, pair3)
        assert not _seqitem_pairs_equal(pair1, pair2)
        assert not _seqitem_pairs_equal(pair1, pair4)
        assert _seqitem_pairs_equal([seq1], [seq3])
        assert not _seqitem_pairs_equal([seq1], [seq2])
        assert not _seqitem_pairs_equal([seq1], pair1)
        assert not _seqitem_pairs_equal(pair1, seq2)
Exemplo n.º 5
0
    def test_seqitem_pairs_equal(self):
        seq1 = SeqWrapper(SEQITEM, SeqItem("seq1", ["@seq1\n", "TAATAC\n", "+\n", "TTTDFG\n"]), "fastq")
        seq2 = SeqWrapper(SEQITEM, SeqItem("seq2", ["@seq2\n", "TCATTA\n", "+\n", "ABCBEG\n"]), "fastq")
        seq3 = SeqWrapper(SEQITEM, SeqItem("seq3", ["@seq3\n", "TAATAC\n", "+\n", "TTTDFG\n"]), "fastq")
        seq4 = SeqWrapper(SEQITEM, SeqItem("seq4", ["@seq4\n", "ACGCGT\n", "+\n", "ABCBEG\n"]), "fastq")
        pair1 = (seq1, seq2)
        pair2 = (seq2, seq4)
        pair3 = (seq3, seq2)
        pair4 = (seq2, seq1)

        assert _seqitem_pairs_equal(pair1, pair3)
        assert _seqitem_pairs_equal(pair1, pair2) == False
        assert _seqitem_pairs_equal(pair1, pair4) == False
        assert _seqitem_pairs_equal([seq1], [seq3])
        assert _seqitem_pairs_equal([seq1], [seq2]) == False
        assert _seqitem_pairs_equal([seq1], pair1) == False
        assert _seqitem_pairs_equal(pair1, seq2) == False