def filter_duplicates(in_fhands, out_fhand, paired_reads, n_seqs_packet=None, tempdir=None): if not in_fhands: raise ValueError('At least one input fhand is required') pairs = _read_pairs(in_fhands, paired_reads) if n_seqs_packet is None: unique_pairs = unique_unordered(pairs, key=_get_pair_key) else: sorted_pairs = sorted_items(pairs, key=_get_pair_key, tempdir=tempdir, max_items_in_memory=n_seqs_packet) unique_pairs = unique(sorted_pairs, key=_get_pair_key) for pair in unique_pairs: write_seqs(pair, out_fhand)
def filter_duplicates(in_fhands, out_fhand, paired_reads, use_length=None, n_seqs_packet=None, tempdir=None): if not in_fhands: raise ValueError('At least one input fhand is required') pairs = _read_pairs(in_fhands, paired_reads) get_pair_key = _PairKeyGetter(use_length=use_length) if n_seqs_packet is None: unique_pairs = unique_unordered(pairs, key=get_pair_key) else: sorted_pairs = sorted_items(pairs, key=get_pair_key, tempdir=tempdir, max_items_in_memory=n_seqs_packet) unique_pairs = unique(sorted_pairs, key=get_pair_key) for pair in unique_pairs: write_seqs(pair, out_fhand)
def test_unique_unordered_items(self): items = [1, 2, 3, 4, 4, 3, 2, 1] unique_items = unique_unordered(items) assert list(unique_items) == [1, 2, 3, 4]