def __init__(self, linkers=None): 'The initiator' if linkers is None: linkers = get_setting('LINKERS') linkers = [SeqItem(str(i), '>%d\n%s\n' % (i, l)) for i, l in enumerate(linkers)] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') self.linkers = list(linkers)
def _read_seqitems(fhands): 'it returns an iterator of seq items (tuples of name and chunk)' seq_iters = [] for fhand in fhands: file_format = get_format(fhand) seq_iter = _itemize_fastx(fhand) seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format) seq_iters.append(seq_iter) return chain.from_iterable(seq_iters)
def __init__(self, linkers=None): 'The initiator' if linkers is None: linkers = get_setting('LINKERS') linkers = [ SeqItem(str(i), '>%d\n%s\n' % (i, l)) for i, l in enumerate(linkers) ] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') self.linkers = list(linkers)
def test_case_change(self): "It changes the case of the sequences" seqs = [SeqRecord(Seq("aCCg"), letter_annotations={"dummy": "dddd"})] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=UPPERCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ["ACCG"] seqs = [SeqRecord(Seq("aCCg"))] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=LOWERCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ["accg"] seqs = [SeqRecord(Seq("aCCg"))] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=SWAPCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ["AccG"]
def test_case_change(self): 'It changes the case of the sequences' seqs = [SeqRecord(Seq('aCCg'), letter_annotations={'dummy': 'dddd'})] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=UPPERCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ['ACCG'] seqs = [SeqRecord(Seq('aCCg'))] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=LOWERCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ['accg'] seqs = [SeqRecord(Seq('aCCg'))] seqs = assing_kind_to_seqs(SEQRECORD, seqs, None) change_case = ChangeCase(action=SWAPCASE) strs = [get_str_seq(s) for s in change_case(seqs)] assert strs == ['AccG']
def test_matching_segments(self): 'It tests the detection of oligos in sequence files' seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' mate_fhand = create_a_matepair_file() linkers = [SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']), SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1) matcher = BlasterForFewSubjects(mate_fhand.name, linkers, program='blastn', elongate_for_global=True) linker_region = matcher.get_matched_segments_for_read('seq1')[0] assert [expected_region] == linker_region
def test_matching_segments(self): 'It tests the detection of oligos in sequence files' seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' mate_fhand = create_a_matepair_file() linkers = [ SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']), SeqItem('flx', ['>flx\n', FLX_LINKER + '\n']) ] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1) matcher = BlasterForFewSubjects(mate_fhand.name, linkers, program='blastn', elongate_for_global=True) linker_region = matcher.get_matched_segments_for_read('seq1')[0] assert [expected_region] == linker_region
def read_seqs(fhands, out_format=None, prefered_seq_classes=None): 'It returns a stream of seqs in different codings: seqrecords, seqitems...' if not prefered_seq_classes: prefered_seq_classes = [SEQITEM, SEQRECORD] try: in_format = get_format(fhands[0]) except FileIsEmptyError: return [] # seqitems is incompatible with different input and output formats # or when in_format != a fasta or fastq if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format and SEQITEM in prefered_seq_classes) or (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS + ILLUMINA_FASTQ_FORMATS)): prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM)) if not prefered_seq_classes: msg = 'No valid seq class left or prefered' raise ValueError(msg) for seq_class in prefered_seq_classes: if seq_class == SEQITEM: try: return _read_seqitems(fhands) except NotImplementedError: continue elif seq_class == SEQRECORD: try: seqs = _read_seqrecords(fhands) return assing_kind_to_seqs(SEQRECORD, seqs, None) except NotImplementedError: continue else: raise ValueError('Unknown class for seq: ' + seq_class) raise RuntimeError('We should not be here, fixme')