Пример #1
0
 def __init__(self, linkers=None):
     'The initiator'
     if linkers is None:
         linkers = get_setting('LINKERS')
         linkers = [SeqItem(str(i), '>%d\n%s\n' % (i, l)) for i, l in enumerate(linkers)]
         linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')
     self.linkers = list(linkers)
Пример #2
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Пример #3
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Пример #4
0
 def __init__(self, linkers=None):
     'The initiator'
     if linkers is None:
         linkers = get_setting('LINKERS')
         linkers = [
             SeqItem(str(i), '>%d\n%s\n' % (i, l))
             for i, l in enumerate(linkers)
         ]
         linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')
     self.linkers = list(linkers)
Пример #5
0
    def test_case_change(self):
        "It changes the case of the sequences"
        seqs = [SeqRecord(Seq("aCCg"), letter_annotations={"dummy": "dddd"})]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=UPPERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ["ACCG"]

        seqs = [SeqRecord(Seq("aCCg"))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=LOWERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ["accg"]

        seqs = [SeqRecord(Seq("aCCg"))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=SWAPCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ["AccG"]
Пример #6
0
    def test_case_change(self):
        'It changes the case of the sequences'
        seqs = [SeqRecord(Seq('aCCg'), letter_annotations={'dummy': 'dddd'})]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=UPPERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['ACCG']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=LOWERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['accg']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=SWAPCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['AccG']
Пример #7
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
                   SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name, linkers,
                                             program='blastn',
                                             elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Пример #8
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [
            SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
            SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])
        ]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name,
                                        linkers,
                                        program='blastn',
                                        elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Пример #9
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Пример #10
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')