def get_distances(seq1, seq2, per_site=True, aligned=False, ignore_gaps=True, alphabet=None, aligner_tools=['mafft', 'muscle']): d = distance(seq1=seq1, seq2=seq2, per_site=per_site, aligned=aligned, ignore_gaps=ignore_gaps, alphabet=alphabet, aligner_tools=aligner_tools) rc = None drc = None if (not alphabet) or (not alphabet.has_state('M')): try: rc = sequtils.get_reverse_complement(seq1), except: pass if rc: drc = distance(seq1=sequtils.get_reverse_complement(seq1), seq2=seq2, per_site=per_site, aligned=False, ignore_gaps=ignore_gaps, alphabet=alphabet, aligner_tools=aligner_tools) return d, drc
def get_distances(seq1, seq2, per_site = True, aligned = False, ignore_gaps = True, alphabet = None, aligner_tools = ['mafft', 'muscle']): d = distance( seq1 = seq1, seq2 = seq2, per_site = per_site, aligned = aligned, ignore_gaps = ignore_gaps, alphabet = alphabet, aligner_tools = aligner_tools) rc = None drc = None if (not alphabet) or (not alphabet.has_state('M')): try: rc = sequtils.get_reverse_complement(seq1), except: pass if rc: drc = distance( seq1 = sequtils.get_reverse_complement(seq1), seq2 = seq2, per_site = per_site, aligned = False, ignore_gaps = ignore_gaps, alphabet = alphabet, aligner_tools = aligner_tools) return d, drc
def reverse_complement_to_longest_reading_frame(seq_iter, gap_characters=['-'], table=1, allow_partial=True, require_start_after_stop=True, log_frequency=0): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) rc = sequtils.get_reverse_complement(s) p1 = sequtils.get_longest_reading_frames( seq_record=s, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) p2 = sequtils.get_longest_reading_frames( seq_record=rc, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format( s.id, len(p1[0].seq), len(p2[0].seq))) if len(p2) == 0: yield s elif len(p1) == 0: _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc elif len(p2[0].seq) > len(p1[0].seq): _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc else: yield s
def reverse_complement_to_first_seq(seq_iter, per_site=True, aligned=False, ignore_gaps=True, alphabet=None, aligner_tools=['mafft', 'muscle'], log_frequency=0): seq1 = None for i, seq2 in enumerate(seq_iter): if i == 0: seq1 = seq2 yield seq2 continue if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) d, drc = seqstats.get_distances(seq1=seq1, seq2=seq2, per_site=per_site, aligned=aligned, ignore_gaps=ignore_gaps, alphabet=alphabet, aligner_tools=aligner_tools) _LOG.debug('{0}: distance {1}, rev comp distance {2}'.format( seq2.id, d, drc)) if drc < d: _LOG.warning( 'Reverse complementing sequence {0!r} (length {1})\n\t' 'rev comp distance ({2}) < current distance ' '({3})'.format(seq2.id, len(seq2.seq), drc, d)) yield sequtils.get_reverse_complement(seq2) continue yield seq2
def reverse_complement_to_longest_reading_frame(seq_iter, gap_characters=['-'], table = 1, allow_partial = True, require_start_after_stop = True, log_frequency = 0): for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)): if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) rc = sequtils.get_reverse_complement(s) p1 = sequtils.get_longest_reading_frames(seq_record = s, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) p2 = sequtils.get_longest_reading_frames(seq_record = rc, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) _LOG.debug('{0}: read length {1}, rev comp read length {2}'.format( s.id, len(p1[0].seq), len(p2[0].seq))) if len(p2) == 0: yield s elif len(p1) == 0: _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc elif len(p2[0].seq) > len(p1[0].seq): _LOG.warning('Reverse complementing sequence {0!r}'.format(rc.id)) yield rc else: yield s
def reverse_complement_to_first_seq(seq_iter, per_site = True, aligned = False, ignore_gaps = True, alphabet = None, aligner_tools = ['mafft', 'muscle'], log_frequency = 0): seq1 = None for i, seq2 in enumerate(seq_iter): if i == 0: seq1 = seq2 yield seq2 continue if (log_frequency > 0) and (((i + 1) % log_frequency) == 0): _LOG.info('{0}: Checking reverse complement of seq {1}...'.format( datetime.datetime.now(), (i + 1))) d, drc = seqstats.get_distances( seq1 = seq1, seq2 = seq2, per_site = per_site, aligned = aligned, ignore_gaps = ignore_gaps, alphabet = alphabet, aligner_tools = aligner_tools) _LOG.debug('{0}: distance {1}, rev comp distance {2}'.format( seq2.id, d, drc)) if drc < d: _LOG.warning('Reverse complementing sequence {0!r} (length {1})\n\t' 'rev comp distance ({2}) < current distance ' '({3})'.format(seq2.id, len(seq2.seq), drc, d)) yield sequtils.get_reverse_complement(seq2) continue yield seq2
def summarize_longest_read_lengths(seq_iter, gap_characters=['-'], table=1, allow_partial=True, require_start_after_stop=True): lengths = [] for seq in seqmod.remove_gaps(seq_iter, gap_characters=gap_characters): l = 0 rcl = 0 lrf = sequtils.get_longest_reading_frames( seq, table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) if lrf: l = len(lrf[0].seq) rc_lrf = sequtils.get_longest_reading_frames( sequtils.get_reverse_complement(seq), table=table, allow_partial=allow_partial, require_start_after_stop=require_start_after_stop) if rc_lrf: rcl = len(rc_lrf[0].seq) lengths.append((l, rcl, seq.id)) return sorted(lengths)
def summarize_longest_read_lengths(seq_iter, gap_characters=['-'], table = 1, allow_partial = True, require_start_after_stop = True): lengths = [] for seq in seqmod.remove_gaps(seq_iter, gap_characters = gap_characters): l = 0 rcl = 0 lrf = sequtils.get_longest_reading_frames( seq, table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) if lrf: l = len(lrf[0].seq) rc_lrf = sequtils.get_longest_reading_frames( sequtils.get_reverse_complement(seq), table = table, allow_partial = allow_partial, require_start_after_stop = require_start_after_stop) if rc_lrf: rcl = len(rc_lrf[0].seq) lengths.append((l, rcl, seq.id)) return sorted(lengths)
def reverse_complement(seq_iter): for s in seq_iter: yield sequtils.get_reverse_complement(s)