def get_inferred_sequences(pairs, genome_dict, add_softclipped_bases=False): inferred_sequences = [] for read1, read2 in pairs: if read1.query_name.count('_') == 2: context_width = int(read1.query_name.split('_')[-2]) name = read1.reference_name + ':' + str( read1.reference_start + context_width) + '-' + str(read2.reference_end - context_width) inferred_sequence = genome_dict[read1.reference_name][ read1.reference_start:read2.reference_end] if add_softclipped_bases: inferred_sequence = sctools.left_softclipped_sequence_strict( read1 ) + inferred_sequence + sctools.right_softclipped_sequence_strict( read2) inferred_sequence = inferred_sequence[context_width:-context_width] if read1.query_name.split('_')[-1] == '2': inferred_sequence = misc.revcomp(inferred_sequence) contig_edge = False if sctools.is_left_softclipped_strict(read1) and \ sctools.left_softclipped_position(read1) < 0: contig_edge = True elif sctools.is_right_softclipped_strict(read2) and \ sctools.right_softclipped_position(read2) >= len(genome_dict[read2.reference_name]): contig_edge = True else: name = read1.reference_name + ':' + str( read1.reference_start) + '-' + str(read2.reference_end) inferred_sequence = genome_dict[read1.reference_name][ read1.reference_start:read2.reference_end] if add_softclipped_bases: inferred_sequence = sctools.left_softclipped_sequence_strict( read1 ) + inferred_sequence + sctools.right_softclipped_sequence_strict( read2) if read1.query_name.split('_')[-1] == '2': inferred_sequence = misc.revcomp(inferred_sequence) contig_edge = False if sctools.is_left_softclipped_strict(read1) and \ sctools.left_softclipped_position(read1) < 0: contig_edge = True elif sctools.is_right_softclipped_strict(read2) and \ sctools.right_softclipped_position(read2) >= len(genome_dict[read2.reference_name]): contig_edge = True inferred_sequences.append( (name, len(inferred_sequence), contig_edge, inferred_sequence)) return inferred_sequences
def get_inferred_sequence(self, forward_read, reverse_read, is_reverse): contig = forward_read.reference_name start = forward_read.reference_start end = reverse_read.reference_end inferred_sequence = ''.join(self.genome_dict[contig][start:end]) inferred_sequence = sctools.left_softclipped_sequence_strict(forward_read) + \ inferred_sequence + \ sctools.right_softclipped_sequence_strict(reverse_read) inferred_sequence = inferred_sequence[self.context_width:-self.context_width] if is_reverse: inferred_sequence = misc.revcomp(inferred_sequence) contig_edge = False if sctools.is_left_softclipped_strict(forward_read) and \ sctools.left_softclipped_position(forward_read) < 0: contig_edge = True elif sctools.is_right_softclipped_strict(reverse_read) and \ sctools.right_softclipped_position(reverse_read) >= len(self.genome_dict[contig]): contig_edge = True return inferred_sequence, contig_edge
def get_inferred_sequence(self, forward_read, reverse_read, is_reverse): contig, start, end = forward_read.reference_name, forward_read.reference_start, reverse_read.reference_end inferred_sequence = ''.join(self.genome_dict[contig][start:end]) inferred_sequence = sctools.left_softclipped_sequence_strict(forward_read) + \ inferred_sequence + \ sctools.right_softclipped_sequence_strict(reverse_read) if is_reverse: inferred_sequence = misc.revcomp(inferred_sequence) return inferred_sequence
def get_inferred_sequences(pairs, genome_dict, add_softclipped_bases=False): inferred_sequences = [] for read1, read2 in pairs: name = read1.reference_name + ':' + str( read1.reference_start) + '-' + str(read2.reference_end) inferred_sequence = genome_dict[ read1.reference_name][read1.reference_start:read2.reference_end] if add_softclipped_bases: inferred_sequence = sctools.left_softclipped_sequence_strict( read1 ) + inferred_sequence + sctools.right_softclipped_sequence_strict( read2) if read1.is_read2: inferred_sequence = misc.revcomp(inferred_sequence) inferred_sequences.append( (name, len(inferred_sequence), inferred_sequence)) return inferred_sequences