def set_read1_consensus_to_read2(input_stream, output_stream): #get the header line = input_stream.readline() while line.startswith("@"): output_stream.write(line) line = input_stream.readline() prev_read=Sam_record(line) for line in input_stream: read=Sam_record(line) if prev_read and read.get_query_name() == prev_read.get_query_name(): if read.is_second_read() and prev_read.is_first_read(): read1=prev_read read2=read else: read2=prev_read read1=read if not read1.is_unmapped(): read2.set_reference_name(read1.get_reference_name()) read2.set_unmapped_flag(False) read2.set_position(1) read2.set_cigar_string("%sM"%len(read2.get_query_sequence())) output_stream.write(str(read1)) output_stream.write(str(read2)) prev_read=None elif prev_read: output_stream.write(str(prev_read)) prev_read=read else: prev_read=read
def load_from_sites_generator(stream): all_unmatched_read1={} all_unmatched_read2={} count_line=0 for line in stream: count_line+=1 if count_line%10000==0: sys.stderr.write('%s %s %s\n'%(count_line, len(all_unmatched_read1), len(all_unmatched_read2))) sam_record = Sam_record(line) if sam_record.is_first_read(): sam_record_r1 = sam_record sam_record_r2 = all_unmatched_read2.pop(sam_record.get_query_name(),None) if not sam_record_r2: all_unmatched_read1[sam_record.get_query_name()]=sam_record else: sam_record_r2 = sam_record sam_record_r1 = all_unmatched_read1.pop(sam_record.get_query_name(),None) if not sam_record_r1: all_unmatched_read2[sam_record.get_query_name()]=sam_record if sam_record_r1 and sam_record_r2: yield ((sam_record_r1,sam_record_r2))
command ="%s view -bS - | %s sort - %s"%(samtools_bin, samtools_bin, output_bam_file) logging.info(command) output_stream,process_output= utils_commands.get_input_stream_from_command(command) #get the header line = input_stream.readline() while line.startswith("@"): output_stream.write(line) line = input_stream.readline() while line: read1=Sam_record(line) line = input_stream.readline() read2=Sam_record(line) if read1.get_query_name() == read2.get_query_name(): if read1.is_second_read() and read2.is_first_read(): tmp = read1 read1=read2 read2=tmp read2.set_reference_name(read1.get_reference_name()) output_stream.write(str(read1)) output_stream.write(str(read2)) else: logging.critical("bam file is not sorted by read name") input_stream.close() output_stream.close() #os.remove(output_bam_file+'.bam') return line = input_stream.readline() return_code=process_input.wait()