x=1 for i in contigs: #print "A contig, ", i if crf: string_seq = i[1] #print "String seq is", string_seq nuc_index = i[0][0] dict_seq = {} # the sequence string at for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 #print "original dict_seq is", dict_seq # add info for consensus dictionary mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail) consensus_single = get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf) #print consensus_single # alter dict_seq keys for the implementation # of the consensus information # #print "CONSENSUS SINGLE: ", consensus_single for p_info in consensus_single: if p_info[0] in dict_seq.keys(): #print "P_INFO: ", p_info # maybe I don't need to consider mismatch but I'll do anyway if p_info[-1] == 'mism': dict_seq[p_info[0]] = p_info[1][0] # check THIS elif p_info[-1] == 'ins': # in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases dict_seq[p_info[0]+'.1'] = p_info[1][0][1:] # alternatively it could be
if crf: string_seq = i[1] #print "String seq is", string_seq nuc_index = i[0][0] dict_seq = {} # the sequence string at for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 #print "original dict_seq is", dict_seq # add info for consensus dictionary mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail) consensus_single = get_consensus_single( mut_events[mut_events.keys()[0]], hf=hf) #print consensus_single # alter dict_seq keys for the implementation # of the consensus information # #print "CONSENSUS SINGLE: ", consensus_single for p_info in consensus_single: if p_info[0] in dict_seq.keys(): #print "P_INFO: ", p_info # maybe I don't need to consider mismatch but I'll do anyway if p_info[-1] == 'mism': dict_seq[p_info[0]] = p_info[1][0] # check THIS elif p_info[-1] == 'ins': # in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases dict_seq[p_info[0] + '.1'] = p_info[1][0][1:] # alternatively it could be
new_i = i #write fasta header f.write('>Contig.%i|%i-%i\n' % (x, new_i[0][0], new_i[0][1])) #print "A contig, ", i if crf: string_seq = i[1] #print "String seq is", string_seq nuc_index = i[0][0] dict_seq = {} # the sequence string at for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 #print "original dict_seq is", dict_seq # add info for consensus dictionary consensus_single = get_consensus_single( mut_events[mut_events.keys()[0]], hf_max=hf_max, hf_min=hf_min) #print consensus_single # alter dict_seq keys for the implementation # of the consensus information # #print "CONSENSUS SINGLE: ", consensus_single #check if there are repeated positions with different mut type if len(consensus_single) == 0: print 'no variants found in this contig {0}\n'.format(x) pass else: df = pd.DataFrame(consensus_single) positions = df[0] dup_positions = positions[positions.duplicated()].values for x in dup_positions: d = df[df[0] == x][
mut_events_cellar.close() if crf: position=1 f=open(contigfile,'w') print "Generating fasta output..." for i in contigs: string_seq = i[1] nuc_index = i[0][0] dict_seq = {} for nuc in string_seq: dict_seq[nuc_index] = nuc nuc_index += 1 # This only gathers consensus bases for the mut_events consensus_single = mtVariantCaller.get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf) for p_info in consensus_single: if p_info[0] in dict_seq.keys(): if p_info[-1] == 'mism': dict_seq[p_info[0]] = p_info[1][0] # check THIS elif p_info[-1] == 'ins': dict_seq[p_info[0]+'.1'] = p_info[1][0][1:] elif p_info[-1] == 'del': for deleted_pos in p_info[1]: if deleted_pos < len(dict_seq): del(dict_seq[deleted_pos]) # sort positions in dict_seq and join to have the sequence contig_seq = '' for j in sorted(dict_seq.keys()): contig_seq += dict_seq[j] new_i = ((i[0][0], i[0][1]), contig_seq)