Exemplo n.º 1
0
x=1
for i in contigs:
	#print "A contig, ", i
	if crf:
		string_seq = i[1]
		#print "String seq is", string_seq
		nuc_index = i[0][0]
		dict_seq = {}
		# the sequence string at 
		for nuc in string_seq:
			dict_seq[nuc_index] = nuc
			nuc_index += 1
		#print "original dict_seq is", dict_seq
		# add info for consensus dictionary
		mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail)
		consensus_single = get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf)
		#print consensus_single
		# alter dict_seq keys for the implementation
		# of the consensus information
		#
		#print "CONSENSUS SINGLE: ", consensus_single
		for p_info in consensus_single:
			if p_info[0] in dict_seq.keys():
				#print "P_INFO: ", p_info
				# maybe I don't need to consider mismatch but I'll do anyway
				if p_info[-1] == 'mism':
					dict_seq[p_info[0]] = p_info[1][0] # check THIS
				elif p_info[-1] == 'ins':
					# in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases
					dict_seq[p_info[0]+'.1'] = p_info[1][0][1:]
					# alternatively it could be
Exemplo n.º 2
0
 if crf:
     string_seq = i[1]
     #print "String seq is", string_seq
     nuc_index = i[0][0]
     dict_seq = {}
     # the sequence string at
     for nuc in string_seq:
         dict_seq[nuc_index] = nuc
         nuc_index += 1
     #print "original dict_seq is", dict_seq
     # add info for consensus dictionary
     mut_events = mtvcf_main_analysis(mt_table,
                                      sam_file,
                                      sample_name,
                                      tail=tail)
     consensus_single = get_consensus_single(
         mut_events[mut_events.keys()[0]], hf=hf)
     #print consensus_single
     # alter dict_seq keys for the implementation
     # of the consensus information
     #
     #print "CONSENSUS SINGLE: ", consensus_single
     for p_info in consensus_single:
         if p_info[0] in dict_seq.keys():
             #print "P_INFO: ", p_info
             # maybe I don't need to consider mismatch but I'll do anyway
             if p_info[-1] == 'mism':
                 dict_seq[p_info[0]] = p_info[1][0]  # check THIS
             elif p_info[-1] == 'ins':
                 # in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases
                 dict_seq[p_info[0] + '.1'] = p_info[1][0][1:]
                 # alternatively it could be
Exemplo n.º 3
0
 new_i = i
 #write fasta header
 f.write('>Contig.%i|%i-%i\n' % (x, new_i[0][0], new_i[0][1]))
 #print "A contig, ", i
 if crf:
     string_seq = i[1]
     #print "String seq is", string_seq
     nuc_index = i[0][0]
     dict_seq = {}
     # the sequence string at
     for nuc in string_seq:
         dict_seq[nuc_index] = nuc
         nuc_index += 1
     #print "original dict_seq is", dict_seq
     # add info for consensus dictionary
     consensus_single = get_consensus_single(
         mut_events[mut_events.keys()[0]], hf_max=hf_max, hf_min=hf_min)
     #print consensus_single
     # alter dict_seq keys for the implementation
     # of the consensus information
     #
     #print "CONSENSUS SINGLE: ", consensus_single
     #check if there are repeated positions with different mut type
     if len(consensus_single) == 0:
         print 'no variants found in this contig {0}\n'.format(x)
         pass
     else:
         df = pd.DataFrame(consensus_single)
         positions = df[0]
         dup_positions = positions[positions.duplicated()].values
         for x in dup_positions:
             d = df[df[0] == x][
Exemplo n.º 4
0
mut_events_cellar.close()


if crf:
    position=1
    f=open(contigfile,'w')
    print "Generating fasta output..."
    for i in contigs:
        string_seq = i[1]
        nuc_index = i[0][0]
        dict_seq = {}
        for nuc in string_seq:
            dict_seq[nuc_index] = nuc
            nuc_index += 1
        # This only gathers consensus bases for the mut_events
        consensus_single = mtVariantCaller.get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf)
        for p_info in consensus_single:
            if p_info[0] in dict_seq.keys():
                if p_info[-1] == 'mism':
                    dict_seq[p_info[0]] = p_info[1][0] # check THIS
                elif p_info[-1] == 'ins':
                    dict_seq[p_info[0]+'.1'] = p_info[1][0][1:]
                elif p_info[-1] == 'del':
                    for deleted_pos in p_info[1]:
                        if deleted_pos < len(dict_seq):
                            del(dict_seq[deleted_pos])
        # sort positions in dict_seq and join to have the sequence
        contig_seq = ''
        for j in sorted(dict_seq.keys()):
            contig_seq += dict_seq[j]
        new_i = ((i[0][0], i[0][1]), contig_seq)