示例#1
0
if crf: f=open(contigfile,'w')
x=1
for i in contigs:
	#print "A contig, ", i
	if crf:
		string_seq = i[1]
		#print "String seq is", string_seq
		nuc_index = i[0][0]
		dict_seq = {}
		# the sequence string at 
		for nuc in string_seq:
			dict_seq[nuc_index] = nuc
			nuc_index += 1
		#print "original dict_seq is", dict_seq
		# add info for consensus dictionary
		mut_events = mtvcf_main_analysis(mt_table, sam_file, sample_name, tail=tail)
		consensus_single = get_consensus_single(mut_events[mut_events.keys()[0]],hf=hf)
		#print consensus_single
		# alter dict_seq keys for the implementation
		# of the consensus information
		#
		#print "CONSENSUS SINGLE: ", consensus_single
		for p_info in consensus_single:
			if p_info[0] in dict_seq.keys():
				#print "P_INFO: ", p_info
				# maybe I don't need to consider mismatch but I'll do anyway
				if p_info[-1] == 'mism':
					dict_seq[p_info[0]] = p_info[1][0] # check THIS
				elif p_info[-1] == 'ins':
					# in the consensus, the ins is reported as the nuc of pos of the ins + the inserted bases
					dict_seq[p_info[0]+'.1'] = p_info[1][0][1:]
示例#2
0
x = 1
for i in contigs:
    #print "A contig, ", i
    if crf:
        string_seq = i[1]
        #print "String seq is", string_seq
        nuc_index = i[0][0]
        dict_seq = {}
        # the sequence string at
        for nuc in string_seq:
            dict_seq[nuc_index] = nuc
            nuc_index += 1
        #print "original dict_seq is", dict_seq
        # add info for consensus dictionary
        mut_events = mtvcf_main_analysis(mt_table,
                                         sam_file,
                                         sample_name,
                                         tail=tail)
        consensus_single = get_consensus_single(
            mut_events[mut_events.keys()[0]], hf=hf)
        #print consensus_single
        # alter dict_seq keys for the implementation
        # of the consensus information
        #
        #print "CONSENSUS SINGLE: ", consensus_single
        for p_info in consensus_single:
            if p_info[0] in dict_seq.keys():
                #print "P_INFO: ", p_info
                # maybe I don't need to consider mismatch but I'll do anyway
                if p_info[-1] == 'mism':
                    dict_seq[p_info[0]] = p_info[1][0]  # check THIS
                elif p_info[-1] == 'ins':
示例#3
0
# [((contig1_start, contig1_end), dict_seq = {pos : nuc, ...}), ((contig2_start, contig2_end), dict_seq = {pos : nuc, ...}), ...]
#
# so that each dict_seq can be handled with the Consensus dict information for ambiguities and indels.

# SAMFILE, MT-TABLE FOR MTVCF_GENERATOR.
# Sample name is defined as sample_name = os.getcwd().split('/')[-1].split('_')[1]
sam_handle = basext + '.sam'
mt_table_handle = tablefile

sam_file = open(basext + '.sam', 'r')
mt_table = open(tablefile, 'r').readlines()
if type(sample_name) == (list):
    sample_name = sample_name[0]
mut_events = mtvcf_main_analysis(mt_table,
                                 sam_file,
                                 sample_name,
                                 tail=tail,
                                 Q=mqual,
                                 minrd=cov)
print "Heteroplasmic range for IUPAC in consensus is = {0} - {1}\n".format(
    hf_min, hf_max)
if os.path.exists('../VCF_dict_tmp'):
    VCF_dict = ast.literal_eval(open('../VCF_dict_tmp',
                                     'r').read())  # global VCF dict
else:
    VCF_dict = {}  # global VCF dict
contigs_wdict = []
if crf: f = open(contigfile, 'w')
x = 1
for i in contigs:
    #initialize new_i
    new_i = i
示例#4
0
    print '============================='
    print ""
#

sam_file = open(basext+'.sam', 'r')
sam = sam_file.readlines()
sam_file.close()

mt_table_file = open(tablefile, 'r')
mt_table = mt_table_file.readlines()
mt_table_file.close()

# Calling of indels and mismatches. In the case of indels, mt_table (file that was generated in a
# previous step) is not used. However, for calling mismatches I think it is.
print " -Calling mtvcf_main_analysis..."
mut_events = mtVariantCaller.mtvcf_main_analysis(mt_table, sam, sample_name, cov, indel_obs, tail)
print " -mtvcf_main_analysis DONE"
if os.path.exists('..'+os.sep+'VCF_dict_tmp'):
    VCF_dict = ast.literal_eval(open('..'+os.sep+'VCF_dict_tmp', 'r').read()) # global VCF dict
    print "Mutation events will be appended to existing global VCF dict ../VCF_dict_tmp"
else:
    VCF_dict = {} # global VCF dict
    print "Creating new global VCF dict ../VCF_dict_tmp"

if mut_events:
    print "Updating the VCF dict..."
    VCF_dict.update(mut_events)

mut_events_cellar = open('..'+os.sep+'VCF_dict_tmp', 'w')
mut_events_cellar.write(str(VCF_dict))
mut_events_cellar.close()