Пример #1
0
def FRR_sam_likelihood (A, sam_path, arg_dict, weights):
	locus = arg_dict['locus']
	chrom, locus_start, locus_end = extract_locus_info(locus)
	log_likelihood = 0
	n = 0
	with open(sam_path, 'r') as irr_handle:
		for record in csv.reader(irr_handle, dialect = 'excel-tab'):
			if record[0][0] != '@':
				n = n + 1
				sample_dfl = extract_col_sam(record, 'om')
				# print sample_dfl
				if 'diploid' in arg_dict and arg_dict['diploid'] == 'True':
					samp_likelihood = weights['allele_1'] * FRR_allele_likelihood(arg_dict, A[0], sample_dfl) + \
										weights['allele_2'] * FRR_allele_likelihood(arg_dict, A[1], sample_dfl)
				else:
					samp_likelihood = FRR_allele_likelihood(arg_dict, A, sample_dfl)
				# print record[0], '\t', locus_start - int(record[3]), '\t', samp_likelihood
				if samp_likelihood > 0:
					samp_log_likelihood = np.log(samp_likelihood)
				else:
					samp_log_likelihood = -50
				log_likelihood = log_likelihood + samp_log_likelihood
				# log_likelihood = log_likelihood * samp_likelihood
	# print 'FRR count:', n
	return log_likelihood
def span_sam_likelihood(A, sam_path, arg_dict, weights):
    locus = arg_dict['locus']
    mean_ins_size = arg_dict['read_ins_mean']
    chrom, locus_start, locus_end = extract_locus_info(locus)
    log_likelihood = 0
    yo = 0
    nn = 0
    with open(sam_path, 'r') as irr_handle:
        for record in csv.reader(irr_handle, dialect='excel-tab'):
            if record[0][0] != '@' and int(record[8]) != 0:
                sample_ins = extract_col_sam(record, 'is')
                if 'diploid' in arg_dict and arg_dict['diploid'] == 'True':
                    samp_likelihood = weights['allele_1'] * span_allele_likelihood(arg_dict, A[0], sample_ins) + \
                         weights['allele_2'] * span_allele_likelihood(arg_dict, A[1], sample_ins)
                else:
                    samp_likelihood = span_allele_likelihood(
                        arg_dict, A, sample_ins)
                # print record[0], '\t', sample_ins, '\t', np.abs(sample_ins - mean_ins_size) / 3 + 10, '\t', samp_likelihood
                yo = yo + np.abs(sample_ins - mean_ins_size) / 3 + 10
                nn = nn + 1
                if samp_likelihood > 0:
                    samp_log_likelihood = np.log(samp_likelihood)
                # elif np.abs(samp_likelihood) < 10**-20 and samp_likelihood != 0:		# accounting for comutational errors
                # 	print 'kek'
                # 	samp_log_likelihood = np.log(np.abs(samp_likelihood))
                elif samp_likelihood <= 0:
                    samp_log_likelihood = -50
                log_likelihood = log_likelihood + samp_log_likelihood
    # print yo / nn
    return log_likelihood
Пример #3
0
def encl_sam_likelihood (A, sam_path, arg_dict, weights):
	locus = arg_dict['locus']
	mean_ins_size = arg_dict['read_ins_mean']
	chrom, locus_start, locus_end = extract_locus_info(locus)
	log_likelihood = 0

	with open(sam_path, 'r') as irr_handle:
		for record in csv.reader(irr_handle, dialect = 'excel-tab'):
			if record[0][0] != '@':
				nCopy = extract_col_sam(record, 'nc')
				if 'diploid' in arg_dict and arg_dict['diploid'] == 'True':
					samp_likelihood = weights['allele_1'] * encl_allele_likelihood(arg_dict, A[0], nCopy) + \
										weights['allele_2'] * encl_allele_likelihood(arg_dict, A[1], nCopy)
				else:
					samp_likelihood = encl_allele_likelihood(arg_dict, A, nCopy)
				# if nCopy == 30:
					# print record[0], '\tnC:', nCopy, '\t', samp_likelihood

				if samp_likelihood > 0:
					samp_log_likelihood = np.log(samp_likelihood)
				# elif np.abs(samp_likelihood) < 10**-20 and samp_likelihood != 0:		# accounting for comutational errors
				# 	print samp_likelihood, A, nCopy
				# 	samp_log_likelihood = np.log(np.abs(samp_likelihood))
				elif samp_likelihood <= 0:
					samp_log_likelihood = -50
				log_likelihood = log_likelihood + samp_log_likelihood
	# print yo / nn

	return log_likelihood
Пример #4
0
def IRR_sam_likelihood(A, B, sam_path, arg_dict):
    locus = arg_dict['locus']
    chrom, locus_start, locus_end = extract_locus_info(locus)
    log_likelihood = 1
    with open(sam_path, 'r') as irr_handle:
        for record in csv.reader(irr_handle, dialect='excel-tab'):
            if record[0][0] != '@':
                sample_dfl = locus_start - int(record[3])
                samp_likelihood = IRR_genotype_likelihood(
                    arg_dict, A, B, sample_dfl)
                # print record[0], '\t', locus_start - int(record[3]), '\t', samp_likelihood
                if samp_likelihood > 0:
                    samp_log_likelihood = np.log(samp_likelihood)
                else:
                    samp_log_likelihood = -250
                log_likelihood = log_likelihood + samp_log_likelihood
                # log_likelihood = log_likelihood * samp_likelihood
    return log_likelihood
Пример #5
0
def span_sam_likelihood(A, B, sam_path, arg_dict):
    locus = arg_dict['locus']
    chrom, locus_start, locus_end = extract_locus_info(locus)
    log_likelihood = 0
    with open(sam_path, 'r') as irr_handle:
        for record in csv.reader(irr_handle, dialect='excel-tab'):
            if record[0][0] != '@' and int(record[8]) != 0:
                sample_ins = int(record[8])
                samp_likelihood = span_genotype_likelihood(
                    arg_dict, A, B, sample_ins)

                # print record[0], '\t', int(record[8]), '\t', np.abs(int(record[8]) - 500) / 3 + 10, '\t', samp_likelihood
                if samp_likelihood > 0:
                    samp_log_likelihood = np.log(samp_likelihood)
                elif np.abs(samp_likelihood
                            ) < 10**-20:  # accounting for comutational errors
                    samp_log_likelihood = np.log(np.abs(samp_likelihood))
                elif samp_likelihood == 0:
                    samp_log_likelihood = -250
                else:
                    print 'Error! Negative likelihood:', samp_likelihood
                log_likelihood = log_likelihood + samp_log_likelihood
    return log_likelihood
Пример #6
0
parser.add_argument('--in-pref', type=str, required=True)
parser.add_argument('--exp-dir', type=str, required=True)

args = parser.parse_args()

out_pref = args.out_pref
in_pref = args.in_pref
exp_dir = args.exp_dir

arg_dict = load_profile(exp_dir)

read_len = arg_dict['read_len']
locus = arg_dict['locus']
motif = arg_dict['motif']

chrom, locus_start, locus_end = extract_locus_info(locus)
pre, post = extract_pre_post_flank(exp_dir, read_len)

score_dict = { 'match':  3, \
    'mismatch': -1, \
    'gap':   -3}
verbose = False
margin = 2

in_sam = in_pref + '.sam'
out_sam = out_pref + '.sam'
out_sam_handle = open(out_sam, 'w')
print 'Filtering ' + in_pref + '.sam'
with open(in_sam, 'r') as in_sam_handle:
    for record in in_sam_handle:
        if record[0] == '@':
Пример #7
0
from realignment import expansion_aware_realign, classify_realigned_read
from load_info import load_profile, extract_locus_info
from extract_genome import extract_pre_post_flank

read_class = 'srp'
nCopy = 70
filt_path = '/storage/nmmsv/expansion-experiments/ATXN3_32_cov60_dist500_hap_viz/aligned_read/nc_'+str(nCopy)+'_'+read_class+'.sam'
# filt_path = '/storage/nmmsv/python_playground/test_filter_IRR/nc_'+str(nCopy)+'.sam'
filt_path_true = '/storage/nmmsv/expansion-experiments/ATXN3_32_cov60_dist500_hap_viz/aligned_read/true_filter/nc_'+str(nCopy)+'_'+read_class+'.sam'
sam_path = '/storage/nmmsv/expansion-experiments/ATXN3_32_cov60_dist500_hap_viz/aligned_read/nc_'+str(nCopy)+'.sam'
exp_dir = '/storage/nmmsv/expansion-experiments/ATXN3_32_cov60_dist500_hap_viz/'
arg_dict = load_profile(exp_dir)
locus = arg_dict['locus']
read_len = arg_dict['read_len']
motif = arg_dict['motif']
chrom, locus_start_ref, locus_end_ref = extract_locus_info(locus)
pre, post = extract_pre_post_flank(exp_dir, read_len)
score_dict = {	'match': 	3, \
				'mismatch': -1, \
				'gap': 		-3}
verbose = False
margin = 2

print locus_start_ref, locus_end_ref
true_reads = []
kk = 0
with open (filt_path, 'r') as handle:
	for record in handle:
		if record[0] != '@':
			kk = kk + 1
			QNAME = record.split()[0]