示例#1
0
def detect_rev_seed_match(mirna_id, targetrna_id):
    '''
    X1_seed_match_rev

    '''
    mirna_seq, targetrna_seq = get_sequence(mirna_id, targetrna_id)
    targetrna_seq_revcomp = utils.reverse_complement(targetrna_seq)
    tmp_dict = find_mirna_subtarget_candidates(mirna_id,mirna_seq,targetrna_id,targetrna_seq_revcomp) # => list()
    return run_result(tmp_dict)
示例#2
0
def detect_seed_match(mirna_id, targetrna_id):
    '''
    1_Find_mirna_target_candidates
    Start find_mirna_target_candidates module
    
    '''
    #seq_data
    mirna_seq, targetrna_seq = get_sequence(mirna_id, targetrna_id)
    targetrna_seq_revcomp = utils.reverse_complement(targetrna_seq)
    #print (mirna_seq)
    #print (targetrna_seq)
    #print (targetrna_seq_revcomp)
    #parameters - seed
    mirna_start_pairing = ''
    seed_length = ''
    allowed_gu_wobbles = []
    allowed_mismatches = []
    if hasattr(seed_infor,'MIRNA_START_PAIRING'):
        mirna_start_pairing = seed_infor.MIRNA_START_PAIRING
    else:
        print ('ERROR: MIRNA_START_PAIRING parameter does not exist in module.analysis.mirna_seed.py file')
        sys.exit(1)
    if hasattr(seed_infor,'SEED_LENGTH'):
        seed_length = seed_infor.SEED_LENGTH
    else:
        print ('ERROR: SEED_LENGTH parameters do not exist in module.analysis.mirna_seed.py file')
        sys.exit(1)
    if hasattr(seed_infor,'ALLOWED_GU_WOBBLES'):
        allowed_gu_wobbles = seed_infor.ALLOWED_GU_WOBBLES
    else:
        print ('ERROR: ALLOWED_GU_WOBBLES parameters do not exist in module.analysis.mirna_seed.py file')
        sys.exit(1)
    if hasattr(seed_infor,'ALLOWED_MISMATCHES'):
        allowed_mismatches = seed_infor.ALLOWED_MISMATCHES
    else:
        print ('ERROR: ALLOWED_MISMATCHES parameters do not exist in module.analysis.mirna_seed.py file')
        sys.exit(1)

    tmp_dict = {}
    #run_log("Finding seed matches and calculating motif density in targetRNA sequences...", 0)
    tmp_dict = find_mirna_target_candidates(mirna_id,mirna_seq,targetrna_id,targetrna_seq_revcomp,mirna_start_pairing,seed_length,allowed_gu_wobbles,allowed_mismatches) # => list()
    return run_result(tmp_dict)
示例#3
0
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict):
    two_nt_motif_prob_dict = Markov_Model(targetrna_seq)
    #print (two_nt_motif_prob_dict)

    motif_type_need = []
    motif_type_dict = {}
    motif_type_dict['p1_p8_match'] = []
    motif_type_dict['p2_p8_match'] = []
    motif_type_dict['p1_p7_match'] = []
    motif_type_dict['p2_p7_match'] = []
    motif_type_dict['p3_p8_match'] = []

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3] #NEED TO CHECK!!
        motif_type = tmp_dict[x][5] #NEED TO CHECK!!
        if motif_type == '8mer': #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p1_p8_match')
            motif_type_dict['p1_p8_match'].append(utils.reverse_complement(targetrna_motif[0:8]))
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '8mer-1A': #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m8': #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m1': #p1_p7_match, p2_p7_match
            motif_type_need.append('p1_p7_match')
            motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '7mer-1A': #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m7': #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m8': #p3_p8_match
            motif_type_need.append('p3_p8_match')
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        else:
            print ('ERROR: motif_type is wrong...')
    
    motif_type_need = utils.rm_duplicate_list(motif_type_need)
    #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match'])
    #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match'])
    #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match'])
    #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match'])
    #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match'])
    #print (motif_type_need)
    #print (motif_type_dict['p1_p8_match'])
    #print (motif_type_dict['p2_p8_match'])
    #print (motif_type_dict['p1_p7_match'])
    #print (motif_type_dict['p2_p7_match'])
    #print (motif_type_dict['p3_p8_match'])

    motif_prob_dict = {}
    motif_prob_dict_each = {}

    for x in motif_type_need: #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
        test_seed = utils.rm_duplicate_list(motif_type_dict[x])
        #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA']

        #All_existed_motifs_calc
        pos_motif_number = len(motif_type_dict[x])
        total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1
        motif_prob, each_motif_prob_dict = calc_motif_prob(test_seed,two_nt_motif_prob_dict) #test
        #print ('Motif_number: ' + str(pos_motif_number))
        #print ('Total_motif: ' + str(total_motif_number))
        #print ('Motif_probability: ' + str(motif_prob))
        motif_binom = cumulative_binomial_distribution(pos_motif_number, total_motif_number, motif_prob)
        #print ('Motif_binom_prob: ' + str(motif_binom))
        motif_prob_dict[x] = [pos_motif_number, total_motif_number, motif_prob, motif_binom] #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)]

        #Each_existed_motif_calc
        existed_motif_dict = Counter(motif_type_dict[x])
        #print (existed_motif_dict)
        for i in existed_motif_dict.keys(): #motif => number
            pos_motif_number_each = existed_motif_dict[i]
            motif_prob_each = each_motif_prob_dict[i] #motif => probability
            motif_binom_each = cumulative_binomial_distribution(pos_motif_number_each, total_motif_number, motif_prob_each)
            #print (i)
            #print ('motif_number: ' + str(pos_motif_number_each))
            #print ('motif_prob_each: ' + str(motif_prob_each))
            #print (motif_binom_each)
            motif_prob_dict_each[i] = [pos_motif_number_each, total_motif_number, motif_prob_each, motif_binom_each]

    #print (motif_prob_dict)
    #print (motif_prob_dict_each)

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3] #NEED TO CHECK
        motif_type = tmp_dict[x][5] #NEED TO CHECK
        if motif_type == '8mer': #p1-p8 match
            seed_group = 'p1_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '8mer-1A': #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m8': #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m1': #p1_p7_match
            seed_group = 'p1_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-1A': #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m7': #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m8': #p3_p8_match
            seed_group = 'p3_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[2:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        else:
            print ('ERROR: motif_type is wrong...')

    return tmp_dict #[each_existed_motif_result], [all_existed_motif_result]

    '''
    motif_type_all = []
    motif_type_dict = {}
    for x in list(tmp_dict.keys()):
        motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1]))
        types = tmp_dict[x][3]
        motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, 
        motif_type_dict[] = 
    print (motif_type_all)
    motif_type = utils.rm_duplicate_list(motif_type_all)
    print (motif_type)
    seed_group = seed_grouping(motif_type)
    print (seed_group)
    if 'p1_p8_match' in seed_group: #8mer ||||||||
        pass
    elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x|||||||
        pass
    elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x
        pass
    elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x
        pass
    elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx||||||
        pass
    else:
        print ('ERROR: seed_group is wrong...')
    '''
    '''
示例#4
0
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict):
    two_nt_motif_prob_dict = Markov_Model(targetrna_seq)
    #print (two_nt_motif_prob_dict)

    motif_type_need = []
    motif_type_dict = {}
    motif_type_dict['p1_p8_match'] = []
    motif_type_dict['p2_p8_match'] = []
    motif_type_dict['p1_p7_match'] = []
    motif_type_dict['p2_p7_match'] = []
    motif_type_dict['p3_p8_match'] = []

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3]  #NEED TO CHECK!!
        motif_type = tmp_dict[x][5]  #NEED TO CHECK!!
        if motif_type == '8mer':  #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p1_p8_match')
            motif_type_dict['p1_p8_match'].append(
                utils.reverse_complement(targetrna_motif[0:8]))
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p1_p7_match'].append(
                utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '8mer-1A':  #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m8':  #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m1':  #p1_p7_match, p2_p7_match
            motif_type_need.append('p1_p7_match')
            motif_type_dict['p1_p7_match'].append(
                utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '7mer-1A':  #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m7':  #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m8':  #p3_p8_match
            motif_type_need.append('p3_p8_match')
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        else:
            print('ERROR: motif_type is wrong...')

    motif_type_need = utils.rm_duplicate_list(motif_type_need)
    #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match'])
    #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match'])
    #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match'])
    #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match'])
    #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match'])
    #print (motif_type_need)
    #print (motif_type_dict['p1_p8_match'])
    #print (motif_type_dict['p2_p8_match'])
    #print (motif_type_dict['p1_p7_match'])
    #print (motif_type_dict['p2_p7_match'])
    #print (motif_type_dict['p3_p8_match'])

    motif_prob_dict = {}
    motif_prob_dict_each = {}

    for x in motif_type_need:  #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
        test_seed = utils.rm_duplicate_list(motif_type_dict[x])
        #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA']

        #All_existed_motifs_calc
        pos_motif_number = len(motif_type_dict[x])
        total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1
        motif_prob, each_motif_prob_dict = calc_motif_prob(
            test_seed, two_nt_motif_prob_dict)  #test
        #print ('Motif_number: ' + str(pos_motif_number))
        #print ('Total_motif: ' + str(total_motif_number))
        #print ('Motif_probability: ' + str(motif_prob))
        motif_binom = cumulative_binomial_distribution(pos_motif_number,
                                                       total_motif_number,
                                                       motif_prob)
        #print ('Motif_binom_prob: ' + str(motif_binom))
        motif_prob_dict[x] = [
            pos_motif_number, total_motif_number, motif_prob, motif_binom
        ]  #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)]

        #Each_existed_motif_calc
        existed_motif_dict = Counter(motif_type_dict[x])
        #print (existed_motif_dict)
        for i in existed_motif_dict.keys():  #motif => number
            pos_motif_number_each = existed_motif_dict[i]
            motif_prob_each = each_motif_prob_dict[i]  #motif => probability
            motif_binom_each = cumulative_binomial_distribution(
                pos_motif_number_each, total_motif_number, motif_prob_each)
            #print (i)
            #print ('motif_number: ' + str(pos_motif_number_each))
            #print ('motif_prob_each: ' + str(motif_prob_each))
            #print (motif_binom_each)
            motif_prob_dict_each[i] = [
                pos_motif_number_each, total_motif_number, motif_prob_each,
                motif_binom_each
            ]

    #print (motif_prob_dict)
    #print (motif_prob_dict_each)

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3]  #NEED TO CHECK
        motif_type = tmp_dict[x][5]  #NEED TO CHECK
        if motif_type == '8mer':  #p1-p8 match
            seed_group = 'p1_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[0:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '8mer-1A':  #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m8':  #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m1':  #p1_p7_match
            seed_group = 'p1_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[0:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-1A':  #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m7':  #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m8':  #p3_p8_match
            seed_group = 'p3_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[2:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        else:
            print('ERROR: motif_type is wrong...')

    return tmp_dict  #[each_existed_motif_result], [all_existed_motif_result]
    '''
    motif_type_all = []
    motif_type_dict = {}
    for x in list(tmp_dict.keys()):
        motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1]))
        types = tmp_dict[x][3]
        motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, 
        motif_type_dict[] = 
    print (motif_type_all)
    motif_type = utils.rm_duplicate_list(motif_type_all)
    print (motif_type)
    seed_group = seed_grouping(motif_type)
    print (seed_group)
    if 'p1_p8_match' in seed_group: #8mer ||||||||
        pass
    elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x|||||||
        pass
    elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x
        pass
    elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x
        pass
    elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx||||||
        pass
    else:
        print ('ERROR: seed_group is wrong...')
    '''
    '''