def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    '''
    Takes as input two sequences seq_x and seq_y, a scoring matrix scoring_matrix,
    and a number of trials num_trials.
    Return a dictionary scoring_distribution that represents an un-normalized 
    distribution generated by performing the following process num_trials times:
    1.Generate a random permutation rand_y of the sequence seq_y using random.shuffle()
    2.Compute the maximum value score for the local alignment of seq_x and rand_y
    3.Increment the entry score in the dictionary scoring_distribution by one.
    '''
    scoring_distribution = {}
    for _ in range(num_trials):

        #shuffle the seq_y
        list_y = list(seq_y)
        random.shuffle(list_y)
        rand_y = "".join(list_y)
        alignment_matrix = compute_alignment_matrix(
            seq_x=seq_x,
            seq_y=rand_y,
            scoring_matrix=scoring_matrix,
            global_flag=False)
        score = max([max(item) for item in alignment_matrix])
        #score, align_human, align_fruitfly = compute_local_alignment(seq_x=seq_x, seq_y=rand_y, scoring_matrix=scoring_matrix, alignment_matrix=alignment_matrix)
        #import pdb; pdb.set_trace()
        try:
            scoring_distribution[score] += 1
        except (KeyError):
            scoring_distribution[score] = 1

    scoring_file = open('scoring_distribution.p', 'wb')
    pickle.dump(scoring_distribution, scoring_file)
    scoring_file.close()

    return scoring_distribution
Пример #2
0
def consensus_alignment(scoring_matrix, align_sequence):
    '''
    compute the similarity of the two sequences in the local alignment computed to
    the PAX Domain.
    global alignment of local align_x vs PAX Domain, local align_y vs PAX Domain
    return the alignment and the persentage of elements in these two sequences that agree
    '''
    pax_domain = read_protein('ConsensusPAXDomain.txt')

    align_sequence = align_sequence.replace("-", "")
    alignment_matrix = compute_alignment_matrix(seq_x=align_sequence,
                                                seq_y=pax_domain,
                                                scoring_matrix=scoring_matrix,
                                                global_flag=True)
    score, global_align_sequence, global_align_pax = compute_global_alignment(
        seq_x=align_sequence,
        seq_y=pax_domain,
        scoring_matrix=scoring_matrix,
        alignment_matrix=alignment_matrix)
    seq = difflib.SequenceMatcher(None, global_align_sequence,
                                  global_align_pax)
    ratio = seq.ratio()
    #import pdb; pdb.set_trace()

    return global_align_sequence, ratio
Пример #3
0
def random_alignment(scoring_matrix):
    '''
    Take two random amino acids, compute the alignment and the consensus with pax
    Return the ratio of similarity
    To examine the solution of the homework
    '''
    alphabet = "ACBEDGFIHKMLNQPSRTWVYXZ"
    length_human = 422
    length_fruitfly = 857

    random_human = ''
    for _ in range(length_human):
        random_human += random.choice(alphabet)

    random_fruitfly = ''
    for _ in range(length_fruitfly):
        random_fruitfly += random.choice(alphabet)

    alignment_matrix = compute_alignment_matrix(seq_x=random_human,
                                                seq_y=random_fruitfly,
                                                scoring_matrix=scoring_matrix,
                                                global_flag=False)
    score, align_human, align_fruitfly = compute_local_alignment(
        seq_x=random_human,
        seq_y=random_fruitfly,
        scoring_matrix=scoring_matrix,
        alignment_matrix=alignment_matrix)

    global_align_human, ratio_human_random = consensus_alignment(
        scoring_matrix, align_human)
    global_align_fruitfly, ratio_fruitfly_random = consensus_alignment(
        scoring_matrix, align_fruitfly)

    return ratio_human_random, ratio_fruitfly_random
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials):
    '''
    Takes as input two sequences seq_x and seq_y, a scoring matrix scoring_matrix,
    and a number of trials num_trials.
    Return a dictionary scoring_distribution that represents an un-normalized 
    distribution generated by performing the following process num_trials times:
    1.Generate a random permutation rand_y of the sequence seq_y using random.shuffle()
    2.Compute the maximum value score for the local alignment of seq_x and rand_y
    3.Increment the entry score in the dictionary scoring_distribution by one.
    '''
    scoring_distribution = {}
    for _ in range(num_trials):
        
        #shuffle the seq_y
        list_y = list(seq_y)
        random.shuffle(list_y)
        rand_y = "".join(list_y)
        alignment_matrix = compute_alignment_matrix(seq_x=seq_x, seq_y=rand_y, scoring_matrix=scoring_matrix, global_flag=False)
        score = max([max(item) for item in alignment_matrix])
        #score, align_human, align_fruitfly = compute_local_alignment(seq_x=seq_x, seq_y=rand_y, scoring_matrix=scoring_matrix, alignment_matrix=alignment_matrix)
        #import pdb; pdb.set_trace()
        try:
            scoring_distribution[score] += 1
        except(KeyError):
            scoring_distribution[score] = 1
        
    scoring_file = open('scoring_distribution.p', 'wb')
    pickle.dump(scoring_distribution, scoring_file)
    scoring_file.close()
        
    return scoring_distribution
Пример #5
0
def random_alignment(scoring_matrix):
    '''
    Take two random amino acids, compute the alignment and the consensus with pax
    Return the ratio of similarity
    To examine the solution of the homework
    '''
    alphabet = "ACBEDGFIHKMLNQPSRTWVYXZ"
    length_human = 422
    length_fruitfly = 857
    
    random_human = ''
    for _ in range(length_human):
        random_human += random.choice(alphabet)
    
    random_fruitfly = ''
    for _ in range(length_fruitfly):
        random_fruitfly += random.choice(alphabet)
        
    alignment_matrix = compute_alignment_matrix(seq_x=random_human, seq_y=random_fruitfly, scoring_matrix=scoring_matrix, global_flag=False)
    score, align_human, align_fruitfly = compute_local_alignment(seq_x=random_human, seq_y=random_fruitfly, scoring_matrix=scoring_matrix, alignment_matrix=alignment_matrix)
    
    global_align_human, ratio_human_random = consensus_alignment(scoring_matrix, align_human)
    global_align_fruitfly, ratio_fruitfly_random = consensus_alignment(scoring_matrix, align_fruitfly)
    
    return ratio_human_random, ratio_fruitfly_random
Пример #6
0
def get_edit_distance(seq_x, seq_y, scoring_matrix):
    '''
    compute the seq_x and seq_y global alignment with scoring matrix
    return the edit distance can be expressed in term of:
    |x| + |y| - score(x, y)
    '''
    alignment_matrix = compute_alignment_matrix(seq_x, seq_y, scoring_matrix, True)
    score, align_x, align_y = compute_global_alignment(seq_x, seq_y, scoring_matrix, alignment_matrix)
    return len(seq_x) + len(seq_y) - score
Пример #7
0
def protein_alignment(scoring_matrix):
    '''
    compute the human eyeless protein and fruitfly eyeless portein alignment
    '''
    human_protein = read_protein('HumanEyelessProtein.txt')
    fruitfly_protein = read_protein('FruitflyEyelessprotein.txt')
    
    alignment_matrix = compute_alignment_matrix(seq_x=human_protein, seq_y=fruitfly_protein, scoring_matrix=scoring_matrix, global_flag=False)
    score, align_human, align_fruitfly = compute_local_alignment(seq_x=human_protein, seq_y=fruitfly_protein, scoring_matrix=scoring_matrix, alignment_matrix=alignment_matrix)
    return score, align_human, align_fruitfly
Пример #8
0
def get_edit_distance(seq_x, seq_y, scoring_matrix):
    '''
    compute the seq_x and seq_y global alignment with scoring matrix
    return the edit distance can be expressed in term of:
    |x| + |y| - score(x, y)
    '''
    alignment_matrix = compute_alignment_matrix(seq_x, seq_y, scoring_matrix,
                                                True)
    score, align_x, align_y = compute_global_alignment(seq_x, seq_y,
                                                       scoring_matrix,
                                                       alignment_matrix)
    return len(seq_x) + len(seq_y) - score
Пример #9
0
def protein_alignment(scoring_matrix):
    '''
    compute the human eyeless protein and fruitfly eyeless portein alignment
    '''
    human_protein = read_protein('HumanEyelessProtein.txt')
    fruitfly_protein = read_protein('FruitflyEyelessprotein.txt')

    alignment_matrix = compute_alignment_matrix(seq_x=human_protein,
                                                seq_y=fruitfly_protein,
                                                scoring_matrix=scoring_matrix,
                                                global_flag=False)
    score, align_human, align_fruitfly = compute_local_alignment(
        seq_x=human_protein,
        seq_y=fruitfly_protein,
        scoring_matrix=scoring_matrix,
        alignment_matrix=alignment_matrix)
    return score, align_human, align_fruitfly
Пример #10
0
def consensus_alignment(scoring_matrix, align_sequence):
    '''
    compute the similarity of the two sequences in the local alignment computed to
    the PAX Domain.
    global alignment of local align_x vs PAX Domain, local align_y vs PAX Domain
    return the alignment and the persentage of elements in these two sequences that agree
    '''
    pax_domain = read_protein('ConsensusPAXDomain.txt')

    align_sequence = align_sequence.replace("-", "")
    alignment_matrix = compute_alignment_matrix(seq_x=align_sequence, seq_y=pax_domain, scoring_matrix=scoring_matrix, global_flag=True)
    score, global_align_sequence, global_align_pax = compute_global_alignment(seq_x=align_sequence, seq_y=pax_domain, scoring_matrix=scoring_matrix, alignment_matrix=alignment_matrix)
    seq = difflib.SequenceMatcher(None, global_align_sequence, global_align_pax)
    ratio = seq.ratio()
    #import pdb; pdb.set_trace()

    
    return global_align_sequence, ratio