Пример #1
0
def linear_seqs_division(seqs,  ge , s ):
    """
    seqs is array of seq that we want to match
    ge - gap extextion penalyty
    s is substitution matrix or array [x,y] x is cost of matching 
    when symbols that we are matching are the same and y if they are not
    function return array that contain divion of seq in seqs into 
    two smaller string second string is reverse
    """
    H = init_matrix_H([len(seq) + 1 for seq in seqs], ge)
    d = create_matching_function(s , ge)
    P = fill_half(H ,seqs, ge, d)
    a = len(seqs[0])//2

    B = np.argwhere(P == np.max(P))

    array = []
    for b in B:
        b = b.item()
        seqs_0_begin = seqs[0][0:a]
        seqs_1_begin = seqs[1][0:b]
        seqs_0_end = seqs[0][a:]
        seqs_1_end = seqs[1][b:]
        array.append(([seqs_0_begin,seqs_1_begin],[seqs_0_end[::-1],seqs_1_end[::-1]]))
    return array
Пример #2
0
def multidimesional_N_W_algoritm(seqs, g, s, max_mathing):
    """
    seqs is array of seq that we want to match
    g is gap extention penalty
    s is substitution matrix or array [x,y] x is cost of matching 
    when symbols that we are matching are the same and y if they are not
    max_mathing is maximal number of matching that are returned
    function return pair containg cost of matching and matches
    """
    H = init_matrix([len(seq) + 1 for seq in seqs], g)
    d = create_matching_function(s, g)
    fill_H_all(H, seqs, d)
    return H[tuple([len(seq)
                    for seq in seqs])], matching(H, seqs, d, max_mathing)
Пример #3
0
def linear_gap_algorytm(seqs, go, ge, s, max_number_of_matching):
    """
    seqs is array of seq that we want to match
    g is gap extention cost
    s is substitution matrix or array [x,y] x is cost of matching 
    when symbols that we are matching are the same and y if they are not
    max_mathing is maximal number of matching that are returned
    function return pair containg cost of matching and matches
    """
    H = init_matrix_H([len(seq) + 1 for seq in seqs], go, ge)
    E = init_matrix_E([len(seq) + 1 for seq in seqs], go, ge)
    F = init_matrix_F([len(seq) + 1 for seq in seqs], go, ge)
    d = create_matching_function(s, ge)
    fill_all(H, E, F, seqs, go, ge, d)
    X = matching(H, E, F, seqs, d, go, ge, max_number_of_matching)
    #X = list(np.unique(X))
    return (H[(len(seqs[0]), len(seqs[1]))], X)
Пример #4
0
    """
    #print((len(seqs[0]) + 1) * (len(seqs [1]) + 1))
    #print("elo")
    if((len(seqs[0]) + 1) * (len(seqs [1]) + 1) <= L or len(seqs[0]) < 10):
        return multidimesional_N_W_algoritm(seqs,  ge , s, max_number_of_matching)
    else:
        
        array = linear_seqs_division(seqs,  ge , s)
        maximum = -float("inf")
        for seqs1, seqs_rev1 in array:
            score_begin, matches_begin = linear_algorytm(seqs1,  ge , s, max_number_of_matching, L)
            score_end, matches_end = linear_algorytm(seqs_rev1,  ge , s, max_number_of_matching, L)
            if (maximum < score_begin +score_end):
                maximum = score_begin +score_end
                max_matches_begin = matches_begin
                max_matches_end = matches_end
        return maximum, merge(max_matches_begin, max_matches_end, max_number_of_matching)
        
if __name__ == "__main__":
    s1= "AAABB"
    s2= "AAA"
    seqs = [s1, s2 ]
    ge = - 1
    d = create_matching_function([1,-1] , ge)
    H = init_matrix_H([len(seq) + 1 for seq in seqs] ,ge)
    X = linear_algorytm(seqs,  ge , [1,-1], 100, 7*2)
    score = X[0]
    if(not score == 1):
        raise AssertionError