Exemplo n.º 1
0
def MPRT(id_list):
    '''
    Finds locations of the N-glycosylation motif \n
    Input: List of Uniprot IDs \n
    Output: Writes ID followed by list of locations to outputs/mprt.txt
    '''
    # get fasta file from uniprot
    output_list = []
    fasta_dict = dict()
    for prot_id in id_list:
        fasta_dict.update(resources.get_uniprot(prot_id))
        protein = fasta_dict[prot_id]

        # find locations of motif
        locations = []
        i = 0
        while i < len(protein) - 2:
            #if i is start of motif
            if (protein[i] == 'N' and \
                protein[i+1] != 'P' and \
                (protein[i+2] == 'S' or protein[i+2] == 'T') and \
                protein[i+3] != 'P'):
                locations.append(i + 1)

            i+=1

        #only append if there are motifs
        if locations != []:
            output_list.append(prot_id)
            output_list.append(resources.list_to_str(locations))

    # write to file
    output_path = 'outputs/mprt.txt'
    resources.write_file(output_path, output_list)
Exemplo n.º 2
0
def CONS(dna_dict):
    '''
    Input: Fasta Dictionary with ID as key and DNA sequence as value \n
    Output: Writes a consensus string and profile matrix for the collection to outputs/cons.txt
    '''
    matrix = []
    for dna_seq in dna_dict.values():
        matrix.append(dna_seq)

    length = len(matrix[0])

    #for each position add all the nts
    A, C, G, T = [], [], [], []
    consensus = ''
    j = 0
    while j < length:
        #create index j in each list
        A.append(0)
        C.append(0)
        G.append(0)
        T.append(0)

        for dna_seq in matrix:
            if dna_seq[j] == 'A':
                A[j] += 1
            elif dna_seq[j] == 'C':
                C[j] += 1
            elif dna_seq[j] == 'G':
                G[j] += 1
            elif dna_seq[j] == 'T':
                T[j] += 1

        #consensus string
        compareDict = {'A': A[j], 'C': C[j], 'G': G[j], 'T': T[j]}
        consensus += max(compareDict, key=compareDict.get)

        j += 1

    output_list = [consensus, "A: " + resources.list_to_str(A), "C: " + resources.list_to_str(C), \
        "G: " + resources.list_to_str(G), "T: " + resources.list_to_str(T)]
    output_path = 'outputs/cons.txt'
    resources.write_file(output_path, output_list)
Exemplo n.º 3
0
def PERM(n):
    '''
    Input: length of permutations \n
    Output: Writes list of permutations to 'outputs/perm.txt'
    '''
    num_list = [i+1 for i in range(n)]
    perm_list = permutations(num_list)
    formatted_list = [resources.list_to_str(perm).strip() for perm in perm_list]
    formatted_list.insert(0, str(len(perm_list)))
    
    output_path = 'outputs/perm.txt'
    resources.write_file(output_path, formatted_list)
Exemplo n.º 4
0
def SUBS(s, t):
    '''
    Input: 2 DNA strings s,t
    Output: Locations of t as a substring of s (space separated list)
    '''
    loc = 0
    locs = [s.find(t) + 1]
    while True:
        loc = s.find(t, locs[len(locs) - 1])
        if loc == -1:
            break
        locs.append(loc + 1)

    locs_str = resources.list_to_str(locs)

    return (locs_str)
Exemplo n.º 5
0
def REVP(dna_dict):
    '''
    Input: Dict with description as key and dna sequence as value \n
    Output: Writes list of position and length of all reverse palindromes 
    of length 4-12 to outputs/revp.txt
    '''
    #get dna sequence
    dna_seq = resources.fasta_to_string(dna_dict)[1]

    #get compliment
    compliment = ''
    for nt in dna_seq:
        compliment += resources.compliments[nt]
    
    revp = []
    #for each starting location in dna sequence
    for i in range(len(dna_seq)):
        #test each length from 4 to 12
        for length in range(3,12):
            #if length is longer than dna seq skip
            if i + length + 1 > len(dna_seq):
                break
            
            pal = False
            #test if that length is a palindrome
            for pal_index in range(length):
                if dna_seq[i+pal_index] == compliment[i+length-pal_index]:
                    pal = True
                else:
                    pal = False
                    break

            if pal:
                revp.append([i + 1, length + 1])
    
    #convert to string
    revp = [resources.list_to_str(pair).strip() for pair in revp]
    
    output_path = 'outputs/revp.txt'
    resources.write_file(output_path, revp)
Exemplo n.º 6
0
def GRPH(dna_dict):
    '''
    Input: Fasta Dictionary with ID as key and DNA sequence as value \n
    Output: Writes the adjacency list for suffix 3 to outputs/grph.txt
    '''
    k = 3

    overlap = []
    #for each dna seq, check last 3 nt with first 3 nt of each other dna seq
    for dna in dna_dict:
        temp = copy.copy(dna_dict)
        temp.pop(dna)
        for other_dna in temp:
            suffix = dna_dict[dna][len(dna_dict[dna]) - k:]
            prefix = temp[other_dna][:k]
            if suffix == prefix:
                overlap.append([dna, other_dna])

    overlap_formatted = []
    for pair in overlap:
        overlap_formatted.append(resources.list_to_str(pair))

    output_path = 'outputs/grph.txt'
    resources.write_file(output_path, overlap_formatted)
Exemplo n.º 7
0
def old():
    # increasing
    increasing_possibilities = [permutation[0]]
    for num in permutation:
        for possibility in copy.copy(increasing_possibilities):
            # append num to possibility and check if it works
            # if it does, create a new possiblity with that list
            possible = True
            temp = list(copy.copy(possibility))
            temp.append(num)
            for i in range(len(temp) - 1):
                if temp[i] > temp[i + 1]:
                    possible = False
                    break

            if possible:
                previous_possibilities = copy.copy(increasing_possibilities)
                increasing_possibilities.append(temp)

                # if there is another possibility of same length with lower/higher last number, remove higher possibility
                for possibility in previous_possibilities:
                    possibility = list(possibility)
                    print(possibility, temp)

                    if len(possibility) != len(temp):
                        continue

                    temp1 = list(possibility).pop()
                    temp2 = temp.pop()

                    if temp1 == temp2:
                        if int(possibility[len(possibility) - 1]) > int(
                                temp[len(temp) - 1]):
                            increasing_possibilities.remove(possibility)
                        else:
                            increasing_possibilities.remove(temp)

        for possibility in increasing_possibilities:
            if len(possibility) != 1:
                continue

            if int(possibility[0]) > int(num):
                increasing_possibilities.remove(possibility)
                increasing_possibilities.append([num])

        print(increasing_possibilities)

    # find longest possibility
    increasing_longest = []
    increasing_length = 0
    for possibility in increasing_possibilities:
        if len(possibility) > increasing_length:
            increasing_longest = possibility
            increasing_length = len(possibility)

    # decreasing
    decreasing_possibilities = []
    for num in permutation:
        for possibility in copy.copy(decreasing_possibilities):
            # append num to possibility and check if it works
            # if it does, create a new possiblity with that list
            possible = True
            temp = copy.copy(possibility)
            temp.append(num)
            for i in range(len(temp) - 1):
                if temp[i] < temp[i + 1]:
                    possible = False
                    break

            if possible:
                decreasing_possibilities.append(temp)

        decreasing_possibilities.append([num])

    # find longest possibility
    decreasing_longest = []
    decreasing_length = 0
    for possibility in decreasing_possibilities:
        if len(possibility) > decreasing_length:
            decreasing_longest = possibility
            decreasing_length = len(possibility)

    increasing_str = resources.list_to_str(increasing_longest)
    decreasing_str = resources.list_to_str(decreasing_longest)

    output_path = 'outputs/lgis.txt'
    resources.write_file(output_path, [increasing_str, decreasing_str])
Exemplo n.º 8
0
def LGIS(n, permutation):
    '''
    Input: length of permutation, permutation of pi \n
    Output: longest increasing subsequence, longest decreasing subsequence
    '''

    n = len(permutation)

    # LIS
    traces = [None] * n
    for i in reversed(range(0, n)):
        if i == n - 1:
            traces[i] = [permutation[i]]
            continue

        longest_possibile_trace = 0
        longest_possibile_trace_length = 0
        # compare with all
        for j in range(i + 1, n):
            if permutation[i] < permutation[j]:  #check if <= or <
                if len(traces[j]) > longest_possibile_trace_length:
                    longest_possibile_trace = j
                    longest_possibile_trace_length = len(traces[j])

        if longest_possibile_trace == 0:
            traces[i] = [permutation[i]]
            continue

        # print(i, longest_possibile_trace)
        traces[i] = copy.copy(traces[longest_possibile_trace])
        traces[i].insert(0, permutation[i])

    lis = []
    for trace in traces:
        if len(trace) > len(lis):
            lis = trace

    # LDS
    traces = [None] * n
    for i in range(0, n):
        if i == 0:
            traces[i] = [permutation[i]]
            continue

        longest_possibile_trace = 0
        longest_possibile_trace_length = 0
        # compare with all
        for j in range(0, i):
            if permutation[i] < permutation[j]:  #check if <= or <
                if len(traces[j]) > longest_possibile_trace_length:
                    longest_possibile_trace = j
                    longest_possibile_trace_length = len(traces[j])
        if longest_possibile_trace == 0:
            traces[i] = [permutation[i]]
            continue

        traces[i] = copy.copy(traces[longest_possibile_trace])
        traces[i].append(permutation[i])

    lds = []
    for trace in traces:
        if len(trace) > len(lds):
            lds = trace

    output_path = 'outputs/lgis.txt'
    output_list = [resources.list_to_str(i) for i in [lis, lds]]
    resources.write_file(output_path, output_list)