Пример #1
0
                            item[1]
                        ] not in superstrings:
                    superstrings.append(inner_item + [item[1]])
                    if len(inner_item) == len(fasta_dict) - 1:
                        return superstrings


def build_superstring(fasta_dict, keys_list):
    overlap_len = len(fasta_dict[keys_list[0]]) // 2
    superstring = fasta_dict[keys_list[0]]
    for index in range(1, len(keys_list)):
        new_string = fasta_dict[keys_list[index]]
        overlap_start = common.substring_positions(
            new_string, superstring[-overlap_len:])[0]
        superstring += new_string[overlap_start + overlap_len:]
    return superstring


fasta_dict = common.fasta_parser('LONG.txt')
adj_list = list()

for key in fasta_dict.keys():
    for inner_key in fasta_dict.keys():
        if common.overlapping(fasta_dict[key],
                              fasta_dict[inner_key]) and key != inner_key:
            adj_list.append((key, inner_key))
print(len(adj_list))
longest_path = sorted(get_superstrings(adj_list), key=len)[-1]
print(longest_path)
print(build_superstring(fasta_dict, longest_path))
Пример #2
0
import common
import math
'''
Assuming we have the same amount of occurences of A-U and G-C, all possible perfect matches is simply (AU)!*(GC)!
! - cause of number of permutations of size AU from AU list
* - cause of independent states of this pairs
'''

fasta_dict = common.fasta_parser('PMCH.txt')
sequence = fasta_dict[list(fasta_dict.keys())[0]]

AU = 0
GC = 0
AU, GC = (sequence.count(item) for item in ('A', 'G'))

matchings = math.factorial(AU) * math.factorial(GC)
print(matchings)
Пример #3
0
import common

fdict = common.fasta_parser('GC.txt')
gc_count = dict()
max_gc = 0
max_key = ''

for key in fdict.keys():
    gc_count[key] = round((fdict[key].count('G') + fdict[key].count('C')) *
                          100 / len(fdict[key]), 6)
    if gc_count[key] > max_gc:
        max_gc = gc_count[key]
        max_key = key

print(max_key)
print(max_gc)
Пример #4
0
import common

fasta_dict = common.fasta_parser('ORF.txt')
sequence = fasta_dict[list(fasta_dict.keys())[0]]
forward_orf = [
    common.translate_dna(orf) for orf in common.find_orf_dna(sequence)
]
reverse_orf = [
    common.translate_dna(orf)
    for orf in common.find_orf_dna(common.reverse_comp(sequence))
]

print(forward_orf)
print(reverse_orf)

with open('ORF_result.txt', 'w') as f:
    [print(orf, file=f) for orf in set(forward_orf) | set(reverse_orf)]
Пример #5
0
import common

adj_len = 3
fasta_dict = common.fasta_parser('GRPH.txt')
adj_list = []

for key in fasta_dict.keys():
    for inner_key in fasta_dict.keys():
        if fasta_dict[key][-adj_len:] == fasta_dict[
                inner_key][:adj_len] and key != inner_key:
            adj_list.append((key, inner_key))

with open('GRPH_result.txt', 'w') as output_file:
    for adj in adj_list:
        print(' '.join(map(str, [item for item in adj])), file=output_file)
Пример #6
0
import common

fasta_dict = common.fasta_parser('SSEQ.txt')
ref_seq = fasta_dict[list(fasta_dict.keys())[0]]
search_seq = fasta_dict[list(fasta_dict.keys())[1]]
print(ref_seq, search_seq)
indices_list = [common.substring_positions(ref_seq, letter) for letter in search_seq]
#print(indices_list)
spliced_motif = list()
for item in indices_list:
    if spliced_motif == []:
        spliced_motif.append(item[0])
        continue
    for index in item:
        if spliced_motif[0] == index:
            continue
        elif index > spliced_motif[-1]:
            spliced_motif.append(index)
            break

print(' '.join(map(lambda x: str(x+1), spliced_motif)))
Пример #7
0
import common


def long_substr(data):
    substr = ''
    if len(data) > 1 and len(data[0]) > 0:
        for i in range(len(data[0])):
            for j in range(len(data[0]) - i + 1):
                if j > len(substr) and all(data[0][i:i + j] in x
                                           for x in data):
                    substr = data[0][i:i + j]
    return substr


fasta_dict = common.fasta_parser('LCSM.txt')
data = [fasta_dict[key] for key in fasta_dict.keys()]

print(long_substr(data))
Пример #8
0
import numpy as np
from level4.HAMM import hamming_distance
import common


def get_dist_table(fasta_dict):
    fasta_list = [fasta_dict[item] for item in fasta_dict.keys()]
    d = np.zeros((len(fasta_list), len(fasta_list)))
    for i in range(len(fasta_list)):
        for j in range(len(fasta_list)):
            d[i][j] = round(
                hamming_distance(fasta_list[i], fasta_list[j]) /
                len(fasta_list[i]), 4)
    return d


if __name__ == '__main__':
    input_fasta_dict = common.fasta_parser('PDST.txt')
    dist_matrix = get_dist_table(input_fasta_dict)
    with open('PDST_result.txt', 'w') as outfile:
        for i in dist_matrix:
            print(' '.join(map(str, i)), file=outfile)
Пример #9
0
import common
import numpy as np

fasta_dict = common.fasta_parser('CONS.txt')
letter_dict = dict()
base_list = ['A', 'C', 'G', 'T']
for base in base_list:
    letter_dict[base] = [0] * len(fasta_dict[list(fasta_dict.keys())[0]])

for key in fasta_dict.keys():
    count = 0
    for letter in fasta_dict[key]:
        letter_dict[letter][count] += 1
        count += 1

cons_seq = ''
counter = 0
for _ in range(len(fasta_dict[list(fasta_dict.keys())[0]])):
    value_list = [letter_dict[base][counter] for base in letter_dict.keys()]
    cons_seq += base_list[np.argmax(value_list)]
    counter += 1

print(cons_seq)
with open('CONS_result.txt', 'w') as f:
    print(cons_seq, file=f)
    for key in letter_dict.keys():
        print('{}: {}'.format(key, ' '.join(map(str, [item for item in letter_dict[key]]))), file=f)

Пример #10
0
import common

fasta_dict = common.fasta_parser('SPLC.txt')
raw_seq = fasta_dict[list(fasta_dict.keys())[0]]

for key in fasta_dict.keys():
    if fasta_dict[key] != raw_seq:
        start_position = raw_seq.index(fasta_dict[key])
        raw_seq = raw_seq[:start_position] + raw_seq[start_position +
                                                     len(fasta_dict[key]):]

print(common.translate_rna(raw_seq.replace('T', 'U')))
Пример #11
0
import common

fasta_dict = common.fasta_parser('TRAN.txt')

seq1, seq2 = (fasta_dict[key] for key in fasta_dict.keys())
transitions = [set(['A', 'G']), set(['C', 'T'])]
transitions_num, transfections_num = 0, 0

for index in range(len(seq1)):
    if seq1[index] != seq2[index]:
        if set([seq1[index], seq2[index]]) in transitions:
            transitions_num += 1
        else:
            transfections_num += 1

print(round(transitions_num / transfections_num, 3))
Пример #12
0
import common

fasta_dict = common.fasta_parser('REVP.txt')
sequence = fasta_dict[list(fasta_dict.keys())[0]]
output_file = open('REVP_result.txt', 'w')
for half_pal_len in [2, 3, 4, 5, 6]:
    for position in range(len(sequence) - half_pal_len):
        if sequence[position:position + half_pal_len] == common.reverse_comp(
                sequence[position + half_pal_len:position + 2 * half_pal_len]):
            print(position + 1, 2 * half_pal_len, file=output_file)
output_file.close()
Пример #13
0
import common

kmer_len = 4


def get_kmer_count(string, kmer):
    return len(common.substring_positions(string, kmer))


fasta_dict = common.fasta_parser('KMER.txt')
sequence = fasta_dict[list(fasta_dict.keys())[0]]

all_kmers = common.get_kmers(["A", "T", "G", "C"], kmer_len)
print(' '.join(map(str,
                   [get_kmer_count(sequence, kmer) for kmer in all_kmers])))