예제 #1
0
파일: CORR.py 프로젝트: elwlwlwk/Rosalind
from util.read import read_fasta
from util.func_tools import get_complement

seqs = list(map(lambda row: row[1], read_fasta('rosalind_corr.txt')))
seqs_with_complements = seqs + list(map(lambda seq: get_complement(seq), seqs))
err_seqs = list(filter(lambda seq: seqs_with_complements.count(seq) == 1,
                       seqs))
correct_seqs = list(
    filter(lambda seq: seqs_with_complements.count(seq) > 1,
           seqs_with_complements))
corrected_seqs = list(
    map(
        lambda err_seq: list(
            filter(
                lambda seq: list(
                    map(lambda base_match: base_match[0] == base_match[1],
                        zip(err_seq, seq))).count(False) == 1, correct_seqs))[
                            0], err_seqs))
print('\n'.join(
    map(lambda correction: '->'.join(correction), zip(err_seqs,
                                                      corrected_seqs))))
pass
예제 #2
0
from util.read import read_fasta
from textwrap import wrap
import re
from util.func_tools import DNA_CODON_TABLE

seq = read_fasta('rosalind_orf.txt')[0][1]
complement = ''.join(
    list(
        map(
            lambda base: 'A' if base is 'T' else 'T' if base is 'A' else 'C'
            if base is 'G' else 'G', seq))[::-1])

result = set()
for idx in range(len(seq)):
    try:
        orf = re.search('ATG(...)*?(?=TAA|TAG|TGA)', seq[idx:]).group()
        result.add(''.join(
            map(lambda triplet: DNA_CODON_TABLE[triplet], wrap(orf, 3))))
    except:
        pass
    try:
        orf = re.search('ATG(...)*?(?=TAA|TAG|TGA)', complement[idx:]).group()
        result.add(''.join(
            map(lambda triplet: DNA_CODON_TABLE[triplet], wrap(orf, 3))))
    except:
        pass
print('\n'.join(result))
예제 #3
0
파일: LONG.py 프로젝트: elwlwlwk/Rosalind
from util.read import read_fasta


def weld_seq(seq1, seq2):
    for weld_len in reversed(range(min(len(seq1), len(seq2)))):
        if seq1[-weld_len:] == seq2[0:weld_len]:
            return seq1 + seq2[weld_len:]
    return seq1 + seq2


seqs = list(map(lambda fasta: fasta[1], read_fasta('rosalind_long.txt')))

while len(seqs) > 1:
    weld_result = []
    for seq1 in seqs:
        for seq2 in seqs:
            if seq1 == seq2:
                continue
            welded_seq = weld_seq(seq1, seq2)
            score = abs(len(seq1) + len(seq2) - len(welded_seq))
            weld_result.append([seq1, seq2, welded_seq, score])
    max_weld = max(weld_result, key=lambda x: x[3])
    seqs.remove(max_weld[0])
    seqs.remove(max_weld[1])
    seqs.append(max_weld[2])
print(seqs[0])
pass
예제 #4
0
파일: KMER.py 프로젝트: elwlwlwk/Rosalind
from itertools import product

from util.read import read_fasta

seq = read_fasta('rosalind_kmer.txt')[0][1]

bases = ['A', 'T', 'C', 'G']
k = 4
k_mers = map(lambda k_mer: ''.join(k_mer), product(bases, repeat=k))

k_mers_count = dict.fromkeys(k_mers, 0)

for idx in range(len(seq) - k + 1):
    k_mers_count[seq[idx : idx+k]] += 1

print(' '.join(list(map(lambda k_mer: str(k_mers_count[k_mer]), sorted(k_mers_count.keys())))))
pass
예제 #5
0
파일: LCSM.py 프로젝트: elwlwlwk/Rosalind
from util.read import read_fasta, read_fasta_dict

seqs = list(map(lambda x: x[1], read_fasta('rosalind_lcsm.txt')))
base_seq = seqs[0]
seqs = seqs[1:]
lcs = ''


def check_subseq(subseq, seqs):
    for seq in seqs:
        if subseq not in seq:
            return False
    return True


for i in range(0, len(base_seq) - 1):
    for j in range(i + len(lcs), len(base_seq)):
        subseq = base_seq[i:j]
        if check_subseq(subseq, seqs):
            lcs = subseq
print(lcs)
예제 #6
0
파일: REVP.py 프로젝트: elwlwlwk/Rosalind
from util.read import read_fasta

data = read_fasta('rosalind_revp.txt')[0][1]


def get_complement(seq):
    return ''.join(
        map(
            lambda base: 'T' if base == 'A' else 'A' if base == 'T' else 'C'
            if base == 'G' else 'G', seq))


for length in range(4, 13, 2):
    for idx in range(len(data) - length + 1):
        upstream = data[idx:idx + length]
        if get_complement(upstream)[::-1] == upstream:
            print(idx + 1, length)
예제 #7
0
파일: PMCH.py 프로젝트: elwlwlwk/Rosalind
import math
from util.read import read_fasta

data = read_fasta('rosalind_pmch.txt')[0][1]

nA = data.count('A')
nC = data.count('C')

print(math.factorial(nA) * math.factorial(nC))
예제 #8
0
파일: SPLC.py 프로젝트: elwlwlwk/Rosalind
from util.read import read_fasta
from util.func_tools import DNA_CODON_TABLE
from functools import reduce
from textwrap import wrap

fasta = read_fasta('rosalind_splc.txt')

dna_seq = list(map(lambda x: x[1], fasta))
exon = reduce(lambda a, b: a[0:a.find(b)] + a[a.find(b) + len(b):], dna_seq)
print(''.join(map(lambda codon: DNA_CODON_TABLE[codon], wrap(exon, 3))))