from util.read import read_fasta from util.func_tools import get_complement seqs = list(map(lambda row: row[1], read_fasta('rosalind_corr.txt'))) seqs_with_complements = seqs + list(map(lambda seq: get_complement(seq), seqs)) err_seqs = list(filter(lambda seq: seqs_with_complements.count(seq) == 1, seqs)) correct_seqs = list( filter(lambda seq: seqs_with_complements.count(seq) > 1, seqs_with_complements)) corrected_seqs = list( map( lambda err_seq: list( filter( lambda seq: list( map(lambda base_match: base_match[0] == base_match[1], zip(err_seq, seq))).count(False) == 1, correct_seqs))[ 0], err_seqs)) print('\n'.join( map(lambda correction: '->'.join(correction), zip(err_seqs, corrected_seqs)))) pass
from util.read import read_fasta from textwrap import wrap import re from util.func_tools import DNA_CODON_TABLE seq = read_fasta('rosalind_orf.txt')[0][1] complement = ''.join( list( map( lambda base: 'A' if base is 'T' else 'T' if base is 'A' else 'C' if base is 'G' else 'G', seq))[::-1]) result = set() for idx in range(len(seq)): try: orf = re.search('ATG(...)*?(?=TAA|TAG|TGA)', seq[idx:]).group() result.add(''.join( map(lambda triplet: DNA_CODON_TABLE[triplet], wrap(orf, 3)))) except: pass try: orf = re.search('ATG(...)*?(?=TAA|TAG|TGA)', complement[idx:]).group() result.add(''.join( map(lambda triplet: DNA_CODON_TABLE[triplet], wrap(orf, 3)))) except: pass print('\n'.join(result))
from util.read import read_fasta def weld_seq(seq1, seq2): for weld_len in reversed(range(min(len(seq1), len(seq2)))): if seq1[-weld_len:] == seq2[0:weld_len]: return seq1 + seq2[weld_len:] return seq1 + seq2 seqs = list(map(lambda fasta: fasta[1], read_fasta('rosalind_long.txt'))) while len(seqs) > 1: weld_result = [] for seq1 in seqs: for seq2 in seqs: if seq1 == seq2: continue welded_seq = weld_seq(seq1, seq2) score = abs(len(seq1) + len(seq2) - len(welded_seq)) weld_result.append([seq1, seq2, welded_seq, score]) max_weld = max(weld_result, key=lambda x: x[3]) seqs.remove(max_weld[0]) seqs.remove(max_weld[1]) seqs.append(max_weld[2]) print(seqs[0]) pass
from itertools import product from util.read import read_fasta seq = read_fasta('rosalind_kmer.txt')[0][1] bases = ['A', 'T', 'C', 'G'] k = 4 k_mers = map(lambda k_mer: ''.join(k_mer), product(bases, repeat=k)) k_mers_count = dict.fromkeys(k_mers, 0) for idx in range(len(seq) - k + 1): k_mers_count[seq[idx : idx+k]] += 1 print(' '.join(list(map(lambda k_mer: str(k_mers_count[k_mer]), sorted(k_mers_count.keys()))))) pass
from util.read import read_fasta, read_fasta_dict seqs = list(map(lambda x: x[1], read_fasta('rosalind_lcsm.txt'))) base_seq = seqs[0] seqs = seqs[1:] lcs = '' def check_subseq(subseq, seqs): for seq in seqs: if subseq not in seq: return False return True for i in range(0, len(base_seq) - 1): for j in range(i + len(lcs), len(base_seq)): subseq = base_seq[i:j] if check_subseq(subseq, seqs): lcs = subseq print(lcs)
from util.read import read_fasta data = read_fasta('rosalind_revp.txt')[0][1] def get_complement(seq): return ''.join( map( lambda base: 'T' if base == 'A' else 'A' if base == 'T' else 'C' if base == 'G' else 'G', seq)) for length in range(4, 13, 2): for idx in range(len(data) - length + 1): upstream = data[idx:idx + length] if get_complement(upstream)[::-1] == upstream: print(idx + 1, length)
import math from util.read import read_fasta data = read_fasta('rosalind_pmch.txt')[0][1] nA = data.count('A') nC = data.count('C') print(math.factorial(nA) * math.factorial(nC))
from util.read import read_fasta from util.func_tools import DNA_CODON_TABLE from functools import reduce from textwrap import wrap fasta = read_fasta('rosalind_splc.txt') dna_seq = list(map(lambda x: x[1], fasta)) exon = reduce(lambda a, b: a[0:a.find(b)] + a[a.find(b) + len(b):], dna_seq) print(''.join(map(lambda codon: DNA_CODON_TABLE[codon], wrap(exon, 3))))