Exemplo n.º 1
0
def find_N_glycosylation_motif(protein):
    finds = []
    for pos in substring_find(protein, 'N'):
        if len(protein[pos-1:pos + 3]) == 4:
            if (protein[pos] != 'P' and protein[pos + 1] in ('S', 'T')
                and protein[pos + 2] != 'P'):
                    finds.append(pos)
    return finds
Exemplo n.º 2
0
def adjacency_list(seqs):
    vertex = set()
    for seq in seqs:
        seq_data = seqs[seq]
        overlap_seq = int(ceil(len(seq_data) / 2))

        adjs = set(seqs.keys())
        adjs.remove(seq)
        for adj in adjs:
            overlap = overlap_seq
            adj_data = seqs[adj]
            overlap_adj = int(ceil(len(adj_data) / 2))

            if overlap < overlap_adj:
                overlap = overlap_adj

            pos = list(substring_find(seq_data, adj_data[:overlap]))
            if pos:
                vertex.add((seq, adj, pos[-1]))
    return vertex
Exemplo n.º 3
0
def kmer_composition(dna_string):
    output = []
    for p in lexf_order(4, 'ACGT'):
        pos = list(substring_find(dna_string, ''.join(p)))
        output.append(str(len(pos)))
    return output
Exemplo n.º 4
0
from prot import prepare_codon_table
from revp import read_fasta
from revc import reverse_complement
from rna import rna_transcription
from subs import substring_find

if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_orf.txt')) as dataset:
        seqs = read_fasta(dataset)

    codon_table = prepare_codon_table(os.path.join('data', 'codon_table'))

    output = []
    dna = seqs.popitem()[1]
    rna = (rna_transcription(dna), rna_transcription(reverse_complement(dna)))

    for seq in rna:
        for offset in (0, 1, 2):
            for start_pos in substring_find(seq[offset:], 'AUG'):
                current = []
                for codon in (seq[i:i + 3]
                              for i in range(start_pos, len(seq), 3)):
                    if len(codon) == 3:
                        if codon_table[codon] == 'Stop' and current:
                            output.append(''.join(current))
                            current = []
                        elif codon == 'AUG' or current:
                            current.append(codon_table[codon])

    print("\n".join(set(output)))
Exemplo n.º 5
0
from prot import prepare_codon_table
from revp import read_fasta
from revc import reverse_complement
from rna import rna_transcription
from subs import substring_find


if __name__ == "__main__":
    with open(os.path.join("data", "rosalind_orf.txt")) as dataset:
        seqs = read_fasta(dataset)

    codon_table = prepare_codon_table(os.path.join("data", "codon_table"))

    output = []
    dna = seqs.popitem()[1]
    rna = (rna_transcription(dna), rna_transcription(reverse_complement(dna)))

    for seq in rna:
        for offset in (0, 1, 2):
            for start_pos in substring_find(seq[offset:], "AUG"):
                current = []
                for codon in (seq[i : i + 3] for i in range(start_pos, len(seq), 3)):
                    if len(codon) == 3:
                        if codon_table[codon] == "Stop" and current:
                            output.append("".join(current))
                            current = []
                        elif codon == "AUG" or current:
                            current.append(codon_table[codon])

    print("\n".join(set(output)))