Python parse_fasta_alignment示例，helper_funcs.helpers_bio.parse_fasta_alignment Python示例

示例#1

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def setUp(self):
     """
     Creates an alignment from ser_thr_kin_short in the db
     """
     align_input = io.StringIO(file_to_string("ser_thr_kin_short.fasta"))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     self.alignment = data
     align_input_a = io.StringIO(file_to_string("protein_annotate_test.fasta"))
     data_a = parse_fasta_alignment(align_input_a)
     for d in data_a:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     self.alignment_a = data_a

示例#2

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def setUp(self):
     name = "A. tha. SPA family protein alignment"
     align_input = io.StringIO(file_to_string("spa_protein_alignment.fasta"))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     align = Alignment.objects.create_alignment(name, data)
     self.response_prot = self.client.get("/query-sequences/" + str(align.slug) + "/")
     name = "A. tha. SPA family DNA alignment"
     align_input = io.StringIO(file_to_string("spa_cds_alignment.fasta"))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACDNA())
     align = Alignment.objects.create_alignment(name, data)
     self.response_dna = self.client.get("/query-sequences/" + str(align.slug) + "/")

示例#3

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

    def test_align_display_page_displays_correct_consensus(self):
        """
        Tests that align_display displays the correct consensus sequence
        """
        expected_seqs = file_to_string("spa_protein_alignment.fasta")
        align_expected = io.StringIO(expected_seqs)
        alignment = parse_fasta_alignment(align_expected)
        alignment = consensus_add(alignment)

        # get displayed sequences
        with self.assertHTML(self.response, "tr") as elems:
            seq_disp = []
            for els in elems:
                seq_disp_line = []
                for e in els.findall("td")[:-1]:
                    if e.attrib["class"] in ["residue S0", "residue S1"]:
                        seq_disp_line.append(e.text)
                if seq_disp_line:
                    seq_disp.append(seq_disp_line)

        # recompose sequences
        cat_re_seq = []
        for j in range(len(alignment) - 1, len(seq_disp), len(alignment)):
            re_seq = [seq_disp[j] for j in range(len(alignment) - 1, len(seq_disp), len(alignment))]
            cat_re_seq = []
            for r in re_seq:
                cat_re_seq.extend(r)

        # check consensus
        cons_li = list(alignment[-1].seq)
        self.assertEqual(cons_li, cat_re_seq, cat_re_seq)

示例#4

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

    def test_align_display_page_displays_correct_protein_alignment_sequence(self):
        """
        Tests that align_display displays an alignment with correct sequences
        """
        expected_seqs = file_to_string("spa_protein_alignment.fasta")
        align_expected = io.StringIO(expected_seqs)
        alignment = parse_fasta_alignment(align_expected)

        # get displayed sequences
        with self.assertHTML(self.response, "tr") as elems:
            seq_disp = []
            for els in elems:
                seq_disp_line = []
                for e in els.findall("td")[:-1]:
                    if e.attrib["class"] in ["residue S0", "residue S1"]:
                        seq_disp_line.append(e.text)
                if seq_disp_line:
                    seq_disp.append(seq_disp_line)

        # recompose sequences
        re_seqs = []
        cat_re_seq = []
        for i in range(0, len(alignment) + 1):
            for j in range(i, len(seq_disp), len(alignment) + 1):
                re_seq = [seq_disp[j] for j in range(i, len(seq_disp), len(alignment) + 1)]
                cat_re_seq = []
                for r in re_seq:
                    cat_re_seq.extend(r)
            re_seqs.append(cat_re_seq)

        # check sequences against original alignment
        for i, al in enumerate(alignment):
            al_li = list(al.seq)
            self.assertEqual(al_li, re_seqs[i], re_seqs[i])

示例#5

0

显示文件

文件： test_urls.py 项目： globz-eu/formalign

 def setUp(self):
     name = 'A. tha. SPA family protein alignment'
     align_input = io.StringIO(file_to_string('spa_protein_alignment.fasta'))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     align = Alignment.objects.create_alignment(name, data)
     self.slug = align.slug

示例#6

0

显示文件

文件： test_tasks.py 项目： globz-eu/formalign

 def setUp(self):
     self.name = 'A. tha. SPA family alignment'
     align_input = io.StringIO(file_to_string('spa_protein_alignment.fasta'))
     self.data = parse_fasta_alignment(align_input)
     alphabet = Gapped(ExtendedIUPACProtein())
     for a in self.data:
         a.seq.alphabet = alphabet
     self.data._alphabet = alphabet

示例#7

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def test_parse_fasta_alignment_returns_expected_object(self):
     """
     tests that parse_fasta_alignment returns the expected object
     """
     align = io.StringIO(file_to_string("ser_thr_kin_short.fasta"))
     parsed = parse_fasta_alignment(align)
     self.assertEqual(
         ["DMD401_1-640", "CER09D1_11-435", "EGFR", "DMDPR2_1-384"],
         [p.description for p in parsed],
         [p.description for p in parsed],
     )

示例#8

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def test_parse_fasta_alignment(self):
     """
     Tests that the parse_fasta function returns expected values with a valid fasta alignment
     :return:
     """
     input_seqs = file_to_string("protein.fasta")
     parsed = parse_fasta_alignment(io.StringIO(input_seqs))
     self.assertEqual(parsed[0].description, "sequence1")
     self.assertEqual(parsed[0].seq, "MKERBGWAQ--QGKKPWRF--EEW")
     self.assertEqual(parsed[1].description, "sequence2")
     self.assertEqual(parsed[1].seq, "MKERBGWA-SYQGKKPWRFAQ-EW")

示例#9

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def setUp(self):
     """
     Creates a response from a GET request to /align-display/ with an alignment pk
     :param input_file: file containing alignment
     :return: response
     """
     name = "SPA1 protein alignment"
     align_input = io.StringIO(file_to_string("spa1_protein_alignment.fasta"))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     self.align = Alignment.objects.create_alignment(name, data)

示例#10

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

 def test_display_page_uses_display_seq_template_on_GET(self):
     """
     Tests that seq_display view returns a 200 response on a GET request and uses the correct template
     :return:
     """
     name = "A. tha. SPA family alignment"
     align_input = io.StringIO(file_to_string("spa_protein_alignment.fasta"))
     data = parse_fasta_alignment(align_input)
     for d in data:
         d.seq.alphabet = Gapped(ExtendedIUPACProtein())
     save = Alignment.objects.create_alignment(name, data)
     response = self.client.get("/query-sequences/" + str(save.slug) + "/")
     self.assertEqual(response.status_code, 200)

示例#11

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

    def test_align_display_page_displays_sequences_in_the_correct_order(self):
        """
        Tests that align_display displays the sequences in the correct order
        """
        expected_seqs = file_to_string("spa_protein_alignment.fasta")
        align_expected = io.StringIO(expected_seqs)
        alignment = parse_fasta_alignment(align_expected)
        ids = [al.id for al in alignment]
        ids.append("consensus 70%")
        with self.assertHTML(self.response, "tr") as elems:
            ids_disp = []
            for els in elems:
                for e in els.findall("td")[:-1]:
                    if e.attrib["class"] == "seq_id":
                        ids_disp.append(e.text)

        # blocks of sequence IDs
        bl = [ids_disp[i : i + len(ids)] for i in range(0, len(ids_disp), len(ids))]
        for b in bl:
            for i in range(len(ids)):
                self.assertEqual(ids[i], b[i], b[i])

示例#12

0

显示文件

文件： forms.py 项目： globz-eu/formalign

    def clean_align_input(self):
        """
        Returns cleaned and validated alignment sequence data. Validates FASTA for standard FASTA alignment
        (starts with '>', does not contain any invalid characters for protein sequences, all sequences have the same
        length)
        :return: parsed_data = [{'meta': 'sequence meta', 'seq': 'SEQUENCE'} ... ]
        """
        align_input = self.cleaned_data['align_input']
        data = io.StringIO(align_input)

        if self.cleaned_data['align_input'][0] != '>':
            raise forms.ValidationError(FORMAT_ERROR)

        try:
            align_input = parse_fasta_alignment(data)
        except ValueError:
            raise forms.ValidationError(ALIGNMENT_ERROR)

        if len(align_input) <= 1:
            raise forms.ValidationError(LESS_THAN_TWO_SEQS_ERROR)

        return align_input

示例#13

0

显示文件

文件： test_views.py 项目： globz-eu/formalign

    def test_align_display_renders_correct_color_classes(self):
        """
        Tests that align_display assigns the correct color classes (residue S0 or residue S1) to the residues
        """
        expected_seqs = file_to_string("spa_protein_alignment.fasta")
        align_expected = io.StringIO(expected_seqs)
        alignment = parse_fasta_alignment(align_expected)
        alignment = consensus_add(alignment)
        alignment = annotate(alignment)

        # get displayed sequences
        with self.assertHTML(self.response, "tr") as elems:
            seq_disp = []
            for els in elems:
                seq_disp_line = []
                for e in els.findall("td")[:-1]:
                    if e.attrib["class"] in ["residue S0", "residue S1"]:
                        seq_disp_line.append(e.attrib["class"])
                if seq_disp_line:
                    seq_disp.append(seq_disp_line)

        # recompose sequences
        re_seqs = []
        cat_re_seq = []
        for i in range(0, len(alignment)):
            for j in range(i, len(seq_disp), len(alignment)):
                re_seq = [seq_disp[j] for j in range(i, len(seq_disp), len(alignment))]
                cat_re_seq = []
                for r in re_seq:
                    cat_re_seq.extend(r)
            re_seqs.append(cat_re_seq)

        # check color classes
        for i, al in enumerate(alignment):
            al_li = ["residue S%s" % a for a in al.letter_annotations["eq"]]
            self.assertEqual(al_li, re_seqs[i], re_seqs)