def setUp(self): """ Creates an alignment from ser_thr_kin_short in the db """ align_input = io.StringIO(file_to_string("ser_thr_kin_short.fasta")) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) self.alignment = data align_input_a = io.StringIO(file_to_string("protein_annotate_test.fasta")) data_a = parse_fasta_alignment(align_input_a) for d in data_a: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) self.alignment_a = data_a
def setUp(self): name = "A. tha. SPA family protein alignment" align_input = io.StringIO(file_to_string("spa_protein_alignment.fasta")) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) align = Alignment.objects.create_alignment(name, data) self.response_prot = self.client.get("/query-sequences/" + str(align.slug) + "/") name = "A. tha. SPA family DNA alignment" align_input = io.StringIO(file_to_string("spa_cds_alignment.fasta")) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACDNA()) align = Alignment.objects.create_alignment(name, data) self.response_dna = self.client.get("/query-sequences/" + str(align.slug) + "/")
def test_align_display_page_displays_correct_consensus(self): """ Tests that align_display displays the correct consensus sequence """ expected_seqs = file_to_string("spa_protein_alignment.fasta") align_expected = io.StringIO(expected_seqs) alignment = parse_fasta_alignment(align_expected) alignment = consensus_add(alignment) # get displayed sequences with self.assertHTML(self.response, "tr") as elems: seq_disp = [] for els in elems: seq_disp_line = [] for e in els.findall("td")[:-1]: if e.attrib["class"] in ["residue S0", "residue S1"]: seq_disp_line.append(e.text) if seq_disp_line: seq_disp.append(seq_disp_line) # recompose sequences cat_re_seq = [] for j in range(len(alignment) - 1, len(seq_disp), len(alignment)): re_seq = [seq_disp[j] for j in range(len(alignment) - 1, len(seq_disp), len(alignment))] cat_re_seq = [] for r in re_seq: cat_re_seq.extend(r) # check consensus cons_li = list(alignment[-1].seq) self.assertEqual(cons_li, cat_re_seq, cat_re_seq)
def test_align_display_page_displays_correct_protein_alignment_sequence(self): """ Tests that align_display displays an alignment with correct sequences """ expected_seqs = file_to_string("spa_protein_alignment.fasta") align_expected = io.StringIO(expected_seqs) alignment = parse_fasta_alignment(align_expected) # get displayed sequences with self.assertHTML(self.response, "tr") as elems: seq_disp = [] for els in elems: seq_disp_line = [] for e in els.findall("td")[:-1]: if e.attrib["class"] in ["residue S0", "residue S1"]: seq_disp_line.append(e.text) if seq_disp_line: seq_disp.append(seq_disp_line) # recompose sequences re_seqs = [] cat_re_seq = [] for i in range(0, len(alignment) + 1): for j in range(i, len(seq_disp), len(alignment) + 1): re_seq = [seq_disp[j] for j in range(i, len(seq_disp), len(alignment) + 1)] cat_re_seq = [] for r in re_seq: cat_re_seq.extend(r) re_seqs.append(cat_re_seq) # check sequences against original alignment for i, al in enumerate(alignment): al_li = list(al.seq) self.assertEqual(al_li, re_seqs[i], re_seqs[i])
def setUp(self): name = 'A. tha. SPA family protein alignment' align_input = io.StringIO(file_to_string('spa_protein_alignment.fasta')) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) align = Alignment.objects.create_alignment(name, data) self.slug = align.slug
def setUp(self): self.name = 'A. tha. SPA family alignment' align_input = io.StringIO(file_to_string('spa_protein_alignment.fasta')) self.data = parse_fasta_alignment(align_input) alphabet = Gapped(ExtendedIUPACProtein()) for a in self.data: a.seq.alphabet = alphabet self.data._alphabet = alphabet
def test_parse_fasta_alignment_returns_expected_object(self): """ tests that parse_fasta_alignment returns the expected object """ align = io.StringIO(file_to_string("ser_thr_kin_short.fasta")) parsed = parse_fasta_alignment(align) self.assertEqual( ["DMD401_1-640", "CER09D1_11-435", "EGFR", "DMDPR2_1-384"], [p.description for p in parsed], [p.description for p in parsed], )
def test_parse_fasta_alignment(self): """ Tests that the parse_fasta function returns expected values with a valid fasta alignment :return: """ input_seqs = file_to_string("protein.fasta") parsed = parse_fasta_alignment(io.StringIO(input_seqs)) self.assertEqual(parsed[0].description, "sequence1") self.assertEqual(parsed[0].seq, "MKERBGWAQ--QGKKPWRF--EEW") self.assertEqual(parsed[1].description, "sequence2") self.assertEqual(parsed[1].seq, "MKERBGWA-SYQGKKPWRFAQ-EW")
def setUp(self): """ Creates a response from a GET request to /align-display/ with an alignment pk :param input_file: file containing alignment :return: response """ name = "SPA1 protein alignment" align_input = io.StringIO(file_to_string("spa1_protein_alignment.fasta")) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) self.align = Alignment.objects.create_alignment(name, data)
def test_display_page_uses_display_seq_template_on_GET(self): """ Tests that seq_display view returns a 200 response on a GET request and uses the correct template :return: """ name = "A. tha. SPA family alignment" align_input = io.StringIO(file_to_string("spa_protein_alignment.fasta")) data = parse_fasta_alignment(align_input) for d in data: d.seq.alphabet = Gapped(ExtendedIUPACProtein()) save = Alignment.objects.create_alignment(name, data) response = self.client.get("/query-sequences/" + str(save.slug) + "/") self.assertEqual(response.status_code, 200)
def test_align_display_page_displays_sequences_in_the_correct_order(self): """ Tests that align_display displays the sequences in the correct order """ expected_seqs = file_to_string("spa_protein_alignment.fasta") align_expected = io.StringIO(expected_seqs) alignment = parse_fasta_alignment(align_expected) ids = [al.id for al in alignment] ids.append("consensus 70%") with self.assertHTML(self.response, "tr") as elems: ids_disp = [] for els in elems: for e in els.findall("td")[:-1]: if e.attrib["class"] == "seq_id": ids_disp.append(e.text) # blocks of sequence IDs bl = [ids_disp[i : i + len(ids)] for i in range(0, len(ids_disp), len(ids))] for b in bl: for i in range(len(ids)): self.assertEqual(ids[i], b[i], b[i])
def clean_align_input(self): """ Returns cleaned and validated alignment sequence data. Validates FASTA for standard FASTA alignment (starts with '>', does not contain any invalid characters for protein sequences, all sequences have the same length) :return: parsed_data = [{'meta': 'sequence meta', 'seq': 'SEQUENCE'} ... ] """ align_input = self.cleaned_data['align_input'] data = io.StringIO(align_input) if self.cleaned_data['align_input'][0] != '>': raise forms.ValidationError(FORMAT_ERROR) try: align_input = parse_fasta_alignment(data) except ValueError: raise forms.ValidationError(ALIGNMENT_ERROR) if len(align_input) <= 1: raise forms.ValidationError(LESS_THAN_TWO_SEQS_ERROR) return align_input
def test_align_display_renders_correct_color_classes(self): """ Tests that align_display assigns the correct color classes (residue S0 or residue S1) to the residues """ expected_seqs = file_to_string("spa_protein_alignment.fasta") align_expected = io.StringIO(expected_seqs) alignment = parse_fasta_alignment(align_expected) alignment = consensus_add(alignment) alignment = annotate(alignment) # get displayed sequences with self.assertHTML(self.response, "tr") as elems: seq_disp = [] for els in elems: seq_disp_line = [] for e in els.findall("td")[:-1]: if e.attrib["class"] in ["residue S0", "residue S1"]: seq_disp_line.append(e.attrib["class"]) if seq_disp_line: seq_disp.append(seq_disp_line) # recompose sequences re_seqs = [] cat_re_seq = [] for i in range(0, len(alignment)): for j in range(i, len(seq_disp), len(alignment)): re_seq = [seq_disp[j] for j in range(i, len(seq_disp), len(alignment))] cat_re_seq = [] for r in re_seq: cat_re_seq.extend(r) re_seqs.append(cat_re_seq) # check color classes for i, al in enumerate(alignment): al_li = ["residue S%s" % a for a in al.letter_annotations["eq"]] self.assertEqual(al_li, re_seqs[i], re_seqs)