def test_validate_alignment(self): ali_str = "> target\n--AAAA\n> template\nGGGG--\n" ali = read_alignment(ali_str) exp_ali = read_alignment(MINI_ALIGNMENT_FILE) self.assertEqual(str(validate_alignment(ali)), str(ali)) self.assertEqual(str(validate_alignment(ali_str)), str(ali)) self.assertEqual(str(validate_alignment(MINI_ALIGNMENT_FILE)), str(exp_ali)) self.assertRaises(ParameterError, validate_alignment, 112345)
def validate_alignment(ali): """Checks alignment string or object""" if isinstance(ali, RNAAlignment): return ali elif isinstance(ali, str): return read_alignment(ali) raise ParameterError("Bad parameter: '%s' must be a pairwise Alignment (object or fasta string)."%str(ali))
def test_short_with_insert(self): a = read_alignment(WITH_INSERT) recipe = RecipeMaker(a).recipe self.assertEqual(len(recipe.difficult), 0) self.assertEqual(len(recipe.add_fragment), 1) self.assertEqual(len(recipe.add_fragment_5p), 0) self.assertEqual(len(recipe.add_fragment_3p), 0)
def test_overhang(self): """Gaps at begin and end should be used only if in the template.""" a = read_alignment(OVERHANG_ALIGN) recipe = RecipeMaker(a).recipe self.assertEqual(len(recipe.add_fragment_5p), 1) self.assertEqual(len(recipe.add_fragment_5p[0]), 3) self.assertEqual(len(recipe.add_fragment_3p), 1) self.assertEqual(len(recipe.add_fragment_3p[0]), 4)
def test_gaps_in_target(self): """Moderna should model gaps in the target, too.""" a = read_alignment(ALIGN_TARGET_GAP) m = RnaModel(self.t, a) m.apply_alignment() m.insert_all_fragments() self.assertEqual( m.get_sequence().seq_with_modifications.replace('_', ''), '..CUGACCU#P')
def test_model_with_5p3p_ends(self): a = read_alignment('''> target CAUGCGGAYYYALCUCAGGUA > mini_template ---GCGGAUUUALCUCAG--- ''') m = RnaModel(self.t, a) m.create_model() self.assertEqual(m.get_sequence(), Sequence('CAUGCGGAYYYALCUCAGGUA'))
def test_build_model_1exd_B(self): """Tests building model for 2tra_A on template 1exd_B.pdb""" ali_string = """> 2tra_A UC-C--G--U---G--A-------U----A---G---U---U-P--AAD---------GGD---------CA-GA-A--U-------G----G---G-C----G-C--------P----------U-----G--------U-----------C----------K------------C-------------G---U--G--C-CAG--------------------------------------------------A---U-?-G--G---G-G--T ---P-C-A-----AU-----------U-----C---C--C---C---G--------U--C---G--C--G-GAG-C-- > 1ehd_B.pdb -G-G--G--G---U--A-------U----C---G---C---C-A--AGC---------GGU----------A-AG-G--C-------A----C---C-G----G-A--------U----------U-----C--------U-----------G----------A------------U-------------U---C--C--G-G-A--------------------------------------------------G--GU-C-G--A---G-G--U ---U-C-G-----AA-----------U-----C---C--U---C---G--------U--A---C--C--C-CAG-CCA""" ali = read_alignment(ali_string) t = load_template(RNA_1EXD, 'B') m = create_model(t, ali, 'B')
def test_build_model_2bte_B(self): """ Tests building model for 2j00_W on template 2bte_B.pdb""" ali_string = """> 2j00_W.pdb CP000026.1/4316393-4316321 GC-C--C--G---G--A-------U----A---G---C---U-C--AGU--------CGGU----------A-GA-G--C-------A----G---G-G----G-A--------U----------U-----G--------A-----------A----------A------------A-------------U---C--C--C-CGU--------------------------------------------------G--UC-C-U--U---G-G--U---U-C-G-----AU-----------U-----C---C--G---A---G--------U--C---C--G--G-GCA-CCA > 2bte_B.pdb GC-C--G--G---G--G-------U----G---G---C---G-G--A-AU-------GGGU----------A-GACG--C-------G----C---A-U----G-A--------C_---------A-----U--------C-----------A----------U------------G-------------U---G--C--G-CAA--------------------------------------------------G-CGU-G-C--G---G-G--U---U-C-A-----AG-----------U-----C---C--C---G---C--------C--C---C--C--G-GCA-CCA""" ali = read_alignment(ali_string) t = load_template(RNA_2BTE, 'B') m = create_model(t, ali, 'B')
def test_5p_extension(self): a = read_alignment("""> target AAAAAAAAAAGCGGAUUUALCUCAG > template ----------GCGGAUUUALCUCAG """) m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE) m.add_missing_5p() self.assertEqual(m.get_sequence(), a.target_seq)
def test_complicated_trna_alignment(self): ali = read_alignment("""> 1j2b_D.pdb GGGCCCGUGGUCUAGUU_--------G-ACGCC-GCCC-UUACGAGGCGGAG-----------G-UC-CGGGGUUC-A--AG--U-C-CC--C-G-CG-GGCCCA-CCA > 2du6_D.pdb --GCCAGGGUGGCAGA---GGGGCUUU-GCGGC-GGAC-UCUAGAUCCGCUUU----------A--C-CCCGGUUC-G--AA--U-C-CG--G-G-CC-CUG-GC---- """) expected = """ GGGCCCGUGGUCUAGUU_------GACGCCGCCCUUACGAGGCGGAG-GUCCGGGGUUCAAGUCCCCGCGGGCCCACCA --GCCAGGGUGGCAGAG-GGGCUUUGCGGCGGACUCUAGAUCCGCUUUA-CCCCGGUUCGAAUCCGGGCCCUG-GC--- """ self.assertEqual(str(ali), expected)
def test_create_model_with_gaps(self): """Should create the model automatically.""" a = read_alignment(ALIGN_1B23_1QF6) t = Template(RNA_1B23, 'file', 'R') m = RnaModel(t, a) m.apply_alignment() m.insert_all_fragments() self.assertEqual( m.get_sequence().seq_with_modifications.replace('_', ''), 'GCCGAUAUAGCUCAGDDGGDAGAGCAGCGCAUUCGUEAUGCGAAG7UCGUAGGTPCGACUCCUAUUAUCGGCACCA' )
def test_fix_example(self): """Real-world example from Irina""" ali = read_alignment(""">1EIY:C|PDBID|CHAIN|SEQUENCE GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA >1EHZ:A|PDBID|CHAIN|SEQUENCE GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA""") am = AlignmentMatcher(ali) seq = Sequence('GCGGAUUUALCUCAGDDGGGAGAGCRCCAGABU#AAYAP?UGGAG7UC?UGUGTPCG"UCCACAGAAUUCGCACCA') self.assertFalse(am.is_template_identical(seq)) am.fix_template_seq(seq) self.assertTrue(am.is_template_identical(seq))
def test_model_with_hydro_template(self): """If the template contains hydrogens, modifications should be added.""" t = Template(RNA_HYDRO, 'file', 'B') a = read_alignment("""> 3tra_A.pdb Z73314.1/2358-2429 UPA > 1qru_B.pdb X55374.1/1-72 CAA """) m = RnaModel(t, a) m.create_model() self.assertEqual(m.get_sequence(), Sequence('UPA'))
def test_model_with_alignment_adjustment(self): """Introduces small corrections on alignment.""" a = read_alignment("""> target ACUGUGAYUA[UACCU#P-G > template with small errors. GCG7A----U.UAGCUCA_G """) t = Template(MINI_TEMPLATE, 'file') match_template_with_alignment(t, a) m = RnaModel(t, a) m.create_model() self.assertEqual(m.get_sequence(), Sequence("ACUGUGAYUA[UACCU#PG"))
def test_number_gap(self): """Builds model with numbering gap in the template.""" a = read_alignment("""> target CCGACCUUCGGCCACCUGACAGUCCUGUGCGGGAAACCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGGGAAACGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGGGAAACCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG > template CCGACCUUCGGCCACCUGACAGUCCUGUGCGG----CCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGG----CGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGG----CCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG """) t = Template(JMB_TEMPLATE, 'file') clean_structure(t) m = RnaModel(t, a) m.create_model() self.assertEqual(m.get_sequence().seq_without_breaks, a.target_seq)
def test_build_model_1efw_D(self): """ Tests building model for 1ehz_A on template 1efw_D. Should work even when 50 fragment candidates are checked""" ali_string = """> 1ehz_A.pdb M14856.1/1-73 GC-G--G--A---U--U-------U----A---L---C---U-C--AGD--------DGGG----------A-GA-G--C-------R----C---C-A----G-A--------B----------U-----#--------A-----------A----------Y------------A-------------P---?--U--G-GAG--------------------------------------------------7--UC-?-U--G---U-G--T ---P-C-G-----"U-----------C-----C---A--C---A---G--------A--A---U--U--C-GCA-CCA > 1efw_D.pdb GG-A--G--C---G--G-------4----A---G---U---U-C--AGD--------CGGD---------DA-GA-A--U-------A----C---C-U----G-C--------C----------U-----Q--------U-----------C----------/------------C-------------G---C--A--G-GGG--------------------------------------------------7--UC-G-C--G---G-G--018U---P-C-G-----AG-----------U-----C---C--C---G---P--------C--C---G--U--U-CC-----""" ali = read_alignment(ali_string) t = load_template(RNA_1EFW, 'D') m = create_model(t, ali, 'D') if os.access(OUTPUT, os.F_OK): os.remove(OUTPUT) write_model(m, OUTPUT) self.assertTrue(os.access(OUTPUT, os.F_OK))
def test_doublegap_model(self): """Should create a model filling two gaps""" a = read_alignment('''> target GGGAUAGUUCCAGABU#A > template GGGA-AG--CCAGABU#A ''') t = Template(DOUBLEGAP, 'file', 'A') m = RnaModel(t, a) m.apply_alignment() m.insert_all_fragments() m.fix_backbone() self.assertEqual(m.get_sequence(), Sequence('GGGAUAGUUCCAGABU#A'))
def test_fix_alignment_with_gap(self): """Real-world exaple from nettab - 1qf6 modeling on 1c0a""" ali = read_alignment(""">1QF6 DDGGD-AGAGAAA >1C0A UCGGUUAGAA---""") true_target_seq = ali.aligned_sequences[0] am = AlignmentMatcher(ali) seq = Sequence('DCGGDDAGAA') self.assertFalse(am.is_template_identical(seq)) am.fix_template_seq(seq) self.assertTrue(am.is_template_identical(seq)) self.assertEqual(ali.aligned_sequences[0], true_target_seq)
def test_oppositegap_model(self): """Should create a model with close gaps in the other respective sequence""" a = read_alignment('''> target GGGAGAGCRUUAG-BU#A > template GGGAGAGCR--AGABU#A ''') t = Template(OPPOSITEGAP, 'file', 'A') m = RnaModel(t, a) m.apply_alignment() m.insert_all_fragments() self.assertEqual( m.get_sequence().seq_with_modifications.replace('_', ''), 'GGGAGAGCRUUAGBU#A')
def test_long_5p_extension(self): a = read_alignment("""> target AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCGGAUUUALCUCAG > template ----------------------------------------GCGGAUUUALCUCAG """) # first number == 1 should not work m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE) self.assertRaises(RenumeratorError, m.add_missing_5p) # first number == 100 should work m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE) renumber_chain(m, 100) m.add_missing_5p() self.assertEqual(m.get_sequence(), a.target_seq)
def test_model_with_close_gaps(self): t = Template('test_data/rna_structures/2du3_excerpt.ent', 'file', 'D') a = read_alignment('''> 1qru_B.pdb X55374.1/1-72 GCA-UUCCG > 2du3_D.pdb CUUUA-CCC ''') m = RnaModel() copy_some_residues([t['944'], t['946'], t['947']], m) lc = find_fragment(m, '946', '947', Sequence('U')) insert_fragment(m, lc[0]) lc = find_fragment(m, '944', '946', Sequence('')) insert_fragment(m, lc[0]) return #TODO: remove # generate model m = RnaModel(t, a) m.create_model()
def load_alignment(file_path): """*load_alignment(file_path)* Loads a sequence alignment from a FASTA file. Produces an Alignment object that can be saved in a variable. ModeRNA expects, that the alignment file contains exactly two sequences. The first is for the target for which the model is to be built, the second for the structural template. Standard RNA bases should be written in upper case (ACGU). Standard DNA bases should be written in lower case (acgt). For modified bases, see the 'concepts' section of the manual. :Arguments: * path+filename of a FASTA file """ file_path = validate_filename(file_path) return read_alignment(file_path)
def test_close_gaps(self): """Gaps close to each other should also work.""" a = read_alignment(CLOSEGAPS) recipe = RecipeMaker(a).recipe self.assertEqual(len(recipe.difficult), 0) self.assertEqual(len(recipe.add_fragment), 4)
def setUp(self): self.a = read_alignment(MINI_ALIGNMENT) self.t = Template(MINI_TEMPLATE, seq=Sequence("GCGGAUUUALCUCAG")) self.m = RnaModel(self.t, self.a) self.seq_before = self.t.get_sequence() self.br = BaseRecognizer()
def test_gap_optimization_example_2(self): t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T') a = read_alignment('test_data/gaps/ali_gap2.fasta') m = RnaModel(t, a) m.create_model()
def test_insert_indel_quality_1(self): """Insert a fragment without strand break""" t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T') a = read_alignment('test_data/gaps/ali_gap2.fasta') m = RnaModel(t, a) self.assertEqual(m.get_sequence().seq_with_modifications.find('_'), -1)
def test_no_overhan(self): a = read_alignment(NO_OVERHANG_ALIGN) recipe = RecipeMaker(a).recipe self.assertEqual(len(recipe.add_fragment_5p), 0) self.assertEqual(len(recipe.add_fragment_3p), 0)
def test_boxes(self): for seq1, seq2, matches in BOX_SAMPLES: align = read_alignment("> 1\n%s\n> 2\n%s\n" % (seq1, seq2)) recipe = RecipeMaker(align).recipe for ap, m in zip(align, matches): self.assertTrue(self.find_in_box(recipe, ap, m))
def setUp(self): """Loads the fasta file with alignmnet.""" self.a = read_alignment(MINI_ALIGNMENT) self.recipe = RecipeMaker(self.a).recipe
def test_create_model_with_unknown(self): """Alignment that contains unknown bases in the target.""" a = read_alignment(MINI_ALIGNMENT_WITH_UNK) m = RnaModel(self.t, a) m.create_model() self.assertEqual(m.get_sequence(), Sequence('..CUGUQQUACCU#P'))