Пример #1
0
 def test_validate_alignment(self):
     ali_str = "> target\n--AAAA\n> template\nGGGG--\n"
     ali = read_alignment(ali_str)
     exp_ali = read_alignment(MINI_ALIGNMENT_FILE)
     self.assertEqual(str(validate_alignment(ali)), str(ali))
     self.assertEqual(str(validate_alignment(ali_str)), str(ali))
     self.assertEqual(str(validate_alignment(MINI_ALIGNMENT_FILE)), str(exp_ali))
     self.assertRaises(ParameterError, validate_alignment, 112345)
Пример #2
0
def validate_alignment(ali):
    """Checks alignment string or object"""
    if isinstance(ali, RNAAlignment):
        return ali
    elif isinstance(ali, str):
        return read_alignment(ali)
    raise ParameterError("Bad parameter: '%s' must be a pairwise Alignment (object or fasta string)."%str(ali))
Пример #3
0
 def test_short_with_insert(self):
     a = read_alignment(WITH_INSERT)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.difficult), 0)
     self.assertEqual(len(recipe.add_fragment), 1)
     self.assertEqual(len(recipe.add_fragment_5p), 0)
     self.assertEqual(len(recipe.add_fragment_3p), 0)
Пример #4
0
 def test_overhang(self):
     """Gaps at begin and end should be used only if in the template."""
     a = read_alignment(OVERHANG_ALIGN)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.add_fragment_5p), 1)
     self.assertEqual(len(recipe.add_fragment_5p[0]), 3)
     self.assertEqual(len(recipe.add_fragment_3p), 1)
     self.assertEqual(len(recipe.add_fragment_3p[0]), 4)
Пример #5
0
 def test_gaps_in_target(self):
     """Moderna should model gaps in the target, too."""
     a = read_alignment(ALIGN_TARGET_GAP)
     m = RnaModel(self.t, a)
     m.apply_alignment()
     m.insert_all_fragments()
     self.assertEqual(
         m.get_sequence().seq_with_modifications.replace('_', ''),
         '..CUGACCU#P')
Пример #6
0
    def test_model_with_5p3p_ends(self):
        a = read_alignment('''> target
CAUGCGGAYYYALCUCAGGUA
> mini_template
---GCGGAUUUALCUCAG---
''')
        m = RnaModel(self.t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence('CAUGCGGAYYYALCUCAGGUA'))
Пример #7
0
    def test_build_model_1exd_B(self):
        """Tests building model for 2tra_A on template 1exd_B.pdb"""
        ali_string = """> 2tra_A
UC-C--G--U---G--A-------U----A---G---U---U-P--AAD---------GGD---------CA-GA-A--U-------G----G---G-C----G-C--------P----------U-----G--------U-----------C----------K------------C-------------G---U--G--C-CAG--------------------------------------------------A---U-?-G--G---G-G--T   ---P-C-A-----AU-----------U-----C---C--C---C---G--------U--C---G--C--G-GAG-C--
> 1ehd_B.pdb 
-G-G--G--G---U--A-------U----C---G---C---C-A--AGC---------GGU----------A-AG-G--C-------A----C---C-G----G-A--------U----------U-----C--------U-----------G----------A------------U-------------U---C--C--G-G-A--------------------------------------------------G--GU-C-G--A---G-G--U   ---U-C-G-----AA-----------U-----C---C--U---C---G--------U--A---C--C--C-CAG-CCA"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_1EXD, 'B')
        m = create_model(t, ali, 'B')
Пример #8
0
    def test_build_model_2bte_B(self):
        """ Tests building model for 2j00_W on template 2bte_B.pdb"""
        ali_string = """> 2j00_W.pdb CP000026.1/4316393-4316321
GC-C--C--G---G--A-------U----A---G---C---U-C--AGU--------CGGU----------A-GA-G--C-------A----G---G-G----G-A--------U----------U-----G--------A-----------A----------A------------A-------------U---C--C--C-CGU--------------------------------------------------G--UC-C-U--U---G-G--U---U-C-G-----AU-----------U-----C---C--G---A---G--------U--C---C--G--G-GCA-CCA
> 2bte_B.pdb 
GC-C--G--G---G--G-------U----G---G---C---G-G--A-AU-------GGGU----------A-GACG--C-------G----C---A-U----G-A--------C_---------A-----U--------C-----------A----------U------------G-------------U---G--C--G-CAA--------------------------------------------------G-CGU-G-C--G---G-G--U---U-C-A-----AG-----------U-----C---C--C---G---C--------C--C---C--C--G-GCA-CCA"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_2BTE, 'B')
        m = create_model(t, ali, 'B')
Пример #9
0
    def test_5p_extension(self):
        a = read_alignment("""> target
AAAAAAAAAAGCGGAUUUALCUCAG
> template
----------GCGGAUUUALCUCAG
        """)
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        m.add_missing_5p()
        self.assertEqual(m.get_sequence(), a.target_seq)
Пример #10
0
    def test_complicated_trna_alignment(self):
        ali = read_alignment("""> 1j2b_D.pdb
GGGCCCGUGGUCUAGUU_--------G-ACGCC-GCCC-UUACGAGGCGGAG-----------G-UC-CGGGGUUC-A--AG--U-C-CC--C-G-CG-GGCCCA-CCA
> 2du6_D.pdb
--GCCAGGGUGGCAGA---GGGGCUUU-GCGGC-GGAC-UCUAGAUCCGCUUU----------A--C-CCCGGUUC-G--AA--U-C-CG--G-G-CC-CUG-GC----
""")
        expected = """
GGGCCCGUGGUCUAGUU_------GACGCCGCCCUUACGAGGCGGAG-GUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
--GCCAGGGUGGCAGAG-GGGCUUUGCGGCGGACUCUAGAUCCGCUUUA-CCCCGGUUCGAAUCCGGGCCCUG-GC---
"""
        self.assertEqual(str(ali), expected)
Пример #11
0
 def test_create_model_with_gaps(self):
     """Should create the model automatically."""
     a = read_alignment(ALIGN_1B23_1QF6)
     t = Template(RNA_1B23, 'file', 'R')
     m = RnaModel(t, a)
     m.apply_alignment()
     m.insert_all_fragments()
     self.assertEqual(
         m.get_sequence().seq_with_modifications.replace('_', ''),
         'GCCGAUAUAGCUCAGDDGGDAGAGCAGCGCAUUCGUEAUGCGAAG7UCGUAGGTPCGACUCCUAUUAUCGGCACCA'
     )
Пример #12
0
    def test_fix_example(self):
        """Real-world example from Irina"""
        ali = read_alignment(""">1EIY:C|PDBID|CHAIN|SEQUENCE
GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA
>1EHZ:A|PDBID|CHAIN|SEQUENCE
GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA""")
        am = AlignmentMatcher(ali)
        seq = Sequence('GCGGAUUUALCUCAGDDGGGAGAGCRCCAGABU#AAYAP?UGGAG7UC?UGUGTPCG"UCCACAGAAUUCGCACCA')
        self.assertFalse(am.is_template_identical(seq))
        am.fix_template_seq(seq)
        self.assertTrue(am.is_template_identical(seq))
Пример #13
0
    def test_model_with_hydro_template(self):
        """If the template contains hydrogens, modifications should be added."""
        t = Template(RNA_HYDRO, 'file', 'B')
        a = read_alignment("""> 3tra_A.pdb Z73314.1/2358-2429
UPA
> 1qru_B.pdb X55374.1/1-72
CAA
""")
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence('UPA'))
Пример #14
0
    def test_model_with_alignment_adjustment(self):
        """Introduces small corrections on alignment."""
        a = read_alignment("""> target
ACUGUGAYUA[UACCU#P-G
> template with small errors.
GCG7A----U.UAGCUCA_G
        """)
        t = Template(MINI_TEMPLATE, 'file')
        match_template_with_alignment(t, a)
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence(), Sequence("ACUGUGAYUA[UACCU#PG"))
Пример #15
0
    def test_number_gap(self):
        """Builds model with numbering gap in the template."""
        a = read_alignment("""> target
CCGACCUUCGGCCACCUGACAGUCCUGUGCGGGAAACCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGGGAAACGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGGGAAACCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG
> template
CCGACCUUCGGCCACCUGACAGUCCUGUGCGG----CCGCACAGGACUGUCAACCAGGUAAUAUAACCACCGG----CGGUGGUUAUAUUACCUGGUACGCCUUGACGUGGGG----CCCCACGUCAAGGCGUGGUGGCCGAAGGUCGG
""")
        t = Template(JMB_TEMPLATE, 'file')
        clean_structure(t)
        m = RnaModel(t, a)
        m.create_model()
        self.assertEqual(m.get_sequence().seq_without_breaks, a.target_seq)
Пример #16
0
    def test_build_model_1efw_D(self):
        """ Tests building model for 1ehz_A on template 1efw_D. Should work even when 50 fragment candidates are checked"""
        ali_string = """> 1ehz_A.pdb M14856.1/1-73
GC-G--G--A---U--U-------U----A---L---C---U-C--AGD--------DGGG----------A-GA-G--C-------R----C---C-A----G-A--------B----------U-----#--------A-----------A----------Y------------A-------------P---?--U--G-GAG--------------------------------------------------7--UC-?-U--G---U-G--T   ---P-C-G-----"U-----------C-----C---A--C---A---G--------A--A---U--U--C-GCA-CCA
> 1efw_D.pdb
GG-A--G--C---G--G-------4----A---G---U---U-C--AGD--------CGGD---------DA-GA-A--U-------A----C---C-U----G-C--------C----------U-----Q--------U-----------C----------/------------C-------------G---C--A--G-GGG--------------------------------------------------7--UC-G-C--G---G-G--018U---P-C-G-----AG-----------U-----C---C--C---G---P--------C--C---G--U--U-CC-----"""
        ali = read_alignment(ali_string)
        t = load_template(RNA_1EFW, 'D')
        m = create_model(t, ali, 'D')
        if os.access(OUTPUT, os.F_OK): os.remove(OUTPUT)
        write_model(m, OUTPUT)
        self.assertTrue(os.access(OUTPUT, os.F_OK))
Пример #17
0
    def test_doublegap_model(self):
        """Should create a model filling two gaps"""
        a = read_alignment('''> target
GGGAUAGUUCCAGABU#A
> template
GGGA-AG--CCAGABU#A
''')
        t = Template(DOUBLEGAP, 'file', 'A')
        m = RnaModel(t, a)
        m.apply_alignment()
        m.insert_all_fragments()
        m.fix_backbone()
        self.assertEqual(m.get_sequence(), Sequence('GGGAUAGUUCCAGABU#A'))
Пример #18
0
    def test_fix_alignment_with_gap(self):
        """Real-world exaple from nettab - 1qf6 modeling on 1c0a"""
        ali = read_alignment(""">1QF6
DDGGD-AGAGAAA
>1C0A
UCGGUUAGAA---""")
        true_target_seq = ali.aligned_sequences[0]
        am = AlignmentMatcher(ali)
        seq = Sequence('DCGGDDAGAA')
        self.assertFalse(am.is_template_identical(seq))
        am.fix_template_seq(seq)
        self.assertTrue(am.is_template_identical(seq))
        self.assertEqual(ali.aligned_sequences[0], true_target_seq)
Пример #19
0
    def test_oppositegap_model(self):
        """Should create a model with close gaps in the other respective sequence"""
        a = read_alignment('''> target
GGGAGAGCRUUAG-BU#A
> template
GGGAGAGCR--AGABU#A
''')
        t = Template(OPPOSITEGAP, 'file', 'A')
        m = RnaModel(t, a)
        m.apply_alignment()
        m.insert_all_fragments()
        self.assertEqual(
            m.get_sequence().seq_with_modifications.replace('_', ''),
            'GGGAGAGCRUUAGBU#A')
Пример #20
0
    def test_long_5p_extension(self):
        a = read_alignment("""> target
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCGGAUUUALCUCAG
> template
----------------------------------------GCGGAUUUALCUCAG
        """)
        # first number == 1 should not work
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        self.assertRaises(RenumeratorError, m.add_missing_5p)
        # first number == 100 should work
        m = RnaModel(None, a, data_type='file', data=MINI_TEMPLATE)
        renumber_chain(m, 100)
        m.add_missing_5p()
        self.assertEqual(m.get_sequence(), a.target_seq)
Пример #21
0
    def test_model_with_close_gaps(self):
        t = Template('test_data/rna_structures/2du3_excerpt.ent', 'file', 'D')
        a = read_alignment('''> 1qru_B.pdb X55374.1/1-72
GCA-UUCCG
> 2du3_D.pdb
CUUUA-CCC
''')
        m = RnaModel()
        copy_some_residues([t['944'], t['946'], t['947']], m)
        lc = find_fragment(m, '946', '947', Sequence('U'))
        insert_fragment(m, lc[0])
        lc = find_fragment(m, '944', '946', Sequence(''))
        insert_fragment(m, lc[0])
        return  #TODO: remove
        # generate model
        m = RnaModel(t, a)
        m.create_model()
Пример #22
0
def load_alignment(file_path):
    """*load_alignment(file_path)*
    
Loads a sequence alignment from a FASTA file.
Produces an Alignment object that can be saved in a variable.

ModeRNA expects, that the alignment file contains exactly two sequences. 
The first is for the target for which the model is to be built, 
the second for the structural template.

Standard RNA bases should be written in upper case (ACGU).
Standard DNA bases should be written in lower case (acgt).
For modified bases, see the 'concepts' section of the manual.

:Arguments:
    * path+filename of a FASTA file
    """
    file_path = validate_filename(file_path)
    return read_alignment(file_path) 
Пример #23
0
 def test_close_gaps(self):
     """Gaps close to each other should also work."""
     a = read_alignment(CLOSEGAPS)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.difficult), 0)
     self.assertEqual(len(recipe.add_fragment), 4)
Пример #24
0
 def setUp(self):
     self.a = read_alignment(MINI_ALIGNMENT)
     self.t = Template(MINI_TEMPLATE, seq=Sequence("GCGGAUUUALCUCAG"))
     self.m = RnaModel(self.t, self.a)
     self.seq_before = self.t.get_sequence()
     self.br = BaseRecognizer()
Пример #25
0
 def test_gap_optimization_example_2(self):
     t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T')
     a = read_alignment('test_data/gaps/ali_gap2.fasta')
     m = RnaModel(t, a)
     m.create_model()
Пример #26
0
 def test_insert_indel_quality_1(self):
     """Insert a fragment without strand break"""
     t = Template('test_data/gaps/mini_1h4s_T_gap2.pdb', 'file', 'T')
     a = read_alignment('test_data/gaps/ali_gap2.fasta')
     m = RnaModel(t, a)
     self.assertEqual(m.get_sequence().seq_with_modifications.find('_'), -1)
Пример #27
0
 def test_no_overhan(self):
     a = read_alignment(NO_OVERHANG_ALIGN)
     recipe = RecipeMaker(a).recipe
     self.assertEqual(len(recipe.add_fragment_5p), 0)
     self.assertEqual(len(recipe.add_fragment_3p), 0)
Пример #28
0
 def test_boxes(self):
     for seq1, seq2, matches in BOX_SAMPLES:
         align = read_alignment("> 1\n%s\n> 2\n%s\n" % (seq1, seq2))
         recipe = RecipeMaker(align).recipe
         for ap, m in zip(align, matches):
             self.assertTrue(self.find_in_box(recipe, ap, m))
Пример #29
0
 def setUp(self):
     """Loads the fasta file with alignmnet."""
     self.a = read_alignment(MINI_ALIGNMENT)
     self.recipe = RecipeMaker(self.a).recipe
Пример #30
0
 def test_create_model_with_unknown(self):
     """Alignment that contains unknown bases in the target."""
     a = read_alignment(MINI_ALIGNMENT_WITH_UNK)
     m = RnaModel(self.t, a)
     m.create_model()
     self.assertEqual(m.get_sequence(), Sequence('..CUGUQQUACCU#P'))