Exemplo n.º 1
0
 def test_multi_alpha_diagonalness_of_blockdiagonal_blocks(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.binary import binary_index_map
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='gap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     bin_map = binary_index_map(stats)
     for crt_range in bin_map:
         crt_slice = slice(crt_range[0], crt_range[1])
         crt_block = maxent.couplings[crt_slice, crt_slice]
         self.assertLess(
             np.max(np.abs(crt_block - np.diag(np.diag(crt_block)))), 1e-10)
Exemplo n.º 2
0
 def test_multi_alpha_shape_and_symmetry_of_couplings(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='uppergap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     self.assertLess(np.max(np.abs(maxent.couplings - maxent.couplings.T)),
                     1e-10)
     self.assertSequenceEqual(
         np.shape(maxent.couplings), 2 * [
             4 * (align1.get_width() + align3.get_width()) +
             20 * align2.get_width()
         ])
Exemplo n.º 3
0
 def test_freq2_on_multi_alpha(self):
     from multicov.alphabet import protein_alphabet, dna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics
     from os.path import join
     align = load_fasta(join('test_data', 'test_aln2.fasta'),
                        dna_alphabet,
                        invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align.add(align2)
     stats = Statistics(align)
     expected_f2 = _slow_get_freq2(align)
     self.assertTrue(np.allclose(stats.freq2, expected_f2))
Exemplo n.º 4
0
 def test_mask_upper(self):
     from multicov.alignment import ReferenceMapping
     from multicov.align_io import load_fasta
     from multicov.alphabet import protein_alphabet
     align = load_fasta(os.path.join('test_data', 'test_aln3.fasta'),
                        protein_alphabet,
                        invalid_letter_policy='upper',
                        mask_fct='upper')
     align0 = load_fasta(os.path.join('test_data', 'test_aln3.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='unchanged')
     mask = [not _.islower() and _ != '.' for _ in align0[0, :]]
     expected = align0.truncate_columns(mask)
     expected.reference = ReferenceMapping(
         list(range(expected.data.shape[1])))
     self.assertEqual(align, expected)
Exemplo n.º 5
0
 def test_replace_invalid_by_uppercase_then_leave(self):
     from multicov.alignment import Alignment
     from multicov.align_io import load_fasta
     from multicov.alphabet import dna_alphabet
     align = load_fasta(os.path.join('test_data', 'test_aln2.fasta'),
                        dna_alphabet,
                        invalid_letter_policy='upper')
     expected = Alignment(['GATTACA', 'ACCA--T', 'G.C-A-C'], dna_alphabet)
     expected.annotations['name'] = ['one', 'sequence', 'one line']
     self.assertEqual(align, expected)
Exemplo n.º 6
0
 def test_dna_unchanged_invalid(self):
     from multicov.alignment import Alignment
     from multicov.align_io import load_fasta
     from multicov.alphabet import dna_alphabet
     align = load_fasta(os.path.join('test_data', 'test_aln2.fasta'),
                        dna_alphabet,
                        invalid_letter_policy='unchanged')
     expected = Alignment(['GATTACA', 'ACCA--T', 'G.c-a-c'], dna_alphabet)
     expected.annotations['name'] = ['one', 'sequence', 'one line']
     self.assertEqual(align, expected)
Exemplo n.º 7
0
    def test_multi_alpha(self):
        from multicov.alphabet import protein_alphabet, dna_alphabet
        from multicov.align_io import load_fasta
        from multicov.statistics import Statistics
        from os.path import join
        align = load_fasta(join('test_data', 'test_aln2.fasta'),
                           dna_alphabet,
                           invalid_letter_policy='gap')
        align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                            protein_alphabet,
                            invalid_letter_policy='gap')
        align.add(align2)

        alpha = 0.6

        stats1 = Statistics(align)
        stats2 = Statistics(align,
                            regularization_amount=alpha,
                            regularizer='pseudocount')

        bkg_freq1 = np.hstack(
            np.ones(width * alphabet.size(no_gap=True)) / alphabet.size()
            for alphabet, width in align.alphabets)
        bkg_freq2 = np.outer(bkg_freq1, bkg_freq1)

        freq1 = (1 - alpha) * stats1.freq1 + alpha * bkg_freq1
        freq2 = (1 - alpha) * stats1.freq2 + alpha * bkg_freq2

        n_letts = np.hstack(width * [alphabet.size(no_gap=True)]
                            for alphabet, width in align.alphabets)
        idxs0 = np.hstack(([0], np.cumsum(n_letts)[:-1]))
        for idx0, n_lett in zip(idxs0, n_letts):
            idxs = slice(idx0, idx0 + n_lett)
            # noinspection PyUnresolvedReferences
            freq2[idxs, idxs] = np.diag(freq1[idxs])

        cmat = freq2 - np.outer(freq1, freq1)

        self.assertTrue(np.allclose(freq1, stats2.freq1))
        # noinspection PyTypeChecker
        self.assertTrue(np.allclose(freq2, stats2.freq2))
        # noinspection PyTypeChecker
        self.assertTrue(np.allclose(cmat, stats2.cmat))
Exemplo n.º 8
0
 def test_protein_unchanged_invalid(self):
     from multicov.alignment import Alignment
     from multicov.align_io import load_fasta
     from multicov.alphabet import protein_alphabet
     align = load_fasta(os.path.join('test_data', 'test_aln1.fasta'),
                        protein_alphabet,
                        invalid_letter_policy='unchanged')
     expected = Alignment(['IVGGYTCQ', 'XVGGTEAQ', 'IGG-KDT-'],
                          alphabet=protein_alphabet)
     expected.annotations['name'] = ['seq1', 'seq2', 'seq3']
     self.assertEqual(align, expected)
Exemplo n.º 9
0
 def test_protein_keep_annot_ws(self):
     from multicov.alignment import Alignment
     from multicov.align_io import load_fasta
     from multicov.alphabet import protein_alphabet
     align = load_fasta(os.path.join('test_data', 'test_aln1.fasta'),
                        protein_alphabet,
                        strip_ws_in_annot=False)
     expected = Alignment(['IVGGYTCQ', '-VGGTEAQ', 'IGG-KDT-'],
                          alphabet=protein_alphabet)
     expected.annotations['name'] = ['seq1  ', ' seq2', 'seq3']
     self.assertEqual(align, expected)
Exemplo n.º 10
0
 def test_cmat_on_multi_alpha(self):
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics
     from os.path import join
     align = load_fasta(join('test_data', 'test_aln2.fasta'),
                        dna_alphabet,
                        invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='uppergap')
     align.add(align2).add(align3)
     stats = Statistics(align)
     expected_f1 = _slow_get_freq1(align)
     expected_f2 = _slow_get_freq2(align)
     expected_cmat = expected_f2 - np.outer(expected_f1, expected_f1)
     self.assertTrue(np.allclose(stats.cmat, expected_cmat))
Exemplo n.º 11
0
 def test_gap_gauge(self):
     from multicov.alignment import Alignment
     from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet
     from multicov.align_io import load_fasta
     from multicov.statistics import Statistics, MaxentModel
     from os.path import join
     align1 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         dna_alphabet,
                         invalid_letter_policy='gap')
     align2 = load_fasta(join('test_data', 'test_aln1.fasta'),
                         protein_alphabet,
                         invalid_letter_policy='gap')
     align3 = load_fasta(join('test_data', 'test_aln2.fasta'),
                         rna_alphabet,
                         invalid_letter_policy='uppergap')
     align = Alignment(align1)
     align.add(align2).add(align3)
     stats = Statistics(align, regularization_amount=0.5)
     maxent = MaxentModel(stats)
     energies = maxent.score([align.get_width() * '-'])
     self.assertLess(np.max(np.abs(energies)), 1e-10)
Exemplo n.º 12
0
 def test_mask_from_first_seq(self):
     from multicov.alignment import Alignment
     from multicov.align_io import load_fasta
     from multicov.alphabet import protein_alphabet
     from numpy import in1d
     align = load_fasta(os.path.join('test_data', 'test_aln1.fasta'),
                        protein_alphabet,
                        invalid_letter_policy='unchanged',
                        mask_fct=lambda s: ~in1d(list(s), ['V', 'G']))
     expected = Alignment(['IYTCQ', 'XTEAQ', 'IKDT-'],
                          alphabet=protein_alphabet)
     expected.annotations['name'] = ['seq1', 'seq2', 'seq3']
     self.assertEqual(align, expected)