def test_against_matlab_example(self): from multicov.alignment import Alignment from multicov.alphabet import protein_alphabet from multicov.statistics import Statistics, MaxentModel from scipy.io import loadmat import os.path matlab = loadmat(os.path.join('test_data', 'maxent_sample.mat'), squeeze_me=True) align = Alignment(matlab['alignment']['data'][()], protein_alphabet) stats = Statistics(align, regularization_amount=0.5) maxent = MaxentModel(stats) energies = maxent.score(align) self.assertTrue(np.allclose(energies, matlab['energies'][()]))
def test_with_matrix(self): from multicov.alignment import Alignment from multicov.binary import binary_index_map from multicov.alphabet import protein_alphabet from multicov.statistics import Statistics, MaxentModel align = Alignment([ 'WKHNAY', 'KHRCDA', 'LGVVGY', 'LIGDDH', 'CMPRYW', 'QWFWRA', 'VTMPEG', 'LNYINM', 'WHV-EW', 'PIWGGF', 'PPCWVE', 'E-MWRG', 'RFGKFT', 'CGRCGS', 'T-PMVW', 'LNCPYA' ], protein_alphabet) stats = Statistics(align, regularization_amount=0.1) maxent = MaxentModel(stats) energies = maxent.score(align.data) energies0 = maxent.score(align) self.assertTrue(np.allclose(energies, energies0))
def test_multi_alpha_diagonalness_of_blockdiagonal_blocks(self): from multicov.alignment import Alignment from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet from multicov.align_io import load_fasta from multicov.binary import binary_index_map from multicov.statistics import Statistics, MaxentModel from os.path import join align1 = load_fasta(join('test_data', 'test_aln1.fasta'), protein_alphabet, invalid_letter_policy='gap') align2 = load_fasta(join('test_data', 'test_aln2.fasta'), dna_alphabet, invalid_letter_policy='gap') align3 = load_fasta(join('test_data', 'test_aln2.fasta'), rna_alphabet, invalid_letter_policy='gap') align = Alignment(align1) align.add(align2).add(align3) stats = Statistics(align, regularization_amount=0.5) maxent = MaxentModel(stats) bin_map = binary_index_map(stats) for crt_range in bin_map: crt_slice = slice(crt_range[0], crt_range[1]) crt_block = maxent.couplings[crt_slice, crt_slice] self.assertLess( np.max(np.abs(crt_block - np.diag(np.diag(crt_block)))), 1e-10)
def test_multi_alpha_shape_and_symmetry_of_couplings(self): from multicov.alignment import Alignment from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet from multicov.align_io import load_fasta from multicov.statistics import Statistics, MaxentModel from os.path import join align1 = load_fasta(join('test_data', 'test_aln2.fasta'), dna_alphabet, invalid_letter_policy='gap') align2 = load_fasta(join('test_data', 'test_aln1.fasta'), protein_alphabet, invalid_letter_policy='gap') align3 = load_fasta(join('test_data', 'test_aln2.fasta'), rna_alphabet, invalid_letter_policy='uppergap') align = Alignment(align1) align.add(align2).add(align3) stats = Statistics(align, regularization_amount=0.5) maxent = MaxentModel(stats) self.assertLess(np.max(np.abs(maxent.couplings - maxent.couplings.T)), 1e-10) self.assertSequenceEqual( np.shape(maxent.couplings), 2 * [ 4 * (align1.get_width() + align3.get_width()) + 20 * align2.get_width() ])
def test_gap_gauge(self): from multicov.alignment import Alignment from multicov.alphabet import protein_alphabet, dna_alphabet, rna_alphabet from multicov.align_io import load_fasta from multicov.statistics import Statistics, MaxentModel from os.path import join align1 = load_fasta(join('test_data', 'test_aln2.fasta'), dna_alphabet, invalid_letter_policy='gap') align2 = load_fasta(join('test_data', 'test_aln1.fasta'), protein_alphabet, invalid_letter_policy='gap') align3 = load_fasta(join('test_data', 'test_aln2.fasta'), rna_alphabet, invalid_letter_policy='uppergap') align = Alignment(align1) align.add(align2).add(align3) stats = Statistics(align, regularization_amount=0.5) maxent = MaxentModel(stats) energies = maxent.score([align.get_width() * '-']) self.assertLess(np.max(np.abs(energies)), 1e-10)
def test_on_empty(self): from multicov.alignment import Alignment from multicov.statistics import Statistics, MaxentModel stats = Statistics(Alignment()) maxent = MaxentModel(stats) self.assertTrue(hasattr(maxent, 'stats')) self.assertTrue(hasattr(maxent, 'alphabets')) self.assertTrue(hasattr(maxent, 'annotations')) self.assertTrue(hasattr(maxent, 'reference')) self.assertIs(maxent.stats, stats) self.assertIs(maxent.alphabets, stats.alphabets) self.assertIs(maxent.annotations, stats.annotations) self.assertIs(maxent.reference, stats.reference) self.assertEqual(np.size(maxent.couplings), 0)
def test_against_matlab_example(self): from multicov.alignment import Alignment from multicov.alphabet import protein_alphabet from multicov.statistics import Statistics, MaxentModel from scipy.io import loadmat import os.path matlab = loadmat(os.path.join('test_data', 'maxent_sample.mat'), squeeze_me=True) align = Alignment(matlab['alignment']['data'][()], protein_alphabet) stats = Statistics(align, regularization_amount=0.5) maxent = MaxentModel(stats) self.assertTrue(np.allclose(stats.cmat, matlab['dca']['cmat'][()])) self.assertTrue( np.allclose( maxent.couplings, matlab['params_nogap_nofc_nodiagtrick']['couplings'][()]))
def test_on_protein(self): from multicov.alignment import Alignment from multicov.binary import binary_index_map from multicov.alphabet import protein_alphabet from multicov.statistics import Statistics, MaxentModel align = Alignment([ 'WKHNAY', 'KHRCDA', 'LGVVGY', 'LIGDDH', 'CMPRYW', 'QWFWRA', 'VTMPEG', 'LNYINM', 'WHV-EW', 'PIWGGF', 'PPCWVE', 'E-MWRG', 'RFGKFT', 'CGRCGS', 'T-PMVW', 'LNCPYA' ], protein_alphabet) stats = Statistics(align, regularization_amount=0.1) maxent = MaxentModel(stats) invC = -np.linalg.inv(stats.cmat) bin_map = binary_index_map(stats) for crt_range in bin_map: invC[crt_range[0]:crt_range[1], crt_range[0]:crt_range[1]] = 0 self.assertTrue( np.allclose(invC, maxent.couplings - np.diag(np.diag(maxent.couplings))))