示例#1
0
 def setUp(self):
     self.pdbfile = pdbfile
     self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
     self.feat = MDFeaturizer(self.pdbfile)
     self.atol = 1e-5
     self.ref_frame = 0
     self.atom_indices = np.arange(0, self.traj.n_atoms / 2)
示例#2
0
class TestCustomFeature(unittest.TestCase):
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)
        self.traj = mdtraj.load(xtcfile, top=pdbfile)

        self.pairs = [[0, 1], [0, 2], [1, 2]]  #some distances
        self.means = [.5, .75, 1.0]  #bogus means
        self.U = np.array([[0, 1], [1, 0], [
            1, 1
        ]])  #bogus transformation, projects from 3 distances to 2 components

    def test_some_feature(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        Y_custom_feature = self.feat.transform(self.traj)
        # Directly call the function
        Y_function = some_call_to_mdtraj_some_operations_some_linalg(
            self.traj, self.pairs, self.means, self.U)
        assert np.allclose(Y_custom_feature, Y_function)

    def test_describe(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)
        self.feat.describe()

    def test_dimensionality(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        assert self.feat.dimension() == self.U.shape[1]
示例#3
0
    def test_ca_distances_with_residues_not_containing_cas_with_exclusions(
            self):
        # Load test geom
        geom = mdtraj.load(self.pdbfile)
        # No exclusions
        feat_EN2 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN2.add_distances_ca(excluded_neighbors=2)
        EN2_pairs = [
            [1, 5],
            [1, 7],
            [3, 7],
        ]

        # Check indices
        assert (np.allclose(EN2_pairs,
                            feat_EN2.active_features[0].distance_indexes))
        # Check distances
        D = mdtraj.compute_distances(geom, EN2_pairs)
        assert (np.allclose(D, feat_EN2.transform(geom)))

        # excluded_neighbors=1 ## will yield the same as before, because the first neighbor
        # doesn't conting CA's anyway
        feat_EN1 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN1.add_distances_ca(excluded_neighbors=1)
        EN1_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]]
        assert (np.allclose(EN1_pairs,
                            feat_EN1.active_features[0].distance_indexes))
        D = mdtraj.compute_distances(geom, EN1_pairs)
        assert (np.allclose(D, feat_EN1.transform(geom)))
示例#4
0
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)
        self.traj = mdtraj.load(xtcfile, top=pdbfile)

        self.pairs = [[0, 1], [0, 2], [1, 2]]  #some distances
        self.means = [.5, .75, 1.0]  #bogus means
        self.U = np.array([[0, 1], [1, 0], [
            1, 1
        ]])  #bogus transformation, projects from 3 distances to 2 components
示例#5
0
    def test_ca_distances_with_all_atom_geometries(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=0)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms))
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))
示例#6
0
    def test_backbone_dihedrals_deg(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(deg=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())
示例#7
0
    def test_backbone_dihedrials_chi(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())
示例#8
0
    def test_backbone_dihedrals_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        self.assertEqual(Y.shape, (len(traj), 3*4)) # (3 phi + 3 psi)*2 [cos, sin]
        assert(np.alltrue(Y >= -np.pi))
        assert(np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension(), msg=desc)
        self.assertIn("COS", desc[0])
        self.assertIn("SIN", desc[1])
示例#9
0
    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=1000,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)
        self._is_reader = True
        self.topfile = topologyfile
        if not isinstance(trajectories, (list, tuple)):
            trajectories = [trajectories]
        self.filenames = copy([str(traj) for traj in trajectories
                               ])  # this is modified in-place in mdtraj.load
        self._return_traj_obj = False

        self._is_random_accessible = all(
            file_suffix(f) in FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS
            for f in self.filenames)
        # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats
        if any(
                file_suffix(f) == '.xtc' or file_suffix(f) == '.trr'
                for f in trajectories):
            from distutils.version import LooseVersion
            xtc_trr_random_accessible = True if LooseVersion(
                mdtraj.version.version) >= LooseVersion('1.6.1') else False
            self._is_random_accessible &= xtc_trr_random_accessible

        self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3)
        self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3)
        self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy(
            self, 2)
        self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy(
            self, 3)

        # featurizer
        if topologyfile and featurizer:
            self.logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()
示例#10
0
class TestStaticMethods(unittest.TestCase):
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)

    def test_pairs(self):
        n_at = 5
        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=3)
        assert np.allclose(pairs, [0, 4])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=2)
        assert np.allclose(pairs, [[0, 3], [0, 4], [1, 4]])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=1)
        assert np.allclose(pairs,
                           [[0, 2], [0, 3], [0, 4], [1, 3], [1, 4], [2, 4]])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=0)
        assert np.allclose(pairs, [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2],
                                   [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]])
示例#11
0
    def test_backbone_dihedrals(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert(np.alltrue(Y >= -np.pi))
        assert(np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

        # test ordering of indices
        backbone_feature = self.feat.active_features[0]
        angle_indices = backbone_feature.angle_indexes
        np.testing.assert_equal(angle_indices[0], backbone_feature._phi_inds[0])
        np.testing.assert_equal(angle_indices[1], backbone_feature._psi_inds[0])
        np.testing.assert_equal(angle_indices[2], backbone_feature._phi_inds[1])
        np.testing.assert_equal(angle_indices[3], backbone_feature._psi_inds[1])
示例#12
0
 def setUp(self):
     self.feat = MDFeaturizer(pdbfile)
示例#13
0
class TestFeaturizer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        import tempfile
        cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1]
        with open(cls.asn_leu_pdbfile, 'w') as fh:
            fh.write(asn_leu_pdb)

        cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc')

        cls.bogus_geom_pdbfile = tempfile.mkstemp(suffix=".pdb")[1]
        with open(cls.bogus_geom_pdbfile, 'w') as fh:
            fh.write(bogus_geom_pdbfile)

        # create traj for asn_leu
        n_frames = 4001
        traj = mdtraj.load(cls.asn_leu_pdbfile)
        ref = traj.xyz
        new_xyz = np.empty((n_frames, ref.shape[1], 3))
        noise = np.random.random(new_xyz.shape)
        new_xyz[:, :, :] = noise + ref
        traj.xyz = new_xyz
        traj.time = np.arange(n_frames)
        traj.save(cls.asn_leu_traj)

    @classmethod
    def tearDownClass(cls):
        try:
            os.unlink(cls.asn_leu_pdbfile)
        except EnvironmentError:
            pass

        try:
            os.unlink(cls.bogus_geom_pdbfile)
        except EnvironmentError:
            pass

    def setUp(self):
        self.pdbfile = pdbfile
        self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
        self.feat = MDFeaturizer(self.pdbfile)
        self.atol = 1e-5
        self.ref_frame = 0
        self.atom_indices = np.arange(0, self.traj.n_atoms / 2)

    def test_select_backbone(self):
        inds = self.feat.select_Backbone()

    def test_select_non_symmetry_heavy_atoms(self):
        try:
            inds = self.feat.select_Heavy(exclude_symmetry_related=True)
        except RuntimeError as e:
            if "recursion depth" in e.args:
                import sys
                raise Exception(
                    "recursion limit reached. Interpreter limit: {}".format(
                        sys.getrecursionlimit()))

    def test_select_all(self):
        self.feat.add_all()
        assert (self.feat.dimension() == self.traj.n_atoms * 3)
        refmap = np.reshape(self.traj.xyz,
                            (len(self.traj), self.traj.n_atoms * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_select(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        self.feat.add_selection(sel)
        assert (self.feat.dimension() == sel.shape[0] * 3)
        refmap = np.reshape(self.traj.xyz[:, sel, :],
                            (len(self.traj), sel.shape[0] * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_inverse_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_inverse_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(Dinv, self.feat.transform(self.traj)))

    def test_ca_distances(self):
        sel = self.feat.select_Ca()
        assert (np.all(sel == list(range(self.traj.n_atoms)))
                )  # should be all for this Ca-traj
        pairs = self.feat.pairs(sel, excluded_neighbors=0)
        self.feat.add_distances_ca(
            periodic=False, excluded_neighbors=0
        )  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs.shape[0])
        X = self.traj.xyz[:, pairs[:, 0], :]
        Y = self.traj.xyz[:, pairs[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_ca_distances_with_all_atom_geometries(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=0)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms))
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_ca_distances_with_all_atom_geometries_and_exclusions(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=2)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2)
        feat_just_ca.add_distances(ca_pairs)
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_ca_distances_with_residues_not_containing_cas_no_exclusions(self):
        # Load test geom
        geom = mdtraj.load(self.pdbfile)
        # No exclusions
        feat_EN0 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN0.add_distances_ca(excluded_neighbors=0)
        ENO_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]]

        # Check indices
        assert (np.allclose(ENO_pairs,
                            feat_EN0.active_features[0].distance_indexes))
        # Check distances
        D = mdtraj.compute_distances(geom, ENO_pairs)
        assert (np.allclose(D, feat_EN0.transform(geom)))

        # excluded_neighbors=1 ## will yield the same as before, because the first neighbor
        # doesn't conting CA's anyway
        feat_EN1 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN1.add_distances_ca(excluded_neighbors=1)
        EN1_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]]
        assert (np.allclose(EN1_pairs,
                            feat_EN1.active_features[0].distance_indexes))
        D = mdtraj.compute_distances(geom, EN1_pairs)
        assert (np.allclose(D, feat_EN1.transform(geom)))

    def test_ca_distances_with_residues_not_containing_cas_with_exclusions(
            self):
        # Load test geom
        geom = mdtraj.load(self.pdbfile)
        # No exclusions
        feat_EN2 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN2.add_distances_ca(excluded_neighbors=2)
        EN2_pairs = [
            [1, 5],
            [1, 7],
            [3, 7],
        ]

        # Check indices
        assert (np.allclose(EN2_pairs,
                            feat_EN2.active_features[0].distance_indexes))
        # Check distances
        D = mdtraj.compute_distances(geom, EN2_pairs)
        assert (np.allclose(D, feat_EN2.transform(geom)))

        # excluded_neighbors=1 ## will yield the same as before, because the first neighbor
        # doesn't conting CA's anyway
        feat_EN1 = MDFeaturizer(self.bogus_geom_pdbfile)
        feat_EN1.add_distances_ca(excluded_neighbors=1)
        EN1_pairs = [[1, 3], [1, 5], [1, 7], [3, 5], [3, 7], [5, 7]]
        assert (np.allclose(EN1_pairs,
                            feat_EN1.active_features[0].distance_indexes))
        D = mdtraj.compute_distances(geom, EN1_pairs)
        assert (np.allclose(D, feat_EN1.transform(geom)))

    def test_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_contacts_count_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5, periodic=False, count_contacts=True
        )  # unperiodic distances such that we can compare
        # The dimensionality of the feature is now one
        assert (self.feat.dimension() == 1)
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        # Count the contacts
        C = C.sum(1, keepdims=True)
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_angles(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_angles_deg(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))

    def test_angles_cossin(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        self.assertEqual(Y.shape, (self.traj.n_frames, 2 * sel.shape[0]))
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_dihedrals(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrals_deg(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrials_cossin(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

        # test ordering of indices
        backbone_feature = self.feat.active_features[0]
        angle_indices = backbone_feature.angle_indexes
        np.testing.assert_equal(angle_indices[0],
                                backbone_feature._phi_inds[0])
        np.testing.assert_equal(angle_indices[1],
                                backbone_feature._psi_inds[0])
        np.testing.assert_equal(angle_indices[2],
                                backbone_feature._phi_inds[1])
        np.testing.assert_equal(angle_indices[3],
                                backbone_feature._psi_inds[1])

    def test_backbone_dihedrals_deg(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(deg=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        self.assertEqual(Y.shape,
                         (len(traj), 3 * 4))  # (3 phi + 3 psi)*2 [cos, sin]
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension(), msg=desc)
        self.assertIn("COS", desc[0])
        self.assertIn("SIN", desc[1])

    def test_backbone_dihedrials_chi(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrials_chi_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())

    def test_custom_feature(self):
        # TODO: test me
        pass

    def test_MinRmsd(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame])
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame])
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices_precentered(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices,
                            precentered=True)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_Residue_Mindist_Ca_all(self):
        n_ca = self.feat.topology.n_atoms
        self.feat.add_residue_mindist(scheme='ca')
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_all_threshold(self):
        threshold = .7
        self.feat.add_residue_mindist(scheme='ca', threshold=threshold)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1
        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_array(self):
        contacts = np.array([[
            20,
            10,
        ], [10, 0]])
        self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj,
                                       scheme='ca',
                                       contacts=contacts)[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_array_periodic(self):
        traj = mdtraj.load(pdbfile)
        # Atoms most far appart in Z
        atom_minz = traj.xyz.argmin(1).squeeze()[-1]
        atom_maxz = traj.xyz.argmax(1).squeeze()[-1]
        # Residues with the atoms most far appart in Z
        res_minz = traj.topology.atom(atom_minz).residue.index
        res_maxz = traj.topology.atom(atom_maxz).residue.index
        contacts = np.array([[res_minz, res_maxz]])
        # Tweak the trajectory so that a (bogus) PBC exists (otherwise traj._have_unitcell is False)
        traj.unitcell_angles = [90, 90, 90]
        traj.unitcell_lengths = [1, 1, 1]
        self.feat.add_residue_mindist(scheme='ca',
                                      residue_pairs=contacts,
                                      periodic=False)
        D = self.feat.transform(traj)
        Dperiodic_true = mdtraj.compute_contacts(traj,
                                                 scheme='ca',
                                                 contacts=contacts,
                                                 periodic=True)[0]
        Dperiodic_false = mdtraj.compute_contacts(traj,
                                                  scheme='ca',
                                                  contacts=contacts,
                                                  periodic=False)[0]
        # This asserts that the periodic option is having an effect at all
        assert not np.allclose(
            Dperiodic_false,
            Dperiodic_true,
        )
        # This asserts that the periodic option is being handled correctly by pyemma
        assert np.allclose(D, Dperiodic_false)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_One_Group(self):
        group0 = [0, 20, 30, 0]
        self.feat.add_group_mindist(
            group_definitions=[group0])  # Even with duplicates
        D = self.feat.transform(self.traj)
        dist_list = list(combinations(np.unique(group0), 2))
        Dref = mdtraj.compute_distances(self.traj, dist_list)
        assert np.allclose(D.squeeze(), Dref.min(1))
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2])
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups_threshold(self):
        threshold = .7
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    threshold=threshold)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1

        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_Some_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]

        group_pairs = np.array([[0, 1], [2, 2], [0, 2]])

        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    group_pairs=group_pairs)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_22 = np.array(list(combinations(np.unique(group2), 2)))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1)
        Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()
示例#14
0
    def test_labels(self):
        """ just checks for exceptions """
        featurizer = MDFeaturizer(pdbfile)
        featurizer.add_angles([[1, 2, 3], [4, 5, 6]])
        with self.assertRaises(ValueError) as cm:
            featurizer.add_backbone_torsions()
            assert 'emtpy indices' in cm.exception.message
        featurizer.add_contacts([[0, 1], [0, 3]])
        featurizer.add_distances([[0, 1], [0, 3]])
        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        cs = CustomFeature(lambda x: x - 1, dim=3)
        featurizer.add_custom_feature(cs)
        featurizer.add_minrmsd_to_ref(pdbfile)
        featurizer.add_residue_mindist()
        featurizer.add_group_mindist([[0, 1], [0, 2]])

        featurizer.describe()
示例#15
0
    def testAddFeaturesWithDuplicates(self):
        """this tests adds multiple features twice (eg. same indices) and
        checks whether they are rejected or not"""
        featurizer = MDFeaturizer(pdbfile)
        expected_active = 1

        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])
        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_contacts([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_contacts([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        # try to fool it with ca selection
        ca = featurizer.select_Ca()
        ca = featurizer.pairs(ca, excluded_neighbors=0)
        featurizer.add_distances(ca)
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_distances_ca(excluded_neighbors=0)
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_distances([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_distances([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        def my_func(x):
            return x - 1

        def foo(x):
            return x - 1

        expected_active += 1
        my_feature = CustomFeature(my_func)
        my_feature.dimension = 3
        featurizer.add_custom_feature(my_feature)

        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_custom_feature(my_feature)
        self.assertEqual(len(featurizer.active_features), expected_active)

        # since myfunc and foo are different functions, it should be added
        expected_active += 1
        foo_feat = CustomFeature(foo, dim=3)
        featurizer.add_custom_feature(foo_feat)

        self.assertEqual(len(featurizer.active_features), expected_active)

        expected_active += 1
        ref = mdtraj.load(xtcfile, top=pdbfile)
        featurizer.add_minrmsd_to_ref(ref)
        featurizer.add_minrmsd_to_ref(ref)
        self.assertEqual(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_minrmsd_to_ref(pdbfile)
        featurizer.add_minrmsd_to_ref(pdbfile)
        self.assertEqual(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_residue_mindist()
        featurizer.add_residue_mindist()
        self.assertEqual(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_group_mindist([[0, 1], [0, 2]])
        featurizer.add_group_mindist([[0, 1], [0, 2]])
        self.assertEqual(len(featurizer.active_features), expected_active)
示例#16
0
class TestFeaturizer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        import tempfile
        cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1]
        with open(cls.asn_leu_pdbfile, 'w') as fh:
            fh.write(asn_leu_pdb)

        cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc')

        # create traj for asn_leu
        n_frames = 4001
        traj = mdtraj.load(cls.asn_leu_pdbfile)
        ref = traj.xyz
        new_xyz = np.empty((n_frames, ref.shape[1], 3))
        noise = np.random.random(new_xyz.shape)
        new_xyz[:, :, :] = noise + ref
        traj.xyz = new_xyz
        traj.time = np.arange(n_frames)
        traj.save(cls.asn_leu_traj)

        super(TestFeaturizer, cls).setUpClass()

    @classmethod
    def tearDownClass(cls):
        try:
            os.unlink(cls.asn_leu_pdbfile)
        except EnvironmentError:
            pass
        super(TestFeaturizer, cls).tearDownClass()

    def setUp(self):
        self.pdbfile = pdbfile
        self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
        self.feat = MDFeaturizer(self.pdbfile)
        self.atol = 1e-5
        self.ref_frame = 0
        self.atom_indices = np.arange(0, self.traj.n_atoms / 2)

    def test_select_backbone(self):
        inds = self.feat.select_Backbone()

    def test_select_all(self):
        self.feat.add_all()
        assert (self.feat.dimension() == self.traj.n_atoms * 3)
        refmap = np.reshape(self.traj.xyz,
                            (len(self.traj), self.traj.n_atoms * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_select(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        self.feat.add_selection(sel)
        assert (self.feat.dimension() == sel.shape[0] * 3)
        refmap = np.reshape(self.traj.xyz[:, sel, :],
                            (len(self.traj), sel.shape[0] * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_inverse_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_inverse_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(Dinv, self.feat.transform(self.traj)))

    def test_ca_distances(self):
        sel = self.feat.select_Ca()
        assert (np.all(sel == list(range(self.traj.n_atoms)))
                )  # should be all for this Ca-traj
        pairs = self.feat.pairs(sel, excluded_neighbors=0)
        self.feat.add_distances_ca(
            periodic=False, excluded_neighbors=0
        )  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs.shape[0])
        X = self.traj.xyz[:, pairs[:, 0], :]
        Y = self.traj.xyz[:, pairs[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_ca_distances_with_all_atom_geometries(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=0)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms))
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_ca_distances_with_all_atom_geometries_and_exclusions(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=2)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2)
        feat_just_ca.add_distances(ca_pairs)
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_contacts_count_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5, periodic=False, count_contacts=True
        )  # unperiodic distances such that we can compare
        # The dimensionality of the feature is now one
        assert (self.feat.dimension() == 1)
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        # Count the contacts
        C = C.sum(1, keepdims=True)
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_angles(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_angles_deg(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))

    def test_angles_cossin(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_dihedrals(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrals_deg(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrials_cossin(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals_deg(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(deg=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        self.assertEqual(Y.shape,
                         (len(traj), 3 * 4))  # (3 phi + 3 psi)*2 [cos, sin]
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrials_chi(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrials_chi_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())

    def test_custom_feature(self):
        # TODO: test me
        pass

    def test_MinRmsd(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame])
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame])
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices_precentered(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices,
                            precentered=True)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_Residue_Mindist_Ca_all(self):
        n_ca = self.feat.topology.n_atoms
        self.feat.add_residue_mindist(scheme='ca')
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_all_threshold(self):
        threshold = .7
        self.feat.add_residue_mindist(scheme='ca', threshold=threshold)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1
        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_array(self):
        contacts = np.array([[
            20,
            10,
        ], [10, 0]])
        self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj,
                                       scheme='ca',
                                       contacts=contacts)[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_One_Group(self):
        group0 = [0, 20, 30, 0]
        self.feat.add_group_mindist(
            group_definitions=[group0])  # Even with duplicates
        D = self.feat.transform(self.traj)
        dist_list = list(combinations(np.unique(group0), 2))
        Dref = mdtraj.compute_distances(self.traj, dist_list)
        assert np.allclose(D.squeeze(), Dref.min(1))
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2])
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups_threshold(self):
        threshold = .7
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    threshold=threshold)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1

        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_Some_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]

        group_pairs = np.array([[0, 1], [2, 2], [0, 2]])

        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    group_pairs=group_pairs)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_22 = np.array(list(combinations(np.unique(group2), 2)))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1)
        Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()
示例#17
0
class TestCustomFeature(unittest.TestCase):
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)
        self.traj = mdtraj.load(xtcfile, top=pdbfile)

        self.pairs = [[0, 1], [0, 2], [1, 2]]  #some distances
        self.means = [.5, .75, 1.0]  #bogus means
        self.U = np.array([[0, 1], [1, 0], [
            1, 1
        ]])  #bogus transformation, projects from 3 distances to 2 components

    def test_some_feature(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        Y_custom_feature = self.feat.transform(self.traj)
        # Directly call the function
        Y_function = some_call_to_mdtraj_some_operations_some_linalg(
            self.traj, self.pairs, self.means, self.U)
        assert np.allclose(Y_custom_feature, Y_function)

    def test_describe(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_describe_given(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg,
            self.U.shape[1],
            self.pairs,
            self.means,
            self.U,
            description=['foo'] * self.U.shape[1])
        desc = self.feat.describe()
        self.assertIn('foo', desc)
        self.assertEqual(len(desc), self.feat.dimension())

    def test_describe_given_str(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg,
            self.U.shape[1],
            self.pairs,
            self.means,
            self.U,
            description='test')
        desc = self.feat.describe()
        self.assertIn('test', desc)
        self.assertEqual(len(desc), self.feat.dimension())

    def test_describe_given_wrong(self):
        """ either a list matching input dim, or 1 element iterable allowed"""
        with self.assertRaises(ValueError) as cm:
            self.feat.add_custom_func(
                some_call_to_mdtraj_some_operations_some_linalg,
                self.U.shape[1] + 1,
                self.pairs,
                self.means,
                self.U,
                description=['ff', 'ff'])

    def test_describe_1_element_expand(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg,
            self.U.shape[1] + 1,
            self.pairs,
            self.means,
            self.U,
            description=['test'])
        desc = self.feat.describe()
        self.assertEqual(desc, ['test'] * 3)

    def test_dimensionality(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        assert self.feat.dimension() == self.U.shape[1]
示例#18
0
class FeatureReader(DataSource):
    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    Examples
    --------
    >>> from pyemma.datasets import get_bpti_test_data

    Iterator access:

    >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top'])

    Optionally set a chunksize

    >>> reader.chunksize = 300

    Store chunks by their trajectory index

    >>> chunks = {i : [] for i in range(reader.number_of_trajectories())}
    >>> for itraj, X in reader:
    ...     chunks[itraj].append(X)


    Calculate some distances of protein during feature reading:

    >>> reader.featurizer.add_distances([[0, 3], [10, 15]])
    >>> X = reader.get_output()

    """
    SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc",
                                       ".trr")

    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=100,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)
        self._is_reader = True
        self.topfile = topologyfile
        self.filenames = trajectories

        self._is_random_accessible = all(
            (f.endswith(FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS)
             for f in self.filenames))
        # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats
        if any(f.endswith('.xtc') or f.endswith('.trr') for f in trajectories):
            from distutils.version import LooseVersion
            xtc_trr_random_accessible = True if LooseVersion(
                mdtraj.version.version) >= LooseVersion('1.6.1') else False
            self._is_random_accessible &= xtc_trr_random_accessible

        self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3)
        self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3)
        self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy(
            self, 2)
        self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy(
            self, 3)

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()

    @property
    @deprecated('Please use "filenames" property.')
    def trajfiles(self):
        return self.filenames

    def _get_traj_info(self, filename):
        # workaround NotImplementedError __len__ for xyz files
        # Github issue: markovmodel/pyemma#621
        if six.PY2:
            from mock import patch
        else:
            from unittest.mock import patch
        from mdtraj.formats import XYZTrajectoryFile

        def _make_len_func(top):
            def _len_xyz(self):
                assert isinstance(self, XYZTrajectoryFile)
                assert hasattr(
                    self, '_filename'), "structual change in xyzfile class!"
                import warnings
                from pyemma.util.exceptions import EfficiencyWarning
                warnings.warn(
                    "reading all of your data,"
                    " just to determine number of frames." +
                    " Happens only once, because this is cached."
                    if config['use_trajectory_lengths_cache'] else "",
                    EfficiencyWarning)
                # obtain len by reading whole file!
                mditer = mdtraj.iterload(self._filename, top=top)
                return sum(t.n_frames for t in mditer)

            return _len_xyz

        f = _make_len_func(self.topfile)

        # lookups pre-computed lengths, or compute it on the fly and store it in db.
        with patch.object(XYZTrajectoryFile, '__len__', f):
            with mdtraj.open(filename, mode='r') as fh:
                length = len(fh)
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]
                offsets = fh.offsets if hasattr(fh, 'offsets') else []

        return TrajInfo(ndim, length, offsets)

    def _create_iterator(self,
                         skip=0,
                         chunk=0,
                         stride=1,
                         return_trajindex=True,
                         cols=None):
        return FeatureReaderIterator(self,
                                     skip=skip,
                                     chunk=chunk,
                                     stride=stride,
                                     return_trajindex=return_trajindex,
                                     cols=cols)

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"
                ] + self.featurizer.describe()

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: Cartesian coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    def _assert_toptraj_consistency(self):
        r""" Check if the topology and the filenames of the reader have the same n_atoms"""
        traj = mdtraj.load_frame(self.filenames[0], index=0, top=self.topfile)
        desired_n_atoms = self.featurizer.topology.n_atoms
        assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \
                                                     " and the first trajectory file, %u vs %u" % \
                                                     (desired_n_atoms, traj.xyz.shape[1])
示例#19
0
class FeatureReader(DataSource):
    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    chunksize: int
        how many frames to process in one batch.

    featurizer: MDFeaturizer
        a preconstructed featurizer

    Examples
    --------
    >>> from pyemma.datasets import get_bpti_test_data
    >>> from pyemma.util.contexts import settings

    Iterator access:

    >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top'])

    Optionally set a chunksize

    >>> reader.chunksize = 300

    Store chunks by their trajectory index

    >>> chunks = {i : [] for i in range(reader.number_of_trajectories())}
    >>> for itraj, X in reader:
    ...     chunks[itraj].append(X)


    Calculate some distances of protein during feature reading:

    >>> reader.featurizer.add_distances([[0, 3], [10, 15]])
    >>> with settings(show_progress_bars=False):
    ...    X = reader.get_output()

    """
    SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc",
                                       ".trr")

    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=1000,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)
        self._is_reader = True
        self.topfile = topologyfile
        self.filenames = trajectories
        self._return_traj_obj = False

        self._is_random_accessible = all(
            (f.endswith(FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS)
             for f in self.filenames))
        # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats
        if any(f.endswith('.xtc') or f.endswith('.trr') for f in trajectories):
            from distutils.version import LooseVersion
            xtc_trr_random_accessible = True if LooseVersion(
                mdtraj.version.version) >= LooseVersion('1.6.1') else False
            self._is_random_accessible &= xtc_trr_random_accessible

        self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3)
        self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3)
        self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy(
            self, 2)
        self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy(
            self, 3)

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()

    @property
    @deprecated('Please use "filenames" property.')
    def trajfiles(self):
        return self.filenames

    def _get_traj_info(self, filename):
        with mdtraj.open(filename, mode='r') as fh:
            length = len(fh)
            frame = fh.read(1)[0]
            ndim = np.shape(frame)[1]
            offsets = fh.offsets if hasattr(fh, 'offsets') else []

        return TrajInfo(ndim, length, offsets)

    def _create_iterator(self,
                         skip=0,
                         chunk=0,
                         stride=1,
                         return_trajindex=True,
                         cols=None):
        return FeatureReaderIterator(self,
                                     skip=skip,
                                     chunk=chunk,
                                     stride=stride,
                                     return_trajindex=return_trajindex,
                                     cols=cols)

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"
                ] + self.featurizer.describe()

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: Cartesian coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    def _assert_toptraj_consistency(self):
        r""" Check if the topology and the filenames of the reader have the same n_atoms"""
        top = self.featurizer.topology
        traj = mdtraj.load_frame(self.filenames[0], index=0, top=top)
        desired_n_atoms = top.n_atoms
        assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \
                                                     " and the first trajectory file, %u vs %u" % \
                                                     (desired_n_atoms, traj.xyz.shape[1])
示例#20
0
    def test_ca_distances_with_all_atom_geometries_and_exclusions(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=2)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2)
        feat_just_ca.add_distances(ca_pairs)
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))
示例#21
0
class FeatureReader(DataSource, SerializableMixIn):
    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    chunksize: int
        how many frames to process in one batch.

    featurizer: MDFeaturizer
        a preconstructed featurizer

    Examples
    --------
    >>> from pyemma.datasets import get_bpti_test_data
    >>> from pyemma.util.contexts import settings

    Iterator access:

    >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top'])

    Optionally set a chunksize

    >>> reader.chunksize = 300

    Store chunks by their trajectory index

    >>> chunks = {i : [] for i in range(reader.number_of_trajectories())}
    >>> for itraj, X in reader:
    ...     chunks[itraj].append(X)


    Calculate some distances of protein during feature reading:

    >>> reader.featurizer.add_distances([[0, 3], [10, 15]])
    >>> with settings(show_progress_bars=False):
    ...    X = reader.get_output()

    """
    SUPPORTED_RANDOM_ACCESS_FORMATS = (".h5", ".dcd", ".binpos", ".nc", ".xtc",
                                       ".trr")
    __serialize_version = 0

    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=1000,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)
        self._is_reader = True
        self.topfile = topologyfile
        if not isinstance(trajectories, (list, tuple)):
            trajectories = [trajectories]
        self.filenames = copy([str(traj) for traj in trajectories
                               ])  # this is modified in-place in mdtraj.load
        self._return_traj_obj = False

        self._is_random_accessible = all(
            file_suffix(f) in FeatureReader.SUPPORTED_RANDOM_ACCESS_FORMATS
            for f in self.filenames)
        # check we have at least mdtraj-1.6.1 to efficiently seek xtc, trr formats
        if any(
                file_suffix(f) == '.xtc' or file_suffix(f) == '.trr'
                for f in trajectories):
            from distutils.version import LooseVersion
            xtc_trr_random_accessible = True if LooseVersion(
                mdtraj.version.version) >= LooseVersion('1.6.1') else False
            self._is_random_accessible &= xtc_trr_random_accessible

        self._ra_cuboid = FeatureReaderCuboidRandomAccessStrategy(self, 3)
        self._ra_jagged = FeatureReaderJaggedRandomAccessStrategy(self, 3)
        self._ra_linear_strategy = FeatureReaderLinearRandomAccessStrategy(
            self, 2)
        self._ra_linear_itraj_strategy = FeatureReaderLinearItrajRandomAccessStrategy(
            self, 3)

        # featurizer
        if topologyfile and featurizer:
            self.logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()

    @property
    @deprecated('Please use "filenames" property.')
    def trajfiles(self):
        return self.filenames

    def _get_traj_info(self, filename):
        filename = str(filename) if isinstance(filename, Path) else filename
        with mdtraj.open(filename, mode='r') as fh:
            try:
                length = len(fh)
            # certain formats like txt based ones (.gro, .lammpstrj) do not implement len()
            except (NotImplementedError, TypeError):
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]
                _ = fh.read()
                length = fh.tell()
            else:
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]

            offsets = fh.offsets if hasattr(fh, 'offsets') else ()

        return TrajInfo(ndim, length, offsets)

    def _create_iterator(self,
                         skip=0,
                         chunk=0,
                         stride=1,
                         return_trajindex=True,
                         cols=None):
        def transform(data):
            # trigger to pass mdtraj.Trajectory objects to self.featurizer or not.
            if self._return_traj_obj:
                return data
            else:
                return self.featurizer.transform(data)

        it = FeatureReaderIterator(self,
                                   skip=skip,
                                   chunk=chunk,
                                   stride=stride,
                                   return_trajindex=return_trajindex,
                                   cols=cols,
                                   transform_function=transform)
        return it

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"
                ] + self.featurizer.describe()

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: Cartesian coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    @staticmethod
    def supports_format(file_name):
        """
        Static method that checks whether the extension of the input file name indicates a file type that can
        potentially be read with a FeatureReader.

        :param file_name: the file name or path
        :return: True if the extension indicates a file type that could be read, otherwise False
        """
        import os
        from mdtraj.formats.registry import FormatRegistry

        if isinstance(file_name, str):
            # ensure there is something to split
            file_name = "/dummy" + file_name
            suffix = os.path.splitext(file_name)[1]
            if suffix in ('.pdb', '.pdb.gz'):
                return False
            return suffix in FormatRegistry.loaders.keys()

        return False

    def _assert_toptraj_consistency(self):
        r""" Check if the topology and the filenames of the reader have the same n_atoms"""
        top = self.featurizer.topology
        traj = mdtraj.load_frame(self.filenames[0], index=0, top=top)
        desired_n_atoms = top.n_atoms
        assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \
                                                     " and the first trajectory file, %u vs %u" % \
                                                     (desired_n_atoms, traj.xyz.shape[1])

    def __reduce__(self):
        # serialize only the constructor arguments.
        return FeatureReader, (self.filenames, None, self.chunksize,
                               self.featurizer)