示例#1
0
def get_contact_density(contact_list, seq_length):
    """Credits to Felix Simkovic; code taken from GitHub rigdenlab/conkit"""
    x = np.array([i for c in contact_list for i in np.arange(c[1], c[0] + 1)],
                 dtype=np.int64)[:, np.newaxis]
    bw = bandwidth_factory('amise')(x).bw
    kde = KernelDensity(bandwidth=bw).fit(x)
    x_fit = np.arange(1, seq_length + 1)[:, np.newaxis]
    density = np.exp(kde.score_samples(x_fit)).tolist()
    density_max = max(density)
    density = [int(round(float(i) / density_max, 1) * 10) for i in density]
    return density
示例#2
0
    def get_contact_density(self, bw_method="amise"):
        """Calculate the contact density in the contact map using Gaussian kernels

        Various algorithms can be used to estimate the bandwidth. To calculate the
        bandwidth for an 1D data array ``X`` with ``n`` data points and ``d`` dimensions,
        the listed algorithms have been implemented. Please note, in rules 2 and 3, the
        value of :math:`\\sigma` is the smaller of the standard deviation of ``X`` or
        the normalized interquartile range.

        Parameters
        ----------
        bw_method : str, optional
           The bandwidth estimator to use [default: amise]

        Returns
        -------
        list
           The list of per-residue density estimates

        Raises
        ------
        :exc:`ImportError`
           Cannot find scikit-learn package
        :exc:`ValueError`
           Undefined bandwidth method
        :exc:`ValueError`
           :obj:`~conkit.core.contactmap.ContactMap` is empty

        """
        try:
            import sklearn.neighbors
        except ImportError as e:
            raise ImportError(e)

        if self.empty:
            raise ValueError("ContactMap is empty")

        x = np.array(
            [i for c in self for i in np.arange(c.res1_seq, c.res2_seq + 1)],
            dtype=np.int64)[:, np.newaxis]
        x_fit = np.arange(x.min(), x.max() + 1)[:, np.newaxis]
        from conkit.misc.bandwidth import bandwidth_factory

        bandwidth = bandwidth_factory(bw_method)(x).bw
        kde = sklearn.neighbors.KernelDensity(bandwidth=bandwidth).fit(x)
        return np.exp(kde.score_samples(x_fit)).tolist()
示例#3
0
    def calculate_contact_density(self, bw_method="amise"):
        """Calculate the contact density in the contact map using Gaussian kernels

        Various algorithms can be used to estimate the bandwidth. To calculate the
        bandwidth for an 1D data array ``X`` with ``n`` data points and ``d`` dimensions,
        the listed algorithms have been implemented. Please note, in rules 2 and 3, the
        value of :math:`\\sigma` is the smaller of the standard deviation of ``X`` or
        the normalized interquartile range.

        Parameters
        ----------
        bw_method : str, optional
           The bandwidth estimator to use [default: amise]

        Returns
        -------
        list
           The list of per-residue density estimates

        Raises
        ------
        RuntimeError
           Cannot find SciKit package
        ValueError
           Undefined bandwidth method

        """
        try:
            import sklearn.neighbors
        except ImportError:
            raise RuntimeError("Cannot find SciKit package")

        if self.empty:
            raise ValueError("ContactMap is empty")

        # TODO: Chunan suggested to fix this bug - results are usually marginally better
        # REM: Bug in Sadowski's algorithm, res2 is excluded from list to train KDE
        # REM: Remember to change test cases when corrected implementation benchmarked
        #  x = np.array([i for c in self for i in np.arange(c.res1_seq, c.res2_seq + 1)])[:, np.newaxis]
        x = np.array([i for c in self for i in np.arange(c.res1_seq, c.res2_seq)])[:, np.newaxis]
        x_fit = np.arange(x.min(), x.max() + 1)[:, np.newaxis]
        from conkit.misc.bandwidth import bandwidth_factory
        bandwidth = bandwidth_factory(bw_method)(x).bw
        kde = sklearn.neighbors.KernelDensity(bandwidth=bandwidth).fit(x)
        return np.exp(kde.score_samples(x_fit)).tolist()
示例#4
0
 def test_bandwidth_factory_9(self):
     with self.assertRaises(ValueError):
         bandwidth.bandwidth_factory("garbage")
示例#5
0
 def test_bandwidth_factory_8(self):
     with self.assertRaises(ValueError):
         bandwidth.bandwidth_factory("silvermn")
示例#6
0
 def test_bandwidth_factory_6(self):
     with self.assertRaises(ValueError):
         bandwidth.bandwidth_factory("SILVERMAN")
示例#7
0
 def test_bandwidth_factory_5(self):
     obj = bandwidth.bandwidth_factory("silverman")
     self.assertEqual(str(obj),
                      "<class 'conkit.misc.bandwidth.SilvermanBW'>")
示例#8
0
 def test_bandwidth_factory_4(self):
     obj = bandwidth.bandwidth_factory("scott")
     self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.ScottBW'>")
示例#9
0
 def test_bandwidth_factory_3(self):
     obj = bandwidth.bandwidth_factory("linear")
     self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.LinearBW'>")
示例#10
0
 def test_bandwidth_factory_2(self):
     obj = bandwidth.bandwidth_factory("bowman")
     self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.BowmanBW'>")
示例#11
0
 def test_bandwidth_factory_1(self):
     obj = bandwidth.bandwidth_factory("amise")
     self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.AmiseBW'>")