Пример #1
0
    def __init__(self, data, motifs, row_indices=None, background=None):
        data = numpy.array(data)
        row_sum = data.sum(axis=1)

        # are we dealing with counts data?
        if 0 <= data.min() and 1 < data.max():
            # convert to freqs data
            data = data / numpy.vstack(row_sum)
            row_sum = data.sum(axis=1)

        # are we dealing with freqs data?
        if (data >= 0).all() and numpy.allclose(
            row_sum[numpy.isnan(row_sum) == False], 1
        ):
            # standard PSSM object creation
            if background is None:
                background = numpy.ones(len(motifs), dtype=float) / len(motifs)
            self._background = numpy.array(background)
            assert len(background) == len(
                motifs
            ), "Mismatch between number of motifs and the background"
            validate_freqs_array(self._background)
            pssm = safe_log(data) - safe_log(self._background)
            super(PSSM, self).__init__(
                pssm, motifs, row_indices=row_indices, dtype=float
            )
            self._indices = numpy.arange(self.shape[0])  # used for scoring
            return

        if not (data.min() < 0 < data.max()):
            raise ValueError("PSSM has been supplied invalid data")

        # we dealing with pssm data
        super(PSSM, self).__init__(data, motifs, row_indices=row_indices, dtype=float)
        self._indices = numpy.arange(self.shape[0])  # used for scoring
Пример #2
0
    def test_write_tabular_pssm(self):
        """correctly writes tabular data for PSSM"""

        # data from test_profile
        data = numpy.array([
            [0.1, 0.3, 0.5, 0.1],
            [0.25, 0.25, 0.25, 0.25],
            [0.05, 0.8, 0.05, 0.1],
            [0.7, 0.1, 0.1, 0.1],
            [0.6, 0.15, 0.05, 0.2],
        ])
        pssm = PSSM(data, "ACTG")
        loader = io_app.load_tabular(sep="\t")
        with TemporaryDirectory(dir=".") as dirname:
            writer = io_app.write_tabular(data_path=dirname, format="tsv")
            outpath = join(dirname, "delme.tsv")
            writer.write(pssm, identifier=outpath)
            new = loader(outpath)
            expected = safe_log(data) - safe_log(
                numpy.array([0.25, 0.25, 0.25, 0.25]))
            for i in range(len(expected)):
                j = i // 4
                self.assertTrue(
                    numpy.isclose(new.array[i][2],
                                  expected[j][i - j],
                                  atol=0.0001))
Пример #3
0
    def relative_entropy_terms(self, background=None):
        """
        Computes a row-wise relative entropy terms per motif and stores them in a DictArray.

        Parameters
        ----------
        background : dict
            {motif_1: prob_1, motif_2: prob_2, ...} is the specified background distribution.

        Returns
        -------
        DictArray


        Notes
        -----
        If background is type None, it defaults to equifrequent.
        """
        if background is None:
            num_motifs = len(self.motifs)
            background = array([1 / num_motifs] * num_motifs)
        else:
            background = array([background.get(m, 0) for m in self.motifs])

        validate_freqs_array(background)
        ret = background * (safe_log(background) - safe_log(self.array))
        return self.template.wrap(ret)
Пример #4
0
    def test_safe_log(self):
        """safe_log: should handle pos/neg/zero/empty arrays"""
        # normal valid array
        a = array([[4, 0, 8], [2, 16, 4]])
        assert_equal(safe_log(a), array([[2, 0, 3], [1, 4, 2]]))
        # input integers, output floats
        assert_allclose(safe_log(array([1, 2, 3])), array([0, 1, 1.5849625]))
        # just zeros
        a = array([[0, 0], [0, 0]])
        assert_equal(safe_log(a), array([[0, 0], [0, 0]]))
        # negative number

        with self.assertRaises(FloatingPointError):
            safe_log(array([0, 3, -4]))

        # empty array
        assert_equal(safe_log(array([])), array([]))
        # double empty array
        assert_equal(safe_log(array([[]])), array([[]]))