示例#1
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i + 1]) for i in range(4)]
        data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0],
                            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, data_local))
        h0 = array([[0.09082617, 0.85490047, 0.57234593],
                    [0.82766740, 0.21301186, 0.90913979]])

        # if the rows of h are not normalized on each iteration:
        h_true = array([[0., 0.6010, 0.9163], [0.8970, 0.1556, 0.7423]])
        w_true = array([[4.5885, 1.5348], [1.3651, 0.2184], [5.9349, 1.0030],
                        [0., 5.5147]])

        # if the columns of h are normalized (as in the current implementation):
        scale_mat = diag(norm(h_true, axis=1))
        h_true = dot(LinAlg.inv(scale_mat), h_true)
        w_true = dot(w_true, scale_mat)

        # calculate NMF using the Thunder implementation
        # (maxiter=9 corresponds with Matlab algorithm)
        nmf_thunder = NMF(k=2, method="als", h0=h0, maxiter=9)
        nmf_thunder.calc(data)
        h_thunder = nmf_thunder.h
        w_thunder = array(nmf_thunder.w.values().collect())

        tol = 1e-03  # allow small error
        assert (allclose(w_thunder, w_true, atol=tol))
        assert (allclose(h_thunder, h_true, atol=tol))
示例#2
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i + 1]) for i in range(4)]
        data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0],
                            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, data_local))
        h0 = array([[0.09082617, 0.85490047, 0.57234593],
                    [0.82766740, 0.21301186, 0.90913979]])
        w0_local = array([[0.40967962, 0.82131499], [0.98889171, 0.54361205],
                          [0.08074117, 0.44954983], [0.68648657, 0.71598381]])
        w0 = self.sc.parallelize(zip(keys, w0_local))

        # if the rows of h are not normalized on each iteration:
        h_true = array([[0.88909291, 0., 0.45868362],
                        [1.54548542, 6.33549752, 10.65274926]])
        w_true = array([[0.67478533, 0.47374843], [0.08587805, 0.12829046],
                        [0., 0.58218579], [5.34371512, 0.14884191]])

        # if the columns of h are normalized (as in the current implementation):
        scale_mat = diag(norm(h_true, axis=1))
        h_true = dot(LinAlg.inv(scale_mat), h_true)
        w_true = dot(w_true, scale_mat)

        # calculate NMF using the Thunder implementation
        nmf_thunder = NMF(k=2, method="als", h0=h0, w0=w0, maxiter=20)
        nmf_thunder.calc(data)
        h_thunder = nmf_thunder.h
        w_thunder = array(nmf_thunder.w.values().collect())

        tol = 1e-04  # allow small error
        assert (allclose(w_thunder, w_true, atol=tol))
        assert (allclose(h_thunder, h_true, atol=tol))
示例#3
0
    def test_init(self):
        """
        test performance of whole function, including random initialization
        """
        data_local = array([[1.0, 2.0, 6.0], [1.0, 3.0, 0.0], [1.0, 4.0, 6.0],
                            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(
            zip([array([i]) for i in range(data_local.shape[0])], data_local))

        nmf_thunder = NMF(k=2, recon_hist='final')
        nmf_thunder.calc(data)

        # check to see if Thunder's solution achieves close-to-optimal reconstruction error
        # scikit-learn's solution achieves 2.993952
        # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges)
        assert (nmf_thunder.recon_err < 2.9950)
示例#4
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i+1]) for i in range(4)]
        data_local = array([
            [1.0, 2.0, 6.0],
            [1.0, 3.0, 0.0],
            [1.0, 4.0, 6.0],
            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, data_local))
        h0 = array(
            [[0.09082617,  0.85490047,  0.57234593],
             [0.82766740,  0.21301186,  0.90913979]])
        w0_local = array(
            [[0.40967962,  0.82131499],
             [0.98889171,  0.54361205],
             [0.08074117,  0.44954983],
             [0.68648657,  0.71598381]])
        w0 = self.sc.parallelize(zip(keys, w0_local))

        # if the rows of h are not normalized on each iteration:
        h_true = array(
            [[0.88909291,   0.        ,   0.45868362],
             [1.54548542,   6.33549752,  10.65274926]])
        w_true = array(
            [[0.67478533,  0.47374843],
             [0.08587805,  0.12829046],
             [0.        ,  0.58218579],
             [5.34371512,  0.14884191]])

        # if the columns of h are normalized (as in the current implementation):
        scale_mat = diag(norm(h_true, axis=1))
        h_true = dot(LinAlg.inv(scale_mat), h_true)
        w_true = dot(w_true, scale_mat)

        # calculate NMF using the Thunder implementation
        nmf_thunder = NMF(k=2, method="als", h0=h0, w0=w0, maxiter=20)
        nmf_thunder.calc(data)
        h_thunder = nmf_thunder.h
        w_thunder = array(nmf_thunder.w.values().collect())

        tol = 1e-04  # allow small error
        assert(allclose(w_thunder, w_true, atol=tol))
        assert(allclose(h_thunder, h_true, atol=tol))
示例#5
0
    def test_init(self):
        """
        test performance of whole function, including random initialization
        """
        data_local = array([
            [1.0, 2.0, 6.0],
            [1.0, 3.0, 0.0],
            [1.0, 4.0, 6.0],
            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip([array([i]) for i in range(data_local.shape[0])], data_local))

        nmf_thunder = NMF(k=2)
        nmf_thunder.calc(data)

        # check to see if Thunder's solution achieves close-to-optimal reconstruction error
        # scikit-learn's solution achieves 2.993952
        # matlab's non-deterministic implementation usually achieves < 2.9950 (when it converges)
        assert(nmf_thunder.rec_err < 2.9950)
示例#6
0
    def test_als(self):
        """ Test accuracy of alternating least-squares NMF algorithm
        against the MATLAB-computed version
        """
        #  set data and initializing constants
        keys = [array([i+1]) for i in range(4)]
        data_local = array([
            [1.0, 2.0, 6.0],
            [1.0, 3.0, 0.0],
            [1.0, 4.0, 6.0],
            [5.0, 1.0, 4.0]])
        data = self.sc.parallelize(zip(keys, data_local))
        h0 = array(
            [[0.09082617,  0.85490047,  0.57234593],
             [0.82766740,  0.21301186,  0.90913979]])

        # if the rows of h are not normalized on each iteration:
        h_true = array(
            [[0.    ,    0.6010,    0.9163],
             [0.8970,    0.1556,    0.7423]])
        w_true = array(
            [[4.5885,    1.5348],
             [1.3651,    0.2184],
             [5.9349,    1.0030],
             [0.    ,    5.5147]])

        # if the columns of h are normalized (as in the current implementation):
        scale_mat = diag(norm(h_true, axis=1))
        h_true = dot(LinAlg.inv(scale_mat), h_true)
        w_true = dot(w_true, scale_mat)

        # calculate NMF using the Thunder implementation
        # (maxiter=9 corresponds with Matlab algorithm)
        nmf_thunder = NMF(k=2, method="als", h0=h0, maxiter=9)
        nmf_thunder.calc(data)
        h_thunder = nmf_thunder.h
        w_thunder = array(nmf_thunder.w.values().collect())

        tol = 1e-03  # allow small error
        assert(allclose(w_thunder, w_true, atol=tol))
        assert(allclose(h_thunder, h_true, atol=tol))