def test_normalize_no_group(self):
        # Fit with projector
        pp = SCNormalizer(log_base=None)
        data = pp(self.iris)

        row_sums = data.X.sum(axis=1)
        np.testing.assert_almost_equal(row_sums, row_sums[0])
    def test_normalize_works_as_preprocessor(self):
        pp = SCNormalizer(normalize_cells=False, log_base=2)
        data = pp(self.iris)

        data2 = self.iris.transform(data.domain)

        np.testing.assert_almost_equal(data.X, data2.X)
        np.testing.assert_array_equal(data.ids, data2.ids)
 def test_normalize_bin(self):
     BIN_THRESH = 3.0
     pp = SCNormalizer(normalize_cells=False,
                       log_base=None,
                       bin_thresh=BIN_THRESH)
     data = pp(self.iris)
     expected_X = (self.iris.X > BIN_THRESH).astype(int)
     np.testing.assert_almost_equal(data.X, expected_X)
    def test_normalize_log(self):
        LOG_BASE = 2.7

        pp = SCNormalizer(normalize_cells=False, log_base=LOG_BASE)
        data = pp(self.iris)

        expected_X = np.log(1 + self.iris.X) / np.log(LOG_BASE)
        np.testing.assert_almost_equal(data.X, expected_X)
    def test_normalized_data_can_be_pickled(self):
        pp = SCNormalizer(normalize_cells=False, log_base=2)
        data = pp(self.iris)

        data2 = pickle.loads(pickle.dumps(data))
        np.testing.assert_almost_equal(data.X, data2.X)
        np.testing.assert_array_equal(data.ids, data2.ids)

        data3 = self.iris.transform(data2.domain)
        np.testing.assert_almost_equal(data.X, data3.X)
        np.testing.assert_array_equal(data.ids, data3.ids)
    def test_normalize_with_group(self):
        pp = SCNormalizer(equalize_var=self.iris.domain.class_var,
                          log_base=None)
        data = pp(self.iris)

        group1, group2, group3 = [self.iris.Y == i for i in range(3)]
        med1 = np.median(data.X[group1].sum(axis=1))
        med2 = np.median(data.X[group2].sum(axis=1))
        med3 = np.median(data.X[group3].sum(axis=1))
        # Group medians should be equal
        self.assertAlmostEqual(med1, med2)
        self.assertAlmostEqual(med2, med3)
    def test_normalize_nans(self):
        # Fit with projector
        pp = SCNormalizer(log_base=None,
                          normalize_cells=True,
                          equalize_var="iris")
        data = self.iris.copy()
        data_nan = self.iris.copy()
        for j in range(data_nan.X.shape[1]):
            data.X[j, j] = 0
            data_nan.X[j, j] = np.nan

        data1 = pp(data)
        data2 = pp(data_nan)

        self.assertEqual(np.nansum(data2.X), np.sum(data1.X))
Пример #8
0
    def commit(self):
        log_base = self.log_base if self.log_check else None
        library_var = None
        if self.data is not None and \
                self.normalize_cells and \
                self.selected_attr in self.data.domain:
            library_var = self.data.domain[self.selected_attr]

        pp = SCNormalizer(equalize_var=library_var,
                          normalize_cells=self.normalize_cells,
                          log_base=log_base)
        data = None
        if self.data is not None:
            data = pp(self.data)

        self.Outputs.data.send(data)
        self.Outputs.preprocessor.send(pp)
    def update_preprocessors(self):
        """ Update parameters of processors. """
        log_base = self.log_base if self.log_check else None
        library_var = None
        selected_attr = self.attrs_model[self.selected_attr_index]
        batch_link = self.LINK_FUNCTIONS[self.batch_link_index]

        if self.data is not None and \
                self.normalize_cells and \
                selected_attr in self.data.domain:
            library_var = self.data.domain[selected_attr]

        self.pp = SCNormalizer(equalize_var=library_var,
                               normalize_cells=self.normalize_cells,
                               log_base=log_base)

        self.pp_batch = SCBatchNormalizer(link=batch_link,
                                          nonzero_only=batch_link == LINK_LOG,
                                          batch_vars=self.batch_vars_selected)
 def test_normalize_categorical(self):
     pp = SCNormalizer()
     dom = Domain([self.iris.domain.class_var])
     data2 = self.iris.transform(dom)
     norm_data = pp(data2)
     np.testing.assert_array_equal(norm_data.X, data2.X)