def test_transform_series_int(self): arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' ')) # reshape arr to 2d arr = arr.reshape(-1, 1) if pd.compat.PY3: models = ['Binarizer', 'Imputer', 'StandardScaler'] # MinMaxScalar raises TypeError in ufunc else: models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler'] for model in models: mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() s.fit(mod1) mod2.fit(arr) result = s.transform(mod1) expected = mod2.transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() result = s.fit_transform(mod1) expected = mod2.fit_transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
def test_LabelBinarizer2(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr) lb = s.preprocessing.LabelBinarizer() s.fit(lb) binarized = s.transform(lb) self.assertTrue(isinstance(binarized, pdml.ModelFrame)) expected = pd.DataFrame({0: [1, 0, 0, 1], 1: [0, 1, 0, 0], 2: [0, 0, 1, 0]}) self.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) binarized = df.target.transform(lb) expected = pd.DataFrame({0: [1] * 50 + [0] * 100, 1: [0] * 50 + [1] * 50 + [0] * 50, 2: [0] * 100 + [1] * 50}) self.assert_frame_equal(binarized, expected) df = pdml.ModelFrame(datasets.load_iris()) df.target.fit(lb) df.target = df.target.transform(lb) self.assertEqual(df.shape, (150, 7)) self.assert_frame_equal(df.target, expected)
def test_transform_series_float(self): arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype=np.float) s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' ')) models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler'] for model in models: mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() s.fit(mod1) mod2.fit(arr) result = s.transform(mod1) expected = mod2.transform(arr).flatten() self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected) mod1 = getattr(s.preprocessing, model)() mod2 = getattr(pp, model)() result = s.fit_transform(mod1) expected = mod2.fit_transform(arr).flatten() self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected)
def test_transform_series_float(self, algo): arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype=np.float) s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' ')) # reshape arr to 2d arr = arr.reshape(-1, 1) mod1 = getattr(s.preprocessing, algo)() mod2 = getattr(pp, algo)() s.fit(mod1) mod2.fit(arr) result = s.transform(mod1) expected = mod2.transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) mod1 = getattr(s.preprocessing, algo)() mod2 = getattr(pp, algo)() result = s.fit_transform(mod1) expected = mod2.fit_transform(arr).flatten() self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
def test_series_instance(self): s = pdml.ModelSeries([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E']) self.assertTrue(isinstance(s, pdml.ModelSeries)) s = s[['A', 'B']] self.assertEqual(len(s), 2) self.assertTrue(isinstance(s, pdml.ModelSeries))
def test_preprocessing_normalize(self): s = pdml.ModelSeries([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E']) self.assertTrue(isinstance(s, pdml.ModelSeries)) result = s.preprocessing.normalize() expected = pp.normalize(np.atleast_2d(s.values.astype(np.float)))[0, :] self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected) self.assert_index_equal(result.index, s.index)
def test_series_groupby(self): s = pdml.ModelSeries([1, 2, 1, 2], name='X') self.assertIsInstance(s, pdml.ModelSeries) grouped = s.groupby([1, 1, 1, 2]) self.assertIsInstance(grouped, pdml.core.groupby.ModelSeriesGroupBy) gs = grouped.get_group(1) self.assertIsInstance(gs, pdml.ModelSeries) expected = pd.Series([1, 2, 1], index=[0, 1, 2], name='X') tm.assert_series_equal(gs, expected) self.assertEqual(gs.name, 'X')
def test_series_to_frame(self): s = pdml.ModelSeries([1, 2, 3, 4, 5]) self.assertTrue(isinstance(s, pdml.ModelSeries)) df = s.to_frame() self.assertTrue(isinstance(df, pdml.ModelFrame)) self.assert_index_equal(df.columns, pd.Index([0])) df = s.to_frame(name='x') self.assertTrue(isinstance(df, pdml.ModelFrame)) self.assert_index_equal(df.columns, pd.Index(['x'])) s = pdml.ModelSeries([1, 2, 3, 4, 5], name='name') self.assertTrue(isinstance(s, pdml.ModelSeries)) df = s.to_frame() self.assertTrue(isinstance(df, pdml.ModelFrame)) self.assert_index_equal(df.columns, pd.Index(['name'])) df = s.to_frame(name='x') self.assertTrue(isinstance(df, pdml.ModelFrame)) self.assert_index_equal(df.columns, pd.Index(['x']))
def test_Imputer(self): arr = np.array([1, np.nan, 3, 2]) s = pdml.ModelSeries(arr) mod1 = s.pp.Imputer(axis=1) s.fit(mod1) result = s.transform(mod1) expected = np.array([1, 2, 3, 2]) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected) mod1 = s.pp.Imputer(axis=1) result = s.fit_transform(mod1) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected)
def test_LabelEncoder_series(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd']) mod1 = s.pp.LabelEncoder() s.fit(mod1) result = s.transform(mod1) expected = np.array([0, 1, 2, 0]) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.index, s.index) mod1 = s.pp.LabelEncoder() result = s.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelSeries) tm.assert_series_equal(inversed, s)
def test_LabelEncoder(self): arr = np.array(['X', 'Y', 'Z', 'X']) s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd']) mod1 = s.pp.LabelEncoder() s.fit(mod1) result = s.transform(mod1) expected = np.array([0, 1, 2, 0]) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected) self.assert_index_equal(result.index, pd.Index(['a', 'b', 'c', 'd'])) mod1 = s.pp.LabelEncoder() result = s.fit_transform(mod1) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected) inversed = result.inverse_transform(mod1) self.assertTrue(isinstance(inversed, pdml.ModelSeries)) self.assert_numpy_array_equal(inversed.values.flatten(), arr) self.assert_index_equal(result.index, pd.Index(['a', 'b', 'c', 'd']))
def test_LabelBinarizer(self): arr = np.array([1, 2, 3, 2]) s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd']) mod1 = s.pp.LabelBinarizer() s.fit(mod1) result = s.transform(mod1) expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.index, s.index) mod1 = s.pp.LabelBinarizer() result = s.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelFrame) self.assert_numpy_array_almost_equal(inversed.values.flatten(), arr) tm.assert_index_equal(result.index, s.index)