def test_cast_dtypes_inplace(self): _cast_dtypes(self.dft) dtypes = set(self.dft.dtypes) self.assertEqual( dtypes, set([np.dtype("int"), np.dtype("float"), np.dtype("O")])) self.assertEqual(self.dft["F"][0], "false")
def test_schema(self): df = pd.DataFrame({ "a": [1, 2], "b": [None, 3], "c": pd.array([4, np.nan], dtype="Int64") }) _cast_dtypes(df) names, types = _get_schema(df) self.assertListEqual(names, ["a", "b", "c"]) self.assertListEqual(types, ["bigint", "double", "bigint"])
def test_query_builder(self): df = pd.DataFrame({ "a": [1, 2], "b": [None, 3], "c": pd.array([4, np.nan], dtype="Int64") }) _cast_dtypes(df) q = self.writer._build_query( "foo", "bar", list(df.itertuples(index=False, name=None)), df.columns) # column 'b' is handled as float64 because of null q_expected = "INSERT INTO foo.bar (a, b, c) VALUES (1, null, 4), (2, 3.0, null)" self.assertEqual(q, q_expected)
def test_cast_dtypes(self): dft = _cast_dtypes(self.dft, inplace=False) dtypes = set(dft.dtypes) self.assertEqual( dtypes, set([ np.dtype("int64"), np.dtype("float"), np.dtype("O"), pd.Int64Dtype() ]), ) self.assertEqual(dft["F"][0], "false") self.assertTrue(isinstance(dft["H"][1], str)) self.assertEqual(dft["H"][1], "[1, 2, 3]") self.assertIsNone(dft["H"][2]) self.assertIsNone(dft["I"][2]) self.assertIsNone(dft["J"][2]) self.assertIsNone(dft["K"][1]) self.assertIsNone(dft["L"][1]) self.assertIsNone(dft["M"][1]) self.assertTrue(np.isnan(dft["N"][1])) # Nullable int will be float dtype by pandas default self.assertTrue(isinstance(dft["N"][0], float)) # _cast_dtypes keeps np.nan/pd.NA when None in Int64 column given # This is for consistency of _get_schema self.assertTrue(pd.isna(dft["O"][2]))
def test_cast_dtypes_keep_list(self): _cast_dtypes(self.dft, keep_list=True) dtypes = set(self.dft.dtypes) self.assertEqual( dtypes, set([np.dtype("int"), np.dtype("float"), np.dtype("O")])) self.assertTrue( self.dft["H"].apply(lambda x: isinstance(x, list)).all()) self.assertTrue( self.dft["I"].apply(lambda x: isinstance(x, list)).all()) self.assertTrue( self.dft["J"].apply(lambda x: isinstance(x, list)).all()) self.assertTrue(isinstance(self.dft["H"].iloc[0][2], int)) # numpy.ndarray containing numpy.nan will be converted as float type self.assertTrue(isinstance(self.dft["I"].iloc[0][2], float)) self.assertTrue(isinstance(self.dft["I"].iloc[1][2], int)) self.assertTrue(self.dft["I"].iloc[0][1] is None)
def test_cast_dtypes(self): dft = _cast_dtypes(self.dft, inplace=False) dtypes = set(dft.dtypes) self.assertEqual( dtypes, set([np.dtype("int"), np.dtype("float"), np.dtype("O")])) self.assertEqual(dft["F"][0], "false") self.assertTrue(isinstance(dft["H"][1], str)) self.assertEqual(dft["H"][1], "[1, 2, 3]")
def test_cast_dtypes_nullable(self): dft = pd.DataFrame({ "P": pd.Series([True, False, None], dtype="boolean"), "Q": pd.Series(["foo", "bar", None], dtype="string"), }) dft = _cast_dtypes(dft, inplace=False) dtypes = set(dft.dtypes) self.assertEqual(dtypes, set([np.dtype("O")])) self.assertIsNone(dft["P"][2]) self.assertIsNone(dft["Q"][2])
def test_cast_dtypes_keep_list(self): _cast_dtypes(self.dft, keep_list=True) dtypes = set(self.dft.dtypes) self.assertEqual( dtypes, set([ np.dtype("int64"), np.dtype("float"), np.dtype("O"), pd.Int64Dtype() ]), ) self.assertTrue(self.dft["H"].apply(_isinstance_or_null, args=(list, )).all()) self.assertTrue(self.dft["I"].apply(_isinstance_or_null, args=(list, )).all()) self.assertTrue(self.dft["J"].apply(_isinstance_or_null, args=(list, )).all()) self.assertTrue(isinstance(self.dft["H"].iloc[0][2], int)) # numpy.ndarray containing numpy.nan will be converted as float type self.assertTrue(isinstance(self.dft["I"].iloc[0][2], float)) self.assertTrue(isinstance(self.dft["I"].iloc[1][2], int)) self.assertIsNone(self.dft["I"].iloc[0][1])