def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 ... ... ... ... ... 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype("float32") df.B = df.B.astype("float32") df.C = df.C.astype("float16") df.D = df.D.astype("float64") return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D mUrCZ67juP 0 1 2 2 rw99ACYaKS 0 1 0 0 7QsEcpaaVU 0 1 1 1 xkrimI2pcE 0 1 0 0 dz01SuzoS8 0 1 255 255 ccQkqOHX75 -1 1 0 0 DN0iXaoDLd 0 1 0 0 ... .. .. ... ... Dfb141wAaQ 1 1 254 254 IPD8eQOVu5 0 1 0 0 CcaKulsCmv 0 1 0 0 rIBa8gu7E5 0 1 0 0 RP6peZmh5o 0 1 1 1 NMb9pipQWQ 0 1 0 0 PqgbJEzjib 0 1 3 3 [30 rows x 4 columns] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype("int32") df.B = np.ones(len(df.B), dtype="uint64") df.C = df.C.astype("uint8") df.D = df.C.astype("int64") return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D mUrCZ67juP 0 1 2 2 rw99ACYaKS 0 1 0 0 7QsEcpaaVU 0 1 1 1 xkrimI2pcE 0 1 0 0 dz01SuzoS8 0 1 255 255 ccQkqOHX75 -1 1 0 0 DN0iXaoDLd 0 1 0 0 ... .. .. ... ... Dfb141wAaQ 1 1 254 254 IPD8eQOVu5 0 1 0 0 CcaKulsCmv 0 1 0 0 rIBa8gu7E5 0 1 0 0 RP6peZmh5o 0 1 1 1 NMb9pipQWQ 0 1 0 0 PqgbJEzjib 0 1 3 3 [30 rows x 4 columns] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 ... ... ... ... ... 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float32') df.B = df.B.astype('float32') df.C = df.C.astype('float16') df.D = df.D.astype('float64') return df
def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float16') df.B = df.B.astype('float32') df.C = df.C.astype('float64') return df
def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float32') df.B = df.B.astype('float32') df.C = df.C.astype('float16') df.D = df.D.astype('float64') return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def test_df_where_with_category(self, kwargs): # GH 16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, True], [False, True, True]]) # change type to category df.A = df.A.astype("category") df.B = df.B.astype("category") df.C = df.C.astype("category") result = df.A.where(mask[:, 0], **kwargs) expected = Series(pd.Categorical([0, np.nan], categories=[0, 3]), name="A") tm.assert_series_equal(result, expected)
def test_df_where_with_category(self, kwargs): # GH#16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, False], [False, False, True]]) # change type to category df.A = df.A.astype("category") df.B = df.B.astype("category") df.C = df.C.astype("category") result = df.where(mask, **kwargs) A = pd.Categorical([0, np.nan], categories=[0, 3]) B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) C = pd.Categorical([np.nan, 5], categories=[2, 5]) expected = DataFrame({"A": A, "B": B, "C": C}) tm.assert_frame_equal(result, expected) # Check Series.where while we're here result = df.A.where(mask[:, 0], **kwargs) expected = Series(A, name="A") tm.assert_series_equal(result, expected)
def test_df_where_change_dtype(self): # GH 16979 df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) mask = np.array([[True, False, False], [False, False, True]]) result = df.where(mask) expected = DataFrame([[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC")) tm.assert_frame_equal(result, expected) # change type to category df.A = df.A.astype("category") df.B = df.B.astype("category") df.C = df.C.astype("category") result = df.where(mask) A = pd.Categorical([0, np.nan], categories=[0, 3]) B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) C = pd.Categorical([np.nan, 5], categories=[2, 5]) expected = DataFrame({"A": A, "B": B, "C": C}) tm.assert_frame_equal(result, expected)
# b 2 # c 2 # dtype: int64 def f(x): return Series([x.min(), x.max()], index=['min', 'max']) frame.apply(f) # result 求每一行的最大、最小值 # A B C # min 0 1 2 # max 6 7 8 _format = lambda x: '%.2f' % x frame.applymap(_format) # result 针对dataframe做出改变 # A B C # a 0.00 1.00 2.00 # b 3.00 4.00 5.00 # c 6.00 7.00 8.00 frame.A = frame['A'].map(_format) print(frame) # result 针对Series # A B C # a 0.00 1 2 # b 3.00 4 5 # c 6.00 7 8 # Name: A, dtype: object