def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self): table = pd.DataFrame({"A": [1, 2]}) result = ProcessResult.coerce( {"dataframe": table, "column_formats": {"A": "{:,d}"}}, try_fallback_columns=[Column("A", ColumnType.Number("{:,.2f}"))], ) self.assertEqual(result.columns, [Column("A", ColumnType.Number("{:,d}"))])
def test_arrow_uint8_column(self): dataframe, columns = arrow_table_to_dataframe( arrow_table( {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())}, columns=[atypes.Column("A", ColumnType.Number("{:,d}"))], )) assert_frame_equal(dataframe, pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8)) self.assertEqual(columns, [Column("A", ColumnType.Number("{:,d}"))])
def test_dataframe_uint8_column(self): assert_arrow_table_equals( dataframe_to_arrow_table( pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8), [Column("A", ColumnType.Number("{:,d}"))], self.path, ), arrow_table( {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())}, [atypes.Column("A", ColumnType.Number("{:,d}"))], ), )
def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce( table, try_fallback_columns=[ Column("A", ColumnType.Text()), Column("B", ColumnType.Number()), ], ) self.assertEqual( result.columns, [Column("A", ColumnType.Number()), Column("B", ColumnType.Text())], )
def test_coerce_infer_columns(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce(table) self.assertEqual( result.columns, [Column("A", ColumnType.Number()), Column("B", ColumnType.Text())], )
def test_to_arrow_normal_dataframe(self): fd, filename = tempfile.mkstemp() os.close(fd) # Remove the file. Then we'll test that ProcessResult.to_arrow() does # not write it (because the result is an error) os.unlink(filename) try: process_result = ProcessResult.coerce(pd.DataFrame({"A": [1, 2]})) result = process_result.to_arrow(Path(filename)) self.assertEqual( result, atypes.RenderResult( atypes.ArrowTable( Path(filename), pyarrow.table({"A": [1, 2]}), atypes.TableMetadata( 2, [ atypes.Column( "A", ColumnType.Number( # Whatever .format # ProcessResult.coerce() gave process_result.columns[0].type.format), ) ], ), ), [], {}, ), ) finally: os.unlink(filename)
def test_dataframe_uint8_column(self): self._test_dataframe_to_arrow_table( pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8), [Column("A", ColumnType.Number("{:,d}"))], make_table( make_column("A", [1, 2, 3, 253], type=pa.uint8(), format="{:,d}") ), )
def test_coerce_infer_columns_with_format(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce( {"dataframe": table, "column_formats": {"A": "{:,d}"}} ) self.assertEqual( result.columns, [ Column("A", ColumnType.Number(format="{:,d}")), Column("B", ColumnType.Text()), ], )
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame({ "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], })) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), ], )
def test_columns(self): df = pd.DataFrame({ "A": [1], # number "B": ["foo"], # str "C": dt(2018, 8, 20), # datetime64 }) df["D"] = pd.Series(["cat"], dtype="category") result = ProcessResult(df) self.assertEqual(result.column_names, ["A", "B", "C", "D"]) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), Column("D", ColumnType.Text()), ], )
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame( { "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], "D": [pd.Period("2021-01-01", freq="D"), pd.NaT], } ) ) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), Column("D", ColumnType.Date("day")), ], )
def test_table_metadata(self): df = pd.DataFrame({"A": [1, 2, 3]}) result = ProcessResult(df) self.assertEqual(result.table_metadata, TableMetadata(3, [Column("A", ColumnType.Number())]))