def test_to_arrow_normal_dataframe(self): fd, filename = tempfile.mkstemp() os.close(fd) # Remove the file. Then we'll test that ProcessResult.to_arrow() does # not write it (because the result is an error) os.unlink(filename) try: process_result = ProcessResult.coerce(pd.DataFrame({"A": [1, 2]})) result = process_result.to_arrow(Path(filename)) self.assertEqual( result, atypes.RenderResult( atypes.ArrowTable( Path(filename), pyarrow.table({"A": [1, 2]}), atypes.TableMetadata( 2, [ atypes.Column( "A", ColumnType.Number( # Whatever .format # ProcessResult.coerce() gave process_result.columns[0].type.format), ) ], ), ), [], {}, ), ) finally: os.unlink(filename)
def test_column_from_thrift(self): self.assertEqual( types.Column.from_thrift( ttypes.Column("A", ttypes.ColumnType(text_type=ttypes.ColumnTypeText())) ), types.Column("A", types.ColumnType.Text()), )
def test_params_to_thrift(self): self.assertEqual( types.Params({ "str": "s", "int": 2, "float": 1.2, "null": None, "bool": False, "column": types.Column("A", types.ColumnType.Number(format="{:,.2f}")), "listofmaps": [{ "A": "a", "B": "b" }, { "C": "c", "D": "d" }], "tab": "TODO tabs", }).to_thrift(), { "str": ttypes.ParamValue(string_value="s"), "int": ttypes.ParamValue(integer_value=2), "float": ttypes.ParamValue(float_value=1.2), "null": ttypes.ParamValue(), "bool": ttypes.ParamValue(boolean_value=False), "column": ttypes.ParamValue(column_value=ttypes.Column( "A", ttypes.ColumnType(number_type=ttypes.ColumnTypeNumber( format="{:,.2f}")), )), "listofmaps": ttypes.ParamValue(list_value=[ ttypes.ParamValue( map_value={ "A": ttypes.ParamValue(string_value="a"), "B": ttypes.ParamValue(string_value="b"), }), ttypes.ParamValue( map_value={ "C": ttypes.ParamValue(string_value="c"), "D": ttypes.ParamValue(string_value="d"), }), ]), "tab": ttypes.ParamValue(string_value="TODO tabs"), }, )
def test_to_arrow(self): self.assertEqual( TableShape( 3, [ Column("A", ColumnType.NUMBER("{:,d}")), Column("B", ColumnType.TEXT()), ], ).to_arrow(), atypes.TableMetadata( 3, [ atypes.Column("A", atypes.ColumnType.Number("{:,d}")), atypes.Column("B", atypes.ColumnType.Text()), ], ), )
def test_arrow_all_null_text_column(self): dataframe, columns = arrow_table_to_dataframe( arrow_table( {"A": pyarrow.array(["a", "b", None, "c"])}, columns=[atypes.Column("A", atypes.ColumnType.Text())], ) ) assert_frame_equal(dataframe, pd.DataFrame({"A": ["a", "b", np.nan, "c"]})) self.assertEqual(columns, [Column("A", ColumnType.TEXT())])
def test_arrow_uint8_column(self): dataframe, columns = arrow_table_to_dataframe( arrow_table( {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())}, columns=[atypes.Column("A", ColumnType.Number("{:,d}"))], )) assert_frame_equal(dataframe, pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8)) self.assertEqual(columns, [Column("A", ColumnType.Number("{:,d}"))])
def test_render_happy_path(self): module = self.kernel.compile( MockPath( ["foo.py"], b"import pandas as pd\ndef render(table, params): return pd.DataFrame({'A': table['A'] * params['m'], 'B': table['B'] + params['s']})", ), "foo", ) with arrow_table_context( { "A": [1, 2, 3], "B": ["a", "b", "c"] }, columns=[ types.Column("A", types.ColumnType.Number("{:,d}")), types.Column("B", types.ColumnType.Text()), ], dir=self.basedir, ) as input_table: input_table.path.chmod(0o644) with self.chroot_context.tempfile_context( prefix="output-", dir=self.basedir) as output_path: result = self.kernel.render( module, self.chroot_context, self.basedir, input_table, types.Params({ "m": 2.5, "s": "XX" }), types.Tab("tab-1", "Tab 1"), None, output_filename=output_path.name, ) self.assertEquals( result.table.table.to_pydict(), { "A": [2.5, 5.0, 7.5], "B": ["aXX", "bXX", "cXX"] }, )
def test_table_metadata_to_thrift(self): self.assertEqual( types.TableMetadata( 4, [ types.Column("A", types.ColumnType.Text()), types.Column("B", types.ColumnType.Text()), ], ).to_thrift(), ttypes.TableMetadata( 4, [ ttypes.Column( "A", ttypes.ColumnType(text_type=ttypes.ColumnTypeText()) ), ttypes.Column( "B", ttypes.ColumnType(text_type=ttypes.ColumnTypeText()) ), ], ), )
def test_dataframe_uint8_column(self): assert_arrow_table_equals( dataframe_to_arrow_table( pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8), [Column("A", ColumnType.NUMBER("{:,d}"))], self.path, ), arrow_table( {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())}, [atypes.Column("A", atypes.ColumnType.Number("{:,d}"))], ), )
def test_arrow_timestamp_column(self): dataframe, columns = arrow_table_to_dataframe( arrow_table( { "A": pyarrow.array( [dt.fromisoformat("2019-09-17T21:21:00.123456"), None], type=pyarrow.timestamp(unit="ns", tz=None), ) }, [atypes.Column("A", ColumnType.Timestamp())], )) assert_frame_equal( dataframe, pd.DataFrame({"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"), ) self.assertEqual(columns, [Column("A", ColumnType.Timestamp())])
def test_dataframe_datetime_column(self): assert_arrow_table_equals( dataframe_to_arrow_table( pd.DataFrame( {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]" ), [Column("A", ColumnType.DATETIME())], self.path, ), arrow_table( { "A": pyarrow.array( [dt.fromisoformat("2019-09-17T21:21:00.123456"), None], type=pyarrow.timestamp(unit="ns", tz=None), ) }, [atypes.Column("A", atypes.ColumnType.Datetime())], ), )
def test_column_from_dict(self): self.assertEqual( fields._dict_to_column({"name": "A", "type": "number", "format": "{:d}"}), types.Column("A", types.ColumnType.Number("{:d}")), )
def test_to_arrow(self): self.assertEqual( Column("A", ColumnType.NUMBER("{:,d}")).to_arrow(), atypes.Column("A", atypes.ColumnType.Number("{:,d}")), )
def test_from_arrow(self): self.assertEqual( Column.from_arrow(atypes.Column("A", atypes.ColumnType.Number("{:,d}"))), Column("A", ColumnType.NUMBER("{:,d}")), )