def test_build_table_schema(self): result = build_table_schema(self.df, version=False) expected = { "fields": [ { "name": "idx", "type": "integer" }, { "name": "A", "type": "integer" }, { "name": "B", "type": "string" }, { "name": "C", "type": "datetime" }, { "name": "D", "type": "duration" }, ], "primaryKey": ["idx"], } assert result == expected result = build_table_schema(self.df) assert "pandas_version" in result
def test_build_table_schema(self): result = build_table_schema(self.df, version=False) expected = { "fields": [ { "name": "index", "type": "integer" }, { "name": "A", "type": "any", "extDtype": "DateDtype" }, { "name": "B", "type": "any", "extDtype": "decimal" }, { "name": "C", "type": "any", "extDtype": "string" }, { "name": "D", "type": "integer", "extDtype": "Int64" }, ], "primaryKey": ["index"], } assert result == expected result = build_table_schema(self.df) assert "pandas_version" in result
def test_series(self): s = pd.Series([1, 2, 3], name="foo") result = build_table_schema(s, version=False) expected = { "fields": [ {"name": "index", "type": "integer"}, {"name": "foo", "type": "integer"}, ], "primaryKey": ["index"], } assert result == expected result = build_table_schema(s) assert "pandas_version" in result
def test_mi_falsey_name(self): # GH 16203 df = pd.DataFrame( np.random.randn(4, 4), index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]), ) result = [x["name"] for x in build_table_schema(df)["fields"]] assert result == ["level_0", "level_1", 0, 1, 2, 3]
def test_series_unnamed(self): result = build_table_schema(pd.Series([1, 2, 3]), version=False) expected = { "fields": [ {"name": "index", "type": "integer"}, {"name": "values", "type": "integer"}, ], "primaryKey": ["index"], } assert result == expected
def test_multiindex(self): df = self.df.copy() idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)]) df.index = idx result = build_table_schema(df, version=False) expected = { "fields": [ { "name": "level_0", "type": "string" }, { "name": "level_1", "type": "integer" }, { "name": "A", "type": "integer" }, { "name": "B", "type": "string" }, { "name": "C", "type": "datetime" }, { "name": "D", "type": "duration" }, ], "primaryKey": ["level_0", "level_1"], } assert result == expected df.index.names = ["idx0", None] expected["fields"][0]["name"] = "idx0" expected["primaryKey"] = ["idx0", "level_1"] result = build_table_schema(df, version=False) assert result == expected
def test_build_table_schema(self): df = DataFrame({ "A": DateArray([dt.date(2021, 10, 10)]), "B": DecimalArray([decimal.Decimal(10)]), "C": array(["pandas"], dtype="string"), "D": array([10], dtype="Int64"), }) result = build_table_schema(df, version=False) expected = { "fields": [ { "name": "index", "type": "integer" }, { "name": "A", "type": "any", "extDtype": "DateDtype" }, { "name": "B", "type": "any", "extDtype": "decimal" }, { "name": "C", "type": "any", "extDtype": "string" }, { "name": "D", "type": "integer", "extDtype": "Int64" }, ], "primaryKey": ["index"], } assert result == expected result = build_table_schema(df) assert "pandas_version" in result
def __init__( self, obj, orient: str | None, date_format: str, double_precision: int, ensure_ascii: bool, date_unit: str, index: bool, default_handler: Callable[[Any], JSONSerializable] | None = None, indent: int = 0, ): """ Adds a `schema` attribute with the Table Schema, resets the index (can't do in caller, because the schema inference needs to know what the index is, forces orient to records, and forces date_format to 'iso'. """ super().__init__( obj, orient, date_format, double_precision, ensure_ascii, date_unit, index, default_handler=default_handler, indent=indent, ) if date_format != "iso": msg = ( "Trying to write with `orient='table'` and " f"`date_format='{date_format}'`. Table Schema requires dates " "to be formatted with `date_format='iso'`") raise ValueError(msg) self.schema = build_table_schema(obj, index=self.index) # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): raise NotImplementedError( "orient='table' is not supported for MultiIndex columns") # TODO: Do this timedelta properly in objToJSON.c See GH #15137 if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or len(obj.columns.intersection(obj.index.names))): msg = "Overlapping names between the index and columns" raise ValueError(msg) obj = obj.copy() timedeltas = obj.select_dtypes(include=["timedelta"]).columns if len(timedeltas): obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) # Convert PeriodIndex to datetimes before serializing if is_period_dtype(obj.index.dtype): obj.index = obj.index.to_timestamp() # exclude index from obj if index=False if not self.index: self.obj = obj.reset_index(drop=True) else: self.obj = obj.reset_index(drop=False) self.date_format = "iso" self.orient = "records" self.index = index