def ibis_schema_apply_to(schema: sch.Schema, df: dd.DataFrame) -> dd.DataFrame: """Applies the Ibis schema to a dask DataFrame Parameters ---------- schema : ibis.schema.Schema df : dask.dataframe.DataFrame Returns ------- df : dask.dataframeDataFrame Notes ----- Mutates `df` """ for column, dtype in schema.items(): dask_dtype = dtype.to_dask() col = df[column] col_dtype = col.dtype try: not_equal = dask_dtype != col_dtype except TypeError: # ugh, we can't compare dtypes coming from dask, assume not equal not_equal = True if not_equal or isinstance(dtype, dt.String): df[column] = convert(col_dtype, dtype, col) return df
def fmt_schema(schema: sch.Schema) -> str: """Format `schema`. Parameters ---------- schema Ibis schema to format Returns ------- str Formatted schema """ names = schema.names maxlen = max(map(len, names)) cols = [f"{name:<{maxlen}} {typ}" for name, typ in schema.items()] depth = ibis.options.repr.table_columns if depth is not None and depth < len(cols): first_column_name = names[0] raw = fmt_truncated( cols, depth=depth, sep="\n", ellipsis=util.VERTICAL_ELLIPSIS.center(len(first_column_name)), ) else: raw = "\n".join(cols) return util.indent(raw, spaces=2)
def fetch_from_cursor( self, cursor: duckdb.DuckDBPyConnection, schema: sch.Schema, ): df = cursor.cursor.fetch_df() return schema.apply_to(df)
def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: df = pd.DataFrame.from_records( cursor, columns=cursor.keys(), coerce_float=True, ) df = schema.apply_to(df) if len(df) and geospatial_supported: return self._to_geodataframe(df, schema) return df
def schema( pairs: Iterable[tuple[str, dt.DataType]] | Mapping[str, dt.DataType] | None = None, names: Iterable[str] | None = None, types: Iterable[str | dt.DataType] | None = None, ) -> sch.Schema: """Validate and return an Schema object. Parameters ---------- pairs List or dictionary of name, type pairs. Mutually exclusive with `names` and `types`. names Field names. Mutually exclusive with `pairs`. types Field types. Mutually exclusive with `pairs`. Examples -------- >>> from ibis import schema >>> sc = schema([('foo', 'string'), ... ('bar', 'int64'), ... ('baz', 'boolean')]) >>> sc2 = schema(names=['foo', 'bar', 'baz'], ... types=['string', 'int64', 'boolean']) Returns ------- Schema An ibis schema """ # noqa: E501 if pairs is not None: return Schema.from_dict(dict(pairs)) else: return Schema(names, types)
def create_table( self, table_name: str, obj: dd.DataFrame = None, schema: sch.Schema = None, ): """Create a table.""" if obj is not None: df = obj elif schema is not None: dtypes = ibis_schema_to_dask(schema) df = schema.apply_to( dd.from_pandas( pd.DataFrame(columns=list(map(toolz.first, dtypes))), npartitions=1, )) else: raise com.IbisError('Must pass expr or schema') self.dictionary[table_name] = df
def _get_table_schema(self, name): return Schema.from_tuples(self._tables[name])
def _get_table_schema(self, name): name = name.replace('`', '') return Schema.from_tuples(self._tables[name])
def get_schema(self, name): name = name.replace('`', '') return Schema.from_tuples(MOCK_TABLES[name])
def _schema_to_sqlalchemy_columns(schema: sch.Schema) -> list[sa.Column]: return [sa.column(n, to_sqla_type(t)) for n, t in schema.items()]
def _get_table_schema(self, name): # name = name.replace('`', '') return Schema.from_tuples(self._tables[name])