示例#1
0
文件: client.py 项目: cloudera/ibis
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        col_names = []
        col_types = []

        for col in self.con.get_table_details(table_name):
            col_names.append(col.name)
            col_types.append(MapDDataType.parse(col.type))

        return sch.schema(
            [
                (col.name, MapDDataType.parse(col.type))
                for col in self.con.get_table_details(table_name)
            ]
        )
示例#2
0
文件: alchemy.py 项目: cloudera/ibis
def schema_from_table(table, schema=None):
    """Retrieve an ibis schema from a SQLAlchemy ``Table``.

    Parameters
    ----------
    table : sa.Table

    Returns
    -------
    schema : ibis.expr.datatypes.Schema
        An ibis schema corresponding to the types of the columns in `table`.
    """
    schema = schema if schema is not None else {}
    pairs = []
    for name, column in table.columns.items():
        if name in schema:
            dtype = dt.dtype(schema[name])
        else:
            dtype = dt.dtype(
                getattr(table.bind, 'dialect', SQLAlchemyDialect()),
                column.type,
                nullable=column.nullable,
            )
        pairs.append((name, dtype))
    return sch.schema(pairs)
示例#3
0
文件: client.py 项目: cloudera/ibis
def infer_pandas_schema(df, schema=None):
    schema = schema if schema is not None else {}

    pairs = []
    for column_name, pandas_dtype in df.dtypes.iteritems():
        if not isinstance(column_name, str):
            raise TypeError(
                'Column names must be strings to use the pandas backend'
            )

        if column_name in schema:
            ibis_dtype = dt.dtype(schema[column_name])
        elif pandas_dtype == np.object_:
            inferred_dtype = infer_pandas_dtype(df[column_name], skipna=True)
            if inferred_dtype in {'mixed', 'decimal'}:
                # TODO: in principal we can handle decimal (added in pandas
                # 0.23)
                raise TypeError(
                    'Unable to infer type of column {0!r}. Try instantiating '
                    'your table from the client with client.table('
                    "'my_table', schema={{{0!r}: <explicit type>}})".format(
                        column_name
                    )
                )
            ibis_dtype = _inferable_pandas_dtypes[inferred_dtype]
        else:
            ibis_dtype = dt.dtype(pandas_dtype)

        pairs.append((column_name, ibis_dtype))

    return sch.schema(pairs)
示例#4
0
文件: rules.py 项目: cloudera/ibis
def table(schema, arg):
    """A table argument.

    Parameters
    ----------
    schema : Union[sch.Schema, List[Tuple[str, dt.DataType]]
        A validator for the table's columns. Only column subset validators are
        currently supported. Accepts any arguments that `sch.schema` accepts.
        See the example for usage.
    arg : The validatable argument.

    Examples
    --------
    The following op will accept an argument named ``'table'``. Note that the
    ``schema`` argument specifies rules for columns that are required to be in
    the table: ``time``, ``group`` and ``value1``. These must match the types
    specified in the column rules. Column ``value2`` is optional, but if
    present it must be of the specified type. The table may have extra columns
    not specified in the schema.
    """
    assert isinstance(arg, ir.TableExpr)

    if arg.schema() >= sch.schema(schema):
        return arg

    raise com.IbisTypeError(
        'Argument is not a table with column subset of {}'.format(schema)
    )
示例#5
0
文件: parquet.py 项目: cloudera/ibis
def infer_parquet_schema(schema):
    pairs = []

    for field in schema.to_arrow_schema():
        ibis_dtype = dt.dtype(field.type, nullable=field.nullable)
        name = field.name
        if not re.match(r'^__index_level_\d+__$', name):
            pairs.append((name, ibis_dtype))

    return sch.schema(pairs)
示例#6
0
文件: client.py 项目: cloudera/ibis
    def schema(self):

        if isinstance(self.expr, (ir.TableExpr, ir.ExprList, sch.HasSchema)):
            return self.expr.schema()
        elif isinstance(self.expr, ir.ValueExpr):
            return sch.schema([(self.expr.get_name(), self.expr.type())])
        else:
            raise ValueError(
                'Expression with type {} does not have a '
                'schema'.format(type(self.expr))
            )
示例#7
0
文件: csv.py 项目: wkusnierczyk/ibis
    def table(self, name, path=None, schema=None, **kwargs):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        if path is None:
            path = self.root

        # get the schema
        f = path / "{}.{}".format(name, self.extension)

        # read sample
        schema = schema or sch.schema([])
        sample = _read_csv(f, schema=schema, header=0, nrows=50, **kwargs)

        # infer sample's schema and define table
        schema = sch.infer(sample)
        table = self.table_class(name, schema, self, **kwargs).to_expr()

        self.dictionary[name] = f

        return table
示例#8
0
def schema_from_table(table, schema=None):
    """Retrieve an ibis schema from a SQLAlchemy ``Table``.

    Parameters
    ----------
    table : sa.Table

    Returns
    -------
    schema : ibis.expr.datatypes.Schema
        An ibis schema corresponding to the types of the columns in `table`.
    """
    schema = schema if schema is not None else {}
    pairs = []
    for name, column in table.columns.items():
        if name in schema:
            dtype = dt.dtype(schema[name])
        else:
            dtype = dt.dtype(column.type, nullable=column.nullable)
        pairs.append((name, dtype))
    return sch.schema(pairs)
示例#9
0
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the given table and database.

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        cols = {
            col.name: omniscidb_dtypes.sql_to_ibis_dtypes[col.type](
                nullable=col.nullable)
            for col in self.con.get_table_details(table_name)
        }

        return sch.schema([(name, tp) for name, tp in cols.items()])
示例#10
0
文件: csv.py 项目: cloudera/ibis
    def table(self, name, path=None, schema=None, **kwargs):
        if name not in self.list_tables(path):
            raise AttributeError(name)

        if path is None:
            path = self.root

        # get the schema
        f = path / "{}.{}".format(name, self.extension)

        # read sample
        schema = schema or sch.schema([])
        sample = _read_csv(f, schema=schema, header=0, nrows=50, **kwargs)

        # infer sample's schema and define table
        schema = sch.infer(sample)
        table = self.table_class(name, schema, self, **kwargs).to_expr()

        self.dictionary[name] = f

        return table
示例#11
0
    def schema(self):
        """Return the schema of the expression.

        Returns
        -------
        Schema

        Raises
        ------
        ValueError
            if self.expr doesn't have a schema.
        """
        if isinstance(self.expr, (ir.TableExpr, ir.ExprList, sch.HasSchema)):
            return self.expr.schema()
        elif isinstance(self.expr, ir.ValueExpr):
            return sch.schema([(self.expr.get_name(), self.expr.type())])
        else:
            raise ValueError(
                'Expression with type {} does not have a '
                'schema'.format(type(self.expr))
            )
示例#12
0
    def ast_schema(self, query_ast):
        """Return the schema of the expression.

        Returns
        -------
        Schema

        Raises
        ------
        ValueError
            if self.expr doesn't have a schema.
        """
        dml = getattr(query_ast, 'dml', query_ast)
        expr = getattr(dml, 'parent_expr', getattr(dml, 'table_set', None))

        if isinstance(expr, (ir.TableExpr, sch.HasSchema)):
            return expr.schema()
        elif isinstance(expr, ir.ValueExpr):
            return sch.schema([(expr.get_name(), expr.type())])
        else:
            raise ValueError('Expression with type {} does not have a '
                             'schema'.format(type(self.expr)))
示例#13
0
文件: client.py 项目: gridl/ibis
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = 'DESC {0}'.format(qualified_name)
        data, _, _ = self.raw_sql(query, results=True)

        colnames, coltypes = data[:2]
        coltypes = list(map(ClickhouseDataType.parse, coltypes))

        return sch.schema(colnames, coltypes)
示例#14
0
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        col_names = []
        col_types = []

        for col in self.con.get_table_details(table_name):
            col_names.append(col.name)
            col_types.append(OmniSciDBDataType.parse(col.type))

        return sch.schema([(col.name, OmniSciDBDataType.parse(col.type))
                           for col in self.con.get_table_details(table_name)])
示例#15
0
文件: __init__.py 项目: jelitox/ibis
    def get_schema(
        self,
        table_name: str,
        database: str | None = None,
    ) -> sch.Schema:
        """Return a Schema object for the indicated table and database.

        Parameters
        ----------
        table_name
            May be fully qualified
        database
            Database name

        Returns
        -------
        sch.Schema
            Ibis schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = f'DESC {qualified_name}'
        data, columns = self.raw_sql(query)
        return sch.schema(data[0], list(map(ClickhouseDataType.parse,
                                            data[1])))
示例#16
0
文件: client.py 项目: martint/ibis
def spark_dataframe_schema(df):
    """Infer the schema of a Spark SQL `DataFrame` object."""
    # df.schema is a pt.StructType
    schema_struct = dt.dtype(df.schema)

    return sch.schema(schema_struct.names, schema_struct.types)
示例#17
0
文件: client.py 项目: gridl/ibis
 def _get_schema_using_query(self, query):
     _, colnames, coltypes = self._execute(query)
     return sch.schema(colnames, coltypes)
示例#18
0
文件: types.py 项目: djv/ibis
 def schema(self):
     return sch.schema(self.names(), self.types())
示例#19
0
 def _get_schema_using_query(self, query, **kwargs):
     data, columns = self.raw_sql(query, **kwargs)
     colnames, typenames = zip(*columns)
     coltypes = list(map(ClickhouseDataType.parse, typenames))
     return sch.schema(colnames, coltypes)
示例#20
0
def infer_pyarrow_schema(schema):
    fields = [(f.name, dt.dtype(f.type, nullable=f.nullable)) for f in schema]
    return sch.schema(fields)
示例#21
0
def schema_from_series(s):
    return sch.schema(tuple(s.iteritems()))
示例#22
0
def infer_pyarrow_schema(schema: pa.Schema) -> sch.Schema:
    return sch.schema([(f.name, dt.dtype(f.type, nullable=f.nullable))
                       for f in schema])
示例#23
0
文件: client.py 项目: cloudera/ibis
def schema_from_series(s):
    return sch.schema(tuple(s.iteritems()))