def _pieces(self): if self.partition is not None: main_schema = self.schema part_schema = self.partition if not isinstance(part_schema, sch.Schema): part_schema = sch.Schema( part_schema, [self.schema[name] for name in part_schema]) to_delete = [] for name in self.partition: if name in self.schema: to_delete.append(name) if len(to_delete): main_schema = main_schema.delete(to_delete) yield format_schema(main_schema) yield 'PARTITIONED BY {}'.format(format_schema(part_schema)) else: yield format_schema(self.schema) if self.table_format is not None: yield '\n'.join(self.table_format.to_ddl()) else: yield self._storage() yield self._location()
def schema(self): # Resolve schema and initialize if not self.selections: return self.table.schema() types = [] names = [] for projection in self.selections: if isinstance(projection, ir.DestructColumn): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = projection.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) elif isinstance(projection, ir.Value): names.append(projection.get_name()) types.append(projection.type()) elif isinstance(projection, ir.Table): schema = projection.schema() names.extend(schema.names) types.extend(schema.types) return sch.Schema(names, types)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) query = 'DESC {0}'.format(qualified_name) data, _ = self._execute(query) names, types = data[:2] ibis_types = clickhouse_types_to_ibis_types(types) try: ibis_types = map(clickhouse_to_ibis.__getitem__, types) except KeyError: raise com.UnsupportedBackendType() return sch.Schema(names, ibis_types)
def get_schema( self, table_name: str, database: str | None = None, ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters ---------- table_name Table name database Database name Returns ------- Schema Ibis schema """ qualified_name = self._fully_qualified_name(table_name, database) query = f'DESCRIBE {qualified_name}' # only pull out the first two columns which are names and types pairs = [row[:2] for row in self.con.fetchall(query)] names, types = zip(*pairs) ibis_types = [udf.parse_type(type.lower()) for type in types] return sch.Schema(names, ibis_types)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) query = f'DESCRIBE {qualified_name}' # only pull out the first two columns which are names and types pairs = [row[:2] for row in self.con.fetchall(query)] names, types = zip(*pairs) ibis_types = [udf.parse_type(type.lower()) for type in types] names = [name.lower() for name in names] return sch.Schema(names, ibis_types)
def _get_schema_using_query(self, query): cur = self.raw_sql(query) # resets the state of the cursor and closes operation cur.fetchall() names, ibis_types = self._adapt_types(cur.description) cur.release() return sch.Schema(names, ibis_types)
def _get_schema_using_query(self, query): result = self.raw_sql(query) # resets the state of the cursor and closes operation result.cursor.fetchall() names, ibis_types = self._adapt_types( _extract_column_details(result.cursor._result.row_set.row_desc)) return sch.Schema(names, ibis_types)
def _get_schema_using_query(self, limited_query): type_map = { int: 'int64', bool: 'boolean', float: 'float64', str: 'string', datetime.datetime: 'timestamp', } with self._execute(limited_query, results=True) as cur: names = [row[0] for row in cur.proxy._cursor_description()] ibis_types = [ type_map[row[1]] for row in cur.proxy._cursor_description() ] return sch.Schema(names, ibis_types)
def partition_schema(self): """Return the schema for the partition columns.""" schema = self.schema() name_to_type = dict(zip(schema.names, schema.types)) result = self.partitions() partition_fields = [] for x in result.columns: if x not in name_to_type: break partition_fields.append((x, name_to_type[x])) pnames, ptypes = zip(*partition_fields) return sch.Schema(pnames, ptypes)
def _get_schema_using_query(self, query): cur = self.raw_sql(query) # resets the state of the cursor and closes operation cur.fetchall() names, ibis_types = self._adapt_types(cur.description) cur.release() # per #321; most Impala tables will be lower case already, but Avro # data, depending on the version of Impala, might have field names in # the metastore cased according to the explicit case in the declared # avro schema. This is very annoying, so it's easier to just conform on # all lowercase fields from Impala. names = [x.lower() for x in names] return sch.Schema(names, ibis_types)
def schema(self): names = [] types = [] for e in self.by + self.metrics: if isinstance(e, ir.DestructValue): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = e.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) else: names.append(e.get_name()) types.append(e.type()) return sch.Schema(names, types)
def schema_from_table(table): """Retrieve an ibis schema from a SQLAlchemy ``Table``. Parameters ---------- table : sa.Table Returns ------- schema : ibis.expr.datatypes.Schema An ibis schema corresponding to the types of the columns in `table`. """ # Convert SQLA table to Ibis schema types = [ sqlalchemy_type_to_ibis_type( column.type, nullable=column.nullable, default_timezone='UTC', ) for column in table.columns.values() ] return sch.Schema(table.columns.keys(), types)
def partition_schema(self): """ For partitioned tables, return the schema (names and types) for the partition columns Returns ------- partition_schema : ibis Schema """ schema = self.schema() name_to_type = dict(zip(schema.names, schema.types)) result = self.partitions() partition_fields = [] for x in result.columns: if x not in name_to_type: break partition_fields.append((x, name_to_type[x])) pnames, ptypes = zip(*partition_fields) return sch.Schema(pnames, ptypes)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) query = 'DESC {0}'.format(qualified_name) data, _ = self._execute(query) names, types = data[:2] ibis_types = map(clickhouse_to_ibis.get, types) return sch.Schema(names, ibis_types)
def _get_schema_using_query(self, limited_query): with self._execute(limited_query, results=True) as cur: # resets the state of the cursor and closes operation names, ibis_types = self._adapt_types(cur.description) return sch.Schema(names, ibis_types)
def _get_schema_using_query(self, limited_query): schema_df = self._execute(limited_query, results=True) names, ibis_types = self._adapt_types(schema_df) return sch.Schema(names, ibis_types)
def get_schema(self, name, database=None): schema_df = self._get_teradata_schema(database, name) return sch.Schema(schema_df.names, schema_df.types)
def schema(self): import ibis.expr.schema as sch return sch.Schema(self.names(), self.types())
def _get_schema_using_query(self, query): _, types = self._execute(query) names, clickhouse_types = zip(*types) ibis_types = map(clickhouse_to_ibis.get, clickhouse_types) return sch.Schema(names, ibis_types)