def test_table_without_partitioning(self, cursor, kudu_client, unique_database): """Test a Kudu table created without partitioning (i.e. equivalent to a single unbounded partition). It is not possible to create such a table in Impala, but it can be created directly in Kudu and then loaded as an external table. Regression test for IMPALA-5154.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() partitioning = Partitioning().set_range_partition_columns([]) name = "%s.one_big_unbounded_partition" % unique_database try: kudu_client.create_table(name, schema, partitioning=partitioning) kudu_table = kudu_client.table(name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (name, props)) with self.drop_impala_table_after_context(cursor, name): cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name) cursor.execute("SELECT COUNT(*) FROM %s" % name) assert cursor.fetchall() == [(3, )] try: cursor.execute("SHOW RANGE PARTITIONS %s" % name) assert False except Exception as e: assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\ "not have range partitions" in str(e) finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client, unique_database): """Check that a single unbounded range partition gets printed correctly.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".unbounded_range_table" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name) assert cursor.description == [('RANGE (id)', 'STRING', None, None, None, None, None)] assert cursor.fetchall() == [('UNBOUNDED', )] finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None, prepend_db_name=True, db_name=None): """Create and return a table. This function should be used in a "with" context. 'kudu' must be a kudu.client.Client. If a table name is not provided, a random name will be used. If 'prepend_db_name' is True, the table name will be prepended with (get_db_name() + "."). If column names are not provided, the letters "a", "b", "c", ... will be used. Example: with self.temp_kudu_table(kudu, [INT32]) as kudu_table: assert kudu.table_exists(kudu_table.name) assert not kudu.table_exists(kudu_table.name) """ if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") col_names = [chr(97 + i) for i in xrange(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) if i < num_key_cols: column_spec.nullable(False) schema_builder.set_primary_keys(col_names[:num_key_cols]) schema = schema_builder.build() name = name or self.random_table_name() if prepend_db_name: name = (db_name or self.get_db_name().lower()) + "." + name kudu.create_table(name, schema, partitioning=Partitioning().add_hash_partitions(col_names[:num_key_cols], 2)) try: yield kudu.table(name) finally: if kudu.table_exists(name): kudu.delete_table(name)
def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client, unique_database): """Check that a single unbounded range partition gets printed correctly.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".unbounded_range_table" try: kudu_client.create_table(name, schema, partitioning=Partitioning().set_range_partition_columns(["id"])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name) assert cursor.description == [ ('RANGE (id)', 'STRING', None, None, None, None, None)] assert cursor.fetchall() == [('UNBOUNDED',)] finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)
def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None, prepend_db_name=True, db_name=None, num_partitions=2): """Create and return a table. This function should be used in a "with" context. 'kudu' must be a kudu.client.Client. If a table name is not provided, a random name will be used. If 'prepend_db_name' is True, the table name will be prepended with (get_db_name() + "."). If column names are not provided, the letters "a", "b", "c", ... will be used. The number of partitions can be set using 'num_partitions'. Example: with self.temp_kudu_table(kudu, [INT32]) as kudu_table: assert kudu.table_exists(kudu_table.name) assert not kudu.table_exists(kudu_table.name) """ if not col_names: if len(col_types) > 26: raise Exception("Too many columns for default naming") col_names = [chr(97 + i) for i in xrange(len(col_types))] schema_builder = SchemaBuilder() for i, t in enumerate(col_types): column_spec = schema_builder.add_column(col_names[i], type_=t) if i < num_key_cols: column_spec.nullable(False) schema_builder.set_primary_keys(col_names[:num_key_cols]) schema = schema_builder.build() name = name or self.random_table_name() if prepend_db_name: name = (db_name or self.get_db_name().lower()) + "." + name kudu.create_table(name, schema, partitioning=Partitioning().add_hash_partitions( col_names[:num_key_cols], num_partitions)) try: yield kudu.table(name) finally: if kudu.table_exists(name): kudu.delete_table(name)
def test_external_timestamp_default_value(self, cursor, kudu_client, unique_database): """Checks that a Kudu table created outside Impala with a default value on a UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE output is correct.""" schema_builder = SchemaBuilder() column_spec = schema_builder.add_column("id", INT64) column_spec.nullable(False) column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS) column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc)) schema_builder.set_primary_keys(["id"]) schema = schema_builder.build() name = unique_database + ".tsdefault" try: kudu_client.create_table( name, schema, partitioning=Partitioning().set_range_partition_columns(["id" ])) kudu_table = kudu_client.table(name) impala_table_name = self.get_kudu_table_base_name(kudu_table.name) props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name, props)) with self.drop_impala_table_after_context(cursor, impala_table_name): cursor.execute("DESCRIBE %s" % impala_table_name) table_desc = [[col.strip() if col else col for col in row] for row in cursor] # Pytest shows truncated output on failure, so print the details just in case. LOG.info(table_desc) assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \ "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc finally: if kudu_client.table_exists(name): kudu_client.delete_table(name)