示例#1
0
    def test_table_without_partitioning(self, cursor, kudu_client,
                                        unique_database):
        """Test a Kudu table created without partitioning (i.e. equivalent to a single
       unbounded partition). It is not possible to create such a table in Impala, but
       it can be created directly in Kudu and then loaded as an external table.
       Regression test for IMPALA-5154."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        partitioning = Partitioning().set_range_partition_columns([])
        name = "%s.one_big_unbounded_partition" % unique_database

        try:
            kudu_client.create_table(name, schema, partitioning=partitioning)
            kudu_table = kudu_client.table(name)

            props = "TBLPROPERTIES('kudu.table_name'='%s')" % name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (name, props))
            with self.drop_impala_table_after_context(cursor, name):
                cursor.execute("INSERT INTO %s VALUES (1), (2), (3)" % name)
                cursor.execute("SELECT COUNT(*) FROM %s" % name)
                assert cursor.fetchall() == [(3, )]
                try:
                    cursor.execute("SHOW RANGE PARTITIONS %s" % name)
                    assert False
                except Exception as e:
                    assert "AnalysisException: SHOW RANGE PARTITIONS requested but table does "\
                        "not have range partitions" in str(e)
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)
示例#2
0
    def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                                 unique_database):
        """Check that a single unbounded range partition gets printed correctly."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()

        name = unique_database + ".unbounded_range_table"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)

            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name)
                assert cursor.description == [('RANGE (id)', 'STRING', None,
                                               None, None, None, None)]
                assert cursor.fetchall() == [('UNBOUNDED', )]

        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)
  def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None,
      prepend_db_name=True, db_name=None):
    """Create and return a table. This function should be used in a "with" context.
       'kudu' must be a kudu.client.Client. If a table name is not provided, a random
       name will be used. If 'prepend_db_name' is True, the table name will be prepended
       with (get_db_name() + "."). If column names are not provided, the letters
       "a", "b", "c", ... will be used.

       Example:
         with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
            assert kudu.table_exists(kudu_table.name)
         assert not kudu.table_exists(kudu_table.name)
    """
    if not col_names:
      if len(col_types) > 26:
        raise Exception("Too many columns for default naming")
      col_names = [chr(97 + i) for i in xrange(len(col_types))]
    schema_builder = SchemaBuilder()
    for i, t in enumerate(col_types):
      column_spec = schema_builder.add_column(col_names[i], type_=t)
      if i < num_key_cols:
        column_spec.nullable(False)
    schema_builder.set_primary_keys(col_names[:num_key_cols])
    schema = schema_builder.build()
    name = name or self.random_table_name()
    if prepend_db_name:
      name = (db_name or self.get_db_name().lower()) + "." + name
    kudu.create_table(name, schema,
        partitioning=Partitioning().add_hash_partitions(col_names[:num_key_cols], 2))
    try:
      yield kudu.table(name)
    finally:
      if kudu.table_exists(name):
        kudu.delete_table(name)
示例#4
0
  def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                               unique_database):
    """Check that a single unbounded range partition gets printed correctly."""
    schema_builder = SchemaBuilder()
    column_spec = schema_builder.add_column("id", INT64)
    column_spec.nullable(False)
    schema_builder.set_primary_keys(["id"])
    schema = schema_builder.build()

    name = unique_database + ".unbounded_range_table"

    try:
      kudu_client.create_table(name, schema,
                        partitioning=Partitioning().set_range_partition_columns(["id"]))
      kudu_table = kudu_client.table(name)

      impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
      props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
      cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" % (impala_table_name,
          props))
      with self.drop_impala_table_after_context(cursor, impala_table_name):
        cursor.execute("SHOW RANGE PARTITIONS %s" % impala_table_name)
        assert cursor.description == [
          ('RANGE (id)', 'STRING', None, None, None, None, None)]
        assert cursor.fetchall() == [('UNBOUNDED',)]

    finally:
      if kudu_client.table_exists(name):
        kudu_client.delete_table(name)
示例#5
0
    def temp_kudu_table(self,
                        kudu,
                        col_types,
                        name=None,
                        num_key_cols=1,
                        col_names=None,
                        prepend_db_name=True,
                        db_name=None,
                        num_partitions=2):
        """Create and return a table. This function should be used in a "with" context.
       'kudu' must be a kudu.client.Client. If a table name is not provided, a random
       name will be used. If 'prepend_db_name' is True, the table name will be prepended
       with (get_db_name() + "."). If column names are not provided, the letters
       "a", "b", "c", ... will be used. The number of partitions can be set using
       'num_partitions'.

       Example:
         with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
            assert kudu.table_exists(kudu_table.name)
         assert not kudu.table_exists(kudu_table.name)
    """
        if not col_names:
            if len(col_types) > 26:
                raise Exception("Too many columns for default naming")
            col_names = [chr(97 + i) for i in xrange(len(col_types))]
        schema_builder = SchemaBuilder()
        for i, t in enumerate(col_types):
            column_spec = schema_builder.add_column(col_names[i], type_=t)
            if i < num_key_cols:
                column_spec.nullable(False)
        schema_builder.set_primary_keys(col_names[:num_key_cols])
        schema = schema_builder.build()
        name = name or self.random_table_name()
        if prepend_db_name:
            name = (db_name or self.get_db_name().lower()) + "." + name
        kudu.create_table(name,
                          schema,
                          partitioning=Partitioning().add_hash_partitions(
                              col_names[:num_key_cols], num_partitions))
        try:
            yield kudu.table(name)
        finally:
            if kudu.table_exists(name):
                kudu.delete_table(name)
示例#6
0
    def test_external_timestamp_default_value(self, cursor, kudu_client,
                                              unique_database):
        """Checks that a Kudu table created outside Impala with a default value on a
       UNIXTIME_MICROS column can be loaded by Impala, and validates the DESCRIBE
       output is correct."""
        schema_builder = SchemaBuilder()
        column_spec = schema_builder.add_column("id", INT64)
        column_spec.nullable(False)
        column_spec = schema_builder.add_column("ts", UNIXTIME_MICROS)
        column_spec.default(datetime(2009, 1, 1, 0, 0, tzinfo=utc))
        schema_builder.set_primary_keys(["id"])
        schema = schema_builder.build()
        name = unique_database + ".tsdefault"

        try:
            kudu_client.create_table(
                name,
                schema,
                partitioning=Partitioning().set_range_partition_columns(["id"
                                                                         ]))
            kudu_table = kudu_client.table(name)
            impala_table_name = self.get_kudu_table_base_name(kudu_table.name)
            props = "TBLPROPERTIES('kudu.table_name'='%s')" % kudu_table.name
            cursor.execute("CREATE EXTERNAL TABLE %s STORED AS KUDU %s" %
                           (impala_table_name, props))
            with self.drop_impala_table_after_context(cursor,
                                                      impala_table_name):
                cursor.execute("DESCRIBE %s" % impala_table_name)
                table_desc = [[col.strip() if col else col for col in row]
                              for row in cursor]
                # Pytest shows truncated output on failure, so print the details just in case.
                LOG.info(table_desc)
                assert ["ts", "timestamp", "", "false", "true", "1230768000000000", \
                  "AUTO_ENCODING", "DEFAULT_COMPRESSION", "0"] in table_desc
        finally:
            if kudu_client.table_exists(name):
                kudu_client.delete_table(name)