Пример #1
0
    def test_drop_managed_kudu_table(self, cursor, kudu_client,
                                     unique_database):
        """Check that dropping a managed Kudu table should fail if the underlying
       Kudu table has been dropped externally. Increase timeout of individual
       Kudu client rpcs to avoid requests fail due to operation delay in the
       Hive Metastore for managed tables (IMPALA-8856).
    """
        impala_tbl_name = "foo"
        cursor.execute(
            """CREATE TABLE %s.%s (a INT PRIMARY KEY) PARTITION BY HASH (a)
        PARTITIONS 3 STORED AS KUDU""" % (unique_database, impala_tbl_name))
        kudu_tbl_name = KuduTestSuite.to_kudu_table_name(
            unique_database, impala_tbl_name)
        assert kudu_client.table_exists(kudu_tbl_name)
        kudu_client.delete_table(kudu_tbl_name)
        assert not kudu_client.table_exists(kudu_tbl_name)

        # Wait for events to prevent race condition
        EventProcessorUtils.wait_for_event_processing(self)

        try:
            cursor.execute("DROP TABLE %s" % kudu_tbl_name)
            assert False
        except Exception as e:
            LOG.info(str(e))
            "Table does not exist: %s" % kudu_tbl_name in str(e)
Пример #2
0
  def __exec_sql_and_check_selfevent_counter(self, stmt, use_impala_client,
                                             check_self_event_counter=True):
    """
    Method runs a given query statement using a impala client or hive client based on the
    argument use_impala_client and confirms if the self-event related counters are as
    expected based on whether we expect a self-event or not. If the
    check_self_event_counter is False it skips checking the self-events-skipped metric.
    """
    self_events, tbls_refreshed, partitions_refreshed = self.__get_self_event_metrics()
    if not use_impala_client:
      self.run_stmt_in_hive(stmt)
    else:
      self.client.execute(stmt)

    EventProcessorUtils.wait_for_event_processing(self)
    self_events_after, tbls_refreshed_after, partitions_refreshed_after = \
      self.__get_self_event_metrics()
    # we assume that any event which comes due to stmts run from impala-client are
    # self-events
    if use_impala_client:
      # self-event counter must increase if this is a self-event if
      # check_self_event_counter is set
      if check_self_event_counter:
        assert self_events_after > self_events
      # if this is a self-event, no table or partitions should be refreshed
      assert tbls_refreshed == tbls_refreshed_after
      assert partitions_refreshed == partitions_refreshed_after
    else:
      # hive was used to run the stmts, any events generated should not have been deemed
      # as self events
      assert self_events == self_events_after
Пример #3
0
 def test_drop_table_events(self):
     """IMPALA-10187: Event processing fails on multiple events + DROP TABLE.
 This test issues ALTER TABLE + DROP in quick succession and checks whether event
 processing still works.
 """
     event_proc_timeout = 15
     db_name = ImpalaTestSuite.get_random_name("drop_event_db_")
     with HiveDbWrapper(self, db_name):
         tbl_name = "foo"
         self.run_stmt_in_hive("""
       drop table if exists {db}.{tbl};
       create table {db}.{tbl} (id int);
       insert into {db}.{tbl} values(1);""".format(db=db_name,
                                                   tbl=tbl_name))
         # With MetastoreEventProcessor running, the insert event will be processed. Query
         # the table from Impala.
         EventProcessorUtils.wait_for_event_processing(
             self, event_proc_timeout)
         # Verify that the data is present in Impala.
         data = self.execute_scalar("select * from %s.%s" %
                                    (db_name, tbl_name))
         assert data == '1'
         # Execute ALTER TABLE + DROP in quick succession so they will be processed in the
         # same event batch.
         self.run_stmt_in_hive("""
       alter table {db}.{tbl} set tblproperties ('foo'='bar');
       drop table {db}.{tbl};""".format(db=db_name, tbl=tbl_name))
         EventProcessorUtils.wait_for_event_processing(
             self, event_proc_timeout)
         # Check that the event processor status is still ACTIVE.
         assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
 def test_describe_db(self, vector, cluster_properties):
     self.__test_describe_db_cleanup()
     try:
         self.client.execute("create database impala_test_desc_db1")
         self.client.execute("create database impala_test_desc_db2 "
                             "comment 'test comment'")
         self.client.execute("create database impala_test_desc_db3 "
                             "location '" + get_fs_path("/testdb") + "'")
         self.client.execute(
             "create database impala_test_desc_db4 comment 'test comment' "
             "location \"" + get_fs_path("/test2.db") + "\"")
         self.client.execute(
             "create database impala_test_desc_db5 comment 'test comment' "
             "managedlocation \"" + get_fs_path("/test2.db") + "\"")
         self.run_stmt_in_hive(
             "create database hive_test_desc_db comment 'test comment' "
             "with dbproperties('pi' = '3.14', 'e' = '2.82')")
         self.run_stmt_in_hive(
             "create database hive_test_desc_db2 comment 'test comment' "
             "managedlocation '" + get_fs_path("/test2.db") + "'")
         if cluster_properties.is_event_polling_enabled():
             # Using HMS event processor - wait until the database shows up.
             assert EventProcessorUtils.get_event_processor_status(
             ) == "ACTIVE"
             EventProcessorUtils.wait_for_event_processing(self)
             self.confirm_db_exists("hive_test_desc_db")
         else:
             # Invalidate metadata to pick up hive-created db.
             self.client.execute("invalidate metadata")
         self.run_test_case('QueryTest/describe-db', vector)
     finally:
         self.__test_describe_db_cleanup()
Пример #5
0
 def check_self_events(query, skips_events=True):
     tbls_refreshed_before, partitions_refreshed_before, \
         events_skipped_before = self.__get_self_event_metrics()
     self.client.execute(query)
     EventProcessorUtils.wait_for_event_processing(self)
     tbls_refreshed_after, partitions_refreshed_after, \
         events_skipped_after = self.__get_self_event_metrics()
     assert tbls_refreshed_before == tbls_refreshed_after
     assert partitions_refreshed_before == partitions_refreshed_after
     if skips_events:
         assert events_skipped_after > events_skipped_before
Пример #6
0
 def __get_self_event_metrics():
   """
   Gets the self-events-skipped, tables-refreshed and partitions-refreshed metric values
   from Metastore EventsProcessor
   """
   tbls_refreshed_count = EventProcessorUtils.get_event_processor_metric(
     'tables-refreshed', 0)
   partitions_refreshed_count = EventProcessorUtils.get_event_processor_metric(
     'partitions-refreshed', 0)
   self_events_count = EventProcessorUtils.get_event_processor_metric(
     'self-events-skipped', 0)
   return int(self_events_count), int(tbls_refreshed_count), int(
     partitions_refreshed_count)
Пример #7
0
 def test_events_on_blacklisted_objects(self):
   """Executes hive queries on blacklisted database and tables and makes sure that
   event processor does not error out
   """
   try:
     self.run_stmt_in_hive("create database testBlackListedDb")
     self.run_stmt_in_hive("create table testBlackListedDb.testtbl (id int)")
     self.run_stmt_in_hive(
       "create table functional_parquet.testBlackListedTbl (id int, val string)"
       " partitioned by (part int) stored as parquet")
     self.run_stmt_in_hive(
       "alter table functional_parquet.testBlackListedTbl add partition (part=1)")
     # wait until all the events generated above are processed
     EventProcessorUtils.wait_for_event_processing(self.hive_client)
     assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
   finally:
     self.run_stmt_in_hive("drop database testBlackListedDb cascade")
     self.run_stmt_in_hive("drop table functional_parquet.testBlackListedTbl")
Пример #8
0
    def test_drop_database(self, vector):
        """
    If a DB is created, then dropped, in Hive, Impala can create one with the
    same name without invalidating metadata.
    """

        test_db = self.unique_string()
        with HiveDbWrapper(self, test_db) as db_name:
            pass
        # if events processing is turned on we should make sure that the drop
        # database event above is processed to avoid flakiness
        EventProcessorUtils.wait_for_event_processing(self)
        self.assert_sql_error(
            self.client.execute,
            'create table %s.%s (x int)' % (test_db, self.unique_string()),
            'Database does not exist: %s' % test_db)
        with self.ImpalaDbWrapper(self, test_db) as db_name:
            pass
Пример #9
0
    def __exec_sql_and_check_selfevent_counter(
            self, stmt, use_impala_client, check_events_skipped_counter=True):
        """
    Method runs a given query statement using a impala client or hive client based on the
    argument use_impala_client and confirms if the self-event related counters are as
    expected based on whether we expect a self-event or not. If the
    check_self_event_counter is False it skips checking the events-skipped metric.
    """
        EventProcessorUtils.wait_for_event_processing(self)
        tbls_refreshed, partitions_refreshed, \
          events_skipped = self.__get_self_event_metrics()
        last_synced_event = EventProcessorUtils.get_last_synced_event_id()
        logging.info("Running statement in {1}: {0}".format(
            stmt, "impala" if use_impala_client else "hive"))
        if not use_impala_client:
            self.run_stmt_in_hive(stmt)
        else:
            self.client.execute(stmt)

        EventProcessorUtils.wait_for_event_processing(self)
        tbls_refreshed_after, partitions_refreshed_after, \
          events_skipped_after = self.__get_self_event_metrics()
        last_synced_event_after = EventProcessorUtils.get_last_synced_event_id(
        )
        # we assume that any event which comes due to stmts run from impala-client are
        # self-events
        logging.info("Event id before {0} event id after {1}".format(
            last_synced_event, last_synced_event_after))
        if use_impala_client:
            # self-event counter must increase if this is a self-event if
            # check_self_event_counter is set
            # some of the test queries generate no events at all. If that is the case
            # skip the below comparison
            if last_synced_event_after > last_synced_event:
                if check_events_skipped_counter:
                    assert events_skipped_after > events_skipped, \
                      "Failing query(impala={}): {}".format(use_impala_client, stmt)
            # if this is a self-event, no table or partitions should be refreshed
            assert tbls_refreshed == tbls_refreshed_after, \
              "Failing query(impala={}): {}".format(use_impala_client, stmt)
            assert partitions_refreshed == partitions_refreshed_after, \
              "Failing query(impala={}): {}".format(use_impala_client, stmt)
        else:
            # hive was used to run the stmts, any events generated should not have been deemed
            # as self events
            assert events_skipped == events_skipped_after
Пример #10
0
    def test_sanity(self, vector, cluster_properties):
        """Verifies that creating a catalog entity (database, table) in Impala using
    'IF NOT EXISTS' while the entity exists in HMS, does not throw an error."""
        # Create a database in Hive
        self.run_stmt_in_hive("drop database if exists hms_sanity_db cascade")
        self.run_stmt_in_hive("create database hms_sanity_db")
        # Make sure Impala's metadata is in sync.
        # Invalidate metadata to pick up hive-created db.
        if cluster_properties.is_event_polling_enabled():
            assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
            # Using HMS event processor - wait until latest event is processed
            EventProcessorUtils.wait_for_event_processing(self)
            self.confirm_db_exists("hms_sanity_db")
            # assert 'hms_sanity_db' in self.client.execute("show databases").data
        else:
            # Using traditional catalog - need to invalidate to pick up hive-created db.
            self.client.execute("invalidate metadata")

        # Creating a database with the same name using 'IF NOT EXISTS' in Impala should
        # not fail
        self.client.execute("create database if not exists hms_sanity_db")
        # The database should appear in the catalog (IMPALA-2441)
        assert 'hms_sanity_db' in self.all_db_names()
        # Ensure a table can be created in this database from Impala and that it is
        # accessable in both Impala and Hive
        self.client.execute(
            "create table hms_sanity_db.test_tbl_in_impala(a int)")
        self.run_stmt_in_hive("select * from hms_sanity_db.test_tbl_in_impala")
        self.client.execute("select * from hms_sanity_db.test_tbl_in_impala")
        # Create a table in Hive
        self.run_stmt_in_hive("create table hms_sanity_db.test_tbl (a int)")
        # Creating a table with the same name using 'IF NOT EXISTS' in Impala should
        # not fail
        self.client.execute(
            "create table if not exists hms_sanity_db.test_tbl (a int)")
        # The table should not appear in the catalog *immediately* unless invalidate
        # metadata is executed.
        if cluster_properties.is_event_polling_enabled():
            assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
            EventProcessorUtils.wait_for_event_processing(self)
        else:
            self.client.execute("invalidate metadata hms_sanity_db.test_tbl")
        assert 'test_tbl' in self.client.execute(
            "show tables in hms_sanity_db").data
Пример #11
0
    def _run_test_empty_partition_events(self, unique_database,
                                         is_transactional):
        TBLPROPERTIES = ""
        if is_transactional:
            TBLPROPERTIES = "TBLPROPERTIES ('transactional'='true'," \
                "'transactional_properties'='insert_only')"
        test_tbl = unique_database + ".test_events"
        self.run_stmt_in_hive("create table {0} (key string, value string) \
      partitioned by (year int) stored as parquet {1}".format(
            test_tbl, TBLPROPERTIES))
        EventProcessorUtils.wait_for_event_processing(self.hive_client)
        self.client.execute("describe {0}".format(test_tbl))

        self.run_stmt_in_hive(
            "alter table {0} add partition (year=2019)".format(test_tbl))
        EventProcessorUtils.wait_for_event_processing(self.hive_client)
        assert [('2019', )] == self.get_impala_partition_info(test_tbl, 'year')

        self.run_stmt_in_hive(
            "alter table {0} add if not exists partition (year=2019)".format(
                test_tbl))
        EventProcessorUtils.wait_for_event_processing(self.hive_client)
        assert [('2019', )] == self.get_impala_partition_info(test_tbl, 'year')
        assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"

        self.run_stmt_in_hive(
            "alter table {0} drop partition (year=2019)".format(test_tbl))
        EventProcessorUtils.wait_for_event_processing(self.hive_client)
        assert ('2019') not in self.get_impala_partition_info(test_tbl, 'year')
        assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"

        self.run_stmt_in_hive(
            "alter table {0} drop if exists partition (year=2019)".format(
                test_tbl))
        EventProcessorUtils.wait_for_event_processing(self.hive_client)
        assert ('2019') not in self.get_impala_partition_info(test_tbl, 'year')
        assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
Пример #12
0
 def test_passthrough_apis(self):
     """
     This test exercises some of the Catalog HMS APIs which are directly
     passed through to the backing HMS service. This is by no means an
     exhaustive set but merely used to as a sanity check to make sure that a
     hive_client can connect to the Catalog's metastore service and is
     able to execute calls to the backing HMS service.
     """
     catalog_hms_client = None
     db_name = ImpalaTestSuite.get_random_name("test_passthrough_apis_db")
     try:
         catalog_hms_client, hive_transport = ImpalaTestSuite.create_hive_client(
             5899)
         assert catalog_hms_client is not None
         # get_databases
         databases = catalog_hms_client.get_all_databases()
         assert databases is not None
         assert len(databases) > 0
         assert "functional" in databases
         # get_database
         database = catalog_hms_client.get_database("functional")
         assert database is not None
         assert "functional" == database.name
         # get_tables
         tables = catalog_hms_client.get_tables("functional", "*")
         assert tables is not None
         assert len(tables) > 0
         assert "alltypes" in tables
         # get table
         table = catalog_hms_client.get_table("functional", "alltypes")
         assert table is not None
         assert "alltypes" == table.tableName
         assert table.sd is not None
         # get partitions
         partitions = catalog_hms_client.get_partitions(
             "functional", "alltypes", -1)
         assert partitions is not None
         assert len(partitions) > 0
         # get partition names
         part_names = catalog_hms_client.get_partition_names(
             "functional", "alltypes", -1)
         assert part_names is not None
         assert len(part_names) > 0
         assert "year=2009/month=1" in part_names
         # notification APIs
         event_id = EventProcessorUtils.get_current_notification_id(
             catalog_hms_client)
         assert event_id is not None
         assert event_id > 0
         # DDLs
         catalog_hms_client.create_database(
             self.__get_test_database(db_name))
         database = catalog_hms_client.get_database(db_name)
         assert database is not None
         assert db_name == database.name
         tbl_name = ImpalaTestSuite.get_random_name(
             "test_passthrough_apis_tbl")
         cols = [["c1", "int", "col 1"], ["c2", "string", "col 2"]]
         part_cols = [["part", "string", "part col"]]
         catalog_hms_client.create_table(
             self.__get_test_tbl(db_name, tbl_name, cols, part_cols))
         table = catalog_hms_client.get_table(db_name, tbl_name)
         assert table is not None
         assert tbl_name == table.tableName
         self.__compare_cols(cols, table.sd.cols)
         self.__compare_cols(part_cols, table.partitionKeys)
     finally:
         if catalog_hms_client is not None:
             catalog_hms_client.drop_database(db_name, True, True)
             catalog_hms_client.shutdown()
Пример #13
0
    def run_test_insert_events(self, unique_database, is_transactional=False):
        """Test for insert event processing. Events are created in Hive and processed in
    Impala. The following cases are tested :
    Insert into table --> for partitioned and non-partitioned table
    Insert overwrite table --> for partitioned and non-partitioned table
    Insert into partition --> for partitioned table
    """
        # Test table with no partitions.
        tbl_insert_nopart = 'tbl_insert_nopart'
        self.run_stmt_in_hive("drop table if exists %s.%s" %
                              (unique_database, tbl_insert_nopart))
        tblproperties = ""
        if is_transactional:
            tblproperties = "tblproperties ('transactional'='true'," \
                "'transactional_properties'='insert_only')"
        self.run_stmt_in_hive(
            "create table %s.%s (id int, val int) %s" %
            (unique_database, tbl_insert_nopart, tblproperties))
        EventProcessorUtils.wait_for_event_processing(self)
        # Test CTAS and insert by Impala with empty results (IMPALA-10765).
        self.execute_query(
            "create table {db}.ctas_tbl {prop} as select * from {db}.{tbl}".
            format(db=unique_database,
                   tbl=tbl_insert_nopart,
                   prop=tblproperties))
        self.execute_query(
            "insert into {db}.ctas_tbl select * from {db}.{tbl}".format(
                db=unique_database, tbl=tbl_insert_nopart))
        # Test insert into table, this will fire an insert event.
        self.run_stmt_in_hive("insert into %s.%s values(101, 200)" %
                              (unique_database, tbl_insert_nopart))
        # With MetastoreEventProcessor running, the insert event will be processed. Query the
        # table from Impala.
        EventProcessorUtils.wait_for_event_processing(self)
        # Verify that the data is present in Impala.
        data = self.execute_scalar("select * from %s.%s" %
                                   (unique_database, tbl_insert_nopart))
        assert data.split('\t') == ['101', '200']

        # Test insert overwrite. Overwrite the existing value.
        self.run_stmt_in_hive("insert overwrite table %s.%s values(101, 201)" %
                              (unique_database, tbl_insert_nopart))
        # Make sure the event has been processed.
        EventProcessorUtils.wait_for_event_processing(self)
        # Verify that the data is present in Impala.
        data = self.execute_scalar("select * from %s.%s" %
                                   (unique_database, tbl_insert_nopart))
        assert data.split('\t') == ['101', '201']
        # Test insert overwrite by Impala with empty results (IMPALA-10765).
        self.execute_query(
            "insert overwrite {db}.{tbl} select * from {db}.ctas_tbl".format(
                db=unique_database, tbl=tbl_insert_nopart))
        result = self.execute_query("select * from {db}.{tbl}".format(
            db=unique_database, tbl=tbl_insert_nopart))
        assert len(result.data) == 0

        # Test partitioned table.
        tbl_insert_part = 'tbl_insert_part'
        self.run_stmt_in_hive("drop table if exists %s.%s" %
                              (unique_database, tbl_insert_part))
        self.run_stmt_in_hive(
            "create table %s.%s (id int, name string) "
            "partitioned by(day int, month int, year int) %s" %
            (unique_database, tbl_insert_part, tblproperties))
        EventProcessorUtils.wait_for_event_processing(self)
        # Test insert overwrite by Impala with empty results (IMPALA-10765).
        self.execute_query(
            "create table {db}.ctas_part partitioned by (day, month, year) {prop} as "
            "select * from {db}.{tbl}".format(db=unique_database,
                                              tbl=tbl_insert_part,
                                              prop=tblproperties))
        self.execute_query(
            "insert into {db}.ctas_part partition(day=0, month=0, year=0) select id, "
            "name from {db}.{tbl}".format(db=unique_database,
                                          tbl=tbl_insert_part))
        # Insert data into partitions.
        self.run_stmt_in_hive(
            "insert into %s.%s partition(day=28, month=03, year=2019)"
            "values(101, 'x')" % (unique_database, tbl_insert_part))
        # Make sure the event has been processed.
        EventProcessorUtils.wait_for_event_processing(self)
        # Verify that the data is present in Impala.
        data = self.execute_scalar("select * from %s.%s" %
                                   (unique_database, tbl_insert_part))
        assert data.split('\t') == ['101', 'x', '28', '3', '2019']

        # Test inserting into existing partitions.
        self.run_stmt_in_hive(
            "insert into %s.%s partition(day=28, month=03, year=2019)"
            "values(102, 'y')" % (unique_database, tbl_insert_part))
        EventProcessorUtils.wait_for_event_processing(self)
        # Verify that the data is present in Impala.
        data = self.execute_scalar(
            "select count(*) from %s.%s where day=28 and month=3 "
            "and year=2019" % (unique_database, tbl_insert_part))
        assert data.split('\t') == ['2']
        # Test inserting into existing partitions by Impala with empty results
        # (IMPALA-10765).
        self.execute_query(
            "insert into {db}.{tbl} partition(day=28, month=03, year=2019) "
            "select id, name from {db}.ctas_part".format(db=unique_database,
                                                         tbl=tbl_insert_part))

        # Test insert overwrite into existing partitions
        self.run_stmt_in_hive(
            "insert overwrite table %s.%s partition(day=28, month=03, "
            "year=2019)"
            "values(101, 'z')" % (unique_database, tbl_insert_part))
        EventProcessorUtils.wait_for_event_processing(self)
        # Verify that the data is present in Impala.
        data = self.execute_scalar(
            "select * from %s.%s where day=28 and month=3 and"
            " year=2019 and id=101" % (unique_database, tbl_insert_part))
        assert data.split('\t') == ['101', 'z', '28', '3', '2019']
        # Test insert overwrite into existing partitions by Impala with empty results
        # (IMPALA-10765).
        self.execute_query("insert overwrite {db}.{tbl} "
                           "partition(day=28, month=03, year=2019) "
                           "select id, name from {db}.ctas_part".format(
                               db=unique_database, tbl=tbl_insert_part))
        result = self.execute_query(
            "select * from {db}.{tbl} "
            "where day=28 and month=3 and year=2019".format(
                db=unique_database, tbl=tbl_insert_part))
        assert len(result.data) == 0
Пример #14
0
    def test_hive_impala_interop(self, vector, unique_database,
                                 cluster_properties):
        # Setup source table.
        source_table = "{0}.{1}".format(unique_database, "t1_source")
        self.execute_query_expect_success(
            self.client,
            "create table {0} as select * from functional_parquet.alltypes".
            format(source_table))
        self.execute_query_expect_success(
            self.client,
            "insert into {0}(id) values (7777), (8888), (9999), (11111), (22222), (33333)"
            .format(source_table))

        # Loop through the compression codecs and run interop tests.
        for codec in PARQUET_CODECS:
            # Write data in Impala.
            vector.get_value('exec_option')['compression_codec'] = codec
            impala_table = "{0}.{1}".format(unique_database, "t1_impala")
            self.execute_query_expect_success(
                self.client, "drop table if exists {0}".format(impala_table))
            self.execute_query_expect_success(
                self.client,
                "create table {0} stored as parquet as select * from {1}".
                format(impala_table, source_table),
                vector.get_value('exec_option'))

            # Read data from Impala and write in Hive
            if (codec == 'none'): codec = 'uncompressed'
            elif (codec == 'zstd:7'): codec = 'zstd'
            hive_table = "{0}.{1}".format(unique_database, "t1_hive")
            self.run_stmt_in_hive(
                "drop table if exists {0}".format(hive_table))
            # For Hive 3+, workaround for HIVE-22371 (CTAS puts files in the wrong place) by
            # explicitly creating an external table so that files are in the external warehouse
            # directory. Use external.table.purge=true so that it is equivalent to a Hive 2
            # managed table. Hive 2 stays the same.
            external = ""
            tblproperties = ""
            if HIVE_MAJOR_VERSION >= 3:
                external = "external"
                tblproperties = "TBLPROPERTIES('external.table.purge'='TRUE')"
            self.run_stmt_in_hive("set parquet.compression={0};\
          create {1} table {2} stored as parquet {3} as select * from {4}".
                                  format(codec, external, hive_table,
                                         tblproperties, impala_table))

            # Make sure Impala's metadata is in sync.
            if cluster_properties.is_event_polling_enabled():
                assert EventProcessorUtils.get_event_processor_status(
                ) == "ACTIVE"
                EventProcessorUtils.wait_for_event_processing(self)
                self.confirm_table_exists(unique_database, "t1_hive")
            else:
                self.client.execute(
                    "invalidate metadata {0}".format(hive_table))

            # Read Hive data in Impala and verify results.
            base_result = self.execute_query_expect_success(
                self.client,
                "select * from {0} order by id".format(source_table))
            test_result = self.execute_query_expect_success(
                self.client,
                "select * from {0} order by id".format(hive_table))
            verify_query_result_is_equal(test_result.data, base_result.data)
Пример #15
0
    def __run_event_based_replication_tests(self, transactional=True):
        """Hive Replication relies on the insert events generated on the tables.
    This test issues some basic replication commands from Hive and makes sure
    that the replicated table has correct data."""
        TBLPROPERTIES = self.__get_transactional_tblproperties(transactional)
        source_db = ImpalaTestSuite.get_random_name("repl_source_")
        target_db = ImpalaTestSuite.get_random_name("repl_target_")
        unpartitioned_tbl = "unpart_tbl"
        partitioned_tbl = "part_tbl"
        try:
            self.run_stmt_in_hive("create database {0}".format(source_db))
            self.run_stmt_in_hive(
                "alter database {0} set dbproperties ('repl.source.for'='xyz')"
                .format(source_db))
            EventProcessorUtils.wait_for_event_processing(self)
            # explicit create table command since create table like doesn't allow tblproperties
            self.client.execute(
                "create table {0}.{1} (a string, b string) stored as parquet"
                " {2}".format(source_db, unpartitioned_tbl, TBLPROPERTIES))
            self.client.execute(
                "create table {0}.{1} (id int, bool_col boolean, tinyint_col tinyint, "
                "smallint_col smallint, int_col int, bigint_col bigint, float_col float, "
                "double_col double, date_string string, string_col string, "
                "timestamp_col timestamp) partitioned by (year int, month int) stored as parquet"
                " {2}".format(source_db, partitioned_tbl, TBLPROPERTIES))

            # case I: insert
            # load the table with some data from impala, this also creates new partitions.
            self.client.execute("insert into {0}.{1}"
                                " select * from functional.tinytable".format(
                                    source_db, unpartitioned_tbl))
            self.client.execute(
                "insert into {0}.{1} partition(year,month)"
                " select * from functional_parquet.alltypessmall".format(
                    source_db, partitioned_tbl))
            rows_in_unpart_tbl = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    source_db, unpartitioned_tbl)).split('\t')[0])
            rows_in_part_tbl = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    source_db, partitioned_tbl)).split('\t')[0])
            assert rows_in_unpart_tbl > 0
            assert rows_in_part_tbl > 0
            # bootstrap the replication
            self.run_stmt_in_hive("repl dump {0}".format(source_db))
            # create a target database where tables will be replicated
            self.client.execute("create database {0}".format(target_db))
            # replicate the table from source to target
            self.run_stmt_in_hive("repl load {0} into {1}".format(
                source_db, target_db))
            EventProcessorUtils.wait_for_event_processing(self)
            assert unpartitioned_tbl in self.client.execute(
                "show tables in {0}".format(target_db)).get_data()
            assert partitioned_tbl in self.client.execute(
                "show tables in {0}".format(target_db)).get_data()
            # confirm the number of rows in target match with the source table.
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, unpartitioned_tbl)).split('\t')[0])
            rows_in_part_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, partitioned_tbl)).split('\t')[0])
            assert rows_in_unpart_tbl == rows_in_unpart_tbl_target
            assert rows_in_part_tbl == rows_in_part_tbl_target

            # case II: insert into existing partitions.
            self.client.execute("insert into {0}.{1}"
                                " select * from functional.tinytable".format(
                                    source_db, unpartitioned_tbl))
            self.client.execute(
                "insert into {0}.{1} partition(year,month)"
                " select * from functional_parquet.alltypessmall".format(
                    source_db, partitioned_tbl))
            self.run_stmt_in_hive("repl dump {0}".format(source_db))
            # replicate the table from source to target
            self.run_stmt_in_hive("repl load {0} into {1}".format(
                source_db, target_db))
            # we wait until the events catch up in case repl command above did some HMS
            # operations.
            EventProcessorUtils.wait_for_event_processing(self)
            # confirm the number of rows in target match with the source table.
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, unpartitioned_tbl)).split('\t')[0])
            rows_in_part_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, partitioned_tbl)).split('\t')[0])
            assert 2 * rows_in_unpart_tbl == rows_in_unpart_tbl_target
            assert 2 * rows_in_part_tbl == rows_in_part_tbl_target

            # Case III: insert overwrite
            # impala does a insert overwrite of the tables.
            self.client.execute("insert overwrite table {0}.{1}"
                                " select * from functional.tinytable".format(
                                    source_db, unpartitioned_tbl))
            self.client.execute(
                "insert overwrite table {0}.{1} partition(year,month)"
                " select * from functional_parquet.alltypessmall".format(
                    source_db, partitioned_tbl))
            self.run_stmt_in_hive("repl dump {0}".format(source_db))
            # replicate the table from source to target
            self.run_stmt_in_hive("repl load {0} into {1}".format(
                source_db, target_db))
            # we wait until the events catch up in case repl command above did some HMS
            # operations.
            EventProcessorUtils.wait_for_event_processing(self)
            # confirm the number of rows in target match with the source table.
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, unpartitioned_tbl)).split('\t')[0])
            rows_in_part_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, partitioned_tbl)).split('\t')[0])
            assert rows_in_unpart_tbl == rows_in_unpart_tbl_target
            assert rows_in_part_tbl == rows_in_part_tbl_target

            # Case IV: CTAS which creates a transactional table.
            self.client.execute(
                "create table {0}.insertonly_nopart_ctas {1} as "
                "select * from {0}.{2}".format(source_db, TBLPROPERTIES,
                                               unpartitioned_tbl))
            self.client.execute(
                "create table {0}.insertonly_part_ctas partitioned by (year, month) {1}"
                " as select * from {0}.{2}".format(source_db, TBLPROPERTIES,
                                                   partitioned_tbl))
            self.run_stmt_in_hive("repl dump {0}".format(source_db))
            # replicate the table from source to target
            self.run_stmt_in_hive("repl load {0} into {1}".format(
                source_db, target_db))
            # we wait until the events catch up in case repl command above did some HMS
            # operations.
            EventProcessorUtils.wait_for_event_processing(self)
            # confirm the number of rows in target match with the source table.
            rows_in_unpart_tbl_source = int(
                self.execute_scalar("select count(*) from "
                                    "{0}.insertonly_nopart_ctas".format(
                                        source_db)).split('\t')[0])
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from "
                                    "{0}.insertonly_nopart_ctas".format(
                                        target_db)).split('\t')[0])
            assert rows_in_unpart_tbl_source == rows_in_unpart_tbl_target
            rows_in_unpart_tbl_source = int(
                self.execute_scalar("select count(*) from "
                                    "{0}.insertonly_part_ctas".format(
                                        source_db)).split('\t')[0])
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from "
                                    "{0}.insertonly_part_ctas".format(
                                        target_db)).split('\t')[0])
            assert rows_in_unpart_tbl_source == rows_in_unpart_tbl_target

            # Case V: truncate table
            # impala truncates both the tables. Make sure replication sees that.
            self.client.execute("truncate table {0}.{1}".format(
                source_db, unpartitioned_tbl))
            self.client.execute("truncate table {0}.{1}".format(
                source_db, partitioned_tbl))
            self.run_stmt_in_hive("repl dump {0}".format(source_db))
            # replicate the table from source to target
            self.run_stmt_in_hive("repl load {0} into {1}".format(
                source_db, target_db))
            # we wait until the events catch up in case repl command above did some HMS
            # operations.
            EventProcessorUtils.wait_for_event_processing(self)
            # confirm the number of rows in target match with the source table.
            rows_in_unpart_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, unpartitioned_tbl)).split('\t')[0])
            rows_in_part_tbl_target = int(
                self.execute_scalar("select count(*) from {0}.{1}".format(
                    target_db, partitioned_tbl)).split('\t')[0])
            assert rows_in_unpart_tbl_target == 0
            assert rows_in_part_tbl_target == 0
        finally:
            src_db = self.__get_db_nothrow(source_db)
            target_db_obj = self.__get_db_nothrow(target_db)
            if src_db is not None:
                self.run_stmt_in_hive(
                    "alter database {0} set dbproperties ('repl.source.for'='')"
                    .format(source_db))
                self.run_stmt_in_hive(
                    "drop database if exists {0} cascade".format(source_db))
            if target_db_obj is not None:
                self.run_stmt_in_hive(
                    "drop database if exists {0} cascade".format(target_db))
            # workaround for HIVE-24135. the managed db location doesn't get cleaned up
            if src_db is not None and src_db.managedLocationUri is not None:
                self.filesystem_client.delete_file_dir(
                    src_db.managedLocationUri, True)
            if target_db_obj is not None and target_db_obj.managedLocationUri is not None:
                self.filesystem_client.delete_file_dir(
                    target_db_obj.managedLocationUri, True)
Пример #16
0
    def __run_create_drop_test(self, db, type, rename=False, rename_db=False):
        if type == "table":
            if not rename:
                queries = [
                    "create table {0}.test_{1} (i int)".format(db, 1),
                    "drop table {0}.test_{1}".format(db, 1)
                ]
            else:
                db_1 = "{}_1".format(db)
                if rename_db:
                    self.execute_query_expect_success(
                        self.create_impala_client(),
                        "drop database if exists {0} cascade".format(db_1))
                    self.execute_query_expect_success(
                        self.create_impala_client(),
                        "create database {0}".format(db_1))
                self.execute_query_expect_success(
                    self.create_impala_client(),
                    "create table if not exists {0}.rename_test_1 (i int)".
                    format(db))
                if rename_db:
                    queries = [
                        "alter table {0}.rename_test_1 rename to {1}.rename_test_1"
                        .format(db, db_1),
                        "alter table {0}.rename_test_1 rename to {1}.rename_test_1"
                        .format(db_1, db)
                    ]
                else:
                    queries = [
                        "alter table {0}.rename_test_1 rename to {0}.rename_test_2"
                        .format(db),
                        "alter table {0}.rename_test_2 rename to {0}.rename_test_1"
                        .format(db)
                    ]
            create_metric_name = "tables-added"
            removed_metric_name = "tables-removed"
        elif type == "database":
            self.execute_query_expect_success(
                self.create_impala_client(),
                "drop database if exists {0}".format("test_create_drop_db"))
            queries = [
                "create database {db}".format(db="test_create_drop_db"),
                "drop database {db}".format(db="test_create_drop_db")
            ]
            create_metric_name = "databases-added"
            removed_metric_name = "databases-removed"
        else:
            tbl_name = "test_create_drop_partition"
            self.execute_query_expect_success(
                self.create_impala_client(),
                "create table {db}.{tbl} (c int) partitioned by (p int)".
                format(db=db, tbl=tbl_name))
            queries = [
                "alter table {db}.{tbl} add partition (p=1)".format(
                    db=db, tbl=tbl_name),
                "alter table {db}.{tbl} drop partition (p=1)".format(
                    db=db, tbl=tbl_name)
            ]
            create_metric_name = "partitions-added"
            removed_metric_name = "partitions-removed"

        # get the metric before values
        EventProcessorUtils.wait_for_event_processing(self)
        create_metric_val_before = EventProcessorUtils.get_int_metric(
            create_metric_name, 0)
        removed_metric_val_before = EventProcessorUtils.get_int_metric(
            removed_metric_name, 0)
        events_skipped_before = EventProcessorUtils.get_int_metric(
            'events-skipped', 0)
        num_iters = 100
        for iter in xrange(num_iters):
            for q in queries:
                try:
                    self.execute_query_expect_success(
                        self.create_impala_client(), q)
                except Exception as e:
                    print("Failed in {} iterations. Error {}".format(
                        iter, str(e)))
                    raise
        EventProcessorUtils.wait_for_event_processing(self)
        create_metric_val_after = EventProcessorUtils.get_int_metric(
            create_metric_name, 0)
        removed_metric_val_after = EventProcessorUtils.get_int_metric(
            removed_metric_name, 0)
        events_skipped_after = EventProcessorUtils.get_int_metric(
            'events-skipped', 0)
        num_delete_event_entries = EventProcessorUtils.\
            get_int_metric('delete-event-log-size', 0)
        assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
        # None of the queries above should actually trigger a add/remove object from events
        assert create_metric_val_after == create_metric_val_before
        assert removed_metric_val_after == removed_metric_val_before
        # each query set generates 2 events and both of them should be skipped
        assert events_skipped_after == num_iters * 2 + events_skipped_before
        # make sure that there are no more entries in the delete event log
        assert num_delete_event_entries == 0
Пример #17
0
 def test_event_batching(self, unique_database):
     """Runs queries which generate multiple ALTER_PARTITION events which must be
 batched by events processor. Runs as a custom cluster test to isolate the metric
 values from other tests."""
     testtbl = "test_event_batching"
     test_acid_tbl = "test_event_batching_acid"
     acid_props = self.__get_transactional_tblproperties(True)
     # create test tables
     self.client.execute(
         "create table {}.{} like functional.alltypes".format(
             unique_database, testtbl))
     self.client.execute(
         "insert into {}.{} partition (year,month) select * from functional.alltypes"
         .format(unique_database, testtbl))
     self.client.execute(
         "create table {}.{} (id int) partitioned by (year int, month int) {}"
         .format(unique_database, test_acid_tbl, acid_props))
     self.client.execute(
         "insert into {}.{} partition (year, month) "
         "select id, year, month from functional.alltypes".format(
             unique_database, test_acid_tbl))
     # run compute stats from impala; this should generate 24 ALTER_PARTITION events which
     # should be batched together into 1 or more number of events.
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_metric = "batch-events-created"
     batch_events_1 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     self.client.execute("compute stats {}.{}".format(
         unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_2 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_2 > batch_events_1
     # run analyze stats event from hive which generates ALTER_PARTITION event on each
     # partition of the table
     self.run_stmt_in_hive("analyze table {}.{} compute statistics".format(
         unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_3 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_3 > batch_events_2
     # in case of transactional table since we batch the events together, the number of
     # tables refreshed must be far lower than number of events generated
     num_table_refreshes_1 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     self.client.execute("compute stats {}.{}".format(
         unique_database, test_acid_tbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_4 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     num_table_refreshes_2 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     # we should generate atleast 1 batch event if not more due to the 24 consecutive
     # ALTER_PARTITION events
     assert batch_events_4 > batch_events_3
     # table should not be refreshed since this is a self-event
     assert num_table_refreshes_2 == num_table_refreshes_1
     self.run_stmt_in_hive("analyze table {}.{} compute statistics".format(
         unique_database, test_acid_tbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_5 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_5 > batch_events_4
     num_table_refreshes_2 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     # the analyze table from hive generates 24 ALTER_PARTITION events which should be
     # batched into 1-2 batches (depending on timing of the event poll thread).
     assert num_table_refreshes_2 > num_table_refreshes_1
     assert int(num_table_refreshes_2) - int(num_table_refreshes_1) < 24
     EventProcessorUtils.wait_for_event_processing(self)
     # test for batching of insert events
     batch_events_insert = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     tables_refreshed_insert = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     partitions_refreshed_insert = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     self.client.execute(
         "insert into {}.{} partition (year,month) select * from functional.alltypes"
         .format(unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_after_insert = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     tables_refreshed_after_insert = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     partitions_refreshed_after_insert = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     # this is a self-event tables or partitions should not be refreshed
     assert batch_events_after_insert > batch_events_insert
     assert tables_refreshed_after_insert == tables_refreshed_insert
     assert partitions_refreshed_after_insert == partitions_refreshed_insert
     # run the insert from hive to make sure that batch event is refreshing all the
     # partitions
     self.run_stmt_in_hive(
         "SET hive.exec.dynamic.partition.mode=nonstrict; insert into {}.{} partition"
         " (year,month) select * from functional.alltypes".format(
             unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_after_hive = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     partitions_refreshed_after_hive = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     assert batch_events_after_hive > batch_events_insert
     # 24 partitions inserted and hence we must refresh 24 partitions once.
     assert int(partitions_refreshed_after_hive
                ) == int(partitions_refreshed_insert) + 24
Пример #18
0
  def test_events_on_blacklisted_objects(self):
    """Executes hive queries on blacklisted database and tables and makes sure that
    event processor does not error out
    """
    try:
      event_id_before = EventProcessorUtils.get_last_synced_event_id()
      # create a blacklisted database from hive and make sure event is ignored
      self.run_stmt_in_hive("create database TESTblackListedDb")
      # wait until all the events generated above are processed
      EventProcessorUtils.wait_for_event_processing(self)
      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
      assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
      # make sure that the blacklisted db is ignored
      assert "TESTblackListedDb".lower() not in self.all_db_names()

      event_id_before = EventProcessorUtils.get_last_synced_event_id()
      self.run_stmt_in_hive("create table testBlackListedDb.testtbl (id int)")
      # create a table on the blacklisted database with a different case
      self.run_stmt_in_hive("create table TESTBLACKlISTEDDb.t2 (id int)")
      self.run_stmt_in_hive(
        "create table functional_parquet.testBlackListedTbl (id int, val string)"
        " partitioned by (part int) stored as parquet")
      self.run_stmt_in_hive(
        "alter table functional_parquet.testBlackListedTbl add partition (part=1)")
      # wait until all the events generated above are processed
      EventProcessorUtils.wait_for_event_processing(self)
      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
      assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
      # make sure that the black listed table is not created
      table_names = self.client.execute("show tables in functional_parquet").get_data()
      assert "testBlackListedTbl".lower() not in table_names

      event_id_before = EventProcessorUtils.get_last_synced_event_id()
      # generate a table level event with a different case
      self.run_stmt_in_hive("drop table functional_parquet.TESTBlackListedTbl")
      # wait until all the events generated above are processed
      EventProcessorUtils.wait_for_event_processing(self)
      assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
      assert EventProcessorUtils.get_last_synced_event_id() > event_id_before
    finally:
      self.run_stmt_in_hive("drop database testBlackListedDb cascade")
      self.run_stmt_in_hive("drop table functional_parquet.testBlackListedTbl")
Пример #19
0
  def run_test_insert_events(self, is_transactional=False):
    """Test for insert event processing. Events are created in Hive and processed in
    Impala. The following cases are tested :
    Insert into table --> for partitioned and non-partitioned table
    Insert overwrite table --> for partitioned and non-partitioned table
    Insert into partition --> for partitioned table
    """
    db_name = self.__get_random_name("insert_event_db_")
    tblproperties = self.__get_transactional_tblproperties(is_transactional)
    with HiveDbWrapper(self, db_name):
      # Test table with no partitions.
      test_tbl_name = 'tbl_insert_nopart'
      self.run_stmt_in_hive("drop table if exists %s.%s" % (db_name, test_tbl_name))
      self.run_stmt_in_hive("create table %s.%s (id int, val int) %s"
         % (db_name, test_tbl_name, tblproperties))
      # Test insert into table, this will fire an insert event.
      self.run_stmt_in_hive("insert into %s.%s values(101, 200)"
         % (db_name, test_tbl_name))
      # With MetastoreEventProcessor running, the insert event will be processed. Query
      # the table from Impala.
      EventProcessorUtils.wait_for_event_processing(self)
      # Verify that the data is present in Impala.
      data = self.execute_scalar("select * from %s.%s" % (db_name, test_tbl_name))
      assert data.split('\t') == ['101', '200']

      # Test insert overwrite. Overwrite the existing value.
      self.run_stmt_in_hive("insert overwrite table %s.%s values(101, 201)"
         % (db_name, test_tbl_name))
      # Make sure the event has been processed.
      EventProcessorUtils.wait_for_event_processing(self)
      # Verify that the data is present in Impala.
      data = self.execute_scalar("select * from %s.%s" % (db_name, test_tbl_name))
      assert data.split('\t') == ['101', '201']

      # Test partitioned table.
      test_part_tblname = 'tbl_insert_part'
      self.run_stmt_in_hive("drop table if exists %s.%s" % (db_name, test_part_tblname))
      self.run_stmt_in_hive("create table %s.%s (id int, name string) "
         "partitioned by(day int, month int, year int) %s"
         % (db_name, test_part_tblname, tblproperties))
      # Insert data into partitions.
      self.run_stmt_in_hive("insert into %s.%s partition(day=28, month=03, year=2019)"
         "values(101, 'x')" % (db_name, test_part_tblname))
      # Make sure the event has been processed.
      EventProcessorUtils.wait_for_event_processing(self)
      # Verify that the data is present in Impala.
      data = self.execute_scalar("select * from %s.%s" % (db_name, test_part_tblname))
      assert data.split('\t') == ['101', 'x', '28', '3', '2019']

      # Test inserting into existing partitions.
      self.run_stmt_in_hive("insert into %s.%s partition(day=28, month=03, year=2019)"
         "values(102, 'y')" % (db_name, test_part_tblname))
      EventProcessorUtils.wait_for_event_processing(self)
      # Verify that the data is present in Impala.
      data = self.execute_scalar("select count(*) from %s.%s where day=28 and month=3 "
         "and year=2019" % (db_name, test_part_tblname))
      assert data.split('\t') == ['2']

      # Test insert overwrite into existing partitions
      self.run_stmt_in_hive("insert overwrite table %s.%s partition(day=28, month=03, "
         "year=2019)" "values(101, 'z')" % (db_name, test_part_tblname))
      EventProcessorUtils.wait_for_event_processing(self)
      # Verify that the data is present in Impala.
      data = self.execute_scalar("select * from %s.%s where day=28 and month=3 and"
         " year=2019 and id=101" % (db_name, test_part_tblname))
      assert data.split('\t') == ['101', 'z', '28', '3', '2019']