def test_hive_udfs_missing_jar(self, vector): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = "tmp/" + get_random_id(5) + ".jar" self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path) drop_fn_stmt = "drop function if exists default.pi_missing_jar()" create_fn_stmt = "create function default.pi_missing_jar() returns double \ location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path cluster = ImpalaCluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = vector.get_value('exec_option') exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar self.hdfs_client.delete_file_dir(jar_path) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client(client, "select default.pi_missing_jar()", vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)
def test_query_profile_encoded_unknown_query_id(self): """Test that /query_profile_encoded error message starts with the expected line in case of missing query and does not contain any leading whitespace. """ cluster = ImpalaCluster() impalad = cluster.get_any_impalad() result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123") assert result.startswith("Could not obtain runtime profile: Query id")
def test_hive_udfs_missing_jar(self, vector, unique_database): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = get_fs_path( "/test-warehouse/{0}.db/".format(unique_database) + get_random_id(5) + ".jar") hive_jar = get_fs_path("/test-warehouse/hive-exec.jar") check_call(["hadoop", "fs", "-cp", hive_jar, jar_path]) drop_fn_stmt = ("drop function if exists " "`{0}`.`pi_missing_jar`()".format(unique_database)) create_fn_stmt = ( "create function `{0}`.`pi_missing_jar`() returns double location '{1}' " "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format( unique_database, jar_path)) cluster = ImpalaCluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = copy(vector.get_value('exec_option')) exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar check_call(["hadoop", "fs", "-rm", jar_path]) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client( client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)
def test_hive_udfs_missing_jar(self, vector, unique_database): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database) + get_random_id(5) + ".jar") hive_jar = get_fs_path("/test-warehouse/hive-exec.jar") check_call(["hadoop", "fs", "-cp", hive_jar, jar_path]) drop_fn_stmt = ( "drop function if exists " "`{0}`.`pi_missing_jar`()".format(unique_database)) create_fn_stmt = ( "create function `{0}`.`pi_missing_jar`() returns double location '{1}' " "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path)) cluster = ImpalaCluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = copy(vector.get_value('exec_option')) exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar check_call(["hadoop", "fs", "-rm", jar_path]) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client( client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)