Пример #1
0
  def test_hive_udfs_missing_jar(self, vector):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = "tmp/" + get_random_id(5) + ".jar"
    self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
    drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
    create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = vector.get_value('exec_option')
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    self.hdfs_client.delete_file_dir(jar_path)

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(client,
          "select default.pi_missing_jar()", vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)
Пример #2
0
    def test_hive_udfs_missing_jar(self, vector):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = "tmp/" + get_random_id(5) + ".jar"
        self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
        drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
        create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = vector.get_value('exec_option')
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        self.hdfs_client.delete_file_dir(jar_path)

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(client,
                                            "select default.pi_missing_jar()",
                                            vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
 def test_query_profile_encoded_unknown_query_id(self):
   """Test that /query_profile_encoded error message starts with the expected line in
   case of missing query and does not contain any leading whitespace.
   """
   cluster = ImpalaCluster()
   impalad = cluster.get_any_impalad()
   result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
   assert result.startswith("Could not obtain runtime profile: Query id")
Пример #4
0
 def test_query_profile_encoded_unknown_query_id(self):
   """Test that /query_profile_encoded error message starts with the expected line in
   case of missing query and does not contain any leading whitespace.
   """
   cluster = ImpalaCluster()
   impalad = cluster.get_any_impalad()
   result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
   assert result.startswith("Could not obtain runtime profile: Query id")
Пример #5
0
    def test_hive_udfs_missing_jar(self, vector, unique_database):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = get_fs_path(
            "/test-warehouse/{0}.db/".format(unique_database) +
            get_random_id(5) + ".jar")
        hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
        check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
        drop_fn_stmt = ("drop function if exists "
                        "`{0}`.`pi_missing_jar`()".format(unique_database))
        create_fn_stmt = (
            "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
            "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(
                unique_database, jar_path))

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = copy(vector.get_value('exec_option'))
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        check_call(["hadoop", "fs", "-rm", jar_path])

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(
                client,
                "select `{0}`.`pi_missing_jar`()".format(unique_database),
                vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Пример #6
0
  def test_hive_udfs_missing_jar(self, vector, unique_database):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database)
                           + get_random_id(5) + ".jar")
    hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
    check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
    drop_fn_stmt = (
        "drop function if exists "
        "`{0}`.`pi_missing_jar`()".format(unique_database))
    create_fn_stmt = (
        "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
        "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = copy(vector.get_value('exec_option'))
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    check_call(["hadoop", "fs", "-rm", jar_path])

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(
          client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)