Exemplo n.º 1
0
  def test_role_privilege_case(self, vector):
    """IMPALA-5582: Store sentry privileges in lower case. This
    test grants select privileges to roles assgined to tables/db
    specified in lower, upper and mix cases. This test verifies
    that these privileges do not vanish on a sentryProxy thread
    update.
    """
    db_name = "test_role_privilege_case_x_" + get_random_id(5)
    db_name_upper_case = "TEST_ROLE_PRIVILEGE_CASE_Y_" + get_random_id(5).upper()
    db_name_mixed_case = "TesT_Role_PRIVIlege_case_z" + get_random_id(5)
    role_name = "test_role_" + get_random_id(5)
    try:
      self.client.execute("create role {0}".format(role_name))
      self.client.execute("grant all on server to {0}".format(role_name))
      self.client.execute(
          "grant role {0} to group `{1}`".format(
           role_name, grp.getgrnam(getuser()).gr_name))

      self.client.execute("create database " + db_name)
      self.client.execute("create database " + db_name_upper_case)
      self.client.execute("create database " + db_name_mixed_case)
      self.client.execute(
          "create table if not exists {0}.test1(i int)".format(db_name))
      self.client.execute("create table if not exists {0}.TEST2(i int)".format(db_name))
      self.client.execute("create table if not exists {0}.Test3(i int)".format(db_name))

      self.client.execute(
          "grant select on table {0}.test1 to {1}".format(db_name, role_name))
      self.client.execute(
          "grant select on table {0}.TEST2 to {1}".format(db_name, role_name))
      self.client.execute(
          "grant select on table {0}.TesT3 to {1}".format(db_name, role_name))
      self.client.execute("grant all on database {0} to {1}".format(db_name, role_name))
      self.client.execute(
          "grant all on database {0} to {1}".format(db_name_upper_case, role_name))
      self.client.execute(
          "grant all on database {0} to {1}".format(db_name_mixed_case, role_name))
      result = self.client.execute("show grant role {0}".format(role_name))
      assert any('test1' in x for x in result.data)
      assert any('test2' in x for x in result.data)
      assert any('test3' in x for x in result.data)
      assert any(db_name_upper_case.lower() in x for x in result.data)
      assert any(db_name_mixed_case.lower() in x for x in result.data)
      # Sleep for 2 seconds and make sure that the privileges
      # on all 3 tables still persist on a sentryProxy thread
      # update. sentry_catalog_polling_frequency_s is set to 1
      # seconds.
      sleep(2)
      result = self.client.execute("show grant role {0}".format(role_name))
      assert any('test1' in x for x in result.data)
      assert any('test2' in x for x in result.data)
      assert any('test3' in x for x in result.data)
      assert any(db_name_upper_case.lower() in x for x in result.data)
      assert any(db_name_mixed_case.lower() in x for x in result.data)
    finally:
      self.client.execute("drop database if exists {0}".format(db_name_upper_case))
      self.client.execute("drop database if exists {0}".format(db_name_mixed_case))
      self.client.execute("drop database if exists {0} cascade".format(db_name))
      self.client.execute("drop role {0}".format(role_name))
  def test_role_privilege_case(self, vector):
    """IMPALA-5582: Store sentry privileges in lower case. This
    test grants select privileges to roles assgined to tables/db
    specified in lower, upper and mix cases. This test verifies
    that these privileges do not vanish on a sentryProxy thread
    update.
    """
    db_name = "test_role_privilege_case_x_" + get_random_id(5)
    db_name_upper_case = "TEST_ROLE_PRIVILEGE_CASE_Y_" + get_random_id(5).upper()
    db_name_mixed_case = "TesT_Role_PRIVIlege_case_z" + get_random_id(5)
    role_name = "test_role_" + get_random_id(5)
    try:
      self.client.execute("create role {0}".format(role_name))
      self.client.execute("grant all on server to {0}".format(role_name))
      self.client.execute("grant role {0} to group `{1}`".format(role_name,
          grp.getgrnam(getuser()).gr_name))

      self.client.execute("create database " + db_name)
      self.client.execute("create database " + db_name_upper_case)
      self.client.execute("create database " + db_name_mixed_case)
      self.client.execute(
          "create table if not exists {0}.test1(i int)".format(db_name))
      self.client.execute("create table if not exists {0}.TEST2(i int)".format(db_name))
      self.client.execute("create table if not exists {0}.Test3(i int)".format(db_name))

      self.client.execute(
          "grant select on table {0}.test1 to {1}".format(db_name, role_name))
      self.client.execute(
          "grant select on table {0}.TEST2 to {1}".format(db_name, role_name))
      self.client.execute(
          "grant select on table {0}.TesT3 to {1}".format(db_name, role_name))
      self.client.execute("grant all on database {0} to {1}".format(db_name, role_name))
      self.client.execute(
          "grant all on database {0} to {1}".format(db_name_upper_case, role_name))
      self.client.execute(
          "grant all on database {0} to {1}".format(db_name_mixed_case, role_name))
      result = self.client.execute("show grant role {0}".format(role_name))
      assert any('test1' in x for x in result.data)
      assert any('test2' in x for x in result.data)
      assert any('test3' in x for x in result.data)
      assert any(db_name_upper_case.lower() in x for x in result.data)
      assert any(db_name_mixed_case.lower() in x for x in result.data)
      # Sleep for 2 seconds and make sure that the privileges
      # on all 3 tables still persist on a sentryProxy thread
      # update. sentry_catalog_polling_frequency_s is set to 1
      # seconds.
      sleep(2)
      result = self.client.execute("show grant role {0}".format(role_name))
      assert any('test1' in x for x in result.data)
      assert any('test2' in x for x in result.data)
      assert any('test3' in x for x in result.data)
      assert any(db_name_upper_case.lower() in x for x in result.data)
      assert any(db_name_mixed_case.lower() in x for x in result.data)
    finally:
      self.client.execute("drop database if exists {0}".format(db_name_upper_case))
      self.client.execute("drop database if exists {0}".format(db_name_mixed_case))
      self.client.execute("drop database if exists {0} cascade".format(db_name))
      self.client.execute("drop role {0}".format(role_name))
Exemplo n.º 3
0
 def __enter__(self):
     self._staging_exec_result_table = 'ExecutionResultsStaging_' + get_random_id(
         5)
     self._staging_profile_table = 'RuntimeProfilesStaging_' + get_random_id(
         5)
     self._create_new_table_as('ExecutionResults',
                               self._staging_exec_result_table)
     self._create_new_table_as('RuntimeProfiles',
                               self._staging_profile_table)
     return self
 def test_role_update(self, vector):
   """IMPALA-5355: The initial update from the statestore has the privileges and roles in
   reverse order if a role was modified, but not the associated privilege. Verify that
   Impala is able to handle this.
   """
   role_name = "test_role_" + get_random_id(5)
   try:
     self.client.execute("create role {0}".format(role_name))
     self.client.execute("grant all on server to {0}".format(role_name))
     # Wait a few seconds to make sure the update propagates to the statestore.
     sleep(3)
     # Update the role, increasing its catalog verion.
     self.client.execute("grant role {0} to group `{1}`".format(
         role_name, grp.getgrnam(getuser()).gr_name))
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_before = self.client.execute("show grant role {0}".format(role_name))
     # Wait a few seconds before restarting Impalad to make sure that the Catalog gets
     # updated.
     sleep(3)
     self.restart_first_impalad()
     verifier = MetricVerifier(self.cluster.impalads[0].service)
     verifier.wait_for_metric("catalog.ready", True)
     # Verify that we still have the right privileges after the first impalad was
     # restarted.
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_after = self.client.execute("show grant role {0}".format(role_name))
     assert privileges_before.data == privileges_after.data
   finally:
     self.client.execute("drop role {0}".format(role_name))
Exemplo n.º 5
0
 def test_role_update(self, vector):
   """IMPALA-5355: The initial update from the statestore has the privileges and roles in
   reverse order if a role was modified, but not the associated privilege. Verify that
   Impala is able to handle this.
   """
   role_name = "test_role_" + get_random_id(5)
   try:
     self.client.execute("create role {0}".format(role_name))
     self.client.execute("grant all on server to {0}".format(role_name))
     # Wait a few seconds to make sure the update propagates to the statestore.
     sleep(3)
     # Update the role, increasing its catalog verion.
     self.client.execute("grant role {0} to group `{1}`".format(
         role_name, grp.getgrnam(getuser()).gr_name))
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_before = self.client.execute("show grant role {0}".format(role_name))
     # Wait a few seconds before restarting Impalad to make sure that the Catalog gets
     # updated.
     sleep(3)
     self.restart_first_impalad()
     verifier = MetricVerifier(self.cluster.impalads[0].service)
     verifier.wait_for_metric("catalog.ready", True)
     # Verify that we still have the right privileges after the first impalad was
     # restarted.
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_after = self.client.execute("show grant role {0}".format(role_name))
     assert privileges_before.data == privileges_after.data
   finally:
     self.client.execute("drop role {0}".format(role_name))
Exemplo n.º 6
0
  def test_hive_udfs_missing_jar(self, vector):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = "tmp/" + get_random_id(5) + ".jar"
    self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
    drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
    create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = vector.get_value('exec_option')
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    self.hdfs_client.delete_file_dir(jar_path)

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(client,
          "select default.pi_missing_jar()", vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)
Exemplo n.º 7
0
    def test_hive_udfs_missing_jar(self, vector):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = "tmp/" + get_random_id(5) + ".jar"
        self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
        drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
        create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = vector.get_value('exec_option')
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        self.hdfs_client.delete_file_dir(jar_path)

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(client,
                                            "select default.pi_missing_jar()",
                                            vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Exemplo n.º 8
0
  def _test_ownership(self):
    """Tests ownership privileges for databases and tables with ranger along with
    some known quirks in the implementation."""
    test_user = getuser()
    test_db = "test_ranger_ownership_" + get_random_id(5).lower()
    # Create a test database as "admin" user. Owner is set accordingly.
    self._run_query_as_user("create database {0}".format(test_db), ADMIN, True)
    try:
      # Try to create a table under test_db as current user. It should fail.
      self._run_query_as_user(
          "create table {0}.foo(a int)".format(test_db), test_user, False)

      # Change the owner of the database to the current user.
      self._run_query_as_user(
          "alter database {0} set owner user {1}".format(test_db, test_user), ADMIN, True)

      self._run_query_as_user("refresh authorization", ADMIN, True)

      # Create should succeed now.
      self._run_query_as_user(
          "create table {0}.foo(a int)".format(test_db), test_user, True)
      # Run show tables on the db. The resulting list should be empty. This happens
      # because the created table's ownership information is not aggressively cached
      # by the current Catalog implementations. Hence the analysis pass does not
      # have access to the ownership information to verify if the current session
      # user is actually the owner. We need to fix this by caching the HMS metadata
      # more aggressively when the table loads. TODO(IMPALA-8937).
      result = \
          self._run_query_as_user("show tables in {0}".format(test_db), test_user, True)
      assert len(result.data) == 0
      # Run a simple query that warms up the table metadata and repeat SHOW TABLES.
      self._run_query_as_user("select * from {0}.foo".format(test_db), test_user, True)
      result = \
          self._run_query_as_user("show tables in {0}".format(test_db), test_user, True)
      assert len(result.data) == 1
      assert "foo" in result.data
      # Change the owner of the db back to the admin user
      self._run_query_as_user(
          "alter database {0} set owner user {1}".format(test_db, ADMIN), ADMIN, True)
      result = self._run_query_as_user(
          "show tables in {0}".format(test_db), test_user, False)
      err = "User '{0}' does not have privileges to access: {1}.*.*". \
          format(test_user, test_db)
      assert err in str(result)
      # test_user is still the owner of the table, so select should work fine.
      self._run_query_as_user("select * from {0}.foo".format(test_db), test_user, True)
      # Change the table owner back to admin.
      self._run_query_as_user(
          "alter table {0}.foo set owner user {1}".format(test_db, ADMIN), ADMIN, True)
      # test_user should not be authorized to run the queries anymore.
      result = self._run_query_as_user(
          "select * from {0}.foo".format(test_db), test_user, False)
      err = ("AuthorizationException: User '{0}' does not have privileges to execute" +
             " 'SELECT' on: {1}.foo").format(test_user, test_db)
      assert err in str(result)
    finally:
      self._run_query_as_user("drop database {0} cascade".format(test_db), ADMIN, True)
Exemplo n.º 9
0
    def test_hive_udfs_missing_jar(self, vector, unique_database):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = get_fs_path(
            "/test-warehouse/{0}.db/".format(unique_database) +
            get_random_id(5) + ".jar")
        hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
        self.filesystem_client.copy(hive_jar, jar_path)
        drop_fn_stmt = ("drop function if exists "
                        "`{0}`.`pi_missing_jar`()".format(unique_database))
        create_fn_stmt = (
            "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
            "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(
                unique_database, jar_path))

        cluster = ImpalaCluster.get_e2e_test_cluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = copy(vector.get_value('exec_option'))
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        check_call(["hadoop", "fs", "-rm", jar_path])

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(
                client,
                "select `{0}`.`pi_missing_jar`()".format(unique_database),
                vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Exemplo n.º 10
0
  def test_hive_udfs_missing_jar(self, vector, unique_database):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database)
                           + get_random_id(5) + ".jar")
    hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
    check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
    drop_fn_stmt = (
        "drop function if exists "
        "`{0}`.`pi_missing_jar`()".format(unique_database))
    create_fn_stmt = (
        "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
        "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = copy(vector.get_value('exec_option'))
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    check_call(["hadoop", "fs", "-rm", jar_path])

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(
          client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)
 def __enter__(self):
   self._staging_exec_result_table = 'ExecutionResultsStaging_' + get_random_id(5)
   self._staging_profile_table = 'RuntimeProfilesStaging_' + get_random_id(5)
   self._create_new_table_as('ExecutionResults', self._staging_exec_result_table)
   self._create_new_table_as('RuntimeProfiles', self._staging_profile_table)
   return self