Exemplo n.º 1
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token()
          token_file = tempfile.NamedTemporaryFile(dir=delegation_token_dir)
          token_file.write(token.delegationTokenBytes)
          token_file.flush()
          delegation_token_files.append(token_file)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Exemplo n.º 2
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Exemplo n.º 3
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Exemplo n.º 4
0
def test_non_default_cluster():
    NON_DEFAULT_NAME = 'non_default'
    cluster.clear_caches()
    reset = (
        conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
        conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
    )
    try:
        # This is indeed the only hdfs/mr cluster
        assert_equal(1, len(cluster.get_all_hdfs()))
        assert_equal(1, len(cluster.all_mrclusters()))
        assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))
        assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME))

        cli = make_logged_in_client()
        # That we can get to a view without errors means that the middlewares work
        cli.get('/about')
    finally:
        for old_conf in reset:
            old_conf()
Exemplo n.º 5
0
Arquivo: tests.py Projeto: abayer/hue
def test_non_default_cluster():
  NON_DEFAULT_NAME = 'non_default'
  cluster.clear_caches()
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
    conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
  )
  try:
    # This is indeed the only hdfs/mr cluster
    assert_equal(1, len(cluster.get_all_hdfs()))
    assert_equal(1, len(cluster.all_mrclusters()))
    assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))
    assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME))

    cli = make_logged_in_client()
    # That we can get to a view without errors means that the middlewares work
    cli.get('/about')
  finally:
    for old_conf in reset:
      old_conf()
Exemplo n.º 6
0
  def run_bin_hadoop_step(self, step):
    """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
    java_properties = {}
    java_properties["hue.suffix"] = "-via-hue"
    java_properties["user.name"] = self.plan.user
    java_prop_str = " ".join("-D%s=%s" % (k,v) for k, v in java_properties.iteritems())
    env = {      
      'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 
      'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
      'HADOOP_CLASSPATH': ':'.join([jobsub.conf.ASPECTPATH.get(),
                                    hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()]),
      'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"),
      'HUE_JOBSUB_USER': self.plan.user,
      'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups),
      'LANG': os.getenv('LANG', i18n.get_site_encoding()),
    }

    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()
    delegation_token_files = []
    merged_token_file = tempfile.NamedTemporaryFile()
    try:
      LOG.debug("all_clusters: %s" % (repr(all_clusters),))
      for cluster in all_clusters:
        if cluster.security_enabled:
          cluster.setuser(self.plan.user)
          token = cluster.get_delegation_token()
          token_file = tempfile.NamedTemporaryFile()
          token_file.write(token.delegationTokenBytes)
          token_file.flush()
          delegation_token_files.append(token_file)
  
      java_home = os.getenv('JAVA_HOME')
      if java_home:
        env["JAVA_HOME"] = java_home
      for k, v in env.iteritems():
        assert v is not None, "Environment key %s missing value." % k
  
      base_args = [ hadoop.conf.HADOOP_BIN.get() ]
      if hadoop.conf.HADOOP_CONF_DIR.get():
        base_args.append("--config")
        base_args.append(hadoop.conf.HADOOP_CONF_DIR.get())
  
      if delegation_token_files:
        args = list(base_args) # Make a copy of the base args.
        args += ['jar', hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file.name]
        args += [token_file.name for token_file in delegation_token_files]
        LOG.debug("merging credentials files with comand: '%s'" % (' '.join(args),))
        merge_pipe = subprocess.Popen(args, shell=False, close_fds=True)
        retcode = merge_pipe.wait()
        if 0 != retcode:
          raise Exception("bin/hadoop returned non-zero %d while trying to merge credentials" % (retcode,))
        env['HADOOP_TOKEN_FILE_LOCATION'] = merged_token_file.name
  
      args = list(base_args) # Make a copy of the base args.
      args += step.arguments
      LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
      LOG.info("Running: %s" % " ".join(args))
      self.pipe = subprocess.Popen(
        args,
        stdin=None,
        cwd=self.work_dir,
        stdout=self.stdout,
        stderr=self.stderr,
        shell=False,
        close_fds=True,
        env=env)
      retcode = self.pipe.wait()
      if 0 != retcode:
        raise Exception("bin/hadoop returned non-zero %d" % retcode)
      LOG.info("bin/hadoop returned %d" % retcode)
    finally:
      for token_file in delegation_token_files + [merged_token_file]:
        token_file.close()
Exemplo n.º 7
0
    def run_bin_hadoop_step(self, step):
        """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
        java_properties = {}
        java_properties["hue.suffix"] = "-via-hue"
        java_properties["user.name"] = self.plan.user
        java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems())
        env = {
            "HADOOP_HOME": hadoop.conf.HADOOP_HOME.get(),
            "HADOOP_OPTS": "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
            "HADOOP_CLASSPATH": ":".join(
                [jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_STATIC_GROUP_MAPPING_CLASSPATH.get()]
            ),
            "HUE_JOBTRACE_LOG": self.internal_file_name("jobs"),
            "HUE_JOBSUB_USER": self.plan.user,
            "HUE_JOBSUB_GROUPS": ",".join(self.plan.groups),
        }

        delegation_token_files = []
        all_clusters = []
        all_clusters += all_mrclusters().values()
        all_clusters += get_all_hdfs().values()
        LOG.info("all_clusters: %s" % (repr(all_clusters),))
        for cluster in all_clusters:
            if cluster.security_enabled:
                cluster.setuser(self.plan.user)
                token = cluster.get_delegation_token()
                token_file = tempfile.NamedTemporaryFile()
                token_file.write(token.delegationTokenBytes)
                token_file.flush()
                delegation_token_files.append(token_file)

        if delegation_token_files:
            env["HADOOP_TOKEN_FILE_LOCATION"] = ",".join([token_file.name for token_file in delegation_token_files])

        java_home = os.getenv("JAVA_HOME")
        if java_home:
            env["JAVA_HOME"] = java_home
        for k, v in env.iteritems():
            assert v is not None, "Environment key %s missing value." % k

        args = [hadoop.conf.HADOOP_BIN.get()]
        if hadoop.conf.HADOOP_CONF_DIR.get():
            args.append("--config")
            args.append(hadoop.conf.HADOOP_CONF_DIR.get())

        args += step.arguments
        LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
        LOG.info("Running: %s" % " ".join(args))
        self.pipe = subprocess.Popen(
            args,
            stdin=None,
            cwd=self.work_dir,
            stdout=self.stdout,
            stderr=self.stderr,
            shell=False,
            close_fds=True,
            env=env,
        )
        retcode = self.pipe.wait()
        if 0 != retcode:
            raise Exception("bin/hadoop returned non-zero %d" % retcode)
        LOG.info("bin/hadoop returned %d" % retcode)
        for token_file in delegation_token_files:
            token_file.close()
Exemplo n.º 8
0
    def run_bin_hadoop_step(self, step):
        """
    user.name is used by FileSystem.getHomeDirectory().
    The environment variables for _USER and _GROUPS are used
    by the aspectj aspect to overwrite Hadoop's notion of 
    users and groups.
    """
        java_properties = {}
        java_properties["hue.suffix"] = "-via-hue"
        java_properties["user.name"] = self.plan.user
        java_prop_str = " ".join("-D%s=%s" % (k, v)
                                 for k, v in java_properties.iteritems())
        env = {
            'HADOOP_HOME':
            hadoop.conf.HADOOP_HOME.get(),
            'HADOOP_OPTS':
            "-javaagent:%s %s" %
            (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str),
            'HADOOP_CLASSPATH':
            ':'.join([
                jobsub.conf.ASPECTPATH.get(),
                hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()
            ]),
            'HUE_JOBTRACE_LOG':
            self.internal_file_name("jobs"),
            'HUE_JOBSUB_USER':
            self.plan.user,
            'HUE_JOBSUB_GROUPS':
            ",".join(self.plan.groups),
        }

        delegation_token_files = []
        all_clusters = []
        all_clusters += all_mrclusters().values()
        all_clusters += get_all_hdfs().values()
        LOG.info("all_clusters: %s" % (repr(all_clusters), ))
        for cluster in all_clusters:
            if cluster.security_enabled:
                cluster.setuser(self.plan.user)
                token = cluster.get_delegation_token()
                token_file = tempfile.NamedTemporaryFile()
                token_file.write(token.delegationTokenBytes)
                token_file.flush()
                delegation_token_files.append(token_file)

        if delegation_token_files:
            env['HADOOP_TOKEN_FILE_LOCATION'] = ','.join(
                [token_file.name for token_file in delegation_token_files])

        java_home = os.getenv('JAVA_HOME')
        if java_home:
            env["JAVA_HOME"] = java_home
        for k, v in env.iteritems():
            assert v is not None, "Environment key %s missing value." % k

        args = [hadoop.conf.HADOOP_BIN.get()]
        if hadoop.conf.HADOOP_CONF_DIR.get():
            args.append("--config")
            args.append(hadoop.conf.HADOOP_CONF_DIR.get())

        args += step.arguments
        LOG.info("Starting %s.  (Env: %s)", repr(args), repr(env))
        LOG.info("Running: %s" % " ".join(args))
        self.pipe = subprocess.Popen(args,
                                     stdin=None,
                                     cwd=self.work_dir,
                                     stdout=self.stdout,
                                     stderr=self.stderr,
                                     shell=False,
                                     close_fds=True,
                                     env=env)
        retcode = self.pipe.wait()
        if 0 != retcode:
            raise Exception("bin/hadoop returned non-zero %d" % retcode)
        LOG.info("bin/hadoop returned %d" % retcode)
        for token_file in delegation_token_files:
            token_file.close()