def config_validator(): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs from liboozie.oozie_api import get_oozie res = [] status = 'down' try: status = str(get_oozie().get_oozie_status()) except: pass if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster, message=_('The deployment directory of Oozie workflows does not exist. ' 'Please run "Setup App" on the Oozie workflow page.'))) res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
def _get_delegation_tokens(self, username, delegation_token_dir): """ If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for the user we want to run the subprocess as. We have to do it here rather than in the subprocess because the subprocess does not have Kerberos credentials in that case. """ delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: current_user = cluster.user try: cluster.setuser(username) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile(dir=delegation_token_dir) token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) finally: cluster.setuser(current_user) return delegation_token_files
def _get_delegation_tokens(self, username, delegation_token_dir): """ If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for the user we want to run the subprocess as. We have to do it here rather than in the subprocess because the subprocess does not have Kerberos credentials in that case. """ delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: current_user = cluster.user try: cluster.setuser(username) token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get()) token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir) os.write(token_file_no, token) os.close(token_file_no) delegation_token_files.append(path) finally: cluster.setuser(current_user) return delegation_token_files
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs res = [] if OOZIE_URL.get(): status = get_oozie_status(user) if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs res = [] status = get_oozie_status() if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend( validate_path( ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_( 'Oozie Share Lib not installed in default location.'))) return res
def config_validator(): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs # from liboozie.oozie_api import get_oozie res = [] # status = 'down' # try: # status = str(get_oozie().get_oozie_status()) # except: # pass # if 'NORMAL' not in status: # res.append((status, _('The Oozie server is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster, message=_('The deployment directory of Oozie workflows does not exist. ' 'Run "Setup App" on the Oozie workflow page.'))) res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' old_caches = clear_sys_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf() restore_sys_caches(old_caches)
def _init_filesystems(): """Initialize the module-scoped filesystem dictionary.""" global _filesystems if _filesystems is not None: return _filesystems = {} if has_hadoop(): # Load HDFSes _filesystems.update(get_all_hdfs()) # Load local for identifier in conf.LOCAL_FILESYSTEMS.keys(): local_fs = LocalSubFileSystem( conf.LOCAL_FILESYSTEMS[identifier].PATH.get()) if identifier in _filesystems: raise Exception(("Filesystem '%s' configured twice. First is " + "%s, second is local FS %s") % (identifier, _filesystems[identifier], local_fs)) _filesystems[identifier] = local_fs
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' cluster.clear_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_equal(1, len(cluster.all_mrclusters())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf()
def config_validator(): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs res = [] class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend( validate_path( REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster, message=_( 'The deployment directory of Oozie workflows does not exist. ' 'Run "Setup Examples" on the Oozie workflow page.'))) res.extend( validate_path( ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_( 'Oozie Share Lib not installed in default location.'))) return res
def config_validator(): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs res = [] class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(REMOTE_DEPLOYMENT_DIR, is_dir=True, fs=cluster, message=_('The deployment directory of Oozie workflows does not exist. ' 'Run "Setup Examples" on the Oozie workflow page.'))) res.extend(validate_path(ConfigMock('/user/oozie/share/lib'), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] if OOZIE_URL.get(): status = get_oozie_status(user) if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) api = get_oozie(user) intrumentation = api.get_instrumentation() sharelib_url = [param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath'] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k,v) for k, v in java_properties.iteritems()) env = { 'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), 'HADOOP_CLASSPATH': ':'.join([jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()]), 'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"), 'HUE_JOBSUB_USER': self.plan.user, 'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups), 'LANG': os.getenv('LANG', i18n.get_site_encoding()), } all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() delegation_token_files = [] merged_token_file = tempfile.NamedTemporaryFile() try: LOG.debug("all_clusters: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) java_home = os.getenv('JAVA_HOME') if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k base_args = [ hadoop.conf.HADOOP_BIN.get() ] if hadoop.conf.HADOOP_CONF_DIR.get(): base_args.append("--config") base_args.append(hadoop.conf.HADOOP_CONF_DIR.get()) if delegation_token_files: args = list(base_args) # Make a copy of the base args. args += ['jar', hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file.name] args += [token_file.name for token_file in delegation_token_files] LOG.debug("merging credentials files with comand: '%s'" % (' '.join(args),)) merge_pipe = subprocess.Popen(args, shell=False, close_fds=True) retcode = merge_pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d while trying to merge credentials" % (retcode,)) env['HADOOP_TOKEN_FILE_LOCATION'] = merged_token_file.name args = list(base_args) # Make a copy of the base args. args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen( args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) finally: for token_file in delegation_token_files + [merged_token_file]: token_file.close()
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from desktop.lib.fsmanager import get_filesystem from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] try: from oozie.conf import REMOTE_SAMPLE_DIR except Exception as e: LOG.warn('Config check failed because Oozie app not installed: %s' % e) return res if OOZIE_URL.get(): status = get_oozie_status(user) if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) fs = get_filesystem() NICE_NAME = 'Oozie' if fs.do_as_superuser(fs.exists, REMOTE_SAMPLE_DIR.get()): stats = fs.do_as_superuser(fs.stats, REMOTE_SAMPLE_DIR.get()) mode = oct(stats.mode) # if neither group nor others have write permission group_has_write = int(mode[-2]) & 2 others_has_write = int(mode[-1]) & 2 if not group_has_write and not others_has_write: res.append( (NICE_NAME, "The permissions of workspace '%s' are too restrictive" % REMOTE_SAMPLE_DIR.get())) api = get_oozie(user, api_version="v2") configuration = api.get_configuration() if 'org.apache.oozie.service.MetricsInstrumentationService' in [ c.strip() for c in configuration.get('oozie.services.ext', '').split(',') ]: metrics = api.get_metrics() sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[ 'gauges'] and [ metrics['gauges']['libs.sharelib.system.libpath']['value'] ] or [] else: intrumentation = api.get_instrumentation() sharelib_url = [ param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath' ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock(object): def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in list(get_all_hdfs().values()): res.extend( validate_path( ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_( 'Oozie Share Lib not installed in default location.'))) return res
def _init_filesystems(): """Initialize the module-scoped filesystem dictionary.""" global _filesystems if _filesystems is not None: return _filesystems = get_all_hdfs()
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] if OOZIE_URL.get(): status = get_oozie_status(user) if "NORMAL" not in status: res.append((status, _("The Oozie server is not available"))) api = get_oozie(user, api_version="v2") configuration = api.get_configuration() if "org.apache.oozie.service.MetricsInstrumentationService" in [ c.strip() for c in configuration.get("oozie.services.ext", "").split(",") ]: metrics = api.get_metrics() sharelib_url = ( "gauges" in metrics and "libs.sharelib.system.libpath" in metrics["gauges"] and [metrics["gauges"]["libs.sharelib.system.libpath"]["value"]] or [] ) else: intrumentation = api.get_instrumentation() sharelib_url = [ param["value"] for group in intrumentation["variables"] for param in group["data"] if param["name"] == "sharelib.system.libpath" ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _("Oozie Share Lib path is not available"))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend( validate_path( ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_("Oozie Share Lib not installed in default location."), ) ) return res
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems()) env = { "HADOOP_HOME": hadoop.conf.HADOOP_HOME.get(), "HADOOP_OPTS": "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), "HADOOP_CLASSPATH": ":".join( [jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_STATIC_GROUP_MAPPING_CLASSPATH.get()] ), "HUE_JOBTRACE_LOG": self.internal_file_name("jobs"), "HUE_JOBSUB_USER": self.plan.user, "HUE_JOBSUB_GROUPS": ",".join(self.plan.groups), } delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.info("all_clusters: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) if delegation_token_files: env["HADOOP_TOKEN_FILE_LOCATION"] = ",".join([token_file.name for token_file in delegation_token_files]) java_home = os.getenv("JAVA_HOME") if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k args = [hadoop.conf.HADOOP_BIN.get()] if hadoop.conf.HADOOP_CONF_DIR.get(): args.append("--config") args.append(hadoop.conf.HADOOP_CONF_DIR.get()) args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen( args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env, ) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) for token_file in delegation_token_files: token_file.close()
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems()) env = { 'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), 'HADOOP_CLASSPATH': ':'.join([ jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get() ]), 'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"), 'HUE_JOBSUB_USER': self.plan.user, 'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups), } delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.info("all_clusters: %s" % (repr(all_clusters), )) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) if delegation_token_files: env['HADOOP_TOKEN_FILE_LOCATION'] = ','.join( [token_file.name for token_file in delegation_token_files]) java_home = os.getenv('JAVA_HOME') if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k args = [hadoop.conf.HADOOP_BIN.get()] if hadoop.conf.HADOOP_CONF_DIR.get(): args.append("--config") args.append(hadoop.conf.HADOOP_CONF_DIR.get()) args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen(args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) for token_file in delegation_token_files: token_file.close()
for param in group['data'] if param['name'] == 'sharelib.system.libpath' ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend( validate_path( ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_( 'Oozie Share Lib not installed in default location.'))) return res