def _get_script_for_user_creation(cluster, instance, user): data = files.get_file_text('plugins/resources/create-principal-keytab') cron_file = files.get_file_text('plugins/resources/cron-file') cron_script = files.get_file_text('plugins/resources/cron-script') data = data % { 'user': user, 'admin_principal': get_admin_principal(cluster), 'admin_password': get_server_password(cluster), 'principal': "%s/sahara-%s@%s" % (user, instance.fqdn(), get_realm_name(cluster)), 'keytab': '%s-sahara-%s.keytab' % (user, instance.fqdn()) } cron_script_location = '/tmp/sahara-kerberos/%s.sh' % _get_short_uuid() cron_file = cron_file % {'refresher': cron_script_location, 'user': user} cron_script = cron_script % { 'principal': "%s/sahara-%s@%s" % (user, instance.fqdn(), get_realm_name(cluster)), 'keytab': '%s-sahara-%s.keytab' % (user, instance.fqdn()), 'user': user, } return data, cron_file, cron_script, cron_script_location
def _post_configuration(pctx, instance): dirs = _get_hadoop_dirs(instance) args = { "hadoop_user": HADOOP_USER, "hadoop_group": HADOOP_GROUP, "hadoop_conf_dir": HADOOP_CONF_DIR, "oozie_conf_dir": OOZIE_CONF_DIR, "hadoop_name_dirs": " ".join(dirs["hadoop_name_dirs"]), "hadoop_data_dirs": " ".join(dirs["hadoop_data_dirs"]), "hadoop_log_dir": dirs["hadoop_log_dir"], "hadoop_secure_dn_log_dir": dirs["hadoop_secure_dn_log_dir"], "yarn_log_dir": dirs["yarn_log_dir"], } post_conf_script = f.get_file_text("plugins/vanilla/hadoop2/resources/post_conf.template") post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to("/tmp/post_conf.sh", post_conf_script) r.execute_command("chmod +x /tmp/post_conf.sh") r.execute_command("sudo /tmp/post_conf.sh") if c_helper.is_data_locality_enabled(pctx, instance.cluster): t_script = HADOOP_CONF_DIR + "/topology.sh" r.write_file_to( t_script, f.get_file_text("plugins/vanilla/hadoop2/resources/topology.sh"), run_as_root=True ) r.execute_command("chmod +x " + t_script, run_as_root=True)
def _post_configuration(instance): node_group = instance.node_group dirs = _get_hadoop_dirs(node_group) args = { 'hadoop_user': HADOOP_USER, 'hadoop_group': HADOOP_GROUP, 'hadoop_conf_dir': HADOOP_CONF_DIR, 'oozie_conf_dir': OOZIE_CONF_DIR, 'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']), 'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']), 'hadoop_log_dir': dirs['hadoop_log_dir'], 'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'], 'yarn_log_dir': dirs['yarn_log_dir'] } post_conf_script = f.get_file_text( 'plugins/vanilla/v2_3_0/resources/post_conf.template') post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to('/tmp/post_conf.sh', post_conf_script) r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') if c_helper.is_data_locality_enabled(instance.node_group.cluster): t_script = HADOOP_CONF_DIR + '/topology.sh' r.write_file_to( t_script, f.get_file_text( 'plugins/vanilla/v2_3_0/resources/topology.sh'), run_as_root=True) r.execute_command('chmod +x ' + t_script, run_as_root=True)
def get_config_files(self, cluster_context, configs, instance=None): template = 'plugins/mapr/services/hue/resources/hue_%s.template' # hue.ini hue_ini = bcf.TemplateFile("hue.ini") hue_ini.remote_path = self.conf_dir(cluster_context) hue_ini.parse(files.get_file_text(template % self.version)) hue_ini.add_properties(self._get_hue_ini_props(cluster_context)) hue_ini.add_property("thrift_version", configs[self.THRIFT_VERSION.name]) # # hue.sh hue_sh_template = 'plugins/mapr/services/hue/' \ 'resources/hue_sh_%s.template' hue_sh = bcf.TemplateFile("hue.sh") hue_sh.remote_path = self.home_dir(cluster_context) + '/bin' hue_sh.parse(files.get_file_text(hue_sh_template % self.version)) hue_sh.add_property('hadoop_version', cluster_context.hadoop_version) hue_sh.mode = 777 hue_instances = cluster_context.get_instances(HUE) for instance in hue_instances: if instance not in cluster_context.changed_instances(): cluster_context.should_be_restarted[self] += [instance] return [hue_ini, hue_sh]
def get_config_files(self, cluster_context, configs, instance=None): hive_default = 'plugins/mapr/services/hive/resources/hive-default.xml' hive_site = bcf.HadoopXML("hive-site.xml") hive_site.remote_path = self.conf_dir(cluster_context) if instance: hive_site.fetch(instance) hive_site.parse(files.get_file_text(hive_default, 'sahara_plugin_mapr')) hive_site.add_properties(self._get_hive_site_props(cluster_context)) sentry_host = cluster_context.get_instance(sentry.SENTRY) if sentry_host: sentry_mode = cluster_context._get_cluster_config_value( sentry.Sentry().SENTRY_STORAGE_MODE) ui_name = sentry.Sentry().ui_name sentry_version = cluster_context.get_chosen_service_version( ui_name) sentry_service = cluster_context. \ _find_service_instance(ui_name, sentry_version) if sentry_service.supports(self, sentry_mode): sentry_default = 'plugins/mapr/services/hive/resources/' \ 'sentry-default.xml' sentry_db = \ 'plugins/mapr/services/hive/resources/sentry-db.xml' hive_site.parse( files.get_file_text(sentry_default, 'sahara_plugin_mapr')) hive_site.add_property( 'hive.sentry.conf.url', 'file://%s/sentry-site.xml' % sentry_service.conf_dir(cluster_context)) if sentry_mode == sentry.DB_STORAGE_SENTRY_MODE: hive_site.parse( files.get_file_text(sentry_db, 'sahara_plugin_mapr')) return [hive_site]
def _post_configuration(instance): node_group = instance.node_group dirs = _get_hadoop_dirs(node_group) args = { 'hadoop_user': HADOOP_USER, 'hadoop_group': HADOOP_GROUP, 'hadoop_conf_dir': HADOOP_CONF_DIR, 'oozie_conf_dir': OOZIE_CONF_DIR, 'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']), 'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']), 'hadoop_log_dir': dirs['hadoop_log_dir'], 'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'], 'yarn_log_dir': dirs['yarn_log_dir'] } post_conf_script = f.get_file_text( 'plugins/vanilla/v2_3_0/resources/post_conf.template') post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to('/tmp/post_conf.sh', post_conf_script) r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') if c_helper.is_data_locality_enabled(instance.node_group.cluster): t_script = HADOOP_CONF_DIR + '/topology.sh' r.write_file_to(t_script, f.get_file_text( 'plugins/vanilla/v2_3_0/resources/topology.sh'), run_as_root=True) r.execute_command('chmod +x ' + t_script, run_as_root=True)
def _edp_pig_test(self): pig_job = f.get_file_text(RESOURCES_PATH + 'edp-job.pig') pig_lib = f.get_file_text(RESOURCES_PATH + 'edp-lib.jar') self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, job_data_list=[{'pig': pig_job}], lib_data_list=[{'jar': pig_lib}], swift_binaries=True, hdfs_local_output=True)
def _get_kdc_config(cluster, os): if os == "ubuntu": data = files.get_file_text('plugins/resources/kdc_conf') else: data = files.get_file_text('plugins/resources/kdc_conf_redhat') return data % { 'realm_name': get_realm_name(cluster) }
def _edp_test(self): path = 'tests/integration/tests/resources/' # check pig pig_job = f.get_file_text(path + 'edp-job.pig') pig_lib = f.get_file_text(path + 'edp-lib.jar') self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, job_data_list=[{'pig': pig_job}], lib_data_list=[{'jar': pig_lib}], swift_binaries=True, hdfs_local_output=True) # check mapreduce mapreduce_jar = f.get_file_text(path + 'edp-mapreduce.jar') mapreduce_configs = { 'configs': { 'mapred.mapper.class': 'org.apache.oozie.example.SampleMapper', 'mapred.reducer.class': 'org.apache.oozie.example.SampleReducer' } } self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE, job_data_list=[], lib_data_list=[{'jar': mapreduce_jar}], configs=mapreduce_configs, swift_binaries=True, hdfs_local_output=True) # check mapreduce streaming mapreduce_streaming_configs = { 'configs': { 'edp.streaming.mapper': '/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc' } } self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, job_data_list=[], lib_data_list=[], configs=mapreduce_streaming_configs) # check java java_jar = f.get_file_text( path + 'hadoop-mapreduce-examples-2.3.0.jar') java_configs = { 'configs': { 'edp.java.main_class': 'org.apache.hadoop.examples.QuasiMonteCarlo' }, 'args': ['10', '10'] } self.edp_testing(utils_edp.JOB_TYPE_JAVA, job_data_list=[], lib_data_list=[{'jar': java_jar}], configs=java_configs)
def _update_jackson_libs(self, context, instances): hadoop_lib = context.hadoop_lib core_asl = f.get_file_text(JACKSON_CORE_ASL) mapper_asl = f.get_file_text(JACKSON_MAPPER_ASL) core_asl_path = '%s/%s' % (hadoop_lib, 'jackson-core-asl-1.9.13.jar') mapper_path = '%s/%s' % (hadoop_lib, 'jackson-mapper-asl-1.9.13.jar') libs = {core_asl_path: core_asl, mapper_path: mapper_asl} for instance in instances: with instance.remote() as r: r.execute_command('rm %s/jackson-*.jar' % hadoop_lib, run_as_root=True) r.write_files_to(libs, run_as_root=True)
def _update_jackson_libs(self, context, instances): hadoop_lib = context.hadoop_lib core_asl = f.get_file_text(JACKSON_CORE_ASL) mapper_asl = f.get_file_text(JACKSON_MAPPER_ASL) core_asl_path = '%s/%s' % (hadoop_lib, 'jackson-core-asl-1.9.13.jar') mapper_path = '%s/%s' % (hadoop_lib, 'jackson-mapper-asl-1.9.13.jar') libs = { core_asl_path: core_asl, mapper_path: mapper_asl } for instance in instances: with instance.remote() as r: r.execute_command('rm %s/jackson-*.jar' % hadoop_lib, run_as_root=True) r.write_files_to(libs, run_as_root=True)
def _install_swift_jar(self, context, instances): LOG.debug('Installing Swift jar') jar = f.get_file_text(Swift.HADOOP_SWIFT_JAR) path = '%s/swift.jar' % context.hadoop_lib for instance in instances: with instance.remote() as r: r.write_file_to(path, jar, run_as_root=True)
def start_hiveserver_process(pctx, instance, backup=None): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if backup is None: _hive_create_warehouse_dir(r) _hive_copy_shared_conf(r, edp.get_hive_shared_conf_path('hadoop'), backup) if c_helper.is_mysql_enabled(pctx, instance.cluster): oozie = vu.get_oozie(instance.node_group.cluster) if not oozie or instance.hostname() != oozie.hostname(): _start_mysql(r) if backup is None: sql_script = files.get_file_text( 'plugins/sandbox/hadoop2/resources/create_hive_db.sql') sql_script = sql_script.replace( '{{password}}', u.get_hive_password(instance.cluster)) r.write_file_to('/tmp/create_hive_db.sql', sql_script) _hive_create_db(r) _hive_metastore_start(r) _hive_hiveserver2_start(r) LOG.info( _LI("Hive Metastore server at {host} has been " "started").format(host=instance.hostname()))
def start_oozie_process(pctx, instance, backup=None): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) if backup is None: LOG.debug("Creating Oozie DB Schema") sql_script = files.get_file_text( 'plugins/sandbox/hadoop2/resources/create_oozie_db.sql' ) password = oozie_helper.get_oozie_mysql_configs( instance.cluster )['oozie.service.JPAService.jdbc.password'] sql_script = sql_script.replace("password", password) script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location}) if backup is None: _oozie_share_lib(r) _start_oozie(r)
def _get_krb5_config(cluster, server_fqdn): data = files.get_file_text('plugins/resources/krb5_config') return data % { 'realm_name': get_realm_name(cluster), 'server': server_fqdn, 'node_domain': CONF.node_domain, }
def get_config_files(self, cluster_context, configs, instance=None): hbase_version = self._get_hbase_version(cluster_context) hive_version = self._get_hive_version(cluster_context) # spark-env-sh template = 'plugins/mapr/services/' \ 'spark/resources/spark-env.template' env_sh = bcf.TemplateFile('spark-env.sh') env_sh.remote_path = self.conf_dir(cluster_context) env_sh.parse(files.get_file_text(template)) env_sh.add_property('version', self.version) # spark-defaults conf = bcf.PropertiesFile('spark-defaults.conf', separator=' ') conf.remote_path = self.conf_dir(cluster_context) if instance: conf.fetch(instance) # compatibility.version versions = bcf.PropertiesFile('compatibility.version') versions.remote_path = self.home_dir(cluster_context) + '/mapr-util' if instance: versions.fetch(instance) if hive_version: versions.add_property('hive_versions', hive_version + '.0') conf.add_properties(self._hive_properties(cluster_context)) if hbase_version: versions.add_property('hbase_versions', hbase_version) conf.add_property( 'spark.executor.extraClassPath', '%s/lib/*' % self._hbase(cluster_context).home_dir(cluster_context)) return [conf, versions, env_sh]
def get_builtin_binaries(job, configs): if job.type == JOB_TYPE_JAVA: if is_adapt_for_oozie_enabled(configs): path = 'service/edp/resources/edp-main-wrapper.jar' name = 'builtin-%s.jar' % six.text_type(uuid.uuid4()) return [{'raw': files.get_file_text(path), 'name': name}] return []
def test_start_hiveserver_process( self, add_provisioning_step, check_cluster_exists, set_current_instance_id, get_oozie, _hive_create_warehouse_dir, _hive_copy_shared_conf, _start_mysql, _hive_create_db, _hive_metastore_start, is_mysql_enabled, get_hive_password): pctx = mock.Mock() path = edp.get_hive_shared_conf_path('hadoop') is_mysql_enabled.return_value = True cluster = self.instance.cluster ng_cluster = self.instance.node_group.cluster get_oozie.return_value = None sql_script = files.get_file_text( 'plugins/vanilla/hadoop2/resources/create_hive_db.sql') get_hive_password.return_value = '123' pwd_script = sql_script.replace('{{password}}', '123') rs.start_hiveserver_process(pctx, self.instance) set_current_instance_id.assert_called_once_with( self.instance.instance_id) _hive_create_warehouse_dir.assert_called_once_with(self.r) _hive_copy_shared_conf.assert_called_once_with(self.r, path) is_mysql_enabled.assert_called_once_with(pctx, cluster) get_oozie.assert_called_once_with(ng_cluster) _start_mysql.assert_called_once_with(self.r) get_hive_password.assert_called_once_with(cluster) self.r.write_file_to.assert_called_once_with('/tmp/create_hive_db.sql', pwd_script) _hive_create_db.assert_called_once_with(self.r) _hive_metastore_start.assert_called_once_with(self.r)
def _upload_job_files(self, where, job_dir, job, job_configs): def upload(r, dir, job_file, proxy_configs): dst = os.path.join(dir, job_file.name) raw_data = dispatch.get_raw_binary(job_file, proxy_configs) r.write_file_to(dst, raw_data) return dst def upload_builtin(r, dir, builtin): dst = os.path.join(dir, builtin['name']) r.write_file_to(dst, builtin['raw']) return dst builtin_libs = [] if edp.is_adapt_spark_for_swift_enabled(job_configs.get('configs', {})): path = 'service/edp/resources/edp-spark-wrapper.jar' name = 'builtin-%s.jar' % six.text_type(uuid.uuid4()) builtin_libs = [{'raw': files.get_file_text(path), 'name': name}] uploaded_paths = [] builtin_paths = [] with remote.get_remote(where) as r: mains = list(job.mains) if job.mains else [] libs = list(job.libs) if job.libs else [] for job_file in mains + libs: uploaded_paths.append( upload(r, job_dir, job_file, job_configs.get('proxy_configs'))) for builtin in builtin_libs: builtin_paths.append(upload_builtin(r, job_dir, builtin)) return uploaded_paths, builtin_paths
def test_load_template_with_anti_affinity_single_ng(self): """This test checks Heat cluster template with Neutron enabled and anti-affinity feature enabled for single node process in single node group. """ ng1 = tu.make_ng_dict('master', 42, ['namenode'], 1, floating_ip_pool='floating', image_id=None, volumes_per_node=0, volumes_size=0, id=1) ng2 = tu.make_ng_dict('worker', 42, ['datanode'], 2, floating_ip_pool='floating', image_id=None, volumes_per_node=0, volumes_size=0, id=2) cluster = tu.create_cluster("cluster", "tenant1", "general", "1.2.1", [ng1, ng2], user_keypair_id='user_key', neutron_management_network='private_net', default_image_id='1', anti_affinity=['datanode'], image_id=None) aa_heat_template = h.ClusterTemplate(cluster) aa_heat_template.add_node_group_extra(ng1['id'], 1, get_ud_generator('line1\nline2')) aa_heat_template.add_node_group_extra(ng2['id'], 2, get_ud_generator('line2\nline3')) self.override_config("use_neutron", True) main_template = h._load_template( 'main.heat', {'resources': aa_heat_template._serialize_resources()}) self.assertEqual( json.loads(main_template), json.loads(f.get_file_text( "tests/unit/resources/" "test_serialize_resources_aa.heat")))
def test_load_template_use_neutron(self): """This test checks Heat cluster template with Neutron enabled. Two NodeGroups used: 'master' with Ephemeral drive attached and 'worker' with 2 attached volumes 10GB size each """ ng1 = tu.make_ng_dict('master', 42, ['namenode'], 1, floating_ip_pool='floating', image_id=None, volumes_per_node=0, volumes_size=0, id=1) ng2 = tu.make_ng_dict('worker', 42, ['datanode'], 1, floating_ip_pool='floating', image_id=None, volumes_per_node=2, volumes_size=10, id=2) cluster = tu.create_cluster("cluster", "tenant1", "general", "1.2.1", [ng1, ng2], user_keypair_id='user_key', neutron_management_network='private_net', default_image_id='1', anti_affinity=[], image_id=None) heat_template = h.ClusterTemplate(cluster) heat_template.add_node_group_extra(ng1['id'], 1, get_ud_generator('line1\nline2')) heat_template.add_node_group_extra(ng2['id'], 1, get_ud_generator('line2\nline3')) self.override_config("use_neutron", True) main_template = h._load_template( 'main.heat', {'resources': heat_template._serialize_resources()}) self.assertEqual( json.loads(main_template), json.loads(f.get_file_text( "tests/unit/resources/" "test_serialize_resources_use_neutron.heat")))
def test_configure_sentry(self, keymanager, cluster_get, cluster_update, uuid4, cfg_log): cluster = get_concrete_cluster() manager = cluster.node_groups[0].instances[0] cluster_get.return_value = cluster db_password = '******' uuid4.return_value = db_password create_db_script = files.get_file_text( 'plugins/cdh/db_resources/create_sentry_db.sql'.format( version=self.version)) create_db_script = create_db_script % db_password self.plug_utils.configure_sentry(cluster) with manager.remote() as r: cmd_exe_sql = ('PGPASSWORD=$(sudo head -1' ' /var/lib/cloudera-scm-server-db/data/' 'generated_password.txt) psql' ' -U cloudera-scm -h localhost -p 7432 -d scm -f' ' script_to_exec.sql') cmd_clean = 'rm script_to_exec.sql' self.assertEqual(create_db_script, r.write_file_to.call_args[0][1]) r.execute_command.assert_has_calls( [mock.call(cmd_exe_sql), mock.call(cmd_clean)])
def get_config_files(self, cluster_context, configs, instance=None): defaults = 'plugins/mapr/services/impala/resources/impala-env.sh.j2' impala_env = bcf.TemplateFile("env.sh") impala_env.remote_path = self.conf_dir(cluster_context) if instance: impala_env.fetch(instance) impala_env.parse(files.get_file_text(defaults)) impala_env.add_properties(self._get_impala_env_props(cluster_context)) sentry_host = cluster_context.get_instance(sentry.SENTRY) if sentry_host: sentry_mode = cluster_context._get_cluster_config_value( sentry.Sentry().SENTRY_STORAGE_MODE) ui_name = sentry.Sentry().ui_name sentry_version = cluster_context.get_chosen_service_version( ui_name) sentry_service = cluster_context. \ _find_service_instance(ui_name, sentry_version) if sentry_service.supports(self, sentry_mode): impala_env.add_properties({ 'sentry_home': sentry_service.home_dir(cluster_context), 'sentry_db': sentry_mode == sentry.DB_STORAGE_SENTRY_MODE, 'sentry_policy_file': 'maprfs://' + sentry_service.GLOBAL_POLICY_FILE, }) return [impala_env]
def _edp_test(self): path = 'tests/integration/tests/resources/' # check pig pig_job = f.get_file_text(path + 'edp-job.pig') pig_lib = f.get_file_text(path + 'edp-lib.jar') self.edp_testing('Pig', [{'pig': pig_job}], [{'jar': pig_lib}]) # check mapreduce mapreduce_jar = f.get_file_text(path + 'edp-mapreduce.jar') mapreduce_configs = { 'configs': { 'mapred.mapper.class': 'org.apache.oozie.example.SampleMapper', 'mapred.reducer.class': 'org.apache.oozie.example.SampleReducer' } } self.edp_testing('MapReduce', [], [{ 'jar': mapreduce_jar }], mapreduce_configs) # check mapreduce streaming mapreduce_streaming_configs = { 'configs': { 'edp.streaming.mapper': '/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc' } } self.edp_testing('MapReduce.Streaming', [], [], mapreduce_streaming_configs) # check java java_jar = f.get_file_text(path + 'hadoop-mapreduce-examples-2.3.0.jar') java_configs = { 'configs': { 'edp.java.main_class': 'org.apache.hadoop.examples.QuasiMonteCarlo' }, 'args': ['10', '10'] } self.edp_testing('Java', [], lib_data_list=[{ 'jar': java_jar }], configs=java_configs)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { '/etc/hadoop/conf/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/conf/hdfs-site.xml': ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.node_group.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % { "nn_path": nn_path, "dn_path": dn_path }) with remote.get_remote(instance) as r: r.execute_command('sudo chown -R $USER:$USER /etc/hadoop') r.execute_command('sudo chown -R $USER:$USER %s' % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh') r.execute_command('sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text('plugins/spark/resources/topology.sh')) r.execute_command('sudo chmod +x /etc/hadoop/topology.sh') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def load_properties_file(path): predicate = fu.and_predicate(lambda i: len(i) != 0, lambda i: not i.isspace(), lambda i: not i.startswith('#')) mapper = fu.chain_function(lambda i: tuple(i.split('=')), lambda i: (i[0].strip(), i[1].strip())) lines = f.get_file_text(path).splitlines() return dict(map(mapper, filter(predicate, lines)))
def _create_script_obj(filename, template, **kwargs): script = cf.TemplateFile(filename) script.remote_path = '/tmp/' script.parse(f.get_file_text( 'plugins/mapr/services/mysql/resources/%s' % template)) for k, v in six.iteritems(kwargs): script.add_property(k, v) return script
def _push_hive_configs(self, cluster, ng_extra, hive_mysql_passwd, r): files = {'/opt/hive/conf/hive-site.xml': ng_extra['xml']['hive-site']} if c_helper.is_mysql_enable(cluster): sql_script = f.get_file_text( 'plugins/vanilla/v1_2_1/resources/create_hive_db.sql') sql_script = sql_script.replace('pass', hive_mysql_passwd) files.update({'/tmp/create_hive_db.sql': sql_script}) r.write_files_to(files)
def configure_topology(self, topology_str, r): r.write_file_to( '/etc/hadoop/conf/topology.sh', f.get_file_text( 'plugins/hdp/versions/version_1_3_2/resources/topology.sh')) r.execute_command('chmod +x /etc/hadoop/conf/topology.sh', run_as_root=True) r.write_file_to('/etc/hadoop/conf/topology.data', topology_str)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { "/etc/hadoop/conf/core-site.xml": ng_extra["xml"]["core-site"], "/etc/hadoop/conf/hdfs-site.xml": ng_extra["xml"]["hdfs-site"], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, "conf/spark-env.sh"): ng_extra["sp_master"], os.path.join(sp_home, "conf/slaves"): ng_extra["sp_slaves"], } files_init = { "/tmp/sahara-hadoop-init.sh": ng_extra["setup_script"], "id_rsa": cluster.management_private_key, "authorized_keys": cluster.management_public_key, } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ( "sudo cp $HOME/id_rsa $HOME/.ssh/; " "sudo chown $USER $HOME/.ssh/id_rsa; " "sudo chmod 600 $HOME/.ssh/id_rsa" ) for ng in cluster.node_groups: dn_path = c_helper.extract_hadoop_path(ng.storage_paths(), "/dfs/dn") nn_path = c_helper.extract_hadoop_path(ng.storage_paths(), "/dfs/nn") hdfs_dir_cmd = ("sudo mkdir -p %s %s;" "sudo chown -R hdfs:hadoop %s %s;" "sudo chmod 755 %s %s;") % ( nn_path, dn_path, nn_path, dn_path, nn_path, dn_path, ) with remote.get_remote(instance) as r: r.execute_command("sudo chown -R $USER:$USER /etc/hadoop") r.execute_command("sudo chown -R $USER:$USER %s" % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command("sudo chmod 0500 /tmp/sahara-hadoop-init.sh") r.execute_command("sudo /tmp/sahara-hadoop-init.sh " ">> /tmp/sahara-hadoop-init.log 2>&1") r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to("/etc/hadoop/topology.sh", f.get_file_text("plugins/spark/resources/topology.sh")) r.execute_command("sudo chmod +x /etc/hadoop/topology.sh") self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def get_config_files(self, cluster_context, configs, instance=None): hive_default = 'plugins/mapr/services/hive/resources/hive-default.xml' hive_site = bcf.HadoopXML("hive-site.xml") hive_site.remote_path = self.conf_dir(cluster_context) if instance: hive_site.fetch(instance) hive_site.parse(files.get_file_text(hive_default)) hive_site.add_properties(self._get_hive_site_props(cluster_context)) return [hive_site]
def run_script(instance, script, run_as=None, *args, **kwargs): with instance.remote() as r: path = '/tmp/%s.sh' % uuid.uuid4() script = files.get_file_text(script) % kwargs r.write_file_to(path, script, run_as_root=(run_as == 'root')) r.execute_command(_run_as(run_as, 'chmod +x %s' % path)) r.execute_command(_run_as(run_as, '%s %s' % (path, ' '.join(args)))) # FIXME(aosadchyi): reuse existing remote remove(instance, path, run_as=run_as)
def _push_oozie_configs(self, cluster, ng_extra, r): r.write_file_to('/opt/oozie/conf/oozie-site.xml', ng_extra['xml']['oozie-site']) if c_helper.is_mysql_enable(cluster): sql_script = f.get_file_text( 'plugins/vanilla/v1_2_1/resources/create_oozie_db.sql') files = {'/tmp/create_oozie_db.sql': sql_script} r.write_files_to(files)
def configure_topology(self, topology_str, r): r.write_file_to( '/etc/hadoop/conf/topology.sh', f.get_file_text( 'plugins/hdp/versions/version_1_3_2/resources/topology.sh')) r.execute_command( 'chmod +x /etc/hadoop/conf/topology.sh', run_as_root=True ) r.write_file_to('/etc/hadoop/conf/topology.data', topology_str)
def install_mysql(instance, distro_name): with instance.remote() as r: script = '/tmp/install_mysql.sh' data = f.get_file_text(MySQL.MYSQL_INSTALL_SCRIPT) r.write_file_to(script, data, run_as_root=True) r.execute_command('chmod +x %s' % script, run_as_root=True) r.execute_command('%s %s' % (script, distro_name), run_as_root=True, timeout=MySQL.INSTALL_PACKAGES_TIMEOUT)
def oozie_create_db(remote): LOG.debug("Creating Oozie DB Schema...") sql_script = files.get_file_text( 'plugins/vanilla/v1_2_1/resources/create_oozie_db.sql') script_location = "create_oozie_db.sql" remote.write_file_to(script_location, sql_script) remote.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location})
def hive_create_db(remote, hive_mysql_passwd): LOG.debug("Creating Hive metastore db...") sql_script = files.get_file_text( 'plugins/vanilla/v1_2_1/resources/create_hive_db.sql') sql_script = sql_script.replace('pass', hive_mysql_passwd) script_location = "create_hive_db.sql" remote.write_file_to(script_location, sql_script) remote.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location})
def generate_job_cleanup_config(cluster): args = { 'minimum_cleanup_megabytes': utils.get_config_value_or_default( "Spark", "Minimum cleanup megabytes", cluster), 'minimum_cleanup_seconds': utils.get_config_value_or_default( "Spark", "Minimum cleanup seconds", cluster), 'maximum_cleanup_seconds': utils.get_config_value_or_default( "Spark", "Maximum cleanup seconds", cluster) } job_conf = {'valid': (args['maximum_cleanup_seconds'] > 0 and (args['minimum_cleanup_megabytes'] > 0 and args['minimum_cleanup_seconds'] > 0))} if job_conf['valid']: job_conf['cron'] = f.get_file_text( 'plugins/spark/resources/spark-cleanup.cron'), job_cleanup_script = f.get_file_text( 'plugins/spark/resources/tmp-cleanup.sh.template') job_conf['script'] = job_cleanup_script.format(**args) return job_conf
def get_config_files(self, cluster_context, configs, instance=None): default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf' cldb_conf = bcf.PropertiesFile("cldb.conf") cldb_conf.remote_path = self.conf_dir(cluster_context) if instance: cldb_conf.fetch(instance) cldb_conf.parse(files.get_file_text(default_path)) cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context)) return [cldb_conf]
def _edp_test(self): path = 'tests/integration/tests/resources/' # check pig pig_job = f.get_file_text(path + 'edp-job.pig') pig_lib = f.get_file_text(path + 'edp-lib.jar') self.edp_testing('Pig', [{'pig': pig_job}], [{'jar': pig_lib}]) # check mapreduce mapreduce_jar = f.get_file_text(path + 'edp-mapreduce.jar') mapreduce_configs = { 'configs': { 'mapred.mapper.class': 'org.apache.oozie.example.SampleMapper', 'mapred.reducer.class': 'org.apache.oozie.example.SampleReducer' } } self.edp_testing('MapReduce', [], [{'jar': mapreduce_jar}], mapreduce_configs) # check mapreduce streaming mapreduce_streaming_configs = { 'configs': { 'edp.streaming.mapper': '/bin/cat', 'edp.streaming.reducer': '/usr/bin/wc' } } self.edp_testing('MapReduce.Streaming', [], [], mapreduce_streaming_configs) # check java java_jar = f.get_file_text( path + 'hadoop-mapreduce-examples-2.3.0.jar') java_configs = { 'configs': { 'edp.java.main_class': 'org.apache.hadoop.examples.QuasiMonteCarlo' }, 'args': ['10', '10'] } self.edp_testing('Java', [], lib_data_list=[{'jar': java_jar}], configs=java_configs)
def start_oozie_process(instance): with instance.remote() as r: if c_helper.is_mysql_enabled(instance.node_group.cluster): _start_mysql(r) sql_script = files.get_file_text("plugins/vanilla/v2_3_0/resources/create_oozie_db.sql") r.write_file_to("/tmp/create_oozie_db.sql", sql_script) _oozie_create_db(r) _oozie_share_lib(r) _start_oozie(r)
def test_get_ng_plugin_configs(self): actual_configs = c_h._get_ng_plugin_configs() expected_configs = [] for json_file in json_files: expected_configs += json.loads(f.get_file_text(path_to_config + json_file)) # compare names expected_names = set(i["name"] for i in expected_configs) actual_names = set(i.to_dict()["name"] for i in actual_configs) self.assertEqual(expected_names, actual_names)
def _push_oozie_configs(self, cluster, ng_extra, r): r.write_file_to('/opt/oozie/conf/oozie-site.xml', ng_extra['xml']['oozie-site']) if c_helper.is_mysql_enable(cluster): sql_script = f.get_file_text( 'plugins/vanilla/v1_2_1/resources/create_oozie_db.sql') files = { '/tmp/create_oozie_db.sql': sql_script } r.write_files_to(files)
def get_config_files(self, cluster_context, configs, instance=None): defaults = "plugins/mapr/services/impala/resources/impala-env.sh" impala_env = bcf.TemplateFile("env.sh") impala_env.remote_path = self.conf_dir(cluster_context) if instance: impala_env.fetch(instance) impala_env.parse(files.get_file_text(defaults)) impala_env.add_properties(self._get_impala_env_props(cluster_context)) return [impala_env]
def start_oozie_process(pctx, instance): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.node_group.cluster): _start_mysql(r) sql_script = files.get_file_text( 'plugins/vanilla/hadoop2/resources/create_oozie_db.sql') r.write_file_to('/tmp/create_oozie_db.sql', sql_script) _oozie_create_db(r) _oozie_share_lib(r) _start_oozie(r)
def _get_script_for_user_creation(cluster, instance, user): data = files.get_file_text( 'plugins/resources/create-principal-keytab') cron_file = files.get_file_text('plugins/resources/cron-file') cron_script = files.get_file_text('plugins/resources/cron-script') data = data % { 'user': user, 'admin_principal': get_admin_principal(cluster), 'admin_password': get_server_password(cluster), 'principal': "%s/sahara-%s@%s" % ( user, instance.fqdn(), get_realm_name(cluster)), 'keytab': '%s-sahara-%s.keytab' % (user, instance.fqdn()) } cron_script_location = '/tmp/sahara-kerberos/%s.sh' % _get_short_uuid() cron_file = cron_file % {'refresher': cron_script_location, 'user': user} cron_script = cron_script % { 'principal': "%s/sahara-%s@%s" % ( user, instance.fqdn(), get_realm_name(cluster)), 'keytab': '%s-sahara-%s.keytab' % (user, instance.fqdn()), 'user': user, } return data, cron_file, cron_script, cron_script_location
def test_load_template_with_anti_affinity_single_ng(self): """Checks Heat cluster template with Neutron enabled. Checks also anti-affinity feature enabled for single node process in single node group. """ ng1 = tu.make_ng_dict( "master", 42, ["namenode"], 1, floating_ip_pool="floating", image_id=None, volumes_per_node=0, volumes_size=0, id=1, ) ng2 = tu.make_ng_dict( "worker", 42, ["datanode"], 2, floating_ip_pool="floating", image_id=None, volumes_per_node=0, volumes_size=0, id=2, ) cluster = tu.create_cluster( "cluster", "tenant1", "general", "1.2.1", [ng1, ng2], user_keypair_id="user_key", neutron_management_network="private_net", default_image_id="1", anti_affinity=["datanode"], image_id=None, ) aa_heat_template = h.ClusterTemplate(cluster) aa_heat_template.add_node_group_extra(ng1["id"], 1, get_ud_generator("line1\nline2")) aa_heat_template.add_node_group_extra(ng2["id"], 2, get_ud_generator("line2\nline3")) self.override_config("use_neutron", True) main_template = h._load_template("main.heat", {"resources": aa_heat_template._serialize_resources()}) self.assertEqual( json.loads(main_template), json.loads(f.get_file_text("tests/unit/resources/" "test_serialize_resources_aa.heat")), )
def set_cluster_mode(self, instances): if not instances: instances = u.get_instances(self.cluster) LOG.info(_LI('Setting cluster mode to classic')) hv_template = f.get_file_text(self.hadoop_version_local) hv = hv_template % {"mode": self.hadoop_mode} for i in instances: with i.remote() as r: LOG.debug('Writing file %(f_name)s to node %(node)s', {'f_name': self.hadoop_version_path, 'node': i.management_ip}) r.write_file_to(self.hadoop_version_path, hv, run_as_root=True)