def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { '/etc/hadoop/conf/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/conf/hdfs-site.xml': ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.node_group.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % { "nn_path": nn_path, "dn_path": dn_path }) with remote.get_remote(instance) as r: r.execute_command('sudo chown -R $USER:$USER /etc/hadoop') r.execute_command('sudo chown -R $USER:$USER %s' % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh') r.execute_command('sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text('plugins/spark/resources/topology.sh')) r.execute_command('sudo chmod +x /etc/hadoop/topology.sh') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"): ng_extra["xml"]["core-site"], os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"): ng_extra["xml"]["hdfs-site"], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, "conf/spark-env.sh"): ng_extra["sp_master"], os.path.join(sp_home, "conf/slaves"): ng_extra["sp_slaves"], os.path.join(sp_home, "conf/spark-defaults.conf"): ng_extra["sp_defaults"], } files_init = { "/tmp/sahara-hadoop-init.sh": ng_extra["setup_script"], "id_rsa": cluster.management_private_key, "authorized_keys": cluster.management_public_key, } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ( "sudo cp $HOME/id_rsa $HOME/.ssh/; " "sudo chown $USER $HOME/.ssh/id_rsa; " "sudo chmod 600 $HOME/.ssh/id_rsa" ) storage_paths = instance.node_group.storage_paths() dn_path = " ".join(c_helper.make_hadoop_path(storage_paths, "/dfs/dn")) nn_path = " ".join(c_helper.make_hadoop_path(storage_paths, "/dfs/nn")) hdfs_dir_cmd = ( "sudo mkdir -p %(nn_path)s %(dn_path)s &&" "sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&" "sudo chmod 755 %(nn_path)s %(dn_path)s" % {"nn_path": nn_path, "dn_path": dn_path} ) with remote.get_remote(instance) as r: r.execute_command("sudo chown -R $USER:$USER /etc/hadoop") r.execute_command("sudo chown -R $USER:$USER %s" % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command("sudo chmod 0500 /tmp/sahara-hadoop-init.sh") r.execute_command("sudo /tmp/sahara-hadoop-init.sh " ">> /tmp/sahara-hadoop-init.log 2>&1") r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to("/etc/hadoop/topology.sh", f.get_file_text("plugins/spark/resources/topology.sh")) r.execute_command("sudo chmod +x /etc/hadoop/topology.sh") self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"): ng_extra['xml']['core-site'], os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"): ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): ng_extra['sp_defaults'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } if 'zeppelin_setup_script' in ng_extra: files_init.update({ '/tmp/zeppelin-conf.sh': ng_extra['zeppelin_setup_script']}) # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.node_group.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % {"nn_path": nn_path, "dn_path": dn_path}) with remote.get_remote(instance) as r: r.execute_command( 'sudo chown -R $USER:$USER /etc/hadoop' ) r.execute_command( 'sudo chown -R $USER:$USER %s' % sp_home ) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command( 'sudo chmod 0500 /tmp/sahara-hadoop-init.sh' ) r.execute_command( 'sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text( 'plugins/spark/resources/topology.sh')) r.execute_command( 'sudo chmod +x /etc/hadoop/topology.sh' ) if 'zeppelin_setup_script' in ng_extra: r.execute_command( 'sudo chmod 0500 /tmp/zeppelin-conf.sh' ) r.execute_command( 'sudo /tmp/zeppelin-conf.sh ' '>> /tmp/zeppelin-conf.log 2>&1') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def test_make_hadoop_path(self): storage_paths = ["/mnt/one", "/mnt/two"] paths = c_helper.make_hadoop_path(storage_paths, "/spam") expected = ["/mnt/one/spam", "/mnt/two/spam"] self.assertEqual(expected, paths)
def test_make_hadoop_path(self): storage_paths = ['/mnt/one', '/mnt/two'] paths = c_helper.make_hadoop_path(storage_paths, '/spam') expected = ['/mnt/one/spam', '/mnt/two/spam'] self.assertEqual(expected, paths)