示例#1
0
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and ("datanode"
                                                   in ng.node_processes):
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Spark plugin cannot scale nodegroup"
                          " with processes: %s") % ' '.join(ng.node_processes))

        dn_amount = len(utils.get_instances(cluster, "datanode"))
        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name,
                _("Spark plugin cannot shrink cluster because "
                  "there would be not enough nodes for HDFS "
                  "replicas (replication factor is %s)") % rep_factor)
    def validate_job_execution(self, cluster, job, data):
        if not self.edp_supported(cluster.hadoop_version):
            raise ex.PluginInvalidDataException(
                _('Spark {base} or higher required to run {type} jobs').format(
                    base=EdpEngine.edp_base_version, type=job.type))

        super(EdpEngine, self).validate_job_execution(cluster, job, data)
示例#3
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = utils.get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name,
                    _("Spark plugin cannot scale nodegroup"
                      " with processes: %s") % ' '.join(ng.node_processes))
示例#4
0
    def validate(self, cluster):
        nn_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "namenode")])
        if nn_count != 1:
            raise ex.InvalidComponentCountException("namenode", 1, nn_count)

        dn_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "datanode")])
        if dn_count < 1:
            raise ex.InvalidComponentCountException("datanode", _("1 or more"),
                                                    nn_count)

        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)
        if dn_count < rep_factor:
            raise ex.InvalidComponentCountException(
                'datanode',
                _('%s or more') % rep_factor, dn_count,
                _('Number of %(dn)s instances should not be less '
                  'than %(replication)s') % {
                      'dn': 'datanode',
                      'replication': 'dfs.replication'
                  })

        # validate Spark Master Node and Spark Slaves
        sm_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "master")])

        if sm_count < 1:
            raise ex.RequiredServiceMissingException("Spark master")

        if sm_count >= 2:
            raise ex.InvalidComponentCountException("Spark master", "1",
                                                    sm_count)

        sl_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "slave")])

        if sl_count < 1:
            raise ex.InvalidComponentCountException("Spark slave",
                                                    _("1 or more"), sl_count)
示例#5
0
def await_datanodes(cluster):
    datanodes_count = len(utils.get_instances(cluster, "datanode"))
    if datanodes_count < 1:
        return

    log_msg = _("Waiting on %d DataNodes to start up") % datanodes_count
    with utils.get_instance(cluster, "namenode").remote() as r:
        utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT,
            log_msg, 1, {"remote": r, "count": datanodes_count})
示例#6
0
 def _push_configs_to_nodes(self, cluster, extra, new_instances):
     all_instances = utils.get_instances(cluster)
     utils.add_provisioning_step(cluster.id, _("Push configs to nodes"),
                                 len(all_instances))
     with context.PluginsThreadGroup() as tg:
         for instance in all_instances:
             extra = self._add_instance_ng_related_to_extra(
                 cluster, instance, extra)
             if instance in new_instances:
                 tg.spawn('spark-configure-%s' % instance.instance_name,
                          self._push_configs_to_new_node, cluster, extra,
                          instance)
             else:
                 tg.spawn('spark-reconfigure-%s' % instance.instance_name,
                          self._push_configs_to_existing_node, cluster,
                          extra, instance)
示例#7
0
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with utils.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(inst_to_be_deleted))
        run.refresh_nodes(utils.get_remote(nn), "dfsadmin")
        context.sleep(3)

        utils.plugin_option_poll(nn.cluster, _is_decommissioned,
                                 c_helper.DECOMMISSIONING_TIMEOUT,
                                 _("Decommission %s") % "DataNodes", 3, {
                                     'r': r,
                                     'inst_to_be_deleted': inst_to_be_deleted
                                 })

        r.write_files_to({
            '/etc/hadoop/dn.incl':
            utils.generate_fqdn_host_names(survived_inst),
            '/etc/hadoop/dn.excl':
            ""
        })
示例#8
0
 def get_description(self):
     return _("This plugin provides an ability to launch Spark on Hadoop "
              "CDH cluster without any management consoles.")
示例#9
0
                               "{split($NF,a,\"/\"); print a[1]}'"
                               "| xargs sudo kill -9"))


def start_spark_master(nn_remote, sp_home):
    nn_remote.execute_command("bash " + os.path.join(sp_home,
                                                     "sbin/start-all.sh"))


def stop_spark(nn_remote, sp_home):
    nn_remote.execute_command("bash " + os.path.join(sp_home,
                                                     "sbin/stop-all.sh"))


@utils.event_wrapper(
    True, step=_("Await DataNodes start up"), param=("cluster", 0))
def await_datanodes(cluster):
    datanodes_count = len(utils.get_instances(cluster, "datanode"))
    if datanodes_count < 1:
        return

    log_msg = _("Waiting on %d DataNodes to start up") % datanodes_count
    with utils.get_instance(cluster, "namenode").remote() as r:
        utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT,
            log_msg, 1, {"remote": r, "count": datanodes_count})


def _check_datanodes_count(remote, count):
    if count < 1:
示例#10
0
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import six

from sahara.plugins import context
from sahara.plugins import utils
from sahara_plugin_spark.i18n import _
from sahara_plugin_spark.plugins.spark import config_helper as c_helper
from sahara_plugin_spark.plugins.spark import run_scripts as run


@utils.event_wrapper(True, step=_("Decommission %s") % "Slaves")
def decommission_sl(master, inst_to_be_deleted, survived_inst):
    if survived_inst is not None:
        slavenames = []
        for slave in survived_inst:
            slavenames.append(slave.hostname())
        slaves_content = c_helper.generate_spark_slaves_configs(slavenames)
    else:
        slaves_content = "\n"

    cluster = master.cluster
    sp_home = utils.get_config_value_or_default("Spark", "Spark home", cluster)
    r_master = utils.get_remote(master)
    run.stop_spark(r_master, sp_home)

    # write new slave file to master
                               "{split($NF,a,\"/\"); print a[1]}'"
                               "| xargs sudo kill -9"))


def start_spark_master(nn_remote, sp_home):
    nn_remote.execute_command("bash " +
                              os.path.join(sp_home, "sbin/start-all.sh"))


def stop_spark(nn_remote, sp_home):
    nn_remote.execute_command("bash " +
                              os.path.join(sp_home, "sbin/stop-all.sh"))


@utils.event_wrapper(True,
                     step=_("Await DataNodes start up"),
                     param=("cluster", 0))
def await_datanodes(cluster):
    datanodes_count = len(utils.get_instances(cluster, "datanode"))
    if datanodes_count < 1:
        return

    log_msg = _("Waiting on %d DataNodes to start up") % datanodes_count
    with utils.get_instance(cluster, "namenode").remote() as r:
        utils.plugin_option_poll(cluster, _check_datanodes_count,
                                 c_helper.DATANODES_STARTUP_TIMEOUT, log_msg,
                                 1, {
                                     "remote": r,
                                     "count": datanodes_count
                                 })