示例#1
0
    def test_get_jobtracker(self):
        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager, self.ng_jobtracker])
        self.assertEqual('jt1', u.get_jobtracker(cl).instance_id)

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager])
        self.assertIsNone(u.get_jobtracker(cl))
示例#2
0
    def test_get_jobtracker(self):
        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager, self.ng_jobtracker])
        self.assertEqual('jt1', u.get_jobtracker(cl).instance_id)

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager])
        self.assertIsNone(u.get_jobtracker(cl))
示例#3
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config("MapReduce", "mapred.job.tracker.http.address", cluster)
            jt_port = c_helper.get_port_from_config("MapReduce", "mapred.job.tracker", cluster)

            info["MapReduce"] = {
                "Web UI": "http://%s:%s" % (jt.management_ip, ui_port),
                "JobTracker": "%s:%s" % (jt.hostname(), jt_port),
            }

        if nn:
            ui_port = c_helper.get_port_from_config("HDFS", "dfs.http.address", cluster)
            nn_port = c_helper.get_port_from_config("HDFS", "fs.default.name", cluster)

            info["HDFS"] = {
                "Web UI": "http://%s:%s" % (nn.management_ip, ui_port),
                "NameNode": "hdfs://%s:%s" % (nn.hostname(), nn_port),
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info["JobFlow"] = {"Oozie": "http://%s:11000" % oozie.management_ip}

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {"info": info})
示例#4
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    jt_hostname = vu.get_instance_hostname(vu.get_jobtracker(cluster))
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
示例#5
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(vu.get_namenode(cluster)), "dfsadmin")
        jt = vu.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
示例#6
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(vu.get_namenode(cluster)),
                          "dfsadmin")
        jt = vu.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
示例#7
0
def generate_sahara_configs(cluster, node_group=None):
    nn_hostname = vu.get_instance_hostname(vu.get_namenode(cluster))
    jt_hostname = vu.get_instance_hostname(vu.get_jobtracker(cluster))
    oozie_hostname = vu.get_instance_hostname(vu.get_oozie(cluster))
    hive_hostname = vu.get_instance_hostname(vu.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
示例#8
0
    def start_cluster(self, cluster):
        nn_instance = vu.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = vu.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info(_LI("Hadoop services in cluster %s have been started"),
                 cluster.name)

        oozie = vu.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info(_LI("Oozie service at '%s' has been started"),
                         nn_instance.hostname())

        hive_server = vu.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r)
                    run.hive_metastore_start(r)
                    LOG.info(_LI("Hive Metastore server at %s has been "
                                 "started"),
                             hive_server.hostname())

        LOG.info(_LI('Cluster %s has been started successfully'), cluster.name)
        self._set_cluster_info(cluster)
示例#9
0
    def start_cluster(self, cluster):
        nn_instance = vu.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = vu.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info(_LI("Hadoop services in cluster %s have been started"),
                 cluster.name)

        oozie = vu.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info(_LI("Oozie service at '%s' has been started"),
                         nn_instance.hostname())

        hive_server = vu.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(hive_server) as r:
                run.hive_create_warehouse_dir(r)
                run.hive_copy_shared_conf(
                    r, edp.get_hive_shared_conf_path('hadoop'))

                if c_helper.is_mysql_enable(cluster):
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(r, hive_server)
                    run.hive_create_db(r, cluster.extra['hive_mysql_passwd'])
                    run.hive_metastore_start(r)
                    LOG.info(
                        _LI("Hive Metastore server at %s has been "
                            "started"), hive_server.hostname())

        LOG.info(_LI('Cluster %s has been started successfully'), cluster.name)
        self._set_cluster_info(cluster)
示例#10
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = vu.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Vanilla plugin cannot scale nodegroup"
                    " with processes: " + ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, "Vanilla plugin cannot scale node group with "
                    "processes which have no master-processes run "
                    "in cluster")
示例#11
0
    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = vu.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = self._get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, _("Vanilla plugin cannot scale nodegroup"
                               " with processes: %s") %
                    ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, _("Vanilla plugin cannot scale node group with "
                               "processes which have no master-processes run "
                               "in cluster"))
示例#12
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
示例#13
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if "datanode" in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if "tasktracker" in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
示例#14
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config('MapReduce',
                                                    'mapred.job.tracker',
                                                    cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
示例#15
0
    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker', cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
示例#16
0
 def start_jobtracker(self, cluster):
     jt = vu.get_jobtracker(cluster)
     if jt:
         self._start_jobtracker(jt)
示例#17
0
 def start_jobtracker(self, cluster):
     jt = vu.get_jobtracker(cluster)
     if jt:
         self._start_jobtracker(jt)