示例#1
0
def _await_networks(cluster, instances):
    if not instances:
        return

    ips_assigned = set()
    while len(ips_assigned) != len(instances):
        if not g.check_cluster_exists(instances[0].node_group.cluster):
            return
        for instance in instances:
            if instance.id not in ips_assigned:
                if networks.init_instances_ips(instance):
                    ips_assigned.add(instance.id)

        context.sleep(1)

    LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
    instances = _get_instances(cluster, ips_assigned)

    with context.ThreadGroup() as tg:
        for instance in instances:
            tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                     _wait_until_accessible, instance)

    LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
示例#2
0
    def _await_networks(self, cluster, instances):
        if not instances:
            return

        ips_assigned = set()
        while len(ips_assigned) != len(instances):
            if not g.check_cluster_exists(instances[0].node_group.cluster):
                return
            for instance in instances:
                if instance.id not in ips_assigned:
                    if networks.init_instances_ips(instance):
                        ips_assigned.add(instance.id)

            context.sleep(1)

        LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id)

        ctx = context.ctx()
        cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster)
        instances = g.get_instances(cluster, ips_assigned)

        with context.ThreadGroup() as tg:
            for instance in instances:
                tg.spawn("wait-for-ssh-%s" % instance.instance_name,
                         self._wait_until_accessible, instance)

        LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
示例#3
0
    def _await_active(self, cluster, instances):
        """Await all instances are in Active status and available."""
        if not instances:
            return

        active_ids = set()
        while len(active_ids) != len(instances):
            if not g.check_cluster_exists(instances[0].node_group.cluster):
                return
            for instance in instances:
                if instance.id not in active_ids:
                    if self._check_if_active(instance):
                        active_ids.add(instance.id)

            context.sleep(1)

        LOG.info("Cluster '%s': all instances are active" % cluster.id)
示例#4
0
def _await_active(cluster, instances):
    """Await all instances are in Active status and available."""
    if not instances:
        return

    active_ids = set()
    while len(active_ids) != len(instances):
        if not g.check_cluster_exists(instances[0].node_group.cluster):
            return
        for instance in instances:
            if instance.id not in active_ids:
                if _check_if_active(instance):
                    active_ids.add(instance.id)

        context.sleep(1)

    LOG.info("Cluster '%s': all instances are active" % cluster.id)
示例#5
0
def _wait_until_accessible(instance):
    while True:
        try:
            # check if ssh is accessible and cloud-init
            # script is finished generating authorized_keys
            exit_code, stdout = instance.remote().execute_command("ls .ssh/authorized_keys", raise_when_error=False)

            if exit_code == 0:
                LOG.debug("Instance %s is accessible" % instance.instance_name)
                return
        except Exception as ex:
            LOG.debug("Can't login to node %s (%s), reason %s", instance.instance_name, instance.management_ip, ex)

        context.sleep(5)

        if not g.check_cluster_exists(instance.node_group.cluster):
            return
示例#6
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(utils.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info("Waiting %s datanodes to start up" % datanodes_count)
        with remote.get_remote(utils.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info('Datanodes on cluster %s has been started' %
                             cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        'Stop waiting datanodes on cluster %s since it has '
                        'been deleted' % cluster.name)
                    return
示例#7
0
    def _wait_until_accessible(self, instance):
        while True:
            try:
                # check if ssh is accessible and cloud-init
                # script is finished generating authorized_keys
                exit_code, stdout = instance.remote().execute_command(
                    "ls .ssh/authorized_keys", raise_when_error=False)

                if exit_code == 0:
                    LOG.debug('Instance %s is accessible' %
                              instance.instance_name)
                    return
            except Exception as ex:
                LOG.debug("Can't login to node %s (%s), reason %s",
                          instance.instance_name, instance.management_ip, ex)

            context.sleep(5)

            if not g.check_cluster_exists(instance.node_group.cluster):
                return
示例#8
0
文件: plugin.py 项目: rrader/savanna
    def _await_datanodes(self, cluster):
        datanodes_count = len(utils.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info("Waiting %s datanodes to start up" % datanodes_count)
        with remote.get_remote(utils.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(
                        'Datanodes on cluster %s has been started' %
                        cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        'Stop waiting datanodes on cluster %s since it has '
                        'been deleted' % cluster.name)
                    return