def _await_networks(cluster, instances): if not instances: return ips_assigned = set() while len(ips_assigned) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in ips_assigned: if networks.init_instances_ips(instance): ips_assigned.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id) ctx = context.ctx() cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster) instances = _get_instances(cluster, ips_assigned) with context.ThreadGroup() as tg: for instance in instances: tg.spawn("wait-for-ssh-%s" % instance.instance_name, _wait_until_accessible, instance) LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
def _await_networks(self, cluster, instances): if not instances: return ips_assigned = set() while len(ips_assigned) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in ips_assigned: if networks.init_instances_ips(instance): ips_assigned.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances have IPs assigned" % cluster.id) ctx = context.ctx() cluster = conductor.cluster_get(ctx, instances[0].node_group.cluster) instances = g.get_instances(cluster, ips_assigned) with context.ThreadGroup() as tg: for instance in instances: tg.spawn("wait-for-ssh-%s" % instance.instance_name, self._wait_until_accessible, instance) LOG.info("Cluster '%s': all instances are accessible" % cluster.id)
def _await_active(self, cluster, instances): """Await all instances are in Active status and available.""" if not instances: return active_ids = set() while len(active_ids) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in active_ids: if self._check_if_active(instance): active_ids.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances are active" % cluster.id)
def _await_active(cluster, instances): """Await all instances are in Active status and available.""" if not instances: return active_ids = set() while len(active_ids) != len(instances): if not g.check_cluster_exists(instances[0].node_group.cluster): return for instance in instances: if instance.id not in active_ids: if _check_if_active(instance): active_ids.add(instance.id) context.sleep(1) LOG.info("Cluster '%s': all instances are active" % cluster.id)
def _wait_until_accessible(instance): while True: try: # check if ssh is accessible and cloud-init # script is finished generating authorized_keys exit_code, stdout = instance.remote().execute_command("ls .ssh/authorized_keys", raise_when_error=False) if exit_code == 0: LOG.debug("Instance %s is accessible" % instance.instance_name) return except Exception as ex: LOG.debug("Can't login to node %s (%s), reason %s", instance.instance_name, instance.management_ip, ex) context.sleep(5) if not g.check_cluster_exists(instance.node_group.cluster): return
def _await_datanodes(self, cluster): datanodes_count = len(utils.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with remote.get_remote(utils.get_namenode(cluster)) as r: while True: if run.check_datanodes_count(r, datanodes_count): LOG.info('Datanodes on cluster %s has been started' % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info( 'Stop waiting datanodes on cluster %s since it has ' 'been deleted' % cluster.name) return
def _wait_until_accessible(self, instance): while True: try: # check if ssh is accessible and cloud-init # script is finished generating authorized_keys exit_code, stdout = instance.remote().execute_command( "ls .ssh/authorized_keys", raise_when_error=False) if exit_code == 0: LOG.debug('Instance %s is accessible' % instance.instance_name) return except Exception as ex: LOG.debug("Can't login to node %s (%s), reason %s", instance.instance_name, instance.management_ip, ex) context.sleep(5) if not g.check_cluster_exists(instance.node_group.cluster): return
def _await_datanodes(self, cluster): datanodes_count = len(utils.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with remote.get_remote(utils.get_namenode(cluster)) as r: while True: if run.check_datanodes_count(r, datanodes_count): LOG.info( 'Datanodes on cluster %s has been started' % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info( 'Stop waiting datanodes on cluster %s since it has ' 'been deleted' % cluster.name) return