def wait_for_instance_state(ec2c, instance_ids, state): print('') print("Waiting for instance{} to be in a '{}' state".format( 's' if len(instance_ids) > 1 else '', state)) start = datetime.now() response = ec2c.describe_instances(InstanceIds=instance_ids) instances = [i for r in response['Reservations'] for i in r['Instances']] states = [i['State']['Name'] for i in instances] pending = [s != state for s in states] while any(pending): total = len(instances) finished = total - sum(pending) progbar.progress_bar(finished, total, start) time.sleep(15) response = ec2c.describe_instances(InstanceIds=instance_ids) instances = [i for r in response['Reservations'] for i in r['Instances']] states = [i['State']['Name'] for i in instances] pending = [s != state for s in states] progbar.progress_bar(total, total, start)
def configure(self): instances = [self.master_instance] + self.worker_instances instance_lookup = dict(self.master, **self.workers) instance_names = sorted(instance_lookup.keys()) # # update Ab[x] tools # self.update_abx(instances) # build base image print('') if len(instances) == 1: print('Building base image...') configure_base_image(instances[0].public_ip_address, self.opts.user, self.opts.identity_file) else: print('Building base image on all nodes...') p = mp.Pool(len(instances)) async_results = [] for instance in instances: async_results.append(p.apply_async(configure_base_image, args=(instance.public_ip_address, self.opts.user, self.opts.identity_file))) monitor_mp_jobs(async_results) p.close() p.join() # deploy SSH key to nodes for passwordless SSH print('') print("Generating cluster's SSH key on master...") key_setup = """ [ -f ~/.ssh/id_rsa ] || (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)""" self.run(self.master_instance, key_setup) get_ssh_tar = 'tar c - .ssh' dot_ssh_tar, _ = self.run(self.master_instance, get_ssh_tar) if self.worker_instances: print("Transferring SSH key to workers:") put_ssh_tar = 'tar x' for i, worker in enumerate(self.worker_instances): progbar.progress_bar(i, len(self.worker_instances)) self.run(worker, put_ssh_tar, stdin=dot_ssh_tar) progbar.progress_bar(len(self.worker_instances), len(self.worker_instances)) print('') # modify /etc/hosts on all nodes print('Updating /etc/hosts on all nodes...') hosts = ['{} {}'.format(self.get_ip(i), n) for n, i in instance_lookup.items()] host_string = '\n'.join(hosts) host_cmd = """sudo -- sh -c 'echo "{}" >> /etc/hosts'""".format(host_string) for instance in instances: self.run(instance, host_cmd) # build and share an EBS volumne on the master node devices = ['/dev/xvda' + string.ascii_lowercase[i] for i in range(self.opts.master_ebs_vol_num)] if len(devices) > 1: volume = self.build_ebs_raid_volume(devices) elif len(devices) == 1: volume = self.format_single_ebs_device(devices[0]) if len(self.worker_instances) > 0: self.share_nfs_volume(volume) # start Celery workers on all nodes if self.opts.celery and len(self.worker_instances) > 0: self.start_redis_server(self.master_instance) self.start_celery_workers(self.worker_instances) self.start_flower() # upload BaseSpace credentials file if self.opts.basespace_credentials: print('') print('Uploading BaseSpace credentials file...') cred_file = os.path.expanduser('~/.abstar/basespace_credentials') remote_path = '/home/{}/.abstar/basespace_credentials'.format(self.opts.user) if os.path.exists(cred_file): self.put(self.master_name, cred_file, remote_path) else: print('ERROR: Local credentials file was not found. No credentials were uploaded.') # configure and start a Jupyter Notebook server if self.opts.jupyter: self.setup_jupyter_notebook() # configure and start a MongoDB server if self.opts.mongodb: self.setup_mongodb() else: self.stop_mongod() # write config information to master self.write_config_info() print('')