def _gen_name( self, master_name, prefix ): N = 5 seed = string.ascii_uppercase * N + string.digits * N random_post = ''.join(random.sample( seed, N )) #extremely unlikely to have a collision, but possible, sigh ... while wkr.get_ANWorker( master_name=master_name, cluster_name=prefix+random_post ): random_post = ''.join(random.sample( seed, N )) return prefix + random_post
def worker_id(self): if self._worker_id is None: try: wm = wkr_mdl.get_ANWorker( cluster_name = self.cluster_name ) if len(wm) > 0: self._worker_id = wm[0]['worker_id'] wkr_mdl.add_sqs_queues_ANWorker( self._worker_id, [self.command_q, self.response_q] ) except: self.logger.exception("Unable to get worker_model for %s" % self.cluster_name) if self._worker_id is None: try: self._terminated = True self.terminate() except: self.logger.exception("Inconsistent state") return self._worker_id
def _terminate_single_worker( self, worker_id): worker = wkr.get_ANWorker( worker_id=worker_id ) self.app.logger.info("%r" % worker ) if worker['status'] in [wkr.CONFIG, wkr.NA]: worker = wkr.update_ANWorker( worker_id, status=wkr.TERMINATED) msg = {'status':'complete', 'data' : json_prep( worker )} status = 200 elif wkr.confirm_worker_running( worker ): #we have an active cluster master = mstr.get_active_master() if master: launcher_message = {'action':'terminate', 'worker_id': worker_id} launcher_config = sys_def_mdl.get_system_defaults( setting_name = 'launcher_config', component='Master' ) conn = boto.sqs.connect_to_region('us-east-1') lq = conn.create_queue( launcher_config['launcher_sqs_in'] ) worker = wkr.update_ANWorker( worker_id, status=wkr.MARKED_FOR_TERMINATION) mess = Message(body=json.dumps( launcher_message )) lq.write( mess ) msg = {'status':'complete', 'data' : json_prep( worker ) } status = 200 else: msg = {'status': 'error', 'data' : {'worker_id': worker_id}, 'message' : 'Running Cluster without an active master' } status = 409 #Conflict else: worker = wkr.update_ANWorker( worker_id, status=wkr.TERMINATED_WITH_ERROR) msg = {'status':'complete', 'data' : json_prep( worker )} status = 200 return (msg, status)
def GET( self, request): if self.worker_id is None: #return active workers if request.args.get('branch'): branch = None else: ls = sys_def_mdl.get_system_defaults('local_settings', 'Master') branch = ls['branch'] self.app.logger.info("GETting workers for the %s branch" % ( branch )) workers = wkr.get_active_workers(branch) workers = [json_prep( worker ) for worker in workers] if workers: msg = { 'status' : 'complete', 'data' : workers } status = 200 else: msg = { 'status' : 'error', 'data' : [], 'message': 'No workers available' } status = 404 else: result = wkr.get_ANWorker( worker_id=self.worker_id ) if result: msg = {'status' : 'complete', 'data' : json_prep( result ) } status = 200 else: msg = {'status': 'error', 'data' : {'worker_id' : self.worker_id}, 'message' : 'Worker not found' } status = 404 return ( msg, status )
def config(worker_id): """= example config { 'cluster_name':'dummy-cluster', 'aws_region':'us-east-1', 'key_name': 'somekey', 'key_location': '/home/sgeadmin/somekey.key', 'cluster_size': 1, 'node_instance_type': 'm1.xlarge', 'node_image_id': 'ami-1234567', 'iam_profile':'some-profile', 'force_spot_master':True, 'spot_bid':2.00, 'plugins':'p1,p2,p3' }""" import masterdirac.models.worker as wrkr import masterdirac.models.systemdefaults as sys_def local_settings = sys_def.get_system_defaults('local_settings', 'Master') worker_model = wrkr.get_ANWorker( worker_id = worker_id ) if worker_model: config_settings = worker_model['starcluster_config'] if local_settings['branch']=='develop': def devify( pl ): t = ['dev-tgr'] for plugin in pl.split(','): if plugin.strip() == 'gpu-bootstrap': t.append('gpu-dev-bootstrap') elif plugin.strip() == 'data-bootstrap': t.append('data-dev-bootstrap') else: t.append(plugin) return ', '.join(t) config_settings['plugins'] = devify( config_settings['plugins'] ) return Response( render_template('sc-main.cfg', **config_settings) + render_template('sc-plugins.cfg') + render_template('sc-security-group.cfg'), mimetype="text/plain" )
def worker_model(self): return wkr_mdl.get_ANWorker( worker_id = self.worker_id )