class Worker(Process): def __init__(self, opts, id, availability): super(Worker, self).__init__() self.opts= opts self.id= id self.availability= availability self.connect() self.alive= True self.sleep= self.opts.sleep def connect(self): (qHost, qPort, qKey)= self.opts.queue.split(':') self.queue= SyncManager(address= (qHost, int(qPort)), authkey= qKey) self.queue.connect() self.pipeline= self.queue.getPipeline() self.store= self.queue.getStore() # register with DFS self.dfs = None self.instances= dict() if self.opts.dfs != None: SyncManager.register('getInstances') (dHost, dPort, dKey)= self.opts.dfs.split(':') self.dfs= SyncManager(address= (dHost, int(dPort)), authkey= dKey) self.dfs.connect() self.instances= self.dfs.getInstances() def handleTransport(self, processId, transport, results): ''' Handles requested transport types ''' if transport == 's3' and self.opts.s3 != None: try: (accessKey, secretKey, bucket)= self.opts.s3.split(':') s3file= S3File( accessKey= accessKey, secretKey= secretKey, bucket= bucket, processId= processId, mode= 'w' ) s3file.write(results) results= s3file.getName() s3file.close() transport= 's3' except Exception, e: print >> stderr, "s3 transport failure using data store instead: %s" % (str(e)) elif transport == 'file' and self.opts.taskDir != None: try: fileStore= FileStore(proxy= self.queue, processId= processId, mode= 'w') fileStore.write(results) results= fileStore.getName() fileStore.close() transport= 'file' except Exception, e: print >> stderr, "fileStore transport failure using data store instead: %s" % (str(e))
class Manager(object): def __init__(self, opts): self.opts= opts super(Manager, self).__init__() self.sleep= self.opts.sleep self.alive= True self.workers= dict() self.connect() ''' The fully qualified domain name for the aws ec2 instance should match what the instance private_dns_name is ''' self.id= getfqdn() def connect(self): # register with queue SyncManager.register('getPipeline') SyncManager.register('getStore') SyncManager.register('setFileContents') SyncManager.register('getFileContents') SyncManager.register('deleteFile') (qHost, qPort, qKey)= self.opts.queue.split(':') self.queue= SyncManager(address= (qHost, int(qPort)), authkey= qKey) self.queue.connect() self.pipeline= self.queue.getPipeline() self.store= self.queue.getStore() # register with dfs self.dfs = None self.instances= dict() if self.opts.dfs != None: SyncManager.register('getInstances') (dHost, dPort, dKey)= self.opts.dfs.split(':') self.dfs= SyncManager(address= (dHost, int(dPort)), authkey= dKey) self.dfs.connect() self.instances= self.dfs.getInstances() def run(self): while self.alive: try: # stop tracking dead workers [self.workers.pop(pid) for (pid, worker) in self.workers.items() if not worker.is_alive()] instanceStore= self.instances.get(self.id, dict()) # update dfs worker availability availability= self.opts.maxProcesses - len(self.workers) self.instances.update([(self.id, dict( id= self.id, status= 'running', capacity= self.opts.maxProcesses, availability= availability, lastTask= instanceStore.get('lastTask', datetime.strftime(datetime.utcnow(), '%Y-%m-%dT%H:%M:%S.000Z') ) ))]) print "========================================================" print "Queue:", self.pipeline.qsize() print "Store:", len(self.store) print "Capacity:", self.opts.maxProcesses print 'Workers:', len(self.workers) print "Availability:", self.opts.maxProcesses - len(self.workers) print "--------------------------------------------------------" # create workers for i in range(min(self.pipeline.qsize(), self.opts.maxProcesses - len(self.workers))): worker= Worker(self.opts, self.id, availability) worker.start() self.workers[worker.pid]= worker except EOFError: self.connect() except IOError: self.connect() sleep(self.sleep) # if manager is shutting down -- then wait for workers to finish print "manager shutting down" map(lambda (pid, worker): worker.join(), self.workers.items()) def stop(self): print "de-registering with dfs -- all workers down" ''' tell dfs are are shutting down and have no capacity/availabilty if dfs doesn't know who we are then create a default stub ''' self.instances.update([(self.id, dict( id= self.id, status= 'running', capacity= 0, availability= 0 ))]) self.alive= False