def startUpInstance(args): yamlConfig = YAML.load(args.yaml) botoConn = BotoConnection(yamlConfig) if args.ami: botoConn.getInstance({'amiID':args.ami}) else: botoConn.getInstance()
def __init__(self, configFile=None, configDict=None, username=None, hostname=None, key=None, port=22, onDemand=True, cwd=None, testSSH=False): if configFile: yamlDict = YAML.load(configFile) self.__dictInit(yamlDict) elif configDict: self.__dictInit(configDict) else: self.__varInit(username, hostname, key=key, port=port, onDemand=onDemand) self.cwd = cwd # test connection if testSSH: self.open() self.close() #self.ssh=None # set ssh based on connection type """
def __init__(self, configFile=None, configDict=None, logFile='job.log'): super(RemoteJob, self).__init__() # to implement threading if configFile: self.rc = RemoteConnection(configFile=configFile) self.yaml = YAML.load(configFile) # use path of config for local WD less otherwise specified self.localWD, self.configFile = os.path.split(configFile) elif configDict: self.rc = RemoteConnection(configDict=configDict) self.yaml = configDict self.localWD, self.configFile = os.path.split(self.yaml['Filename']) else: sys.exit("Either configFile or configDict karg needs to be used.") if 'LocalWorkingDir' in self.yaml: self.localWD = self._expandTilde( self.yaml['LocalWorkingDir'] ) if 'LocalOutputDir' in self.yaml: d = self.yaml['LocalOutputDir'] if d[0] == '.': self.localOD = os.path.join( self.localWD, d[2:]) else: self.localOD = self._expandTilde(d) else: self.localOD = self.localWD if not os.path.exists(self.localOD): os.mkdir(self.localOD) self.remoteWD = self._expandTilde( self.yaml['RemoteWorkingDir'] ) if 'RemoteOutputDir' in self.yaml: self.remoteOD = self._expandTilde( self.yaml['RemoteOutputDir'] ) else: # output dir = working dir self.remoteOD = self.remoteWD self.logFile = logFile
def createImage(args): yamlConfig = YAML.load(args.yaml) botoConn = BotoConnection(yamlConfig) newAMI = botoConn.createImage(args.instId, args.name) print "Image created. The new ami follows: %s" % newAMI
def deregisterAMI(args): yamlConfig = YAML.load(args.yaml) botoConn = BotoConnection(yamlConfig) botoConn.deregisterImage(args.ami)
def listImages(args): yamlConfig = YAML.load(args.yaml) botoConn = BotoConnection(yamlConfig) botoConn.listImages()
def start(self, args): # consider making all keys lower case (we do this in the autobhans) self.yamlConfig = YAML.load(args.yaml) super(JobManager, self).start()
def run(self): """ JobManager - starts worker processes and manages redis data structures """ sys.stdout = UnbufferedOutput(sys.stdout) sys.stderr = sys.stdout print "JobManager - starting." os.chdir(self.cwd) # restore cwd (this gets changed # validate that redis is installed and running self.validateRedis() # setup ec2 connection (we are assuming ec2 with this JobManager boto_conn = BotoConnection(self.yamlConfig) # start up workers self.JobStarter = MP.Process(target=JobStarter, args=(self.yamlConfig,)) self.JobStarter.start() self.ResultFetcher = MP.Process(target=ResultFetcher, args=(self.yamlConfig,)) self.ResultFetcher.start() # set clean-up handler for SIGTERM signal(SIGTERM, self.sigterm_handler) # this keeps track of which Queues need to be considered for blocking # NOTE: these starting values may not be necessary the only option self.queueTracker = QueueTracker( ["Job_Queue", "Result_Queue", "Local_Queue", "EC2_Queue"], [True, False, False, False] ) # *** MAIN LOOP *** while True: # multi-queue blocking with priority to left most keys (in list order) -- this list changes throughout execution queueName, poppedData = self.r_server.blpop(self.queueTracker.getQueueList()) if queueName == "Job_Queue": # need to add data to tracker but first we parse the poppedData runFiles = poppedData[1:-1].replace('\'', '').split(', ') print "New list of files to run received: %s" % repr(runFiles) # add files to holding queues for runParamFile in runFiles: try: # http://stackoverflow.com/questions/8930915/python-append-dictionary-to-dictionary runParamDict = dict(**self.yamlConfig) runParamDict.update(YAML.load(runParamFile)) runParamDict['Filename'] = runParamFile print "Queuing '%s' of ClassType" % runParamFile, if runParamDict['ClassType'] == 'EC2': print "EC2 into 'EC2_Queue'" self.r_server.rpush("EC2_Queue", pickle.dumps(runParamDict) ) if boto_conn.isInstanceAvailable(): self.queueTracker.setQueueFlag("EC2_Queue", True) elif runParamDict['ClassType'] == 'Remote': self.r_server.rpush("RemoteJobs", runParamDict['Hostname']) remoteQueue = "%s_Queue" % runParamDict['Hostname'] print "Remote into '%s'" % remoteQueue self.r_server.rpush(remoteQueue, pickle.dumps(runParamDict) ) self.queueTracker.append(remoteQueue, flag=True) elif runParamDict['ClassType'] == 'Local': print "Local into 'Local_Queue'" self.r_server.rpush("Local_Queue", pickle.dumps(runParamDict) ) if self.r_server.llen("LocalJobs") < self.yamlConfig['LocalMaxJobs']: self.queueTracker.setQueueFlag("Local_Queue", True) except: print "Error with input file:", runParamFile print "Skipping file." elif queueName == "EC2_Queue": runParamDict = pickle.loads(poppedData) # create EC2 instance instId, instAddr = boto_conn.getInstance(instDict=runParamDict) runParamDict['InstanceID'] = instId # new key runParamDict['Hostname'] = instAddr # replace old host value print "New EC2 job. Instance (%s) starting up at '%s'" % (instId, instAddr) dq = EC2DelayedQueue() dq.start(120, runParamDict) # start blocking on the Result_Queue self.queueTracker.setQueueFlag("Result_Queue", True) # if too many instances, stop blocking if not boto_conn.isInstanceAvailable(): self.queueTracker.setQueueFlag("EC2_Queue", False) elif queueName == "Local_Queue": runParamDict = pickle.loads(poppedData) print "New local job." # Use the filename as a unique identifier for job self.r_server.rpush("LocalJobs", runParamDict['Filename']) # Queue the job up for processing self.r_server.rpush("Granted_Queue", pickle.dumps(runParamDict)) # check to see if we DO NOT have room for another local job within constraints if self.r_server.llen("LocalJobs") >= self.yamlConfig['LocalMaxJobs']: self.queueTracker.setQueueFlag("Local_Queue", False) # start blocking on the Result_Queue self.queueTracker.setQueueFlag("Result_Queue", True) elif queueName == "Result_Queue": runParamDict = pickle.loads(poppedData) print "Receiving results." #NOTE: need to look at validating results if runParamDict['ClassType'] == 'EC2': #NOTE: need to add recycling later boto_conn.terminateInstance(runParamDict['InstanceID']) self.queueTracker.setQueueFlag("EC2_Queue", True) elif runParamDict['ClassType'] == 'Remote': remoteQueue = "%s_Queue" % runParamDict['Hostname'] cnt = self.r_server.llen(runParamDict['Hostname']) if cnt == 1: # current machine no longer needs to be tracked self.r_server.delete(runParamDict['Hostname']) self.r_server.lrem("RemoteJobs", runParamDict['Hostname']) self.queueTracker.remove(remoteQueue) else: self.r_server.lrem(runParamDict['Hostname'], runParamDict['Filename']) self.queueTracker.setQueueFlag(queueName, True) elif runParamDict['ClassType'] == 'Local': self.r_server.lrem("LocalJobs", runParamDict['Filename']) self.queueTracker.setQueueFlag("Local_Queue", True) # assuming we have a remote queue created on demand else: runParamDict = pickle.loads(poppedData) # Add to list Hostname the filename of the new job self.r_server.rpush(runParamDict['Hostname'], runParamDict['Filename']) print "New Remote job from %s" % queueName # Queue the job up for processing self.r_server.rpush("Granted_Queue", pickle.dumps(runParamDict)) # check to see if we DO NOT have room for another remote job on given machine within constraints if self.r_server.llen(runParamDict['Hostname']) >= self.yamlConfig['RemoteMaxJobsPerHost']: self.queueTracker.setQueueFlag(queueName, False) # stop watching queue # start blocking on the Result_Queue self.queueTracker.setQueueFlag("Result_Queue", True)