def run(self): # get logfile reader self.log = LogFile.LogFile(self.log_file) # get google compute engine driver to interact with the compute engine self.myDriver= GCEManager(self.service_account_email_address, self.pem_file, self.auth_account, project=self.project_id) # start job manager, which has some useful functions for checking on the status and starting jobs self.jobManager=JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, update= (not self.restart), StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # hard restart if restart is true to remove previous instances and disks if self.restart: self.log.write("performing hard reset of all running instances and disks") self.jobManager.shutDown(wait=False) # need to reboot manager too so it knows the jobs are gone self.jobManager=JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # run jobs while(self.jobManager.remainingJobs()): print "updating job status for all jobs" self.jobManager.updateJobStatus() print "starting new jobs" self.jobManager.startNewJobs() print "waiting for jobs to complete" time.sleep(self.cycle_period) # cycle every min or whatever is specified self.jobManager.writeInstanceSummary() # clean up self.jobManager.shutDown()
def run(self): # get logfile reader self.log = LogFile.LogFile(self.log_file) # get google compute engine driver to interact with the compute engine self.myDriver = GCEManager(self.service_account_email_address, self.pem_file, self.auth_account, project=self.project_id) # start job manager, which has some useful functions for checking on the status and starting jobs self.jobManager = JobManager( self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, update=(not self.restart), StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # hard restart if restart is true to remove previous instances and disks if self.restart: self.log.write( "performing hard reset of all running instances and disks") self.jobManager.shutDown(wait=False) # need to reboot manager too so it knows the jobs are gone self.jobManager = JobManager( self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # run jobs while (self.jobManager.remainingJobs()): print "updating job status for all jobs" self.jobManager.updateJobStatus() print "starting new jobs" self.jobManager.startNewJobs() print "waiting for jobs to complete" time.sleep( self.cycle_period) # cycle every min or whatever is specified self.jobManager.writeInstanceSummary() # clean up self.jobManager.shutDown()
def prepareData(envLLs): if not hasattr(envLLs, '__iter__'): envLLs = (envLLs,) manager = JobManager() for envLL in envLLs: tiles = NED.getTiles(envLL) for tile in tiles: manager.addJob("Preparing %s" % (tile[0]), NED.prepDataFile, tile) manager.finish() console.printMessage("Postprocessing contours...") NED.removeSeaLevelContours() NED.simplifyContours(1.0) NED.convertContourElevationsToFt() NED.clusterContoursOnGeoColumn() NED.analyzeContoursTable()
def set_datastore(self, datastore): """ Gives the node a reference to its datastore. Guaranteed to be called before the first call to 'get_x'. @type datastore: Datastore @param datastore: the datastore associated with this node """ self.datastore = datastore self.jobManager = JobManager(self, datastore) self.jobManager.start()
def __init__ (self): """ Initializes the managers for the jobs daemon. """ # Init the Greenlet # Greenlet.__init__(self) # Create amazon handles self.amazonS3Manager = AmazonS3Manager() self.amazonSQSManager = AmazonSQSManager() self.amazonETManager = AmazonETManager() # Create the job manager self.jobManager = JobManager(self.amazonSQSManager)
def prepareData(envLLs): if not hasattr(envLLs, '__iter__'): envLLs = (envLLs, ) manager = JobManager() for envLL in envLLs: tiles = NED.getTiles(envLL) for tile in tiles: manager.addJob("Preparing %s" % (tile[0]), NED.prepDataFile, tile) manager.finish() console.printMessage("Postprocessing contours...") NED.removeSeaLevelContours() NED.simplifyContours(1.0) NED.convertContourElevationsToFt() NED.clusterContoursOnGeoColumn() NED.analyzeContoursTable()
class RESTfulHTTPRequestHandler(DefaultHTTPRequestHandler): """Default HTTP Request Handler Interface class.""" jobManager = JobManager() def _handle_OPTIONS(self): """Handle OPTIONS function.""" try: self.send_response(200, "OK") self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Access-Control-Allow-Methods', 'POST, OPTIONS') self.end_headers() logger.debug("Sent response: \"200\"") except Exception as ex: logger.error(str(ex)) raise ex def _handle_GET(self): """Handle GET function. Override this method.""" try: parsed_path = urlparse(self.path) if (parsed_path.path == "/api/jobs"): if (parsed_path.query == "status=running"): self._handle_get_jobs_running() elif (parsed_path.query == "status=stopped"): self._handle_get_jobs_stopped() elif (parsed_path.query == "status=completed"): self._handle_get_jobs_completed() else: self._handle_get_jobs_all() m = re.match(r"/api/jobs/(\d+)", parsed_path.path) if (m): self._handle_get_job(int(m[1])) else: self.send_response(404) self.end_headers() logger.debug("Sent response: \"404\"") except Exception as ex: logger.error(ex) raise ex #self.send_response(501, "Not implemented") def _handle_POST(self): """Handle POST function.""" try: parsed_path = urlparse(self.path) if (parsed_path.path == "/api/jobs"): self._handle_post_job_start() else: self.send_response(404) self.end_headers() except Exception as ex: logger.error(str(ex)) raise ex def _handle_DELETE(self): """Handle DELETE function.""" try: self.send_response(404) self.end_headers() except Exception as ex: logger.error(str(ex)) raise ex def _handle_PUT(self): """Handle PUT function.""" try: parsed_path = urlparse(self.path) m = re.match(r"/api/jobs/(\d+)/stop", parsed_path.path) if (m): self._handle_put_job_stop(int(m[1])) else: self.send_response(404) self.end_headers() except Exception as ex: logger.error(str(ex)) raise ex def _handle_post_job_start(self): """Handle POST to /api/jobs""" try: content_length = int(self.headers['Content-Length']) body = self.rfile.read(content_length) self.send_response(200) self.end_headers() jobData = json.loads(body) # process json and gets args cmd = jobData["command"] jobMap = dict() if cmd != "": id = self.jobManager.create_job(cmd) jobMap = { "id": id, "status": "running" } else: jobMap = { "id": -1, "status": "stopped" } jsonString = json.dumps(jobMap) self.wfile.write(bytes(jsonString, "utf8")) except Exception as ex: logger.error(str(ex)) raise ex def _handle_put_job_stop(self, id): """Handle PUT to /api/jobs/<id>/stop""" if self.jobManager.kill_job(id): self.send_response(200) self.end_headers() else: self.send_response(500) def _send_json_response(self, code, map): """Sends HTTP response with JSON in the body""" jsonString = json.dumps(map) self.send_response(code) self.send_header('Content-type', 'application/json;charset=utf-8') self.send_header('Content-length', len(jsonString)) self.end_headers() self.wfile.write(bytes(jsonString, "utf8")) def _handle_get_job(self, id): """Handle GET to /api/jobs/<id>""" job = self.jobManager.get_job(id) jobMap = dict() code = 404 if job: jobstatus = jobStatusAsString(job.status) jobout = job.stdout.decode("utf-8") jobMap = {"id": job.id, "status": jobstatus, "command": job.command, "stdout": jobout} code = 200 else: jobMap = {"id": -1} self._send_json_response(code, jobMap) def _handle_get_jobs_all(self): """Handle GET to /api/jobs""" joblist = self.jobManager.get_jobs() jobMap = { "jobs": joblist } self._send_json_response(200, jobMap) def _handle_get_jobs_running(self): """Handle GET to /api/jobs?status=running""" joblist = self.jobManager.get_jobs_running() jobMap = { "jobs": joblist } self._send_json_response(200, jobMap) def _handle_get_jobs_stopped(self): """Handle GET to /api/jobs?status=stopped""" joblist = self.jobManager.get_jobs_stopped() jobMap = { "jobs": joblist } self._send_json_response(200, jobMap) def _handle_get_jobs_completed(self): """Handle GET to /api/jobs?status=completed""" joblist = self.jobManager.get_jobs_completed() jobMap = { "jobs": joblist } self._send_json_response(200, jobMap)
class JobExecutionLoop(object): ''' This class executes jobs from a job queue taking into account dependencies. ''' def __init__(self, log_file, job_csv_file, disk_csv_file, service_account_email_address, project_id, pem_file, data_center, auth_type, metadata_url, storage_directory, cycle_period=60, max_instances=23, restart=False, commandFile=""): ''' Constructor ''' self.log_file = log_file self.job_csv_file = job_csv_file self.disk_csv_file = disk_csv_file self.service_account_email_address = service_account_email_address self.pem_file = pem_file self.project_id = project_id self.data_center = data_center self.auth_type = auth_type self.metadata_url = metadata_url self.cycle_period = cycle_period self.storage_directory = storage_directory self.max_instances = max_instances self.restart = restart self.commandFile = commandFile def run(self): # get logfile reader self.log = LogFile.LogFile(self.log_file) # get google compute engine driver to interact with the compute engine self.myDriver = GCEManager(self.service_account_email_address, self.pem_file, project=self.project_id) # start job manager, which has some useful functions for checking on the status and starting jobs self.jobManager = JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, update=(not self.restart)) # hard restart if restart is true to remove previous instances and disks if self.restart: self.log.write( "performing hard reset of all running instances and disks") self.jobManager.shutDown(wait=False) # need to reboot manager too so it knows the jobs are gone self.jobManager = JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances) # run jobs while (self.jobManager.remainingJobs()): print "updating job status for all jobs" self.jobManager.updateJobStatus() print "starting new jobs" self.jobManager.startNewJobs() print "waiting for jobs to complete" time.sleep( self.cycle_period) # cycle every min or whatever is specified self.jobManager.writeInstanceSummary() # clean up self.jobManager.shutDown()
def __init__(self) : self.m_oEventSvr = LibEvent.LibEvent() # object for sync self.m_oJobManagerSvr = JobManager()
class JobExecutionLoop(object): ''' This class executes jobs from a job queue taking into account dependencies. ''' def __init__(self, log_file, job_csv_file, disk_csv_file, service_account_email_address, project_id, pem_file, data_center, auth_type, metadata_url, storage_directory, rootdir, auth_account, cycle_period=60, max_instances=23, restart=False, commandFile = "", StackdriverAPIKey="", activateStackDriver=False): ''' Constructor ''' self.log_file=log_file self.job_csv_file=job_csv_file self.disk_csv_file=disk_csv_file self.service_account_email_address=service_account_email_address self.pem_file=pem_file self.project_id=project_id self.data_center=data_center self.auth_type=auth_type self.metadata_url=metadata_url self.cycle_period=cycle_period self.storage_directory=storage_directory self.max_instances = max_instances self.restart=restart self.commandFile = commandFile self.rootdir=rootdir self.auth_account=auth_account self.StackdriverAPIKey = StackdriverAPIKey self.activateStackDriver = activateStackDriver def run(self): # get logfile reader self.log = LogFile.LogFile(self.log_file) # get google compute engine driver to interact with the compute engine self.myDriver= GCEManager(self.service_account_email_address, self.pem_file, self.auth_account, project=self.project_id) # start job manager, which has some useful functions for checking on the status and starting jobs self.jobManager=JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, update= (not self.restart), StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # hard restart if restart is true to remove previous instances and disks if self.restart: self.log.write("performing hard reset of all running instances and disks") self.jobManager.shutDown(wait=False) # need to reboot manager too so it knows the jobs are gone self.jobManager=JobManager(self.job_csv_file, self.disk_csv_file, self.myDriver, self.log, self.storage_directory, self.max_instances, self.rootdir, StackdriverAPIKey=self.StackdriverAPIKey, activateStackDriver=self.activateStackDriver) # run jobs while(self.jobManager.remainingJobs()): print "updating job status for all jobs" self.jobManager.updateJobStatus() print "starting new jobs" self.jobManager.startNewJobs() print "waiting for jobs to complete" time.sleep(self.cycle_period) # cycle every min or whatever is specified self.jobManager.writeInstanceSummary() # clean up self.jobManager.shutDown()
if(len(sys.argv) != 4): print "Bad usage. Type ./COCOAss --help" sys.exit() #Names nameOfCocoa = sys.argv[2] nameOfInput = sys.argv[1] nameOfCVS = sys.argv[3] #InputParser myParser = InputParser(nameOfInput, nameOfCocoa) #TagExpander myExpander = TagExpander(myParser.read()) expanded = myExpander.expand() #JobManager manager = JobManager(nameOfCocoa, expanded) output = manager.loop() #WriteOutput outputObject = OutputModule(nameOfCVS, output) outputObject.write()
from OutputModule import OutputModule #Python modules import string import sys ########################## Begin ############################ if (len(sys.argv) == 2 and sys.argv[1] == "--help"): print "Usage: ./COCOAss cocoaInputFile.txt baseCOCOADescriptionFile output.txt" sys.exit() if (len(sys.argv) != 4): print "Bad usage. Type ./COCOAss --help" sys.exit() #Names nameOfCocoa = sys.argv[2] nameOfInput = sys.argv[1] nameOfCVS = sys.argv[3] #InputParser myParser = InputParser(nameOfInput, nameOfCocoa) #TagExpander myExpander = TagExpander(myParser.read()) expanded = myExpander.expand() #JobManager manager = JobManager(nameOfCocoa, expanded) output = manager.loop() #WriteOutput outputObject = OutputModule(nameOfCVS, output) outputObject.write()
class EC2JobsDaemon (object): """ Greenlet responsible for monitoring the jobs queue to pull off and run any new jobs found. TODO: Proper logging """ # Wait 5 seconds between polls PAUSE_TIME = 5 def __init__ (self): """ Initializes the managers for the jobs daemon. """ # Init the Greenlet # Greenlet.__init__(self) # Create amazon handles self.amazonS3Manager = AmazonS3Manager() self.amazonSQSManager = AmazonSQSManager() self.amazonETManager = AmazonETManager() # Create the job manager self.jobManager = JobManager(self.amazonSQSManager) def run(self): """ Loops checking for new jobs and runs them if it gets one. """ self.running = True while self.running: try: print "Checking for new jobs..." # Read from the queue job = self.jobManager.getJobFromJobsQueue() # Create an empty response message responseMessage = None # Run the job if we got one if job is not None and job is not False: job.run(self) # Add a success response responseMessage = ResponseMessage(name = "Response", success = True, message = "Completed job %s successfully." % str(job)) # If job is false it means it failed at converting a message to a job elif job is False: # Add a failure response responseMessage = ResponseMessage(name = "Response", success = False, message = "Failed to convert job message.") if responseMessage: self.amazonSQSManager.addQueueMessage(self.amazonSQSManager.responsesQueue, responseMessage) except Exception, jobsDaemonException: print "Jobs daemon failed with exception %s." % str(jobsDaemonException) traceback.print_exc() try: # Add a failure response responseMessage = ResponseMessage(name = "Response", success = False, message = "Failed to complete job %s." % str(job)) self.amazonSQSManager.addQueueMessage(self.amazonSQSManager.responsesQueue, responseMessage) except Exception, jobsDaemonFailureResponseException: print "Jobs daemon failed to add failure response with exception %s." % str(jobsDaemonFailureResponseException) finally:
class Node: """ Class that represents a cluster node with computation and storage functionalities. """ def __init__(self, node_id, matrix_size): """ Constructor. @type node_id: Integer @param node_id: an integer less than 'matrix_size' uniquely identifying the node @type matrix_size: Integer @param matrix_size: the size of the matrix A """ self.node_id = node_id self.matrix_size = matrix_size self.datastore = None self.nodes = None self.jobManager = None self.mainProcess = None self.received_queue = Queue() self.synchronous_request_buffer = Queue(1) def __str__(self): """ Pretty prints this node. @rtype: String @return: a string containing this node's id """ return "Node %d" % self.node_id def set_datastore(self, datastore): """ Gives the node a reference to its datastore. Guaranteed to be called before the first call to 'get_x'. @type datastore: Datastore @param datastore: the datastore associated with this node """ self.datastore = datastore self.jobManager = JobManager(self, datastore) self.jobManager.start() def set_nodes(self, nodes): """ Informs the current node of the other nodes in the cluster. Guaranteed to be called before the first call to 'get_x'. @type nodes: List of Node @param nodes: a list containing all the nodes in the cluster """ self.nodes = nodes def get_x(self): """ Computes the x value corresponding to this node. This method is invoked by the tester. This method must block until the result is available. @rtype: (Float, Integer) @return: the x value and the index of this variable in the solution vector @type mainProcess: EquationSystemSolver @param nodes: instance of the thread executing the main algorithm """ self.mainProcess = EquationSystemSolver(self, self.received_queue) self.mainProcess.start() self.mainProcess.join() return (self.mainProcess.x, self.node_id) def shutdown(self): """ Instructs the node to shutdown (terminate all threads). This method is invoked by the tester. This method must block until all the threads started by this node terminate. """ # Inchid thread-uri conexiuni self.jobManager.close() def get_A(self, column): """ Synchronously (blocking operation) returns an element from the row of the A matrix that is stored in this datastore. @type column: Integer @param column: the column of the element @rtype: Float @return: the element of matrix A at the requested position """ request = Request(type = "Get_A", column = column, src = self) self.process_request(request) return self.synchronous_request_buffer.get() def put_A(self, column, A): """ Synchronously (blocking operation) updates an element from the row of the A matrix that is stored in this datastore. @type column: Integer @param column: the column of the element @type A: Float @param A: the new element value """ request = Request(type = "Put_A", value = A, column = column, src = self) self.process_request(request) self.synchronous_request_buffer.get() # Wait for operation to complete def get_b(self): """ Synchronously (blocking operation) returns the element of b stored in this datastore. @rtype: Float @return: the element of b stored in this datastore """ request = Request(type = "Get_b", src = self) self.process_request(request) return self.synchronous_request_buffer.get() def put_b(self, b): """ Synchronously (blocking operation) updates the element of b stored in this datastore. @type b: Float @param b: the new value of b """ request = Request(type = "Put_b", value = b, src = self) self.process_request(request) self.synchronous_request_buffer.get() # Wait for operation to complete def process_request(self, request): """ Route the request to the appropriate handler (jobManager or directly pass to the received_queue for direct use in the mainProcess instance) """ if(request.type == "Barrier"): self.received_queue.put(request) else: self.jobManager.process_request(request)