class LocalSystem: """A ResourceCoordinator that uses the current machine, up to max_cores_to_use cores of it It uses multiprocessing and the LocalSlave """ def __init__(self, max_cores_to_use=util.CPUs(), profile=False): self.max_cores_to_use = max_cores_to_use # todo: update to local cpu count... self.slave = LocalSlave(self) self.cores_available = max_cores_to_use self.physical_memory, self.swap_memory = get_memory_available() self.timeout = 5 self.profile = profile def spawn_slaves(self): return {'LocalSlave': self.slave} def get_resources(self): res = { 'LocalSlave': { # this is always the maximum available - the graph is handling the bookeeping of running jobs 'cores': self.cores_available, 'physical_memory': self.physical_memory, 'swap_memory': self.swap_memory, } } logger.info('get_resources, result %s - %s' % (id(res), res)) return res def enter_loop(self): self.spawn_slaves() self.que = MPQueueFixed() logger.info("Starting first batch of jobs") self.pipegraph.start_jobs() while True: self.slave.check_for_dead_jobs( ) # whether time out or or job was done, let's check this... self.see_if_output_is_requested() try: logger.info("Listening to que") slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get( block=True, timeout=self.timeout) # was there a job done?t logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok)) logger.info("Remaining in que (approx): %i" % self.que.qsize()) job = self.pipegraph.jobs[job_id_done] job.was_done_on.add(slave_id) job.stdout = stdout job.stderr = stderr job.exception = exception job.trace = trace job.failed = not was_ok job.stop_time = time.time() if job.start_time: logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time)) if job.failed: try: if job.exception.startswith('STR'.encode('UTF-8')): job.exception = job.exception[3:] raise pickle.UnpicklingError( "String Transmission" ) # what an ugly control flow... logger.info("Before depickle %s" % type(exception)) job.exception = pickle.loads(exception) logger.info("After depickle %s" % type(job.exception)) logger.info("exception stored at %s" % (job)) except ( pickle.UnpicklingError, EOFError ): # some exceptions can't be pickled, so we send a string instead pass if job.exception: logger.info("Exception: %s" % repr(exception)) logger.info("Trace: %s" % trace) logger.info("stdout: %s" % stdout) logger.info("stderr: %s" % stderr) if not new_jobs is False: if not job.modifies_jobgraph(): job.exception = ppg_exceptions.JobContractError( "%s created jobs, but was not a job with modifies_jobgraph() returning True" % job) job.failed = True else: new_jobs = pickle.loads(new_jobs) logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job)) self.pipegraph.new_jobs_generated_during_runtime( new_jobs) more_jobs = self.pipegraph.job_executed(job) #if job.cores_needed == -1: #self.cores_available = self.max_cores_to_use #else: #self.cores_available += job.cores_needed if not more_jobs: # this means that all jobs are done and there are no longer any more running... break self.pipegraph.start_jobs() except (queue.Empty, IOError): # either timeout, or the que failed pass self.que.close() self.que.join_thread() # wait for the que to close logger.info("Leaving loop") def see_if_output_is_requested(self): import select try: if select.select([sys.stdin], [], [], 0)[0]: ch = sys.stdin.read(1) # enter pressed... self.pipegraph.print_running_jobs() pass finally: pass
class LocalSystem: """A ResourceCoordinator that uses the current machine, up to max_cores_to_use cores of it It uses multiprocessing and the LocalSlave """ def __init__(self, max_cores_to_use=util.CPUs(), profile = False): self.max_cores_to_use = max_cores_to_use # todo: update to local cpu count... self.slave = LocalSlave(self) self.cores_available = max_cores_to_use self.physical_memory, self.swap_memory = get_memory_available() self.timeout = 5 self.profile = profile def spawn_slaves(self): return { 'LocalSlave': self.slave } def get_resources(self): res = { 'LocalSlave': { # this is always the maximum available - the graph is handling the bookeeping of running jobs 'cores': self.cores_available, 'physical_memory': self.physical_memory, 'swap_memory': self.swap_memory, } } logger.info('get_resources, result %s - %s' % (id(res), res)) return res def enter_loop(self): self.spawn_slaves() self.que = MPQueueFixed() logger.info("Starting first batch of jobs") self.pipegraph.start_jobs() while True: self.slave.check_for_dead_jobs() # whether time out or or job was done, let's check this... self.see_if_output_is_requested() try: logger.info("Listening to que") slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(block=True, timeout=self.timeout) # was there a job done?t logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok)) logger.info("Remaining in que (approx): %i" % self.que.qsize()) job = self.pipegraph.jobs[job_id_done] job.was_done_on.add(slave_id) job.stdout = stdout job.stderr = stderr job.exception = exception job.trace = trace job.failed = not was_ok job.stop_time = time.time() if job.start_time: logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time)) if job.failed: try: if job.exception.startswith('STR'.encode('UTF-8')): job.exception = job.exception[3:] raise pickle.UnpicklingError("String Transmission") # what an ugly control flow... logger.info("Before depickle %s" % type(exception)) job.exception = pickle.loads(exception) logger.info("After depickle %s" % type(job.exception)) logger.info("exception stored at %s" % (job)) except (pickle.UnpicklingError, EOFError): # some exceptions can't be pickled, so we send a string instead pass if job.exception: logger.info("Exception: %s" % repr(exception)) logger.info("Trace: %s" % trace) logger.info("stdout: %s" % stdout) logger.info("stderr: %s" % stderr) if not new_jobs is False: if not job.modifies_jobgraph(): job.exception = ppg_exceptions.JobContractError("%s created jobs, but was not a job with modifies_jobgraph() returning True" % job) job.failed = True else: new_jobs = pickle.loads(new_jobs) logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job)) self.pipegraph.new_jobs_generated_during_runtime(new_jobs) more_jobs = self.pipegraph.job_executed(job) #if job.cores_needed == -1: #self.cores_available = self.max_cores_to_use #else: #self.cores_available += job.cores_needed if not more_jobs: # this means that all jobs are done and there are no longer any more running... break self.pipegraph.start_jobs() except (queue.Empty, IOError): # either timeout, or the que failed pass self.que.close() self.que.join_thread() # wait for the que to close logger.info("Leaving loop") def see_if_output_is_requested(self): import select try: if select.select([sys.stdin], [], [], 0)[0]: ch = sys.stdin.read(1) # enter pressed... self.pipegraph.print_running_jobs() pass finally: pass
def enter_loop(self): self.spawn_slaves() self.que = MPQueueFixed() logger.info("Starting first batch of jobs") self.pipegraph.start_jobs() while True: self.slave.check_for_dead_jobs( ) # whether time out or or job was done, let's check this... self.see_if_output_is_requested() try: logger.info("Listening to que") slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get( block=True, timeout=self.timeout) # was there a job done?t logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok)) logger.info("Remaining in que (approx): %i" % self.que.qsize()) job = self.pipegraph.jobs[job_id_done] job.was_done_on.add(slave_id) job.stdout = stdout job.stderr = stderr job.exception = exception job.trace = trace job.failed = not was_ok job.stop_time = time.time() if job.start_time: logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time)) if job.failed: try: if job.exception.startswith('STR'.encode('UTF-8')): job.exception = job.exception[3:] raise pickle.UnpicklingError( "String Transmission" ) # what an ugly control flow... logger.info("Before depickle %s" % type(exception)) job.exception = pickle.loads(exception) logger.info("After depickle %s" % type(job.exception)) logger.info("exception stored at %s" % (job)) except ( pickle.UnpicklingError, EOFError ): # some exceptions can't be pickled, so we send a string instead pass if job.exception: logger.info("Exception: %s" % repr(exception)) logger.info("Trace: %s" % trace) logger.info("stdout: %s" % stdout) logger.info("stderr: %s" % stderr) if not new_jobs is False: if not job.modifies_jobgraph(): job.exception = ppg_exceptions.JobContractError( "%s created jobs, but was not a job with modifies_jobgraph() returning True" % job) job.failed = True else: new_jobs = pickle.loads(new_jobs) logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job)) self.pipegraph.new_jobs_generated_during_runtime( new_jobs) more_jobs = self.pipegraph.job_executed(job) #if job.cores_needed == -1: #self.cores_available = self.max_cores_to_use #else: #self.cores_available += job.cores_needed if not more_jobs: # this means that all jobs are done and there are no longer any more running... break self.pipegraph.start_jobs() except (queue.Empty, IOError): # either timeout, or the que failed pass self.que.close() self.que.join_thread() # wait for the que to close logger.info("Leaving loop")
def enter_loop(self): self.spawn_slaves() self.que = MPQueueFixed() logger.info("Starting first batch of jobs") self.pipegraph.start_jobs() while True: self.slave.check_for_dead_jobs() # whether time out or or job was done, let's check this... self.see_if_output_is_requested() try: logger.info("Listening to que") slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(block=True, timeout=self.timeout) # was there a job done?t logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok)) logger.info("Remaining in que (approx): %i" % self.que.qsize()) job = self.pipegraph.jobs[job_id_done] job.was_done_on.add(slave_id) job.stdout = stdout job.stderr = stderr job.exception = exception job.trace = trace job.failed = not was_ok job.stop_time = time.time() if job.start_time: logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time)) if job.failed: try: if job.exception.startswith('STR'.encode('UTF-8')): job.exception = job.exception[3:] raise pickle.UnpicklingError("String Transmission") # what an ugly control flow... logger.info("Before depickle %s" % type(exception)) job.exception = pickle.loads(exception) logger.info("After depickle %s" % type(job.exception)) logger.info("exception stored at %s" % (job)) except (pickle.UnpicklingError, EOFError): # some exceptions can't be pickled, so we send a string instead pass if job.exception: logger.info("Exception: %s" % repr(exception)) logger.info("Trace: %s" % trace) logger.info("stdout: %s" % stdout) logger.info("stderr: %s" % stderr) if not new_jobs is False: if not job.modifies_jobgraph(): job.exception = ppg_exceptions.JobContractError("%s created jobs, but was not a job with modifies_jobgraph() returning True" % job) job.failed = True else: new_jobs = pickle.loads(new_jobs) logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job)) self.pipegraph.new_jobs_generated_during_runtime(new_jobs) more_jobs = self.pipegraph.job_executed(job) #if job.cores_needed == -1: #self.cores_available = self.max_cores_to_use #else: #self.cores_available += job.cores_needed if not more_jobs: # this means that all jobs are done and there are no longer any more running... break self.pipegraph.start_jobs() except (queue.Empty, IOError): # either timeout, or the que failed pass self.que.close() self.que.join_thread() # wait for the que to close logger.info("Leaving loop")