def delete_job(self, job): """ Deletes an entire job folder from disk Returns True if the Job was found and deleted """ if isinstance(job, str) or isinstance(job, unicode): job_id = str(job) elif isinstance(job, Job): job_id = job.id() else: raise ValueError('called delete_job with a %s' % type(job)) dependent_jobs = [] # try to find the job for i, job in enumerate(self.jobs): if job.id() == job_id: if isinstance(job, DatasetJob): # check for dependencies for j in self.jobs: if isinstance(j, ModelJob) and j.dataset_id == job.id(): logger.error( 'Cannot delete "%s" (%s) because "%s" (%s) depends on it.' % (job.name(), job.id(), j.name(), j.id())) dependent_jobs.append(j.name()) if len(dependent_jobs) > 0: error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % ( job.name(), len(dependent_jobs), ('s' if len(dependent_jobs) != 1 else ''), ('s' if len(dependent_jobs) == 1 else ''), ', '.join( ['"%s"' % j for j in dependent_jobs])) raise errors.DeleteError(error_message) self.jobs.pop(i) job.abort() if os.path.exists(job.dir()): shutil.rmtree(job.dir()) logger.info('Job deleted.', job_id=job_id) return True # see if the folder exists on disk path = os.path.join(config_option('jobs_dir'), job_id) path = os.path.normpath(path) if os.path.dirname(path) == config_option( 'jobs_dir') and os.path.exists(path): shutil.rmtree(path) return True return False
def delete_job(self, job): """ Deletes an entire job folder from disk Returns True if the Job was found and deleted """ if isinstance(job, str) or isinstance(job, unicode): job_id = str(job) elif isinstance(job, Job): job_id = job.id() else: raise ValueError('called delete_job with a %s' % type(job)) dependent_jobs = [] # try to find the job for i, job in enumerate(self.jobs): if job.id() == job_id: if isinstance(job, DatasetJob): # check for dependencies for j in self.jobs: if isinstance(j, ModelJob) and j.dataset_id == job.id(): logger.error('Cannot delete "%s" (%s) because "%s" (%s) depends on it.' % (job.name(), job.id(), j.name(), j.id())) dependent_jobs.append(j.name()) if len(dependent_jobs)>0: error_message = 'Cannot delete "%s" because %d model%s depend%s on it: %s' % ( job.name(), len(dependent_jobs), ('s' if len(dependent_jobs) != 1 else ''), ('s' if len(dependent_jobs) == 1 else ''), ', '.join(['"%s"' % j for j in dependent_jobs])) raise errors.DeleteError(error_message) self.jobs.pop(i) job.abort() if os.path.exists(job.dir()): shutil.rmtree(job.dir()) logger.info('Job deleted.', job_id=job_id) return True # see if the folder exists on disk path = os.path.join(config_option('jobs_dir'), job_id) path = os.path.normpath(path) if os.path.dirname(path) == config_option('jobs_dir') and os.path.exists(path): shutil.rmtree(path) return True return False
def handle_exception(e, status_code=500): if config_option('level') == 'test': raise e title = type(e).__name__ message = str(e) trace = None if app.debug: trace = traceback.format_exc() #trace = '<br>\n'.join(trace.split('\n')) return render_template('500.html', title = title, message = message, trace = trace, ), status_code
def serve_file(path): jobs_dir = config_option('jobs_dir') path = os.path.normpath(os.path.join(jobs_dir, path)) # Don't allow path manipulation if not os.path.commonprefix([path, jobs_dir]).startswith(jobs_dir): abort(403) if not os.path.exists(path): abort(404) if os.path.isdir(path): abort(403) with open(path, 'r') as infile: response = make_response(infile.read()) response.headers["Content-Disposition"] = "attachment; filename=%s" % os.path.basename(path) return response
def path(self, filename, relative=False): """ Returns a path to the given file Arguments: filename -- the requested file Keyword arguments: relative -- If False, return an absolute path to the file If True, return a path relative to the jobs directory """ if not filename: return None if os.path.isabs(filename): path = filename else: path = os.path.join(self.job_dir, filename) if relative: path = os.path.relpath(path, config_option('jobs_dir')) return str(path)
def serve_file(path): """ Return a file in the jobs directory If you install the nginx.site file, nginx will serve files instead and this path will never be used """ jobs_dir = config_option('jobs_dir') path = os.path.normpath(os.path.join(jobs_dir, path)) # Don't allow path manipulation if not os.path.commonprefix([path, jobs_dir]).startswith(jobs_dir): abort(403) if not os.path.exists(path): abort(404) if os.path.isdir(path): abort(403) with open(path, 'r') as infile: response = make_response(infile.read()) response.headers["Content-Disposition"] = "attachment; filename=%s" % os.path.basename(path) return response
import os import sys from flask import Flask from flask.ext.socketio import SocketIO from digits import utils from config import config_option import digits.scheduler ### Create Flask, Scheduler and SocketIO objects app = Flask(__name__) app.config['DEBUG'] = False app.config['SECRET_KEY'] = config_option('secret_key') socketio = SocketIO(app) scheduler = digits.scheduler.Scheduler(config_option('gpu_list')) ### Register filters and views app.jinja_env.filters['print_time'] = utils.time_filters.print_time app.jinja_env.filters['print_time_diff'] = utils.time_filters.print_time_diff app.jinja_env.filters['print_time_since'] = utils.time_filters.print_time_since app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True import digits.views ### Setup the environment
def load_past_jobs(self): """ Look in the jobs directory and load all valid jobs """ failed = 0 loaded_jobs = [] for dir_name in sorted(os.listdir(config_option('jobs_dir'))): if os.path.isdir(os.path.join(config_option('jobs_dir'), dir_name)): exists = False # Make sure it hasn't already been loaded for job in self.jobs: if job.id() == dir_name: exists = True break if not exists: try: job = Job.load(dir_name) # The server might have crashed if job.status.is_running(): job.status = Status.ABORT for task in job.tasks: if task.status.is_running(): task.status = Status.ABORT # We might have changed some attributes here or in __setstate__ job.save() loaded_jobs.append(job) except Exception as e: failed += 1 if self.verbose: if str(e): print 'Caught %s while loading job "%s":' % (type(e).__name__, job.id()) print '\t%s' % e else: print 'Caught %s while loading job "%s"' % (type(e).__name__, job.id()) # add DatasetJobs for job in loaded_jobs: if isinstance(job, DatasetJob): self.jobs.append(job) # add ModelJobs for job in loaded_jobs: if isinstance(job, ModelJob): try: # load the DatasetJob job.load_dataset() self.jobs.append(job) except Exception as e: failed += 1 if self.verbose: if str(e): print 'Caught %s while loading job "%s":' % (type(e).__name__, job.id()) print '\t%s' % e else: print 'Caught %s while loading job "%s"' % (type(e).__name__, job.id()) if failed > 0 and self.verbose: print 'WARNING:', failed, 'jobs failed to load.'
def main_thread(self): """ Monitors the jobs in current_jobs, updates their statuses, and puts their tasks in queues to be processed by other threads """ signal.signal(signal.SIGTERM, self.sigterm_handler) try: last_saved = None while not self.shutdown.is_set(): # Iterate backwards so we can delete jobs for job in reversed(self.jobs): if job.status == Status.INIT: def start_this_job(job): if isinstance(job, ModelJob): if job.dataset.status == Status.DONE: job.status = Status.RUN elif job.dataset.status in [Status.ABORT, Status.ERROR]: job.abort() else: job.status = Status.WAIT else: job.status = Status.RUN if config_option('level') == 'test': start_this_job(job) else: # Delay start by one second for initial page load gevent.spawn_later(1, start_this_job, job) if job.status == Status.WAIT: if isinstance(job, ModelJob): if job.dataset.status == Status.DONE: job.status = Status.RUN elif job.dataset.status in [Status.ABORT, Status.ERROR]: job.abort() else: job.status = Status.RUN if job.status == Status.RUN: alldone = True for task in job.tasks: if task.status == Status.INIT: alldone = False if task.ready_to_queue(): logger.debug('%s task queued.' % task.name(), job_id=job.id()) task.status = Status.WAIT if isinstance(task, dataset_tasks.ParseFolderTask): self.split_queue.put( (job, task) ) elif isinstance(task, dataset_tasks.CreateDbTask): self.create_queue.put( (job, task) ) elif isinstance(task, model_tasks.TrainTask): self.train_queue.put( (job, task) ) else: logger.error('Task type %s not recognized' % type(task).__name__, job_id=job.id()) task.exception = Exception('Task type not recognized') task.status = Status.ERROR elif task.status == Status.WAIT or task.status == Status.RUN: alldone = False elif task.status == Status.DONE: pass elif task.status == Status.ABORT: pass elif task.status == Status.ERROR: job.status = Status.ERROR alldone = False break else: logger.warning('Unrecognized task status: "%s"', task.status, job_id=job.id()) if alldone: job.status = Status.DONE logger.info('Job complete.', job_id=job.id()) job.save() # save running jobs every 15 seconds if not last_saved or time.time()-last_saved > 15: for job in self.jobs: if job.status.is_running(): job.save() last_saved = time.time() time.sleep(utils.wait_time()) except KeyboardInterrupt: pass # Shutdown for job in self.jobs: job.abort() job.save() self.running = False
def setUpClass(cls): cls.s = _.Scheduler(config_option('gpu_list')) assert cls.s.start(), 'failed to start'
import os import sys from flask import Flask from flask.ext.socketio import SocketIO from digits import utils from config import config_option import digits.scheduler ### Create Flask, Scheduler and SocketIO objects app = Flask(__name__) app.config['DEBUG'] = False app.config['SECRET_KEY'] = config_option('secret_key') app.url_map.redirect_defaults = False socketio = SocketIO(app) scheduler = digits.scheduler.Scheduler(config_option('gpu_list')) # Set up flask API documentation, if installed try: from flask.ext.autodoc import Autodoc _doc = Autodoc(app) autodoc = _doc.doc # decorator except ImportError: def autodoc(*args, **kwargs): def _doc(f): # noop decorator return f return _doc
def get_scheduler(self): return _.Scheduler(config_option('gpu_list'))
def load_past_jobs(self): """ Look in the jobs directory and load all valid jobs """ failed = 0 loaded_jobs = [] for dir_name in sorted(os.listdir(config_option('jobs_dir'))): if os.path.isdir(os.path.join(config_option('jobs_dir'), dir_name)): exists = False # Make sure it hasn't already been loaded for job in self.jobs: if job.id() == dir_name: exists = True break if not exists: try: job = Job.load(dir_name) # The server might have crashed if job.status.is_running(): job.status = Status.ABORT for task in job.tasks: if task.status.is_running(): task.status = Status.ABORT # We might have changed some attributes here or in __setstate__ job.save() loaded_jobs.append(job) except Exception as e: failed += 1 if self.verbose: if str(e): print 'Caught %s while loading job "%s":' % (type(e).__name__, dir_name) print '\t%s' % e else: print 'Caught %s while loading job "%s"' % (type(e).__name__, dir_name) # add DatasetJobs for job in loaded_jobs: if isinstance(job, DatasetJob): self.jobs.append(job) # add ModelJobs for job in loaded_jobs: if isinstance(job, ModelJob): try: # load the DatasetJob job.load_dataset() self.jobs.append(job) except Exception as e: failed += 1 if self.verbose: if str(e): print 'Caught %s while loading job "%s":' % (type(e).__name__, job.id()) print '\t%s' % e else: print 'Caught %s while loading job "%s"' % (type(e).__name__, job.id()) if failed > 0 and self.verbose: print 'WARNING:', failed, 'jobs failed to load.'