def finish(engine): if engine.bg_task_id is not None: revoke(engine.bg_task_id, terminate=True) engine.bg_task_id = None if engine.pid is not None: executioner = subprocess.Popen("kill -9 {}".format(engine.pid), shell=True) engine.pid = None if engine.gid is not None: GPUManager.free_device(engine.gid) engine.gid = None db.session.commit()
def train_engine(self, engine_id, is_admin): # Trains an engine by calling JoeyNMT and keeping # track of its progress engine = Engine.query.filter_by(id=engine_id).first() engine.status = "launching" db.session.commit() gpu_id = GPUManager.wait_for_available_device(is_admin=is_admin) engine.gid = gpu_id db.session.commit() try: env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu_id) running_joey = subprocess.Popen([ "python3", "-m", "joeynmt", "train", "-t", os.path.join(engine.path, "config.yaml") ], cwd=app.config['JOEYNMT_FOLDER'], env=env) engine.status = "training" engine.pid = running_joey.pid db.session.commit() # Trainings are limited to 1 hour start = datetime.datetime.now() difference = 0 while difference < 3600: time.sleep(10) difference = (datetime.datetime.now() - start).total_seconds() if running_joey.poll() is not None: # JoeyNMT finished (or died) before timeout db.session.refresh(engine) if engine.status != "stopped" and engine.status != "stopped_admin": Trainer.stop(engine_id) GPUManager.free_device(gpu_id) return if running_joey.poll() is None: Trainer.stop(engine_id) finally: engine.status = "stopped" GPUManager.free_device(gpu_id) db.session.commit()
def load(self): # build model and load parameters into it model_checkpoint = load_checkpoint(self.ckpt, self.use_cuda) self.model = build_model(self.model_data, src_vocab=self.src_vocab, trg_vocab=self.trg_vocab) self.model.load_state_dict(model_checkpoint["model_state"]) if self.use_cuda: self.gpu_id = GPUManager.wait_for_available_device( is_admin=self.is_admin) if self.gpu_id is not None: self.model.cuda(self.gpu_id) else: return False return True
def __del__(self): if self.gpu_id is not None: GPUManager.free_device(self.gpu_id)
for running_engine in models.RunningEngines.query.all(): db.session.delete(running_engine) db.session.commit() TOPICS = [ "General", "Technical", "Legal", "Financial", "Medical", "Religion", "Politics", "Administrative", "Subtitles", "Patents", "News", "Books", "Other" ] for topic in TOPICS: if models.Topic.query.filter_by(name=topic).first() is None: topic_obj = models.Topic(name=topic) db.session.add(topic_obj) db.session.commit() folders = [ 'USERSPACE_FOLDER', 'STORAGE_FOLDER', 'FILES_FOLDER', 'ENGINES_FOLDER', 'USERS_FOLDER' ] for folder in folders: try: os.stat(app.config[folder]) except: os.mkdir(app.config[folder]) from app.utils.GPUManager import GPUManager GPUManager.scan_devices(reset=True, is_admin=True)