def launch_process(cls, index, opt, model_agent, process_queue): import torch torch.set_num_threads(1) # prevent threads from spawning in this worker logging.info(f"Launching background on Index {index}") opt = copy.deepcopy(opt) opt['background_index'] = index try: world = cls(opt, model_agent=model_agent, process_queue=process_queue) while True: world.parley() except Exception: import traceback error = traceback.format_exc() logging.critical( f'Exception on background preprocesser index {index}!\n' + error ) raise
def multiprocess_train(rank, opt, port=61337, rank_offset=0, gpu=None, hostname='localhost'): init_method = f"tcp://{hostname}:{port}" with distributed_utils.distributed_context(rank, opt, rank_offset, gpu, init_method=init_method) as opt: # Run the actual training opt['multiprocessing'] = True try: return single_train.TrainLoop(opt).train() except Exception: import parlai.utils.logging as logging logging.critical(traceback.format_exc()) logging.critical( f"Got the above exception on worker {rank + rank_offset}. " "This may cause hangs requiring manual killing of processes.") raise