Exemplo n.º 1
0
    def launch_process(cls, index, opt, model_agent, process_queue):
        import torch

        torch.set_num_threads(1)  # prevent threads from spawning in this worker
        logging.info(f"Launching background on Index {index}")
        opt = copy.deepcopy(opt)
        opt['background_index'] = index
        try:
            world = cls(opt, model_agent=model_agent, process_queue=process_queue)
            while True:
                world.parley()
        except Exception:
            import traceback

            error = traceback.format_exc()
            logging.critical(
                f'Exception on background preprocesser index {index}!\n' + error
            )
            raise
Exemplo n.º 2
0
def multiprocess_train(rank,
                       opt,
                       port=61337,
                       rank_offset=0,
                       gpu=None,
                       hostname='localhost'):
    init_method = f"tcp://{hostname}:{port}"
    with distributed_utils.distributed_context(rank,
                                               opt,
                                               rank_offset,
                                               gpu,
                                               init_method=init_method) as opt:
        # Run the actual training
        opt['multiprocessing'] = True
        try:
            return single_train.TrainLoop(opt).train()
        except Exception:
            import parlai.utils.logging as logging

            logging.critical(traceback.format_exc())
            logging.critical(
                f"Got the above exception on worker {rank + rank_offset}. "
                "This may cause hangs requiring manual killing of processes.")
            raise