Пример #1
0
def init_dist(launcher, backend='nccl', **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError(f'Invalid launcher type: {launcher}')
Пример #2
0
def init_dist(backend='nccl', **kwargs):
    ''' initialization for distributed training'''
    # if mp.get_start_method(allow_none=True) is None:
    if mp.get_start_method(allow_none=True) != 'spawn': #Return the name of start method used for starting processes
        mp.set_start_method('spawn', force=True) ##'spawn' is the default on Windows
    rank = int(os.environ['RANK']) #system env process ranks
    num_gpus = torch.cuda.device_count() #Returns the number of GPUs available
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs) #Initializes the default distributed process group
Пример #3
0
def init_dist(backend='nccl', **kwargs):
    ''' initialization for distributed training'''
    # if mp.get_start_method(allow_none=True) is None:
    if mp.get_start_method(allow_none=True) != 'spawn':
        mp.set_start_method('spawn')
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)
def init_dist(backend='nccl', **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    #os.environ['MASTER_ADDR'] = '10.1.114.10'
    #os.environ['MASTER_PORT'] = '29500'
    #os.environ['RANK'] = '0'
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)
Пример #5
0
def init_dist(launcher, backend='nccl', **kwargs):
    ''' initialization for distributed training'''
    if mp.get_start_method(allow_none=True) != 'spawn':
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError('Invalid launcher type: {}'.format(launcher))
Пример #6
0
def init_dist(backend, **kwargs):
    # These packages have globals that screw with Windows, so only import them if needed.
    import torch.distributed as dist
    import torch.multiprocessing as mp
    """initialization for distributed training"""
    if mp.get_start_method(allow_none=True) != 'spawn':
        mp.set_start_method('spawn')
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend, **kwargs)
def init_dist(backend='nccl', master_ip='127.0.0.1', port=29500):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    os.environ['MASTER_ADDR'] = master_ip
    os.environ['MASTER_PORT'] = str(port)
    rank = int(os.environ['RANK'])
    world_size = int(os.environ['WORLD_SIZE'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend)
    return rank, world_size
Пример #8
0
def init_dist(launcher='pytorch', backend='nccl', **kwargs):
    if dist.is_initialized():
        return torch.cuda.current_device()
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    gpu_id = rank % num_gpus
    torch.cuda.set_device(gpu_id)
    dist.init_process_group(backend=backend, **kwargs)
    return gpu_id
def init_distributed_mode():
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')

    is_slurm_job = "SLURM_JOB_ID" in os.environ
    if is_slurm_job:
        _init_dist_slurm()
    else:
        _init_dist_pytorch()

    return get_dist_info()
Пример #10
0
def init_dist(launcher, backend="nccl", **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method("spawn")
    if launcher == "pytorch":
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == "mpi":
        _init_dist_mpi(backend, **kwargs)
    elif launcher == "slurm":
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError("Invalid launcher type: {}".format(launcher))
Пример #11
0
def init_environment(cfg):
    @master_only
    def _pprint_cfg():
        pprint(dict(cfg))

    if mp.get_start_method(allow_none=True) != "forkserver":
        mp.set_start_method("forkserver")
    colorama.init()
    _init_dist_and_device(cfg)
    torch.manual_seed(cfg.seed)
    np.random.seed(cfg.seed)
    _pprint_cfg()
Пример #12
0
def init_dist(backend='nccl', rank=0):
    ''' initialization for distributed training'''
    # if mp.get_start_method(allow_none=True) is None:
    if mp.get_start_method(allow_none=True) != 'spawn':
        mp.set_start_method('spawn')
    # rank = int(os.environ['RANK'])
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(rank % num_gpus)
    dist.init_process_group(backend=backend,
                            init_method="tcp://127.0.0.1:23571",
                            world_size=num_gpus,
                            rank=rank)
Пример #13
0
def init_dist_pytorch(tcp_port, local_rank, backend='nccl'):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')

    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(local_rank % num_gpus)
    dist.init_process_group(backend=backend,
                            init_method='tcp://127.0.0.1:%d' % tcp_port,
                            rank=local_rank,
                            world_size=num_gpus)
    rank = dist.get_rank()
    return num_gpus, rank
Пример #14
0
def init_dist(launcher, backend='nccl', **kwargs):
    if (mp.get_start_method(allow_none=True) is None):
        mp.set_start_method('spawn')
    if (launcher == 'pytorch'):
        _init_dist_pytorch(backend, **kwargs)
    elif (launcher == 'mpi'):
        _init_dist_mpi(backend, **kwargs)
    elif (launcher == 'slurm'):
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError(
            ''.join(['Invalid launcher type: ', '{}'.format(launcher)]))
Пример #15
0
def main():
    args = Options().parse()
    torch.backends.cudnn.benchmark = True
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    init_dist('pytorch', backend=args.dist_backend)
    logger = get_root_logger('INFO')
    args.lr = args.lr * dist.get_world_size()
    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)
    main_worker(args)
Пример #16
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device('cpu')
    num_action = 2
    num_state = 4
    num_process = 5

    global_Actor = NeuralNet.ActorNet(inputs=num_state,
                                      outputs=num_action,
                                      num_hidden_layers=2,
                                      hidden_dim=8).to(device)
    #summary(global_Actor, input_size=(10,num_state))
    global_Critic = NeuralNet.CriticNet(inputs=num_state,
                                        outputs=1,
                                        num_hidden_layers=2,
                                        hidden_dim=8).to(device)
    #summary(global_Critic, input_size=(10,num_state))
    batch_size = 64
    GAMMA = 0.95
    max_episodes = 5000
    max_step = 1000
    global_Actor.share_memory()
    global_Critic.share_memory()

    processes = []
    processes_socket = []
    processes_agent = []
    mp.set_start_method('spawn')
    print("MP start method:", mp.get_start_method())

    ip = '110.76.78.109'
    port = 1111
    for rank in range(num_process):
        processes_socket.append(0)
        processes_socket[rank] = ClientSocket.MySocket(port, 'f', 'ffff?f')
        processes_agent.append(0)
        processes_agent[rank] = Agent.Brain(GlobalActorNet=global_Actor,
                                            GlobalCriticNet=global_Critic,
                                            device=device,
                                            socket=processes_socket[rank],
                                            num_action=num_action,
                                            max_episodes=max_episodes,
                                            max_step=max_step,
                                            batch_size=batch_size,
                                            GAMMA=GAMMA)
        p = mp.Process(target=processes_agent[rank].train, args=())
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Пример #17
0
def init_dist_pytorch(batch_size, tcp_port, local_rank, backend='nccl'):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')

    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(local_rank % num_gpus)
    dist.init_process_group(backend=backend,
                            init_method='tcp://127.0.0.1:%d' % tcp_port,
                            rank=local_rank,
                            world_size=num_gpus)
    assert batch_size % num_gpus == 0, 'Batch size should be matched with GPUS: (%d, %d)' % (
        batch_size, num_gpus)
    batch_size_each_gpu = batch_size // num_gpus
    rank = dist.get_rank()
    return batch_size_each_gpu, rank
Пример #18
0
def init_dist(launcher, backend='nccl', **kwargs):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    if launcher == 'pytorch':
        _init_dist_pytorch(backend, **kwargs)
    elif launcher == 'mpi':
        _init_dist_mpi(backend, **kwargs)
    elif launcher == 'infimpi':
        set_environment_variables_for_nccl_backend(
            ompi_size() == ompi_local_size())
        _init_dist_infimpi(backend, **kwargs)
    elif launcher == 'slurm':
        _init_dist_slurm(backend, **kwargs)
    else:
        raise ValueError('Invalid launcher type: {}'.format(launcher))
Пример #19
0
def init_dist_pytorch(tcp_port, local_rank, backend='nccl'):
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    # os.environ['MASTER_PORT'] = str(tcp_port)
    # os.environ['MASTER_ADDR'] = 'localhost'
    num_gpus = torch.cuda.device_count()
    torch.cuda.set_device(local_rank % num_gpus)

    dist.init_process_group(
        backend=backend,
        # init_method='tcp://127.0.0.1:%d' % tcp_port,
        # rank=local_rank,
        # world_size=num_gpus
    )
    rank = dist.get_rank()
    return num_gpus, rank
Пример #20
0
    def __init__(
        self,
        env,
        policy,
        num_workers: int,
        *,
        min_rollouts: int = None,
        min_steps: int = None,
        show_progress_bar: bool = True,
        seed: int = NO_SEED_PASSED,
    ):
        """
        Constructor

        :param env: environment to sample from
        :param policy: policy to act in the environment (can also be an exploration strategy)
        :param num_workers: number of parallel samplers
        :param min_rollouts: minimum number of complete rollouts to sample
        :param min_steps: minimum total number of steps to sample
        :param show_progress_bar: it `True`, display a progress bar using `tqdm`
        :param seed: seed value for the random number generators, pass `None` for no seeding; defaults to the last seed
                     that was set with `pyrado.set_seed`
        """
        Serializable._init(self, locals())
        super().__init__(min_rollouts=min_rollouts, min_steps=min_steps)

        self.env = env
        self.policy = policy
        self.show_progress_bar = show_progress_bar

        # Set method to spawn if using cuda
        if mp.get_start_method(allow_none=True) != "spawn":
            mp.set_start_method("spawn", force=True)

        # Create parallel pool. We use one thread per env because it's easier.
        self.pool = SamplerPool(num_workers)

        if seed is NO_SEED_PASSED:
            seed = pyrado.get_base_seed()
        self._seed = seed
        # Initialize with -1 such that we start with the 0-th sample. Incrementing after sampling may cause issues when
        # the sampling crashes and the sample count is not incremented.
        self._sample_count = -1

        # Distribute environments. We use pickle to make sure a copy is created for n_envs=1
        self.pool.invoke_all(_ps_init, pickle.dumps(self.env),
                             pickle.dumps(self.policy))
Пример #21
0
    def test_dataloader(self):
        dataset = Dataset(
            data=[{"img": np.array([[[0.0, 1.0], [2.0, 3.0]]])}, {"img": np.array([[[0.0, 1.0], [2.0, 3.0]]])}],
            transform=IntensityStatsd(keys="img", ops=["max", "mean"], key_prefix="orig"),
        )
        # set num workers = 0 for mac / win
        num_workers = 2 if sys.platform == "linux" else 0
        dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=2)
        orig_method = mp.get_start_method()
        mp.set_start_method("spawn", force=True)

        for d in dataloader:
            meta = d[PostFix.meta("img")]
            np.testing.assert_allclose(meta["orig_max"], [3.0, 3.0], atol=1e-3)
            np.testing.assert_allclose(meta["orig_mean"], [1.5, 1.5], atol=1e-3)
        # restore the mp method
        mp.set_start_method(orig_method, force=True)
Пример #22
0
def init_dist(opt, local_rank):
    """ Adopted from BasicSR
    """
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    torch.cuda.set_device(local_rank)
    dist.init_process_group(backend='nccl')

    rank, world_size = get_dist_info()

    opt.update({
        'dist': True,
        'device': 'cuda',
        'local_rank': local_rank,
        'world_size': world_size,
        'rank': rank
    })
Пример #23
0
def setup_multi_processes(cfg, workers_per_gpu):
    """Setup multi-processing environment variables."""
    logger = get_root_logger()

    # set multi-process start method
    if platform.system() != 'Windows':
        mp_start_method = cfg.get('mp_start_method', None)
        current_method = mp.get_start_method(allow_none=True)
        if mp_start_method in ('fork', 'spawn', 'forkserver'):
            logger.info(
                f'Multi-processing start method `{mp_start_method}` is '
                f'different from the previous setting `{current_method}`.'
                f'It will be force set to `{mp_start_method}`.')
            mp.set_start_method(mp_start_method, force=True)
        else:
            logger.info(
                f'Multi-processing start method is `{mp_start_method}`')

    # disable opencv multithreading to avoid system being overloaded
    opencv_num_threads = cfg.get('opencv_num_threads', None)
    if isinstance(opencv_num_threads, int):
        logger.info(f'OpenCV num_threads is `{opencv_num_threads}`')
        cv2.setNumThreads(opencv_num_threads)
    else:
        logger.info(f'OpenCV num_threads is `{cv2.getNumThreads}')

    if workers_per_gpu > 1:
        # setup OMP threads
        # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa
        omp_num_threads = cfg.get('omp_num_threads', None)
        if 'OMP_NUM_THREADS' not in os.environ:
            if isinstance(omp_num_threads, int):
                logger.info(f'OMP num threads is {omp_num_threads}')
                os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
        else:
            logger.info(f'OMP num threads is {os.environ["OMP_NUM_THREADS"] }')

        # setup MKL threads
        if 'MKL_NUM_THREADS' not in os.environ:
            mkl_num_threads = cfg.get('mkl_num_threads', None)
            if isinstance(mkl_num_threads, int):
                logger.info(f'MKL num threads is {mkl_num_threads}')
                os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
        else:
            logger.info(f'MKL num threads is {os.environ["MKL_NUM_THREADS"]}')
Пример #24
0
def setup_multi_processes(cfg):
    """Setup multi-processing environment variables."""
    # set multi-process start method as `fork` to speed up the training
    if platform.system() != 'Windows':
        mp_start_method = cfg.get('mp_start_method', 'fork')
        current_method = mp.get_start_method(allow_none=True)
        if current_method is not None and current_method != mp_start_method:
            warnings.warn(
                f'Multi-processing start method `{mp_start_method}` is '
                f'different from the previous setting `{current_method}`.'
                f'It will be force set to `{mp_start_method}`. You can change '
                f'this behavior by changing `mp_start_method` in your config.')
        mp.set_start_method(mp_start_method, force=True)

    # disable opencv multithreading to avoid system being overloaded
    opencv_num_threads = cfg.get('opencv_num_threads', 0)
    cv2.setNumThreads(opencv_num_threads)

    # setup OMP threads
    # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa
    workers_per_gpu = cfg.data.get('workers_per_gpu', 1)
    if 'train_dataloader' in cfg.data:
        workers_per_gpu = \
            max(cfg.data.train_dataloader.get('workers_per_gpu', 1),
                workers_per_gpu)

    if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
        omp_num_threads = 1
        warnings.warn(
            f'Setting OMP_NUM_THREADS environment variable for each process '
            f'to be {omp_num_threads} in default, to avoid your system being '
            f'overloaded, please further tune the variable for optimal '
            f'performance in your application as needed.')
        os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)

    # setup MKL threads
    if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1:
        mkl_num_threads = 1
        warnings.warn(
            f'Setting MKL_NUM_THREADS environment variable for each process '
            f'to be {mkl_num_threads} in default, to avoid your system being '
            f'overloaded, please further tune the variable for optimal '
            f'performance in your application as needed.')
        os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)
Пример #25
0
def initialSettings(port, backend='nccl'):
    method = mp.get_start_method(allow_none=True)
    if method is None:
        mp.set_start_method('spawn')
    
    logger.info('multiprocessing start method:{}'.format(method))
    procId = int(os.environ.get('SLURM_PROCID'))
    numOfTasks = int(os.environ.get('SLURM_NTASKS'))
    nodeList = os.environ.get('SLURM_JOB_NODELIST')

    numOfGPUs = torch.cuda.device_count()
    torch.cuda.set_device(procId % numOfGPUs)

    if '[' in nodeList:
        beg = nodeList.find('[')
        pos1 = nodeList.find('-', beg)
        
        if pos1 < 0:
            pos1 = 1000
        pos2 = nodeList.find(',', beg)
        if pos2 < 0:
            pos2 = 1000
        
        nodeList = nodeList[:min(pos1, pos2)].replace('[', '')
    
    addr = nodeList[8:].replace('-', '.')
    os.environ['MASTER_PORT'] = port
    os.environ['MASTER_ADDR'] = addr
    os.environ['WORLD_SIZE'] = str(numOfTasks)
    os.environ['RANK'] = str(procId)

    if backend == 'nccl':
        distributed.init_process_group(backend='nccl')
    else:
        distributed.init_process_group(backend='gloo', rank=procId, world_size=numOfTasks)
    
    rank = distributed.get_rank()
    worldSize = distributed.get_world_size()

    return rank, worldSize
Пример #26
0
def main():
    parser = utils.prepare_parser()
    parser = utils.add_dgp_parser(parser)
    config = vars(parser.parse_args())
    utils.dgp_update_config(config)
    print(config)

    rank = 0
    if mp.get_start_method(allow_none=True) != 'spawn':
        mp.set_start_method('spawn', force=True)
    if config['dist']:
        rank, world_size = dist_init(config['port'])

    # Seed RNG
    utils.seed_rng(rank + config['seed'])

    # Setup cudnn.benchmark for free speed
    torch.backends.cudnn.benchmark = True

    # train
    trainer = Trainer(config)
    trainer.run()
Пример #27
0
def init_dist(distributed=True,
                   backend='nccl',
                   master_ip='tcp://127.0.0.1',
                   port=6501):
    if not distributed:
        return

    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    global gpu_id
    os.environ['MASTER_ADDR'] = master_ip
    os.environ['MASTER_PORT'] = str(port)
    rank = int(os.environ['RANK'])
    world_size = int(os.environ['WORLD_SIZE'])
    num_gpus = torch.cuda.device_count()
    gpu_id = rank % num_gpus
    torch.cuda.set_device(gpu_id)
    dist_url = master_ip + ':' + str(port)
    dist.init_process_group(backend=backend, init_method=dist_url, \
        world_size=world_size, rank=rank)
    print("dist initialized. master_ip: %s, port: %s, rank: %d/%d" % \
        (master_ip, str(port), rank, world_size))
    return rank, world_size
def main():

    # get local rank from distributed launcher
    parser = argparse.ArgumentParser()
    parser.add_argument("--local_rank", type=int)
    parser.add_argument('--world_size', type=int)
    args = parser.parse_args()
    print('what is the rank of the current program: ')
    print(args.local_rank)
    print('world size: ')
    print(args.world_size)

    # initialize dist
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    torch.cuda.set_device(int(args.local_rank))
    dist.init_process_group(backend='nccl', init_method='env://')

    # define dataset
    dataset = DentalDataset(
        num_class=NUM_CLASS,
        ann_file=ANN_FILE,
        img_prefix=IMG_PREFIX,
        img_scale=IMG_SCALE,
        img_norm_cfg=IMG_TRANSFORM_CONFIG,
        multiscale_mode='value',
        flip_ratio=FLIP_RATIO,
        with_label=False,
        extra_aug=None,
        test_mode=True,
    )

    # sampler for make number of samples % number of gpu == 0
    sampler = NewDistributedSampler(dataset=dataset,
                                    num_replicas=args.world_size,
                                    images_per_gpu=IMGS_PER_GPU,
                                    rank=args.local_rank,
                                    shuffle=False)

    # data loader. Note this is the code for one (each) gpu.
    data_loader = DataLoader(
        dataset=dataset,
        batch_size=IMGS_PER_GPU,
        # when sampler is given, shuffle must be False.
        shuffle=False,
        sampler=sampler,
        batch_sampler=None,
        num_workers=WORKERS_PER_GPU,
        collate_fn=partial(collate, samples_per_gpu=IMGS_PER_GPU),
        pin_memory=False,
        drop_last=False,
        timeout=0,
        worker_init_fn=None,
    )

    # define the model and restore checkpoint
    model = SSDDetector(
        # basic
        input_size=IMG_SCALE,
        num_classes=NUM_CLASS,
        in_channels=(512, 1024, 512, 256, 256),
        use_dropout=False,
        dropout_rate=None,
        # anchor generate
        anchor_ratios=([1 / 2.0, 1.0,
                        2.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                               3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                                      3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                                             3.0], [1 / 2.0, 1.0, 2.0]),
        anchor_strides=((16, 16), (16, 16), (30, 30), (60, 60), (100, 100)),
        basesizes=((12, 12), (16, 16), (24, 24), (30, 30), (36, 36)),
        allowed_border=-1,
        # regression
        target_means=(.0, .0, .0, .0),
        target_stds=(0.1, 0.1, 0.2, 0.2),
        # box assign
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        min_pos_iou=0.,
        gt_max_assign_all=False,
        # sampling
        sampling=False,
        # balancing the loss
        neg_pos_ratio=3,
        # loss
        smoothl1_beta=1.,
        # inference nms
        nms_pre=-1,
        score_thr=0.02,
        min_size=100.0,
        max_scale_ratio=10.0,
        nms_cfg=['nms', 0.45, None],
        max_per_img=200,
        # device
        device=None,
    )
    model.cuda(args.local_rank)
    model = torch.nn.parallel.DistributedDataParallel(
        model, device_ids=[args.local_rank], output_device=args.local_rank)
    if hasattr(model, 'module'):
        model = model.module

    # load checkpoint
    loc = 'cuda:{}'.format(args.local_rank)
    checkpoint = torch.load(CHECKPOINT_FILE, map_location=loc)
    # optimizer.state_dict -> state, param_groups
    # state -> var series number -> step / exp_avg / exp_avg_sq
    # param_groups -> lr / betas / eps / weight_decay / amsgrad / params
    model.load_state_dict(checkpoint['state_dict'], strict=True)

    # enable dropout during inference
    model.eval()
    # for m in model.modules():
    #     if m.__class__.__name__.startswith('Dropout'):
    #         m.train()

    # results and progress bar
    results = []
    if args.local_rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))

    # enumerate all data
    for i, data_pair in enumerate(data_loader):
        data_pair_img = data_pair['img'].data[0].cuda(args.local_rank,
                                                      non_blocking=True)
        data_pair_img_meta = data_pair['img_meta'].data[0]

        with torch.no_grad():
            result = model(
                is_test=True,
                img=data_pair_img,
                img_meta=data_pair_img_meta,
                rescale=True,
            )
        results.extend(result)

        # update program bar only if it is rank 0.
        if args.local_rank == 0:
            for _ in range(IMGS_PER_GPU * args.world_size):
                prog_bar.update()

    # collect results from all gpus
    results = collect_results(result_part=results,
                              dataset_real_size=len(dataset),
                              tmpdir=TMPDIR)

    # write results to file
    # [Number of images, Number of classes, (k, 5)].
    # 5 for t, l, b, r, and prob.
    if args.local_rank == 0:
        print('\nwriting results to {}'.format(OUT_FILE))
        mmcv.dump(results, OUT_FILE)
Пример #29
0
    def __init__(self, args, model, optimizer, train_loader, val_loader,
                 input_train_transform, input_val_transform, output_transform, losses, scheduler=None):

        # Allow multiple processes to access tensors on GPU. Add checking for multiple continuous runs.
        if multiprocessing.get_start_method(allow_none=True) is None:
            multiprocessing.set_start_method(method='spawn')

        self.logger = get_logger(name=__name__, save_file=args.log_path / args.run_name)

        # Checking whether inputs are correct.
        assert isinstance(model, nn.Module), '`model` must be a Pytorch Module.'
        assert isinstance(optimizer, optim.Optimizer), '`optimizer` must be a Pytorch Optimizer.'
        assert isinstance(train_loader, DataLoader) and isinstance(val_loader, DataLoader), \
            '`train_loader` and `val_loader` must be Pytorch DataLoader objects.'

        assert callable(input_train_transform) and callable(input_val_transform), \
            'input_transforms must be callable functions.'
        # I think this would be best practice.
        assert isinstance(output_transform, nn.Module), '`output_transform` must be a Pytorch Module.'

        # 'losses' is expected to be a dictionary.
        # Even composite losses should be a single loss module with a dictionary output.
        losses = nn.ModuleDict(losses)

        if scheduler is not None:
            if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                self.metric_scheduler = True
            elif isinstance(scheduler, optim.lr_scheduler._LRScheduler):
                self.metric_scheduler = False
            else:
                raise TypeError('`scheduler` must be a Pytorch Learning Rate Scheduler.')

        # Display interval of 0 means no display of validation images on TensorBoard.
        if args.max_images <= 0:
            self.display_interval = 0
        else:
            self.display_interval = int(len(val_loader.dataset) // (args.max_images * args.batch_size))

        self.checkpointer = CheckpointManager(model, optimizer, mode='min', save_best_only=args.save_best_only,
                                              ckpt_dir=args.ckpt_path, max_to_keep=args.max_to_keep)

        # loading from checkpoint if specified.
        if vars(args).get('prev_model_ckpt'):
            self.checkpointer.load(load_dir=args.prev_model_ckpt, load_optimizer=False)

        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.input_train_transform = input_train_transform
        self.input_val_transform = input_val_transform
        self.output_transform = output_transform
        self.losses = losses
        self.scheduler = scheduler

        self.verbose = args.verbose
        self.num_epochs = args.num_epochs
        self.smoothing_factor = args.smoothing_factor
        self.use_slice_metrics = args.use_slice_metrics
        self.img_lambda = torch.tensor(args.img_lambda, dtype=torch.float32, device=args.device)
        self.writer = SummaryWriter(str(args.log_path))
def main():

    # get local rank from distributed launcher
    parser = argparse.ArgumentParser()
    parser.add_argument("--local_rank", type=int)
    args = parser.parse_args()
    print('what is the rank of the current program: ')
    print(args.local_rank)

    # initialize dist
    if mp.get_start_method(allow_none=True) is None:
        mp.set_start_method('spawn')
    rank = int(args.local_rank)
    torch.cuda.set_device(rank)
    dist.init_process_group(backend='nccl', init_method='env://')

    # define dataset
    dataset = DentalClassDataset(
        ann_file=ann_file,
        img_prefix=img_prefix,
        img_scale=img_scale,
        img_norm_cfg=img_transform_cfg,
        multiscale_mode='value',   # select a scale, rather than random from a range.
        flip_ratio=flip_ratio,
        with_label=False,
        extra_aug=None,
        test_mode=True,
    )

    # sampler for make number of samples % number of gpu == 0
    rank, world_size = get_dist_info()
    sampler = NewDistributedSampler(
        dataset=dataset,
        num_replicas=world_size,
        images_per_gpu=imgs_per_gpu,
        rank=rank,
        shuffle=False
    )

    # data loader. Note this is the code for one (each) gpu.
    batch_size = imgs_per_gpu
    num_workers = workers_per_gpu
    data_loader = DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        # when sampler is given, shuffle must be False.
        shuffle=False,
        sampler=sampler,
        batch_sampler=None,
        num_workers=num_workers,
        collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
        pin_memory=False,
        drop_last=False,
        timeout=0,
        worker_init_fn=None,
    )

    # define the model and restore checkpoint
    model = VGGClassifier(
        with_bn=False,
        num_classes=len(dataset.CLASSES),
        num_stages=5,
        dilations=(1, 1, 1, 1, 1),
        out_indices=(30,),
        frozen_stages=-1,
        bn_eval=True,
        bn_frozen=False,
        ceil_mode=True,
        with_last_pool=True,
        dimension_before_fc=(10, 15),
        dropout_rate=0.5,
        pos_loss_weights=torch.tensor((15, 8), dtype=torch.float32, device=torch.device('cuda', rank)),
    )

    checkpoint = load_checkpoint(
        model=model,
        filename=checkpoint_file,
        map_location='cpu',
        strict=False,
        logger=None
    )

    # define classes
    model.CLASSES = checkpoint['meta']['CLASSES']

    # parallelize model
    model = model.cuda()
    model = MMDistributedDataParallel(
        module=model,
        dim=0,
        broadcast_buffers=True,
        bucket_cap_mb=25
    )
    model.eval()

    # results and progress bar
    results = []
    dataset = data_loader.dataset

    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))

    # enumerate all data
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(is_test=True, rescale=True, **data)
        results.extend(result)

        # update program bar only if it is rank 0.
        if rank == 0:
            batch_size = data['img'].size(0)
            for _ in range(batch_size * world_size):
                prog_bar.update()

    # collect results from all gpus
    results = collect_results(
        result_part=results,
        dataset_real_size=len(dataset),
        tmpdir=tmpdir
    )

    # write results to file
    # [Number of images, Number of classes, (k, 5)].
    # 5 for t, l, b, r, and prob.
    if rank == 0:
        print('\nwriting results to {}'.format(out_file))
        mmcv.dump(results, out_file+'.pickle')