Python set_cuda_device_index示例，classy_vision.generic.distributed_util.set_cuda_device_index Python示例

示例#1

0

显示文件

    def __init__(
        self,
        use_gpu: Optional[bool] = None,
        num_dataloader_workers: int = 0,
        dataloader_mp_context: Optional[str] = None,
    ):
        """Constructor for DistributedTrainer.

        Args:
            use_gpu: If true, then use GPU 0 for training.
                If None, then check if we have GPUs available, if we do
                then use GPU for training.
            num_dataloader_workers: Number of CPU processes doing dataloading
                per GPU. If 0, then dataloading is done on main thread.
            dataloader_mp_context: Determines how to launch
                new processes for dataloading. Must be one of "fork", "forkserver",
                "spawn". If None, process launching is inherited from parent.
        """
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        _init_env_vars()
        _init_distributed(self.use_gpu)
        logging.info(
            f"Done setting up distributed process_group with rank {get_rank()}"
            + f", world_size {get_world_size()}")
        local_rank = int(os.environ["LOCAL_RANK"])
        if self.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(local_rank))
            set_cuda_device_index(local_rank)
        else:
            logging.info("Using CPU")
            set_cpu_device()

示例#2

0

显示文件

文件： elastic_trainer.py 项目： kiukchung/ClassyVision-1

    def __init__(
        self,
        use_gpu,
        num_dataloader_workers,
        elastic_coordinator,
        input_args,
        local_rank,
        dataloader_mp_context=None,
    ):
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        pid = os.getpid()
        if use_gpu:
            set_cuda_device_index(local_rank)
            device_idx = torch.cuda.current_device()
            log.info(
                f"initialized worker {local_rank} (pid={pid}, gpu={device_idx})"
            )
            device_properties = torch.cuda.get_device_properties(device_idx)
            log.info(f"gpu device properties: {device_properties}")
        else:
            # cpu
            set_cpu_device()
            log.info(f"initialized worker {local_rank} (pid={pid}, cpu)")

        self.elastic_coordinator = elastic_coordinator
        self.input_args = input_args

示例#3

0

显示文件

    def __init__(
        self,
        use_gpu: Optional[bool] = None,
        num_dataloader_workers: int = 0,
        dataloader_mp_context: Optional[str] = None,
    ):
        """Constructor for LocalTrainer.

        Args:
            use_gpu: If true, then use GPU 0 for training.
                If None, then check if we have GPUs available, if we do
                then use GPU for training.
            num_dataloader_workers: Number of CPU processes doing dataloading
                per GPU. If 0, then dataloading is done on main thread.
            dataloader_mp_context: Determines how to launch
                new processes for dataloading. Must be one of "fork", "forkserver",
                "spawn". If None, process launching is inherited from parent.
        """
        super().__init__(
            use_gpu=use_gpu,
            num_dataloader_workers=num_dataloader_workers,
            dataloader_mp_context=dataloader_mp_context,
        )
        if self.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(0))
            set_cuda_device_index(0)
        else:
            logging.info("Using CPU")
            set_cpu_device()

示例#4

0

显示文件

    def train(self, task):
        if task.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(0))
            set_cuda_device_index(0)
        else:
            logging.info("Using CPU")
            set_cpu_device()

        super().train(task)

示例#5

0

显示文件

文件： distributed_trainer.py 项目： zachlewis2020/ClassyVision

    def train(self, task):
        _init_env_vars(task.use_gpu)
        _init_distributed(task.use_gpu)
        logging.info(
            f"Done setting up distributed process_group with rank {get_rank()}"
            + f", world_size {get_world_size()}")
        local_rank = int(os.environ["LOCAL_RANK"])
        if task.use_gpu:
            logging.info("Using GPU, CUDA device index: {}".format(local_rank))
            set_cuda_device_index(local_rank)
        else:
            logging.info("Using CPU")
            set_cpu_device()

        super().train(task)

示例#6

0

显示文件

    def setup_distributed(self, use_gpu: bool):
        """
        Setup the distributed training. VISSL support both GPU and CPU only training.
        (1) Initialize the torch.distributed.init_process_group if the distributed is
            not already initialized. The init_method, backend are specified by user in the
            yaml config file. See vissl/defaults.yaml file for description on how to set
            init_method, backend.
        (2) We also set the global cuda device index using torch.cuda.set_device or
            cpu device
        """
        # we overwrite the distributed trainer setup here with our config options
        distributed_world_size = int(os.environ["WORLD_SIZE"])
        assert distributed_world_size % self.cfg.DISTRIBUTED.NUM_NODES == 0
        init_method = f"{self.cfg.DISTRIBUTED.INIT_METHOD}://{self.dist_run_id}"
        logging.info(
            f"Using Distributed init method: {init_method}, "
            f"world_size: {distributed_world_size}, rank: {self.distributed_rank}"
        )

        if not torch.distributed.is_initialized():
            torch.distributed.init_process_group(
                backend=self.cfg.DISTRIBUTED.BACKEND,
                init_method=init_method,
                world_size=distributed_world_size,
                rank=self.distributed_rank,
            )
        else:
            logging.warning(
                "Torch distributed has already been initialized, \
                reusing existing configuration"
            )

        logging.info(
            "| initialized host {} as rank {} ({})".format(
                socket.gethostname(),
                self.distributed_rank,
                torch.distributed.get_rank(),
            )
        )
        if use_gpu:
            set_cuda_device_index(self.local_rank)
            # perform a dummy all-reduce to initialize the NCCL communicator
            if torch.cuda.is_available() and (self.cfg.DISTRIBUTED.BACKEND == "nccl"):
                dist.all_reduce(torch.zeros(1).cuda())
        else:
            set_cpu_device()