Python gc_cuda примеры, torch_utils.gc_cuda Python примеры использования

Пример #1

0

Показать файл

Файл: acquisition_functions.py Проект: deepopinion/BatchBALD

    def compute_scores(self, logits_B_K_C, available_loader, device):
        """"""
        scorer = self.scorer

        if self == AcquisitionFunction.random:
            return scorer(logits_B_K_C, None).double()

        B, K, C = logits_B_K_C.shape

        # We need to sample the predictions from the bayesian_model n times and store them.
        with torch.no_grad():
            scores_B = torch.empty((B,), dtype=torch.float64)

            if device.type == "cuda":
                torch_utils.gc_cuda()
                KC_memory = K * C * 8
                batch_size = min(torch_utils.get_cuda_available_memory() // KC_memory, 8192)
            else:
                batch_size = 4096

            for scores_b, logits_b_K_C in with_progress_bar(
                torch_utils.split_tensors(scores_B, logits_B_K_C, batch_size), unit_scale=batch_size
            ):
                scores_b.copy_(scorer(logits_b_K_C.to(device)), non_blocking=True)

        return scores_B

Пример #2

0

Показать файл

def torch_device(request):
    use_cuda = request.param
    if use_cuda:
        assert torch.cuda.is_available()
        torch_utils.gc_cuda()
        return torch.device("cuda")
    return torch.device("cpu")

Пример #3

0

Показать файл

Файл: sampler_model.py Проект: tmfs10/ical

    def forward(self, input: torch.Tensor):
        global sampler_model_cuda_chunk_size
        if self.training:
            return torch_utils.logit_mean(self.bayesian_net(input, self.k), dim=1, keepdim=False)
        else:
            mc_output_B_C = torch.zeros((input.shape[0], self.num_classes), dtype=torch.float64, device=input.device)

            k = self.k

            chunk_size = sampler_model_cuda_chunk_size if input.device.type == "cuda" else 32

            k_lower = 0
            while k_lower < k:
                try:
                    k_upper = min(k_lower + chunk_size, k)

                    # Reset the mask all around.
                    self.bayesian_net.eval()

                    mc_output_B_K_C = self.bayesian_net(input, k_upper - k_lower)
                except RuntimeError as exception:
                    if torch_utils.should_reduce_batch_size(exception):
                        chunk_size //= 2
                        if chunk_size <= 0:
                            raise
                        if sampler_model_cuda_chunk_size != chunk_size:
                            print(f"New sampler_model_cuda_chunk_size={chunk_size} ({exception})")
                            sampler_model_cuda_chunk_size = chunk_size

                        torch_utils.gc_cuda()
                else:
                    mc_output_B_C += torch.sum(mc_output_B_K_C.double().exp_(), dim=1, keepdim=False)
                    k_lower += chunk_size

            return (mc_output_B_C / k).log_()

Пример #4

0

Показать файл

Файл: sampler_model.py Проект: tmfs10/ical

def eval_bayesian_model_consistent(
    bayesian_model: mc_dropout.BayesianModule, available_loader, num_classes, k=20, device=None
):
    global eval_bayesian_model_consistent_cuda_chunk_size

    with torch.no_grad():
        # NOTE: I'm hard-coding 10 classes here!
        B = len(available_loader.dataset)
        logits_B_K_C = torch.empty((B, k, num_classes), dtype=torch.float64)

        chunk_size = eval_bayesian_model_consistent_cuda_chunk_size if device.type == "cuda" else 64
        torch_utils.gc_cuda()
        k_lower = 0
        while k_lower < k:
            try:
                k_upper = min(k_lower + chunk_size, k)

                # This resets the dropout masks.
                bayesian_model.eval()

                for i, (batch, _) in enumerate(
                    with_progress_bar(available_loader, unit_scale=available_loader.batch_size)
                ):
                    lower = i * available_loader.batch_size
                    upper = min(lower + available_loader.batch_size, B)

                    batch = batch.to(device)
                    # batch_size x ws x classes
                    mc_output_B_K_C = bayesian_model(batch, k_upper - k_lower)
                    logits_B_K_C[lower:upper, k_lower:k_upper].copy_(mc_output_B_K_C.double(), non_blocking=True)

            except RuntimeError as exception:
                if torch_utils.should_reduce_batch_size(exception):
                    if chunk_size <= 1:
                        raise
                    chunk_size //= 2
                    print(f"New eval_bayesian_model_consistent_cuda_chunk_size={chunk_size} ({exception})")
                    eval_bayesian_model_consistent_cuda_chunk_size = chunk_size

                    torch_utils.gc_cuda()
                else:
                    raise
            else:
                k_lower += chunk_size

    return logits_B_K_C

Пример #5

0

Показать файл

def compute_multi_bald_batch(
    bayesian_model: nn.Module,
    available_loader,
    num_classes,
    k,
    b,
    target_size,
    initial_percentage,
    reduce_percentage,
    device=None,
) -> AcquisitionBatch:
    result = reduced_eval_consistent_bayesian_model(
        bayesian_model=bayesian_model,
        acquisition_function=AcquisitionFunction.bald,
        num_classes=num_classes,
        k=k,
        initial_percentage=initial_percentage,
        reduce_percentage=reduce_percentage,
        target_size=target_size,
        available_loader=available_loader,
        device=device,
    )
    start_time = time.process_time()

    subset_split = result.subset_split

    partial_multi_bald_B = result.scores_B
    # Now we can compute the conditional entropy
    conditional_entropies_B = joint_entropy_exact.batch_conditional_entropy_B(
        result.logits_B_K_C)

    # We turn the logits into probabilities.
    probs_B_K_C = result.logits_B_K_C.exp_()

    # Don't need the result anymore.
    result = None

    torch_utils.gc_cuda()
    # torch_utils.cuda_meminfo()

    with torch.no_grad():
        num_samples_per_ws = 40000 // k
        num_samples = num_samples_per_ws * k

        if device.type == "cuda":
            # KC_memory = k*num_classes*8
            sample_MK_memory = num_samples * k * 8
            MC_memory = num_samples * num_classes * 8
            copy_buffer_memory = 256 * num_samples * num_classes * 8
            slack_memory = 2 * 2**30
            multi_bald_batch_size = (torch_utils.get_cuda_available_memory() -
                                     (sample_MK_memory + copy_buffer_memory +
                                      slack_memory)) // MC_memory

            global compute_multi_bald_bag_multi_bald_batch_size
            if compute_multi_bald_bag_multi_bald_batch_size != multi_bald_batch_size:
                compute_multi_bald_bag_multi_bald_batch_size = multi_bald_batch_size
                print(
                    f"New compute_multi_bald_bag_multi_bald_batch_size = {multi_bald_batch_size}"
                )
        else:
            multi_bald_batch_size = 16

        subset_acquisition_bag = []
        global_acquisition_bag = []
        acquisition_bag_scores = []

        # We use this for early-out in the b==0 case.
        MIN_SPREAD = 0.1

        if b == 0:
            b = 100
            early_out = True
        else:
            early_out = False

        prev_joint_probs_M_K = None
        prev_samples_M_K = None
        for i in range(b):
            torch_utils.gc_cuda()

            if i > 0:
                # Compute the joint entropy
                joint_entropies_B = torch.empty((len(probs_B_K_C), ),
                                                dtype=torch.float64)

                exact_samples = num_classes**i
                if exact_samples <= num_samples:
                    prev_joint_probs_M_K = joint_entropy_exact.joint_probs_M_K(
                        probs_B_K_C[subset_acquisition_bag[-1]][None].to(
                            device),
                        prev_joint_probs_M_K=prev_joint_probs_M_K,
                    )

                    # torch_utils.cuda_meminfo()
                    batch_exact_joint_entropy(probs_B_K_C,
                                              prev_joint_probs_M_K,
                                              multi_bald_batch_size, device,
                                              joint_entropies_B)
                else:
                    if prev_joint_probs_M_K is not None:
                        prev_joint_probs_M_K = None
                        torch_utils.gc_cuda()

                    # Gather new traces for the new subset_acquisition_bag.
                    prev_samples_M_K = joint_entropy_sampling.sample_M_K(
                        probs_B_K_C[subset_acquisition_bag].to(device),
                        S=num_samples_per_ws)

                    # torch_utils.cuda_meminfo()
                    for joint_entropies_b, probs_b_K_C in with_progress_bar(
                            torch_utils.split_tensors(joint_entropies_B,
                                                      probs_B_K_C,
                                                      multi_bald_batch_size),
                            unit_scale=multi_bald_batch_size,
                    ):
                        joint_entropies_b.copy_(joint_entropy_sampling.batch(
                            probs_b_K_C.to(device), prev_samples_M_K),
                                                non_blocking=True)

                        # torch_utils.cuda_meminfo()

                    prev_samples_M_K = None
                    torch_utils.gc_cuda()

                partial_multi_bald_B = joint_entropies_B - conditional_entropies_B
                joint_entropies_B = None

            # Don't allow reselection
            partial_multi_bald_B[subset_acquisition_bag] = -math.inf

            winner_index = partial_multi_bald_B.argmax().item()

            # Actual MultiBALD is:
            actual_multi_bald_B = partial_multi_bald_B[
                winner_index] - torch.sum(
                    conditional_entropies_B[subset_acquisition_bag])
            actual_multi_bald_B = actual_multi_bald_B.item()

            print(f"Actual MultiBALD: {actual_multi_bald_B}")

            # If we early out, we don't take the point that triggers the early out.
            # Only allow early-out after acquiring at least 1 sample.
            if early_out and i > 1:
                current_spread = actual_multi_bald_B[
                    winner_index] - actual_multi_bald_B.median()
                if current_spread < MIN_SPREAD:
                    print("Early out")
                    break

            acquisition_bag_scores.append(actual_multi_bald_B)

            subset_acquisition_bag.append(winner_index)
            # We need to map the index back to the actual dataset.
            global_acquisition_bag.append(
                subset_split.get_dataset_indices([winner_index]).item())

            print(
                f"Acquisition bag: {sorted(global_acquisition_bag)}, num_ack: {i}"
            )

    end_time = time.process_time()
    time_taken = end_time - start_time
    print('ack time taken', time_taken)

    return AcquisitionBatch(global_acquisition_bag, acquisition_bag_scores,
                            None), time_taken

Пример #6

0

Показать файл

Файл: reduced_consistent_mc_sampler.py Проект: deepopinion/BatchBALD

def reduced_eval_consistent_bayesian_model(
    bayesian_model: mc_dropout.BayesianModule,
    acquisition_function: AcquisitionFunction,
    num_classes: int,
    k: int,
    initial_percentage: int,
    reduce_percentage: int,
    target_size: int,
    available_loader,
    device=None,
) -> SubsetEvalResults:
    """Performs a scoring step with k inference samples while reducing the dataset to at most min_remaining_percentage.

    Before computing anything at all the initial available dataset is randomly culled to initial_percentage.

    Every `chunk_size` inferences BALD is recomputed and the bottom `reduce_percentage` samples are dropped."""
    global reduced_eval_consistent_bayesian_model_cuda_chunk_size

    # TODO: ActiveLearningData should be renamed to be a more modular SplitDataset.
    # Here, we need to use available_dataset because it allows us to easily recover the original indices.

    # We start with all data in the acquired data.
    subset_split = active_learning_data.ActiveLearningData(
        available_loader.dataset)
    initial_length = len(available_loader.dataset)

    initial_split_length = initial_length * initial_percentage // 100

    # By acquiring [initial_split_length:], we make the tail unavailable.
    subset_split.acquire(torch.randperm(initial_length)[initial_split_length:])

    subset_dataloader = data.DataLoader(subset_split.available_dataset,
                                        shuffle=False,
                                        batch_size=available_loader.batch_size)

    print(f"Scoring subset of {len(subset_dataloader.dataset)} items:")

    # We're done with available_loader in this function.
    available_loader = None

    with torch.no_grad():
        B = len(subset_split.available_dataset)
        C = num_classes

        # We stay on the CPU.
        logits_B_K_C = None

        k_lower = 0
        torch_utils.gc_cuda()
        chunk_size = reduced_eval_consistent_bayesian_model_cuda_chunk_size if device.type == "cuda" else 32
        while k_lower < k:
            try:
                k_upper = min(k_lower + chunk_size, k)

                old_logit_B_K_C = logits_B_K_C
                # This also stays on the CPU.
                logits_B_K_C = torch.empty((B, k_upper, C),
                                           dtype=torch.float64)

                # Copy the old data over.
                if k_lower > 0:
                    logits_B_K_C[:, 0:k_lower, :].copy_(old_logit_B_K_C)
                    old_logit_B_K_C = None

                # This resets the dropout masks.
                bayesian_model.eval()

                for i, (batch, _) in enumerate(
                        with_progress_bar(
                            subset_dataloader,
                            unit_scale=subset_dataloader.batch_size)):
                    lower = i * subset_dataloader.batch_size
                    upper = min(lower + subset_dataloader.batch_size, B)

                    batch = batch.to(device)
                    # batch_size x ws x classes
                    mc_output_B_K_C = bayesian_model(batch, k_upper - k_lower)
                    logits_B_K_C[lower:upper, k_lower:k_upper].copy_(
                        mc_output_B_K_C.double(), non_blocking=True)

            except RuntimeError as exception:
                if torch_utils.should_reduce_batch_size(exception):
                    if chunk_size <= 1:
                        raise
                    chunk_size = chunk_size // 2
                    print(
                        f"New reduced_eval_consistent_bayesian_model_cuda_chunk_size={chunk_size} ({exception})"
                    )
                    reduced_eval_consistent_bayesian_model_cuda_chunk_size = chunk_size

                    torch_utils.gc_cuda()
                else:
                    raise
            else:
                if k_upper == k:
                    next_size = target_size
                elif k_upper < 50:
                    next_size = B
                else:
                    next_size = max(target_size,
                                    B * (100 - reduce_percentage) // 100)

                # Compute the score if it's needed: we are going to reduce the dataset or we're in the last iteration.
                if next_size < B or k_upper == k:
                    # Calculate the scores (mutual information) of logits_B_K_C
                    scores_B = acquisition_function.compute_scores(
                        logits_B_K_C,
                        available_loader=subset_dataloader,
                        device=device)
                else:
                    scores_B = None

                if next_size < B:
                    print("Reducing size", next_size)
                    # Get indices of samples sorted by increasing mutual information
                    sorted_indices = torch.argsort(scores_B, descending=True)
                    # Select next_size samples with smallest mutual information (ascending indices)
                    new_indices = torch.sort(sorted_indices[:next_size],
                                             descending=False)[0]

                    B = next_size
                    logits_B_K_C = logits_B_K_C[new_indices]
                    if k_upper == k:
                        logits_B_K_C = logits_B_K_C.clone().detach()
                    scores_B = scores_B[new_indices].clone().detach()

                    # Acquire all the low scorers
                    subset_split.acquire(sorted_indices[next_size:])

                k_lower += chunk_size

    return SubsetEvalResults(subset_split=subset_split,
                             subset_dataloader=subset_dataloader,
                             scores_B=scores_B,
                             logits_B_K_C=logits_B_K_C)

Пример #7

0

Показать файл

Файл: multi_bald.py Проект: deepopinion/BatchBALD

def compute_multi_bald_batch(
    bayesian_model: nn.Module,
    available_loader,
    num_classes,
    k,  # Number of samples to use for monte carlo sampling
    b,  # Acquisition batch size (How many samples do we want to label next)
    target_size,
    initial_percentage,
    reduce_percentage,
    device=None,
) -> AcquisitionBatch:

    result = reduced_eval_consistent_bayesian_model(
        bayesian_model=bayesian_model,
        acquisition_function=AcquisitionFunction.
        bald,  # This is mutual information
        num_classes=num_classes,
        k=k,
        initial_percentage=initial_percentage,
        reduce_percentage=reduce_percentage,
        target_size=target_size,
        available_loader=available_loader,
        device=device,
    )
    # Result contains a certain amount of samples with the smallest mutual information
    subset_split = result.subset_split

    partial_multi_bald_B = result.scores_B
    # partial_multi_bald_B contais H(y_1, ..., y_n, y_m) -
    # E_p(w)[H(y_m|w)], n being the samples already in the aquisition
    # bag and m being all available samples that are candidates to be
    # selected into the aquisition bag. For the first sample to be
    # selcted, this is equivalent to H(y_m) - E_p(w)[H(y_m|w)], i.e.
    # the mutual information of y_m and the model parameters w. Since
    # E_p(w)[H(y_1, ..., y_n)] that has to be subtracted to get the
    # true result of a_BatchBALD is the same for all samples, we can
    # ignore it to find the best candidate

    # Now we can compute the conditional entropy
    conditional_entropies_B = joint_entropy_exact.batch_conditional_entropy_B(
        result.logits_B_K_C)
    # conditional_entropies_B = E_p(w)[H(y_i|w)]. After summing
    # together we get E_p(w)[H(y_1, ..., y_n|w)] which is the right
    # hand side of Equation 8 to calculate batchBALD

    # We turn the logits into probabilities.
    probs_B_K_C = result.logits_B_K_C.exp_()

    # Don't need the result anymore.
    result = None

    torch_utils.gc_cuda()

    with torch.no_grad():
        num_samples_per_ws = 40000 // k  # Number of samples used to calculate joint entropy for each sample of the model
        num_samples = num_samples_per_ws * k

        # Decide how many samples should be calculated at once when determining the joint entropy
        if device.type == "cuda":
            sample_MK_memory = num_samples * k * 8
            MC_memory = num_samples * num_classes * 8
            copy_buffer_memory = 256 * num_samples * num_classes * 8
            slack_memory = 2 * 2**30
            multi_bald_batch_size = (torch_utils.get_cuda_available_memory() -
                                     (sample_MK_memory + copy_buffer_memory +
                                      slack_memory)) // MC_memory

            global compute_multi_bald_bag_multi_bald_batch_size
            if compute_multi_bald_bag_multi_bald_batch_size != multi_bald_batch_size:
                compute_multi_bald_bag_multi_bald_batch_size = multi_bald_batch_size
                print(
                    f"New compute_multi_bald_bag_multi_bald_batch_size = {multi_bald_batch_size}"
                )
        else:
            multi_bald_batch_size = 16

        subset_acquisition_bag = [
        ]  # Indices of currently selected samples for next labeling (local indices)
        global_acquisition_bag = [
        ]  # Indices of currently selected samples for next labeling (global indices)
        acquisition_bag_scores = []

        # We use this for early-out in the b==0 case.
        MIN_SPREAD = 0.1

        if b == 0:
            b = 100
            early_out = True
        else:
            early_out = False

        prev_joint_probs_M_K = None
        prev_samples_M_K = None
        # Iteratively select b samples for labeling and put them in
        # the acquisition_bag
        for i in range(b):  # Algorithm 1 : Line number 2
            torch_utils.gc_cuda()

            if i > 0:  # Only run this starting from the second sample

                # Compute the joint entropies. Depending on the size
                # of n (y_1, ..., y_n) we can either solve this
                # analytically using joint_entropy.exact or via
                # sampling using joint_entropy.sample
                # The entropies can be calculated iteratively using information obtained when adding the last

                joint_entropies_B = torch.empty((len(probs_B_K_C), ),
                                                dtype=torch.float64)

                # If we can, calculate joint entropy analytically, otherwise use sampling
                exact_samples = num_classes**i
                if exact_samples <= num_samples:  # Use exact joint entropy (no sampling)
                    # P1:n-1?
                    prev_joint_probs_M_K = joint_entropy_exact.joint_probs_M_K(
                        probs_B_K_C[subset_acquisition_bag[-1]][None].to(
                            device),
                        prev_joint_probs_M_K=prev_joint_probs_M_K,
                    )

                    batch_exact_joint_entropy(
                        probs_B_K_C,  # Class probabilities from logits_B_K_C
                        prev_joint_probs_M_K,  # 
                        multi_bald_batch_size,  # Number of samples to compute at once
                        device,  # Calculate on GPU or CPU?
                        joint_entropies_B  # Filled with the resulting joint entropies
                    )
                else:  # use sampling to get joint entropy
                    if prev_joint_probs_M_K is not None:
                        prev_joint_probs_M_K = None
                        torch_utils.gc_cuda()

                    # Gather new traces for the new subset_acquisition_bag.
                    prev_samples_M_K = joint_entropy_sampling.sample_M_K(
                        probs_B_K_C[subset_acquisition_bag].to(device),
                        S=num_samples_per_ws)
                    # prev_samples_M_K is the probability of a
                    # certain label assignment configuration for all
                    # samples in the current acquisition_bag i.e. p(y^_1:n-1|w^_j) and therefore P^_{1:n-1}

                    for joint_entropies_b, probs_b_K_C in with_progress_bar(
                            torch_utils.split_tensors(joint_entropies_B,
                                                      probs_B_K_C,
                                                      multi_bald_batch_size),
                            unit_scale=multi_bald_batch_size,
                    ):
                        joint_entropies_b.copy_(joint_entropy_sampling.batch(
                            probs_b_K_C.to(device), prev_samples_M_K),
                                                non_blocking=True)

                    prev_samples_M_K = None
                    torch_utils.gc_cuda()

                partial_multi_bald_B = joint_entropies_B - conditional_entropies_B
                joint_entropies_B = None

            # Don't allow reselection
            partial_multi_bald_B[subset_acquisition_bag] = -math.inf

            # Algorithm 1 : Line 4

            winner_index = partial_multi_bald_B.argmax().item()

            # Actual MultiBALD is:
            actual_multi_bald_B = partial_multi_bald_B[
                winner_index] - torch.sum(
                    conditional_entropies_B[subset_acquisition_bag])
            actual_multi_bald_B = actual_multi_bald_B.item()

            print(f"Actual MultiBALD: {actual_multi_bald_B}")

            # If we early out, we don't take the point that triggers the early out.
            # Only allow early-out after acquiring at least 1 sample.
            if early_out and i > 1:
                current_spread = actual_multi_bald_B[
                    winner_index] - actual_multi_bald_B.median()
                if current_spread < MIN_SPREAD:
                    print("Early out")
                    break

            acquisition_bag_scores.append(actual_multi_bald_B)
            # Algorithm 1 : Line 5
            subset_acquisition_bag.append(winner_index)
            # We need to map the index back to the actual dataset.
            global_acquisition_bag.append(
                subset_split.get_dataset_indices([winner_index]).item())

            print(f"Acquisition bag: {sorted(global_acquisition_bag)}")

    return AcquisitionBatch(global_acquisition_bag, acquisition_bag_scores,
                            None)

Python gc_cuda примеры использования