Exemplo n.º 1
0
def select_classification_errors(batch, loss_terms):
    nb_samples = trw.utils.len_batch(batch)
    indices_errors = collections.defaultdict(list)
    for name, loss_term in loss_terms.items():
        ref = loss_term.get('output_ref')
        if ref is None or not isinstance(ref,
                                         outputs_trw.OutputClassification):
            continue

        truth_name = ref.classes_name
        truth_values = to_value(batch[truth_name])
        found_values = to_value(loss_term['output'])
        samples_with_errors = np.where(found_values != truth_values)[0]
        for i in samples_with_errors:
            indices_errors[i].append(name)

    samples = []
    samples_error = [''] * nb_samples
    for index, values in indices_errors.items():
        samples.append(index)
        samples_error[index] = '|'.join(values)

    # add additional data in the batch so that we can easily display the errors
    batch['samples_error'] = samples_error
    return samples
Exemplo n.º 2
0
def extract_metrics(p: nn.parameter.Parameter):
    return collections.OrderedDict([
        ('mean', to_value(p.mean())),
        ('max', to_value(p.max())),
        ('min', to_value(p.min())),
        ('std', to_value(p.std())),
        ('norm2', to_value(p.norm())),
    ])
Exemplo n.º 3
0
 def forward_hook(module, inputs, outputs):
     nonlocal batch_stats
     if isinstance(outputs, torch.Tensor):
         batch_stats[module] = to_value(outputs)
     else:
         # cater for the bigger usecase (module with single output)
         # if really, needed, the user can add intermediate debug
         # module
         warnings.warn(
             f'module={module} with output type={type(outputs)} is not handled!'
         )
Exemplo n.º 4
0
def collect_gradient(model, gradient_store):
    """
    Collect the gradient of each parameter of a given model
    Args:
        model: the model
        gradient_store: where to store the parameter gradients

    Returns:

    """
    for p in model.parameters():
        if p.requires_grad:
            gradient_store[p] = to_value(p.grad)
Exemplo n.º 5
0
def make_pair_indices(targets, same_target_ratio=0.5):
    """
    Make random indices of pairs of samples that belongs or not to the same target.

    Args:
        same_target_ratio: specify the ratio of same target to be generated for sample pairs
        targets: a 1D integral tensor in range [0..C] to be used to group the samples
            into same or different target

    Returns:
        a tuple with (samples_0 indices, samples_1 indices, same_target)
    """
    # group samples by class
    samples_by_class = collections.defaultdict(list)
    classes = to_value(targets)
    for index, c in enumerate(classes):
        samples_by_class[c].append(index)
    samples_by_class = {name: np.asarray(value) for name, value in samples_by_class.items()}

    # create the (sample, sample+, sample-) groups
    samples_0 = []
    samples_1 = []
    same_target = []
    for c, c_indexes in samples_by_class.items():
        samples = c_indexes.copy()
        np.random.shuffle(c_indexes)
        nb_same_targets = int(same_target_ratio * len(c_indexes))

        other = [idx for cc, idx in samples_by_class.items() if cc != c]
        other = np.concatenate(other)
        np.random.shuffle(other)

        samples_0.append(samples)
        samples_positive = c_indexes[:nb_same_targets]
        same_target += [1] * len(samples_positive)
        # expect to have more negative than positive, so for the negative
        # pick the remaining
        samples_negative = other[:len(c_indexes) - len(samples_positive)]
        same_target += [0] * len(samples_negative)
        samples_1.append(np.concatenate((samples_positive, samples_negative)))

    # in case the assumption was wrong (we, in fact, have more positive than negative)
    # shorten the batch
    samples_0 = samples_0[:len(samples_1)]

    return np.concatenate(samples_0), np.concatenate(samples_1), np.asarray(same_target)
Exemplo n.º 6
0
def expand_classification_mapping(batch,
                                  loss_term_name,
                                  loss_term,
                                  classification_mappings,
                                  suffix='_str'):
    """
    Expand as string the class name for the classification outputs

    Args:
        batch:
        loss_term:
        classification_mappings: classification_mappings: a nested dict recording the class name/value
            associated with a set of ``output_name``

            {``output_name``:
                {'mapping': {name, value}},
                {'mappinginv': {value, name}}
            }

        suffix: the suffix appended to output or target name
    """
    output_ref = loss_term.get('output_ref')
    if isinstance(output_ref, outputs_trw.OutputClassification):
        target_name = output_ref.classes_name
        if target_name is not None and classification_mappings is not None:
            mapping = classification_mappings.get(target_name)
            if mapping is not None:
                output = to_value(loss_term['output'])
                if len(output.shape) == 1:
                    output_str = [
                        utilities.get_class_name(mapping, o) for o in output
                    ]
                    batch[loss_term_name + suffix] = output_str

                    # if we record the loss term output, record also the
                    # target name as string.
                    target_name_str = target_name + suffix
                    if target_name_str not in batch:
                        target_values = batch.get(target_name)
                        if target_values is not None and len(
                                target_values.shape) == 1:
                            target_values = [
                                utilities.get_class_name(mapping, o)
                                for o in target_values
                            ]
                            batch[target_name_str] = target_values
Exemplo n.º 7
0
def make_triplet_indices(targets):
    """
    Make random index triplets (anchor, positive, negative) such that ``anchor`` and ``positive``
        belong to the same target while ``negative`` belongs to a different target

    Args:
        targets: a 1D integral tensor in range [0..C]

    Returns:
        a tuple of indices (samples, samples_positive, samples_negative)
    """
    # group samples by class
    samples_by_class = collections.defaultdict(list)
    targets = to_value(targets)
    for index, c in enumerate(targets):
        samples_by_class[c].append(index)

    # create the (sample, sample+, sample-) groups
    samples_all = []
    samples_positive_all = []
    samples_negative_all = []
    for c, c_indexes in samples_by_class.items():
        samples = c_indexes.copy()
        samples_positive = c_indexes
        np.random.shuffle(c_indexes)

        other = [idx for cc, idx in samples_by_class.items() if cc != c]
        other = np.concatenate(other)

        # sample with replacement in case the ``negative`` sample are less
        # than the ``positive`` samples
        samples_negative = np.random.choice(other, len(samples))

        samples_all.append(samples)
        samples_positive_all.append(samples_positive)
        samples_negative_all.append(samples_negative)

    samples_all = np.concatenate(samples_all)
    samples_positive_all = np.concatenate(samples_positive_all)
    samples_negative_all = np.concatenate(samples_negative_all)
    min_samples = min(len(samples_all), len(samples_negative_all))
    return samples_all[:min_samples], samples_positive_all[:min_samples], samples_negative_all[:min_samples]
Exemplo n.º 8
0
def get_translation_from_4x4(tfm: Tensor) -> np.ndarray:
    assert tfm.shape == (4, 4)
    tfm = to_value(tfm)
    return tfm[0:3, 3]
Exemplo n.º 9
0
def get_spacing_from_4x4(tfm: Tensor) -> List[float]:
    assert tfm.shape == (4, 4)
    tfm = to_value(tfm)
    spacing = [np.linalg.norm(tfm[0:3, n]) for n in range(3)]
    return spacing
Exemplo n.º 10
0
def callbacks_per_loss_term(dataset_name, split_name, batch, loss_terms, root,
                            datasets_infos, loss_terms_inclusion,
                            feature_exclusions, dataset_exclusions,
                            split_exclusions, exported_cases, max_samples,
                            epoch, sql_table, format, select_fn):
    # process the exclusion
    if dataset_name in dataset_exclusions:
        raise StopIteration()
    if split_name in split_exclusions:
        raise StopIteration()

    # copy to the current batch the specified loss terms
    classification_mappings = utilities.get_classification_mappings(
        datasets_infos, dataset_name, split_name)
    for loss_term_name, loss_term in loss_terms.items():
        for loss_term_inclusion in loss_terms_inclusion:
            if loss_term_inclusion in loss_term:
                name = f'term_{loss_term_name}_{loss_term_inclusion}'
                value = loss_term[loss_term_inclusion]
                batch[name] = to_value(value)

                # special handling of `losses`: in 2D regression, the output will be a 2D error maps
                # but it could be useful to have the average error instead (e.g., to plot the worst samples).
                if loss_term_inclusion == 'losses' and len(value.shape) > 2:
                    batch[name + '_avg'] = to_value(
                        torch.mean(flatten(value), dim=1))
                expand_classification_mapping(batch, loss_term_name, loss_term,
                                              classification_mappings)

    for feature_exclusion in feature_exclusions:
        if feature_exclusion in batch:
            del batch[feature_exclusion]

    # force recording of epoch
    batch['epoch'] = epoch

    # calculate how many samples to export
    nb_batch_samples = trw.utils.len_batch(batch)
    nb_samples_exported = len(exported_cases)
    nb_samples_to_export = min(max_samples - nb_samples_exported,
                               nb_batch_samples)
    if nb_samples_to_export <= 0:
        raise StopIteration()

    # export the features
    samples_to_export = select_fn(batch, loss_terms)
    samples_to_export = samples_to_export[:nb_samples_to_export]
    for n in samples_to_export:
        id = n + nb_samples_exported
        exported_cases.append(id)
        name = format.format(dataset_name=dataset_name,
                             split_name=split_name,
                             id=id,
                             epoch=epoch)
        reporting.export_sample(
            root,
            sql_table,
            base_name=name,
            batch=batch,
            sample_ids=[n],
            name_expansions=[],  # we already expanded in the basename!
        )