def select_classification_errors(batch, loss_terms): nb_samples = trw.utils.len_batch(batch) indices_errors = collections.defaultdict(list) for name, loss_term in loss_terms.items(): ref = loss_term.get('output_ref') if ref is None or not isinstance(ref, outputs_trw.OutputClassification): continue truth_name = ref.classes_name truth_values = to_value(batch[truth_name]) found_values = to_value(loss_term['output']) samples_with_errors = np.where(found_values != truth_values)[0] for i in samples_with_errors: indices_errors[i].append(name) samples = [] samples_error = [''] * nb_samples for index, values in indices_errors.items(): samples.append(index) samples_error[index] = '|'.join(values) # add additional data in the batch so that we can easily display the errors batch['samples_error'] = samples_error return samples
def extract_metrics(p: nn.parameter.Parameter): return collections.OrderedDict([ ('mean', to_value(p.mean())), ('max', to_value(p.max())), ('min', to_value(p.min())), ('std', to_value(p.std())), ('norm2', to_value(p.norm())), ])
def forward_hook(module, inputs, outputs): nonlocal batch_stats if isinstance(outputs, torch.Tensor): batch_stats[module] = to_value(outputs) else: # cater for the bigger usecase (module with single output) # if really, needed, the user can add intermediate debug # module warnings.warn( f'module={module} with output type={type(outputs)} is not handled!' )
def collect_gradient(model, gradient_store): """ Collect the gradient of each parameter of a given model Args: model: the model gradient_store: where to store the parameter gradients Returns: """ for p in model.parameters(): if p.requires_grad: gradient_store[p] = to_value(p.grad)
def make_pair_indices(targets, same_target_ratio=0.5): """ Make random indices of pairs of samples that belongs or not to the same target. Args: same_target_ratio: specify the ratio of same target to be generated for sample pairs targets: a 1D integral tensor in range [0..C] to be used to group the samples into same or different target Returns: a tuple with (samples_0 indices, samples_1 indices, same_target) """ # group samples by class samples_by_class = collections.defaultdict(list) classes = to_value(targets) for index, c in enumerate(classes): samples_by_class[c].append(index) samples_by_class = {name: np.asarray(value) for name, value in samples_by_class.items()} # create the (sample, sample+, sample-) groups samples_0 = [] samples_1 = [] same_target = [] for c, c_indexes in samples_by_class.items(): samples = c_indexes.copy() np.random.shuffle(c_indexes) nb_same_targets = int(same_target_ratio * len(c_indexes)) other = [idx for cc, idx in samples_by_class.items() if cc != c] other = np.concatenate(other) np.random.shuffle(other) samples_0.append(samples) samples_positive = c_indexes[:nb_same_targets] same_target += [1] * len(samples_positive) # expect to have more negative than positive, so for the negative # pick the remaining samples_negative = other[:len(c_indexes) - len(samples_positive)] same_target += [0] * len(samples_negative) samples_1.append(np.concatenate((samples_positive, samples_negative))) # in case the assumption was wrong (we, in fact, have more positive than negative) # shorten the batch samples_0 = samples_0[:len(samples_1)] return np.concatenate(samples_0), np.concatenate(samples_1), np.asarray(same_target)
def expand_classification_mapping(batch, loss_term_name, loss_term, classification_mappings, suffix='_str'): """ Expand as string the class name for the classification outputs Args: batch: loss_term: classification_mappings: classification_mappings: a nested dict recording the class name/value associated with a set of ``output_name`` {``output_name``: {'mapping': {name, value}}, {'mappinginv': {value, name}} } suffix: the suffix appended to output or target name """ output_ref = loss_term.get('output_ref') if isinstance(output_ref, outputs_trw.OutputClassification): target_name = output_ref.classes_name if target_name is not None and classification_mappings is not None: mapping = classification_mappings.get(target_name) if mapping is not None: output = to_value(loss_term['output']) if len(output.shape) == 1: output_str = [ utilities.get_class_name(mapping, o) for o in output ] batch[loss_term_name + suffix] = output_str # if we record the loss term output, record also the # target name as string. target_name_str = target_name + suffix if target_name_str not in batch: target_values = batch.get(target_name) if target_values is not None and len( target_values.shape) == 1: target_values = [ utilities.get_class_name(mapping, o) for o in target_values ] batch[target_name_str] = target_values
def make_triplet_indices(targets): """ Make random index triplets (anchor, positive, negative) such that ``anchor`` and ``positive`` belong to the same target while ``negative`` belongs to a different target Args: targets: a 1D integral tensor in range [0..C] Returns: a tuple of indices (samples, samples_positive, samples_negative) """ # group samples by class samples_by_class = collections.defaultdict(list) targets = to_value(targets) for index, c in enumerate(targets): samples_by_class[c].append(index) # create the (sample, sample+, sample-) groups samples_all = [] samples_positive_all = [] samples_negative_all = [] for c, c_indexes in samples_by_class.items(): samples = c_indexes.copy() samples_positive = c_indexes np.random.shuffle(c_indexes) other = [idx for cc, idx in samples_by_class.items() if cc != c] other = np.concatenate(other) # sample with replacement in case the ``negative`` sample are less # than the ``positive`` samples samples_negative = np.random.choice(other, len(samples)) samples_all.append(samples) samples_positive_all.append(samples_positive) samples_negative_all.append(samples_negative) samples_all = np.concatenate(samples_all) samples_positive_all = np.concatenate(samples_positive_all) samples_negative_all = np.concatenate(samples_negative_all) min_samples = min(len(samples_all), len(samples_negative_all)) return samples_all[:min_samples], samples_positive_all[:min_samples], samples_negative_all[:min_samples]
def get_translation_from_4x4(tfm: Tensor) -> np.ndarray: assert tfm.shape == (4, 4) tfm = to_value(tfm) return tfm[0:3, 3]
def get_spacing_from_4x4(tfm: Tensor) -> List[float]: assert tfm.shape == (4, 4) tfm = to_value(tfm) spacing = [np.linalg.norm(tfm[0:3, n]) for n in range(3)] return spacing
def callbacks_per_loss_term(dataset_name, split_name, batch, loss_terms, root, datasets_infos, loss_terms_inclusion, feature_exclusions, dataset_exclusions, split_exclusions, exported_cases, max_samples, epoch, sql_table, format, select_fn): # process the exclusion if dataset_name in dataset_exclusions: raise StopIteration() if split_name in split_exclusions: raise StopIteration() # copy to the current batch the specified loss terms classification_mappings = utilities.get_classification_mappings( datasets_infos, dataset_name, split_name) for loss_term_name, loss_term in loss_terms.items(): for loss_term_inclusion in loss_terms_inclusion: if loss_term_inclusion in loss_term: name = f'term_{loss_term_name}_{loss_term_inclusion}' value = loss_term[loss_term_inclusion] batch[name] = to_value(value) # special handling of `losses`: in 2D regression, the output will be a 2D error maps # but it could be useful to have the average error instead (e.g., to plot the worst samples). if loss_term_inclusion == 'losses' and len(value.shape) > 2: batch[name + '_avg'] = to_value( torch.mean(flatten(value), dim=1)) expand_classification_mapping(batch, loss_term_name, loss_term, classification_mappings) for feature_exclusion in feature_exclusions: if feature_exclusion in batch: del batch[feature_exclusion] # force recording of epoch batch['epoch'] = epoch # calculate how many samples to export nb_batch_samples = trw.utils.len_batch(batch) nb_samples_exported = len(exported_cases) nb_samples_to_export = min(max_samples - nb_samples_exported, nb_batch_samples) if nb_samples_to_export <= 0: raise StopIteration() # export the features samples_to_export = select_fn(batch, loss_terms) samples_to_export = samples_to_export[:nb_samples_to_export] for n in samples_to_export: id = n + nb_samples_exported exported_cases.append(id) name = format.format(dataset_name=dataset_name, split_name=split_name, id=id, epoch=epoch) reporting.export_sample( root, sql_table, base_name=name, batch=batch, sample_ids=[n], name_expansions=[], # we already expanded in the basename! )