def test_takes_no_log_without_nllloss(self, net_cls, module_cls, data): net = net_cls(module_cls, criterion=nn.BCELoss, max_epochs=1) net.initialize() mock_loss = Mock(side_effect=lambda x, y: nn.NLLLoss()(x, y)) net.criterion_.forward = mock_loss net.partial_fit(*data) # call partial_fit to avoid re-initialization # check that loss was called with raw probabilities for (y_out, _), _ in mock_loss.call_args_list: assert not (y_out < 0).all() assert torch.isclose(torch.ones(len(y_out)), y_out.sum(1)).all()
def assert_allclose(actual, expected, rtol=None, atol=None, equal_nan=True): if not isinstance(actual, torch.Tensor): actual = torch.tensor(actual) if not isinstance(expected, torch.Tensor): expected = torch.tensor(expected, dtype=actual.dtype) if expected.shape != actual.shape: expected = expected.expand_as(actual) if rtol is None or atol is None: if rtol is not None or atol is not None: raise ValueError("rtol and atol must both be specified or both be unspecified") rtol, atol = _get_default_tolerance(actual, expected) close = torch.isclose(actual, expected, rtol, atol, equal_nan) if close.all(): return # Find the worst offender error = (expected - actual).abs() expected_error = atol + rtol * expected.abs() delta = error - expected_error delta[close] = 0 # mask out NaN/inf _, index = delta.reshape(-1).max(0) # TODO: consider adding torch.unravel_index def _unravel_index(index, shape): res = [] for size in shape[::-1]: res.append(int(index % size)) index = int(index // size) return tuple(res[::-1]) index = _unravel_index(index.item(), actual.shape) # Count number of offenders count = (~close).long().sum() msg = ('Not within tolerance rtol={} atol={} at input{} ({} vs. {}) and {}' ' other locations ({:2.2f}%)') raise AssertionError(msg.format( rtol, atol, list(index), actual[index].item(), expected[index].item(), count - 1, 100 * count / actual.numel()))
return new_noise # UNIT TEST # Check that the basic function works opt.zero_grad() noise = torch.ones(20, 20) * 2 noise.requires_grad_() fake_classes = (noise**2).mean() fake_classes.backward() new_noise = calculate_updated_noise(noise, 0.1) assert type(new_noise) == torch.Tensor assert tuple(new_noise.shape) == (20, 20) assert new_noise.max() == 2.0010 assert new_noise.min() == 2.0010 assert torch.isclose(new_noise.sum(), torch.tensor(0.4) + 20 * 20 * 2) print("Success!") # Check that it works for generated images opt.zero_grad() noise = get_noise(32, z_dim).to(device).requires_grad_() fake = gen(noise) fake_classes = classifier(fake)[:, 0] fake_classes.mean().backward() noise.data = calculate_updated_noise(noise, 0.01) fake = gen(noise) fake_classes_new = classifier(fake)[:, 0] assert torch.all(fake_classes_new > fake_classes) print("Success!") # First generate a bunch of images with the generator
def test_large_margin_softmax_and_sphereface_loss(self): margin = 10 scale = 2 for dtype in TEST_DTYPES: loss_funcA = LargeMarginSoftmaxLoss(margin=margin, scale=scale, num_classes=10, embedding_size=2) loss_funcB = SphereFaceLoss(margin=margin, scale=scale, num_classes=10, embedding_size=2) embedding_angles = torch.arange(0, 180) # multiply by 10 to make the embeddings unnormalized embeddings = torch.tensor( np.array([c_f.angle_to_coord(a) for a in embedding_angles]) * 10, requires_grad=True, dtype=dtype).to(self.device) #2D embeddings labels = torch.randint(low=0, high=10, size=(180, )).to(self.device) lossA = loss_funcA(embeddings, labels) lossB = loss_funcB(embeddings, labels) weightsA = loss_funcA.W weightsB = torch.nn.functional.normalize(loss_funcB.W, dim=0) product_of_magnitudesA = torch.norm( weightsA, p=2, dim=0).unsqueeze(0) * torch.norm( embeddings, p=2, dim=1).unsqueeze(1) product_of_magnitudesB = torch.norm( weightsB, p=2, dim=0).unsqueeze(0) * torch.norm( embeddings, p=2, dim=1).unsqueeze(1) cosinesA = torch.matmul(embeddings, weightsA) / (product_of_magnitudesA) cosinesB = torch.matmul(embeddings, weightsB) / (product_of_magnitudesB) coefficients = [ scipy.special.binom(margin, 2 * n) for n in range((margin // 2) + 1) ] for i, j in enumerate(labels): curr_cosineA = cosinesA[i, j] curr_cosineB = cosinesB[i, j] cos_with_marginA = torch.zeros(len(coefficients)) cos_with_marginB = torch.zeros(len(coefficients)) for z, c in enumerate(coefficients): curr_valA = c * (curr_cosineA**(margin - (2 * z))) * ( (1 - curr_cosineA**2)**z) curr_valB = c * (curr_cosineB**(margin - (2 * z))) * ( (1 - curr_cosineB**2)**z) if z % 2 == 1: curr_valA *= -1 curr_valB *= -1 cos_with_marginA[z] = curr_valA cos_with_marginB[z] = curr_valB cos_with_marginA = torch.sum(cos_with_marginA) cos_with_marginB = torch.sum(cos_with_marginB) angleA = torch.acos( torch.clamp(curr_cosineA, -1 + 1e-7, 1 - 1e-7)) angleB = torch.acos( torch.clamp(curr_cosineB, -1 + 1e-7, 1 - 1e-7)) kA = (angleA / (math.pi / margin)).floor( ) # Equation 6: angles needs to be between [k*pi/m and (k+1)*pi/m] kB = (angleB / (math.pi / margin)).floor( ) # Equation 6: angles needs to be between [k*pi/m and (k+1)*pi/m] cosinesA[i, j] = ((-1)**kA) * cos_with_marginA - (2 * kA) cosinesB[i, j] = ((-1)**kB) * cos_with_marginB - (2 * kB) cosinesA *= product_of_magnitudesA cosinesB *= product_of_magnitudesB correct_lossA = torch.nn.functional.cross_entropy( cosinesA * scale, labels) correct_lossB = torch.nn.functional.cross_entropy( cosinesB * scale, labels) rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue(torch.isclose(lossA, correct_lossA, rtol=rtol)) self.assertTrue(torch.isclose(lossB, correct_lossB, rtol=rtol))
def test_max(model_test, data): "Test that argmax score is the same as max" gen = Gen(model_test, data, MaxSemiring) score = gen.struct.sum(gen.vals) marginals = gen.struct.marginals(gen.vals) assert torch.isclose(score, gen.struct.score(gen.vals, marginals)).all()
def test_contrastive_loss(self): loss_funcA = ContrastiveLoss(pos_margin=0.25, neg_margin=1.5, use_similarity=False, avg_non_zero_only=True, squared_distances=True) loss_funcB = ContrastiveLoss(pos_margin=1.5, neg_margin=0.6, use_similarity=True, avg_non_zero_only=True) loss_funcC = ContrastiveLoss(pos_margin=0.25, neg_margin=1.5, use_similarity=False, avg_non_zero_only=False, squared_distances=True) loss_funcD = ContrastiveLoss(pos_margin=1.5, neg_margin=0.6, use_similarity=True, avg_non_zero_only=False) embedding_angles = [0, 20, 40, 60, 80] embeddings = torch.FloatTensor( [c_f.angle_to_coord(a) for a in embedding_angles]) #2D embeddings labels = torch.LongTensor([0, 0, 1, 1, 2]) lossA = loss_funcA(embeddings, labels) lossB = loss_funcB(embeddings, labels) lossC = loss_funcC(embeddings, labels) lossD = loss_funcD(embeddings, labels) pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)] neg_pairs = [(0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 4), (3, 0), (3, 1), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3)] correct_pos_losses = [0, 0, 0, 0] correct_neg_losses = [0, 0, 0, 0] num_non_zero_pos = [0, 0, 0, 0] num_non_zero_neg = [0, 0, 0, 0] for a, p in pos_pairs: anchor, positive = embeddings[a], embeddings[p] correct_lossA = torch.relu( torch.sum((anchor - positive)**2) - 0.25) correct_lossB = torch.relu(1.5 - torch.matmul(anchor, positive)) correct_pos_losses[0] += correct_lossA correct_pos_losses[1] += correct_lossB correct_pos_losses[2] += correct_lossA correct_pos_losses[3] += correct_lossB if correct_lossA > 0: num_non_zero_pos[0] += 1 num_non_zero_pos[2] += 1 if correct_lossB > 0: num_non_zero_pos[1] += 1 num_non_zero_pos[3] += 1 for a, n in neg_pairs: anchor, negative = embeddings[a], embeddings[n] correct_lossA = torch.relu(1.5 - torch.sum((anchor - negative)**2)) correct_lossB = torch.relu(torch.matmul(anchor, negative) - 0.6) correct_neg_losses[0] += correct_lossA correct_neg_losses[1] += correct_lossB correct_neg_losses[2] += correct_lossA correct_neg_losses[3] += correct_lossB if correct_lossA > 0: num_non_zero_neg[0] += 1 num_non_zero_neg[2] += 1 if correct_lossB > 0: num_non_zero_neg[1] += 1 num_non_zero_neg[3] += 1 for i in range(2): if num_non_zero_pos[i] > 0: correct_pos_losses[i] /= num_non_zero_pos[i] if num_non_zero_neg[i] > 0: correct_neg_losses[i] /= num_non_zero_neg[i] for i in range(2, 4): correct_pos_losses[i] /= len(pos_pairs) correct_neg_losses[i] /= len(neg_pairs) correct_losses = [0, 0, 0, 0] for i in range(4): correct_losses[i] = correct_pos_losses[i] + correct_neg_losses[i] self.assertTrue(torch.isclose(lossA, correct_losses[0])) self.assertTrue(torch.isclose(lossB, correct_losses[1])) self.assertTrue(torch.isclose(lossC, correct_losses[2])) self.assertTrue(torch.isclose(lossD, correct_losses[3]))
def _compare_tensors_internal( a: torch.Tensor, b: torch.Tensor, *, rtol, atol, equal_nan: Union[str, bool]) -> _compare_return_type: assert equal_nan in {True, False, "relaxed"} debug_msg: Optional[str] # Integer (including bool) comparisons are identity comparisons # when rtol is zero and atol is less than one if ((is_integral(a.dtype) and rtol == 0 and atol < 1) or a.dtype is torch.bool or is_quantized(a.dtype)): if (a == b).all().item(): return (True, None) # Gathers debug info for failed integer comparison # NOTE: converts to long to correctly represent differences # (especially between uint8 tensors) identity_mask = a != b a_flat = a.to(torch.long).flatten() b_flat = b.to(torch.long).flatten() count_non_identical = torch.sum(identity_mask, dtype=torch.long) diff = torch.abs(a_flat - b_flat) greatest_diff_index = torch.argmax(diff) debug_msg = ( "Found {0} different element(s) (out of {1}), with the greatest " "difference of {2} ({3} vs. {4}) occuring at index " "{5}.".format(count_non_identical.item(), a.numel(), diff[greatest_diff_index], a_flat[greatest_diff_index], b_flat[greatest_diff_index], _unravel_index(greatest_diff_index, a.shape))) return (False, debug_msg) # Compares complex tensors' real and imaginary parts separately. # (see NOTE Test Framework Tensor "Equality") if a.is_complex(): a = a.resolve_conj() b = b.resolve_conj() if equal_nan == "relaxed": a = a.clone() b = b.clone() a.real[a.imag.isnan()] = math.nan a.imag[a.real.isnan()] = math.nan b.real[b.imag.isnan()] = math.nan b.imag[b.real.isnan()] = math.nan real_result, debug_msg = _compare_tensors_internal(a.real, b.real, rtol=rtol, atol=atol, equal_nan=equal_nan) if not real_result: debug_msg = "Real parts failed to compare as equal! " + cast( str, debug_msg) return (real_result, debug_msg) imag_result, debug_msg = _compare_tensors_internal(a.imag, b.imag, rtol=rtol, atol=atol, equal_nan=equal_nan) if not imag_result: debug_msg = "Imaginary parts failed to compare as equal! " + cast( str, debug_msg) return (imag_result, debug_msg) return (True, None) # All other comparisons use torch.allclose directly if torch.allclose(a, b, rtol=rtol, atol=atol, equal_nan=(equal_nan in {"relaxed", True})): return (True, None) # Gathers debug info for failed float tensor comparison # NOTE: converts to float64 to best represent differences a_flat = a.to(torch.float64).flatten() b_flat = b.to(torch.float64).flatten() diff = torch.abs(a_flat - b_flat) # Masks close values # NOTE: this avoids (inf - inf) oddities when computing the difference close = torch.isclose(a_flat, b_flat, rtol, atol, (equal_nan in {"relaxed", True})) diff[close] = 0 nans = torch.isnan(diff) num_nans = nans.sum() outside_range = (diff > (atol + rtol * torch.abs(b_flat))) | (diff == math.inf) count_outside_range = torch.sum(outside_range, dtype=torch.long) greatest_diff_index = torch.argmax(diff) debug_msg = ( "With rtol={0} and atol={1}, found {2} element(s) (out of {3}) whose " "difference(s) exceeded the margin of error (including {4} nan comparisons). " "The greatest difference was {5} ({6} vs. {7}), which " "occurred at index {8}.".format( rtol, atol, count_outside_range + num_nans, a.numel(), num_nans, diff[greatest_diff_index], a_flat[greatest_diff_index], b_flat[greatest_diff_index], _unravel_index(greatest_diff_index, a.shape))) return (False, debug_msg)
def _check_classification_inputs( preds: torch.Tensor, target: torch.Tensor, threshold: float, num_classes: Optional[int], is_multiclass: bool, top_k: Optional[int], ) -> str: """Performs error checking on inputs for classification. This ensures that preds and target take one of the shape/type combinations that are specified in ``_input_format_classification`` docstring. It also checks the cases of over-rides with ``is_multiclass`` by checking (for multi-class and multi-dim multi-class cases) that there are only up to 2 distinct labels. In case where preds are floats (probabilities), it is checked whether they are in [0,1] interval. When ``num_classes`` is given, it is checked that it is consitent with input cases (binary, multi-label, ...), and that, if availible, the implied number of classes in the ``C`` dimension is consistent with it (as well as that max label in target is smaller than it). When ``num_classes`` is not specified in these cases, consistency of the highest target value against ``C`` dimension is checked for (multi-dimensional) multi-class cases. If ``top_k`` is set (not None) for inputs that do not have probability predictions (and are not binary), an error is raised. Similarly if ``top_k`` is set to a number that is higher than or equal to the ``C`` dimension of ``preds``, an error is raised. Preds and target tensors are expected to be squeezed already - all dimensions should be greater than 1, except perhaps the first one (``N``). Args: preds: Tensor with predictions (labels or probabilities) target: Tensor with ground truth labels, always integers (labels) threshold: Threshold probability value for transforming probability predictions to binary (0,1) predictions, in the case of binary or multi-label inputs. num_classes: Number of classes. If not explicitly set, the number of classes will be infered either from the shape of inputs, or the maximum label in the ``target`` and ``preds`` tensor, where applicable. top_k: Number of highest probability entries for each sample to convert to 1s - relevant only for inputs with probability predictions. The default value (``None``) will be interepreted as 1 for these inputs. If this parameter is set for multi-label inputs, it will take precedence over threshold. Should be left unset (``None``) for inputs with label predictions. is_multiclass: Used only in certain special cases, where you want to treat inputs as a different type than what they appear to be. See the parameter's :ref:`documentation section <pages/overview:using the is_multiclass parameter>` for a more detailed explanation and examples. Return: case: The case the inputs fall in, one of 'binary', 'multi-class', 'multi-label' or 'multi-dim multi-class' """ # Baisc validation (that does not need case/type information) _basic_input_validation(preds, target, threshold, is_multiclass) # Check that shape/types fall into one of the cases case, implied_classes = _check_shape_and_type_consistency(preds, target) # For (multi-dim) multi-class case with prob preds, check that preds sum up to 1 if case in (DataType.MULTICLASS, DataType.MULTIDIM_MULTICLASS) and preds.is_floating_point(): if not torch.isclose(preds.sum(dim=1), torch.ones_like( preds.sum(dim=1))).all(): raise ValueError( "Probabilities in `preds` must sum up to 1 accross the `C` dimension." )
def test_vif_p_one_for_equal_tensors(x) -> None: y = x.clone() measure = vif_p(x, y) assert torch.isclose(measure, torch.tensor( 1.0)), f'VIF for equal tensors shouls be 1.0, got {measure}.'
def test_bundleivp(x0, y0, y1, ones, lin, net11, net21, net31, net41): # Regular IVP with no bundle: x = x0 * ones cond = BundleIVP(x0, y0) y = cond.enforce(net11, x) assert torch.isclose(y, y0 * ones).all(), "y(x_0) != y_0" cond = BundleIVP(x0, y0, y1) y = cond.enforce(net11, x) assert all_close(y, y0), "y(x_0) != y_0" assert all_close(diff(y, x), y1), "y'(x_0) != y'_0" # Bundle in u_0: y_bundle = y0 * lin cond = BundleIVP(t_0=x0, bundle_conditions={'u_0': 0}) y = cond.enforce(net21, x, y_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" cond = BundleIVP(t_0=x0, u_0_prime=y1, bundle_conditions={'u_0': 0}) y = cond.enforce(net21, x, y_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" assert all_close(diff(y, x), y1), "y'(x_0) != y'_0" # Bundle in u_0_prime: y_prime_bundle = y1 * lin cond = BundleIVP(t_0=x0, u_0=y0, bundle_conditions={'u_0_prime': 0}) y = cond.enforce(net21, x, y_prime_bundle) assert all_close(y, y0), "y(x_0) != y_0" assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0" # Bundle in u_0 and u_0_prime: cond = BundleIVP(t_0=x0, bundle_conditions={'u_0': 0, 'u_0_prime': 1}) y = cond.enforce(net31, x, y_bundle, y_prime_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0" # Bundle in t_0: x = x0 * lin x_bundle = x0 * lin cond = BundleIVP(u_0=y0, bundle_conditions={'t_0': 0}) y = cond.enforce(net21, x, x_bundle) assert torch.isclose(y, y0 * ones).all(), "y(x_0) != y_0" cond = BundleIVP(u_0=y0, u_0_prime=y1, bundle_conditions={'t_0': 0}) y = cond.enforce(net21, x, x_bundle) assert all_close(y, y0), "y(x_0) != y_0" assert all_close(diff(y, x), y1), "y'(x_0) != y'_0" # Bundle in t_0 and u_0: cond = BundleIVP(bundle_conditions={'t_0': 0, 'u_0': 1}) y = cond.enforce(net31, x, x_bundle, y_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" cond = BundleIVP(u_0_prime=y1, bundle_conditions={'t_0': 0, 'u_0': 1}) y = cond.enforce(net31, x, x_bundle, y_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" assert all_close(diff(y, x), y1), "y'(x_0) != y'_0" # Bundle in t_0, u_0 and u_0_prime: cond = BundleIVP(bundle_conditions={'t_0': 0, 'u_0': 1, 'u_0_prime': 2}) y = cond.enforce(net41, x, x_bundle, y_bundle, y_prime_bundle) assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0" assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0"
def test_configure_optimizers(self, mock_atom_model): optimizer = mock_atom_model.configure_optimizers() assert isinstance(optimizer, torch.optim.Adam) assert torch.isclose(torch.tensor(optimizer.defaults["lr"]), torch.tensor(0.01))
def dice_score(pred: torch.Tensor, label: torch.Tensor, bg=False, cls_logging=False, nan_score=0.0, no_fg_score=0.0, apply_argmax: bool = True): """ Compute dice score 1/n_classes * (2*tp)/(2*tp + fp + fn) Parameters ---------- pred : torch.Tensor probability for each class label : torch.Tensor ground truth annotation. Classes are given by numeric value (not onehot encoded) bg : bool, optional compute dice for background class, by default False cls_logging : int, optional logging for individual class results nan_score: float, optional if denominator is zero `nan_score`is used instead. no_fg_score: float, optional if foreground class is not present, `np_fg_score` is sued instead. Returns ------- float dice score """ if not (pred > 0).any(): logger.warning( "Prediction only contains zeros. Dice score might be ambigious.") # invert background value bg = (1 - int(bool(bg))) n_classes = pred.shape[1] score = 0. for i in range(bg, n_classes): tp, fp, tn, fn = compute_stat_score(pred, label, i, do_argmax=apply_argmax) denom = (2 * tp + fp + fn).to(torch.float) if not (label == i).any(): # no foreground class score_cls = no_fg_score elif torch.isclose(denom, torch.zeros_like(denom)).any(): # nan result score_cls = nan_score else: score_cls = (2 * tp).to(torch.float) / denom if cls_logging: logger.info( {'value': { 'value': score_cls, 'name': 'dice_cls_' + str(i) }}) pass score += score_cls return score / (n_classes - bg)
def test_local_remote_gradient_clipping(workers): """ Real test case of gradient clipping for the remote and local parameters of an RNN """ alice = workers["alice"] class RNN(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(RNN, self).__init__() self.hidden_size = hidden_size self.i2h = nn.Linear(input_size + hidden_size, hidden_size) self.i2o = nn.Linear(input_size + hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1) def forward(self, input, hidden): combined = torch.cat((input, hidden), 1) hidden = self.i2h(combined) output = self.i2o(combined) output = self.softmax(output) return output, hidden def initHidden(self): return torch.zeros(1, self.hidden_size) # let's initialize a simple RNN n_hidden = 128 n_letters = 57 n_categories = 18 rnn = RNN(n_letters, n_hidden, n_categories) # Let's send the model to alice, who will be responsible for the tiny computation alice_model = rnn.copy().send(alice) # Simple input for the Recurrent Neural Network input_tensor = torch.zeros(size=(1, 57)) # Just set a random category for it input_tensor[0][20] = 1 alice_input = input_tensor.copy().send(alice) label_tensor = torch.randint(low=0, high=(n_categories - 1), size=(1, )) alice_label = label_tensor.send(alice) hidden_layer = alice_model.initHidden() alice_hidden_layer = hidden_layer.send(alice) # Forward pass into the NN and its hidden layers, notice how it goes sequentially output, alice_hidden_layer = alice_model(alice_input, alice_hidden_layer) criterion = nn.NLLLoss() loss = criterion(output, alice_label) # time to backpropagate... loss.backward() # Remote gradient clipping remote_parameters = alice_model.parameters() total_norm_remote = nn.utils.clip_grad_norm_(remote_parameters, 2) # Local gradient clipping local_alice_model = alice_model.get() local_parameters = local_alice_model.parameters() total_norm_local = nn.utils.clip_grad_norm_(local_parameters, 2) # Is the output of the remote gradient clipping version equal to # the output of the local gradient clipping version? assert torch.isclose(total_norm_remote.get(), total_norm_local, atol=1e-4)
def test_rolling(agent_class: mantrap.agents.base.DTAgent.__class__): agent = agent_class(position=torch.zeros(2)) controls = torch.tensor([[1, 1], [2, 2], [4, 4]]).float() trajectory = agent.unroll_trajectory(controls, dt=1.0) assert torch.all( torch.isclose(controls, agent.roll_trajectory(trajectory, dt=1.0)))
def test_snr_contrastive_loss(self): pos_margin, neg_margin, embedding_reg_weight = 0, 0.1, 0.1 loss_func = SignalToNoiseRatioContrastiveLoss( pos_margin=pos_margin, neg_margin=neg_margin, embedding_regularizer=ZeroMeanRegularizer(), embedding_reg_weight=embedding_reg_weight, ) for dtype in TEST_DTYPES: embedding_angles = [0, 20, 40, 60, 80] embeddings = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles], requires_grad=True, dtype=dtype, ).to(TEST_DEVICE) # 2D embeddings labels = torch.LongTensor([0, 0, 1, 1, 2]) loss = loss_func(embeddings, labels) loss.backward() pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)] neg_pairs = [ (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 4), (3, 0), (3, 1), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), ] correct_pos_loss = 0 correct_neg_loss = 0 num_non_zero = 0 for a, p in pos_pairs: anchor, positive = embeddings[a], embeddings[p] curr_loss = torch.relu( torch.var(anchor - positive) / torch.var(anchor) - pos_margin) correct_pos_loss += curr_loss if curr_loss > 0: num_non_zero += 1 if num_non_zero > 0: correct_pos_loss /= num_non_zero num_non_zero = 0 for a, n in neg_pairs: anchor, negative = embeddings[a], embeddings[n] curr_loss = torch.relu(neg_margin - torch.var(anchor - negative) / torch.var(anchor)) correct_neg_loss += curr_loss if curr_loss > 0: num_non_zero += 1 if num_non_zero > 0: correct_neg_loss /= num_non_zero reg_loss = torch.mean(torch.abs(torch.sum(embeddings, dim=1))) correct_total = (correct_pos_loss + correct_neg_loss + embedding_reg_weight * reg_loss) rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue(torch.isclose(loss, correct_total, rtol=rtol))
def isclose(a, b): return torch.isclose(a, b, rtol=1e-4, atol=1e-7)
def test_return_simple(self): loss = self.criterion(self.outputs, self.targets) self.assertTrue(torch.isclose(loss, torch.tensor(0.5092423)))
def test_vif_p_works_for_zeros_tensors() -> None: x = torch.zeros(4, 3, 256, 256) y = torch.zeros(4, 3, 256, 256) measure = vif_p(x, y, data_range=1.) assert torch.isclose(measure, torch.tensor( 1.0)), f'VIF for 2 zero tensors shouls be 1.0, got {measure}.'
def test_gdc(): edge_index = torch.tensor([[0, 0, 1, 1, 2, 2, 2, 3, 3, 4], [1, 2, 0, 2, 0, 1, 3, 2, 4, 3]]) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='sym', normalization_out='sym', diffusion_kwargs=dict(method='ppr', alpha=0.15), sparsification_kwargs=dict(method='threshold', avg_degree=2), exact=True) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() assert torch.all(mat >= -1e-8) assert torch.allclose(mat, mat.t(), atol=1e-4) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='sym', normalization_out='sym', diffusion_kwargs=dict(method='heat', t=10), sparsification_kwargs=dict(method='threshold', avg_degree=2), exact=True) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() assert torch.all(mat >= -1e-8) assert torch.allclose(mat, mat.t(), atol=1e-4) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='col', normalization_out='col', diffusion_kwargs=dict(method='heat', t=10), sparsification_kwargs=dict(method='topk', k=2, dim=0), exact=True) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() col_sum = mat.sum(0) assert torch.all(mat >= -1e-8) assert torch.all( torch.isclose(col_sum, torch.tensor(1.0)) | torch.isclose(col_sum, torch.tensor(0.0))) assert torch.all((~torch.isclose(mat, torch.tensor(0.0))).sum(0) == 2) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='row', normalization_out='row', diffusion_kwargs=dict(method='heat', t=5), sparsification_kwargs=dict(method='topk', k=2, dim=1), exact=True) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() row_sum = mat.sum(1) assert torch.all(mat >= -1e-8) assert torch.all( torch.isclose(row_sum, torch.tensor(1.0)) | torch.isclose(row_sum, torch.tensor(0.0))) assert torch.all((~torch.isclose(mat, torch.tensor(0.0))).sum(1) == 2) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='row', normalization_out='row', diffusion_kwargs=dict(method='coeff', coeffs=[0.8, 0.3, 0.1]), sparsification_kwargs=dict(method='threshold', eps=0.1), exact=True) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() row_sum = mat.sum(1) assert torch.all(mat >= -1e-8) assert torch.all( torch.isclose(row_sum, torch.tensor(1.0)) | torch.isclose(row_sum, torch.tensor(0.0))) data = Data(edge_index=edge_index, num_nodes=5) gdc = GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=0.15, eps=1e-4), sparsification_kwargs=dict(method='threshold', avg_degree=2), exact=False) data = gdc(data) mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze() col_sum = mat.sum(0) assert torch.all(mat >= -1e-8) assert torch.all( torch.isclose(col_sum, torch.tensor(1.0)) | torch.isclose(col_sum, torch.tensor(0.0)))
def test_triplet_margin_loss(self): margin = 0.2 loss_funcA = TripletMarginLoss(margin=margin) loss_funcB = TripletMarginLoss(margin=margin, reducer=MeanReducer()) loss_funcC = TripletMarginLoss(margin=margin, distance=CosineSimilarity()) loss_funcD = TripletMarginLoss(margin=margin, reducer=MeanReducer(), distance=CosineSimilarity()) for dtype in TEST_DTYPES: embedding_angles = [0, 20, 40, 60, 80] embeddings = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles], requires_grad=True, dtype=dtype, ).to(self.device) # 2D embeddings labels = torch.LongTensor([0, 0, 1, 1, 2]) lossA = loss_funcA(embeddings, labels) lossB = loss_funcB(embeddings, labels) lossC = loss_funcC(embeddings, labels) lossD = loss_funcD(embeddings, labels) triplets = [ (0, 1, 2), (0, 1, 3), (0, 1, 4), (1, 0, 2), (1, 0, 3), (1, 0, 4), (2, 3, 0), (2, 3, 1), (2, 3, 4), (3, 2, 0), (3, 2, 1), (3, 2, 4), ] correct_loss = 0 correct_loss_cosine = 0 num_non_zero_triplets = 0 num_non_zero_triplets_cosine = 0 for a, p, n in triplets: anchor, positive, negative = embeddings[a], embeddings[ p], embeddings[n] curr_loss = torch.relu( torch.sqrt(torch.sum((anchor - positive)**2)) - torch.sqrt(torch.sum((anchor - negative)**2)) + margin) curr_loss_cosine = torch.relu( torch.sum(anchor * negative) - torch.sum(anchor * positive) + margin) if curr_loss > 0: num_non_zero_triplets += 1 if curr_loss_cosine > 0: num_non_zero_triplets_cosine += 1 correct_loss += curr_loss correct_loss_cosine += curr_loss_cosine rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue( torch.isclose(lossA, correct_loss / num_non_zero_triplets, rtol=rtol)) self.assertTrue( torch.isclose(lossB, correct_loss / len(triplets), rtol=rtol)) self.assertTrue( torch.isclose(lossC, correct_loss_cosine / num_non_zero_triplets_cosine, rtol=rtol)) self.assertTrue( torch.isclose(lossD, correct_loss_cosine / len(triplets), rtol=rtol))
def _compare_tensors_internal(a: torch.Tensor, b: torch.Tensor, *, rtol, atol, equal_nan) -> _compare_return_type: debug_msg: Optional[str] # Integer (including bool) comparisons are identity comparisons # when rtol is zero and atol is less than one if ((is_integral(a.dtype) and rtol == 0 and atol < 1) or a.dtype is torch.bool or is_quantized(a.dtype)): if (a == b).all().item(): return (True, None) # Gathers debug info for failed integer comparison # NOTE: converts to long to correctly represent differences # (especially between uint8 tensors) identity_mask = a != b a_flat = a.to(torch.long).flatten() b_flat = b.to(torch.long).flatten() count_non_identical = torch.sum(identity_mask, dtype=torch.long) diff = torch.abs(a_flat - b_flat) greatest_diff_index = torch.argmax(diff) debug_msg = ( "Found {0} different element(s) (out of {1}), with the greatest " "difference of {2} ({3} vs. {4}) occuring at index " "{5}.".format(count_non_identical.item(), a.numel(), diff[greatest_diff_index], a_flat[greatest_diff_index], b_flat[greatest_diff_index], _unravel_index(greatest_diff_index, a.shape))) return (False, debug_msg) # All other comparisons use torch.allclose directly if torch.allclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan): return (True, None) # Gathers debug info for failed float tensor comparison # NOTE: converts to float64 to best represent differences a_flat = a.to(torch.float64 if not a.dtype.is_complex else torch.complex128 ).flatten() b_flat = b.to(torch.float64 if not a.dtype.is_complex else torch.complex128 ).flatten() diff = torch.abs(a_flat - b_flat) # Masks close values # NOTE: this avoids (inf - inf) oddities when computing the difference close = torch.isclose(a_flat, b_flat, rtol, atol, equal_nan) diff[close] = 0 nans = torch.isnan(diff) num_nans = nans.sum() outside_range = (diff > (atol + rtol * torch.abs(b_flat))) | (diff == math.inf) count_outside_range = torch.sum(outside_range, dtype=torch.long) greatest_diff_index = torch.argmax(diff) debug_msg = ( "With rtol={0} and atol={1}, found {2} element(s) (out of {3}) whose " "difference(s) exceeded the margin of error (including {4} nan comparisons). " "The greatest difference was {5} ({6} vs. {7}), which " "occurred at index {8}.".format( rtol, atol, count_outside_range + num_nans, a.numel(), num_nans, diff[greatest_diff_index], a_flat[greatest_diff_index], b_flat[greatest_diff_index], _unravel_index(greatest_diff_index, a.shape))) return (False, debug_msg)
def is_near_zero(tens: torch.Tensor, rtol: float = 1e-05, atol: float = 1e-08, equal_nan: bool = False) -> torch.Tensor: z = torch.zeros(1, dtype=tens.dtype, device=tens.device) return torch.isclose(tens, other=z, rtol=rtol, atol=atol, equal_nan=equal_nan)
def isclose(x, y, rtol=rtol, atol=atol): if not torch.is_tensor(x): x = torch.tensor(x) if not torch.is_tensor(y): y = torch.tensor(y) return torch.isclose(x, y, atol=atol, rtol=rtol)
def test_ntxent_loss(self): temperature = 0.1 loss_funcA = NTXentLoss(temperature=temperature) loss_funcB = NTXentLoss(temperature=temperature, distance=LpDistance()) loss_funcC = NTXentLoss( temperature=temperature, reducer=PerAnchorReducer(AvgNonZeroReducer()) ) loss_funcD = SupConLoss(temperature=temperature) loss_funcE = SupConLoss(temperature=temperature, distance=LpDistance()) for dtype in TEST_DTYPES: embedding_angles = [0, 10, 20, 50, 60, 80] embeddings = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles], requires_grad=True, dtype=dtype, ).to( TEST_DEVICE ) # 2D embeddings labels = torch.LongTensor([0, 0, 0, 1, 1, 2]) obtained_losses = [ x(embeddings, labels) for x in [loss_funcA, loss_funcB, loss_funcC, loss_funcD, loss_funcE] ] pos_pairs = [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1), (3, 4), (4, 3)] neg_pairs = [ (0, 3), (0, 4), (0, 5), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 0), (3, 1), (3, 2), (3, 5), (4, 0), (4, 1), (4, 2), (4, 5), (5, 0), (5, 1), (5, 2), (5, 3), (5, 4), ] total_lossA, total_lossB, total_lossC, total_lossD, total_lossE = ( 0, 0, torch.zeros(5, device=TEST_DEVICE, dtype=dtype), torch.zeros(5, device=TEST_DEVICE, dtype=dtype), torch.zeros(5, device=TEST_DEVICE, dtype=dtype), ) for a1, p in pos_pairs: anchor, positive = embeddings[a1], embeddings[p] numeratorA = torch.exp(torch.matmul(anchor, positive) / temperature) numeratorB = torch.exp( -torch.sqrt(torch.sum((anchor - positive) ** 2)) / temperature ) denominatorA = numeratorA.clone() denominatorB = numeratorB.clone() denominatorD = 0 denominatorE = 0 for a2, n in pos_pairs + neg_pairs: if a2 == a1: negative = embeddings[n] curr_denomD = torch.exp( torch.matmul(anchor, negative) / temperature ) curr_denomE = torch.exp( -torch.sqrt(torch.sum((anchor - negative) ** 2)) / temperature ) denominatorD += curr_denomD denominatorE += curr_denomE if (a2, n) not in pos_pairs: denominatorA += curr_denomD denominatorB += curr_denomE else: continue curr_lossA = -torch.log(numeratorA / denominatorA) curr_lossB = -torch.log(numeratorB / denominatorB) curr_lossD = -torch.log(numeratorA / denominatorD) curr_lossE = -torch.log(numeratorB / denominatorE) total_lossA += curr_lossA total_lossB += curr_lossB total_lossC[a1] += curr_lossA total_lossD[a1] += curr_lossD total_lossE[a1] += curr_lossE total_lossA /= len(pos_pairs) total_lossB /= len(pos_pairs) pos_pair_per_anchor = torch.tensor( [2, 2, 2, 1, 1], device=TEST_DEVICE, dtype=dtype ) total_lossC, total_lossD, total_lossE = [ torch.mean(x / pos_pair_per_anchor) for x in [total_lossC, total_lossD, total_lossE] ] rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue(torch.isclose(obtained_losses[0], total_lossA, rtol=rtol)) self.assertTrue(torch.isclose(obtained_losses[1], total_lossB, rtol=rtol)) self.assertTrue(torch.isclose(obtained_losses[2], total_lossC, rtol=rtol)) self.assertTrue(torch.isclose(obtained_losses[3], total_lossD, rtol=rtol)) self.assertTrue(torch.isclose(obtained_losses[4], total_lossE, rtol=rtol))
def ignore_alignment(data): # log_potentials = torch.ones(2, 2, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) # print("FINAL", v) # log_potentials = torch.ones(2, 3, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) # print("FINAL", v) # log_potentials = torch.ones(2, 6, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) # print("FINAL", v) # log_potentials = torch.ones(2, 7, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) # print("FINAL", v) # log_potentials = torch.ones(2, 8, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) # print("FINAL", v) # assert False # model = data.draw(sampled_from([Alignment])) # semiring = data.draw(sampled_from([StdSemiring])) # struct = model(semiring) # vals, (batch, N) = model._rand() # print(batch, N) # struct = model(semiring) # # , max_gap=max(3, abs(vals.shape[1] - vals.shape[2]) + 1)) # vals.fill_(1) # alpha = struct.sum(vals) model = data.draw(sampled_from([Alignment])) semiring = data.draw(sampled_from([StdSemiring])) test = test_lookup[model](semiring) struct = model(semiring, sparse_rounds=10) vals, (batch, N) = test._rand() alpha = struct.sum(vals) count = test.enumerate(vals)[0] assert torch.isclose(count, alpha).all() model = data.draw(sampled_from([Alignment])) semiring = data.draw(sampled_from([LogSemiring])) struct = model(semiring, sparse_rounds=10) vals, (batch, N) = model._rand() alpha = struct.sum(vals) count = test_lookup[model](semiring).enumerate(vals)[0] assert torch.isclose(count, alpha).all() # model = data.draw(sampled_from([Alignment])) # semiring = data.draw(sampled_from([MaxSemiring])) # struct = model(semiring) # log_potentials = torch.ones(2, 2, 2, 3) # v = Alignment(StdSemiring).sum(log_potentials) log_potentials = torch.ones(2, 2, 8, 3) v = Alignment(MaxSemiring).sum(log_potentials) # print(v) # assert False m = Alignment(MaxSemiring).marginals(log_potentials) score = Alignment(MaxSemiring).score(log_potentials, m) assert torch.isclose(v, score).all() semiring = data.draw(sampled_from([MaxSemiring])) struct = model(semiring, local=True) test = test_lookup[model](semiring) vals, (batch, N) = test._rand() vals[..., 0] = -2 * vals[..., 0].abs() vals[..., 1] = vals[..., 1].abs() vals[..., 2] = -2 * vals[..., 2].abs() alpha = struct.sum(vals) count = test.enumerate(vals)[0] mx = struct.marginals(vals) print(alpha, count) print(mx[0].nonzero()) # assert torch.isclose(count, alpha).all() struct = model(semiring, max_gap=1) alpha = struct.sum(vals)
def __init__( self, loc: torch.Tensor, concentration: torch.Tensor, change_magnitude_sampling_algorithm: str = "wood", ): if loc.dim() < 1: raise ValueError("loc must be at least one-dimensional.") if concentration.dim() > 2 or (concentration.dim() == 2 and concentration.shape[-1] != 1): raise ValueError(""" `concentration` should be a tensor of a single value with shape (1,) or batched with shapes (batch_size,) or (batch_size, 1); got {} instead """.format(concentration.size())) # For single batches, unsqueeze to (batch_size, dimension) where batch_size = 1. if loc.dim() == 1: loc = loc.unsqueeze(0) # TODO: Some torch distributions will repeat a parameter like this if only one is defined. if loc.shape[0] != concentration.shape[0]: raise ValueError(""" batch size for loc ({}) and concentration ({}) differ; concentration should be defined for each mean """.format(loc.shape[0], concentration.shape[0])) # Invariant: `self.concentration` should always have the shape (batch_size,). # Feedforward layers may project to a single dimension and produce shape (batch_size, 1). # Computing batched latent representations (w; sqrt(1 - w^t) v.T)^T however requires (batch_size,) for proper # matrix multiply. if concentration.dim() > 1: concentration = concentration.squeeze(-1) if change_magnitude_sampling_algorithm.lower() not in ("wood", "ulrich"): raise ValueError( "unsupported change magnitude sampling algorithm: {}".format( change_magnitude_sampling_algorithm)) loc_norm = loc.norm(dim=-1) if not torch.all(torch.isclose(loc_norm, torch.ones(loc_norm.size()))): raise ValueError(""" loc is not normalized; loc should be either a normalized tensor or a batched tensor normalized in the final dimension, instead L2 norm(s) of loc is {} """.format(loc_norm)) self.loc = loc # Shape: (batch_size, m) self.concentration = concentration # Shape: (batch_size,) change_magnitude_sampling_algorithms = { "wood": self._rejection_sample_wood, "ulrich": self._rejection_sample_ulrich, } self._rejection_sample = change_magnitude_sampling_algorithms[ change_magnitude_sampling_algorithm] # Distribution is set on the `(self._m - 1)` sphere. self._m = self.loc.shape[-1] batch_shape = loc.shape event_shape = torch.Size() super(VonMisesFisher, self).__init__(batch_shape=batch_shape, event_shape=event_shape)
def check(self, value): square_check = super().check(value) if not square_check.all(): return square_check return torch.isclose(value, value.mT, atol=1e-6).all(-2).all(-1)
def test_multi_similarity_loss(self): for dtype in TEST_DTYPES: if dtype == torch.float16: alpha, beta, base = 0.1, 10, 0.5 else: alpha, beta, base = 0.1, 40, 0.5 loss_func = MultiSimilarityLoss(alpha=alpha, beta=beta, base=base) embedding_angles = [0, 20, 40, 60, 80] embeddings = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles], requires_grad=True, dtype=dtype, ).to(TEST_DEVICE) # 2D embeddings labels = torch.LongTensor([0, 0, 1, 1, 2]) loss = loss_func(embeddings, labels) loss.backward() pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)] neg_pairs = [ (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 4), (3, 0), (3, 1), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), ] correct_total = 0 for i in range(len(embeddings)): correct_pos_loss = 0 correct_neg_loss = 0 for a, p in pos_pairs: if a == i: anchor, positive = embeddings[a], embeddings[p] correct_pos_loss += torch.exp( -alpha * (torch.matmul(anchor, positive) - base)) if correct_pos_loss > 0: correct_pos_loss = ( 1 / alpha) * torch.log(1 + correct_pos_loss) for a, n in neg_pairs: if a == i: anchor, negative = embeddings[a], embeddings[n] correct_neg_loss += torch.exp( beta * (torch.matmul(anchor, negative) - base)) if correct_neg_loss > 0: correct_neg_loss = (1 / beta) * torch.log(1 + correct_neg_loss) correct_total += correct_pos_loss + correct_neg_loss correct_total /= embeddings.size(0) rtol = 1e-2 if dtype == torch.float16 else 1e-5 self.assertTrue(torch.isclose(loss, correct_total, rtol=rtol))
SA = S.matmul(AM) TAR = T.matmul(AR) TAW = T.matmul(AM).matmul(W) SAW = SA.matmul(W) C = TAR D = SAW G = TAW # Removes batch entries with rank-deficient C or D U_c, Sig_c, V_c = torch.svd(C) U_d, Sig_d, V_d = torch.svd(D.permute(0, 2, 1)) Sig_c_cpu = Sig_c.cpu() bool_array = torch.isclose(Sig_c_cpu, torch.zeros_like(Sig_c_cpu), atol=1e-4) zero_inds = torch.nonzero(bool_array) unique_c, counts = np.unique(zero_inds[:, 0], return_counts=True) Sig_d_cpu = Sig_d.cpu() bool_array = torch.isclose(Sig_d_cpu, torch.zeros_like(Sig_d_cpu), atol=1e-4) zero_inds = torch.nonzero(bool_array) unique_d, counts = np.unique(zero_inds[:, 0], return_counts=True) good_ind = np.arange(args.bs) good_ind = np.delete(good_ind, unique_c) good_ind = np.delete(good_ind, unique_d)
def all_close(x_tensor, y_tensor, rtol=5e-4, atol=1e-6, equal_nan=False): if isinstance(y_tensor, (float, int)): y_tensor = torch.ones_like(x_tensor) * y_tensor return torch.isclose(x_tensor, y_tensor, rtol=rtol, atol=atol, equal_nan=equal_nan).all()
def sgd_train_linear_model( model: LinearModel, dataloader: DataLoader, construct_kwargs: Dict[str, Any], max_epoch: int = 100, reduce_lr: bool = True, initial_lr: float = 0.01, alpha: float = 1.0, loss_fn: Callable = l2_loss, reg_term: Optional[int] = 1, patience: int = 10, threshold: float = 1e-4, running_loss_window: Optional[int] = None, device: Optional[str] = None, init_scheme: str = "zeros", debug: bool = False, ) -> Dict[str, float]: r""" Trains a linear model with SGD. This will continue to iterate your dataloader until we converged to a solution or alternatively until we have exhausted `max_epoch`. Convergence is defined by the loss not changing by `threshold` amount for `patience` number of iterations. Args: model The model to train dataloader The data to train it with. We will assume the dataloader produces either pairs or triples of the form (x, y) or (x, y, w). Where x and y are typical pairs for supervised learning and w is a weight vector. We will call `model._construct_model_params` with construct_kwargs and the input features set to `x.shape[1]` (`x.shape[0]` corresponds to the batch size). We assume that `len(x.shape) == 2`, i.e. the tensor is flat. The number of output features will be set to y.shape[1] or 1 (if `len(y.shape) == 1`); we require `len(y.shape) <= 2`. max_epoch The maximum number of epochs to exhaust reduce_lr Whether or not to reduce the learning rate as iterations progress. Halves the learning rate when the training loss does not move. This uses torch.optim.lr_scheduler.ReduceLROnPlateau and uses the parameters `patience` and `threshold` initial_lr The initial learning rate to use. alpha A constant for the regularization term. loss_fn The loss to optimise for. This must accept three parameters: x1 (predicted), x2 (labels) and a weight vector reg_term Regularization is defined by the `reg_term` norm of the weights. Please use `None` if you do not wish to use regularization. patience Defines the number of iterations in a row the loss must remain within `threshold` in order to be classified as converged. threshold Threshold for convergence detection. running_loss_window Used to report the training loss once we have finished training and to determine when we have converged (along with reducing the learning rate). The reported training loss will take the last `running_loss_window` iterations and average them. If `None` we will approximate this to be the number of examples in an epoch. init_scheme Initialization to use prior to training the linear model. device The device to send the model and data to. If None then no `.to` call will be used. debug Whether to print the loss, learning rate per iteration Returns This will return the final training loss (averaged with `running_loss_window`) """ loss_window: List[torch.Tensor] = [] min_avg_loss = None convergence_counter = 0 converged = False def get_point(datapoint): if len(datapoint) == 2: x, y = datapoint w = None else: x, y, w = datapoint if device is not None: x = x.to(device) y = y.to(device) if w is not None: w = w.to(device) return x, y, w # get a point and construct the model data_iter = iter(dataloader) x, y, w = get_point(next(data_iter)) model._construct_model_params( in_features=x.shape[1], out_features=y.shape[1] if len(y.shape) == 2 else 1, **construct_kwargs, ) model.train() assert model.linear is not None if init_scheme is not None: assert init_scheme in ["xavier", "zeros"] with torch.no_grad(): if init_scheme == "xavier": torch.nn.init.xavier_uniform_(model.linear.weight) else: model.linear.weight.zero_() if model.linear.bias is not None: model.linear.bias.zero_() optim = torch.optim.SGD(model.parameters(), lr=initial_lr) if reduce_lr: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optim, factor=0.5, patience=patience, threshold=threshold ) t1 = time.time() epoch = 0 i = 0 while epoch < max_epoch: while True: # for x, y, w in dataloader if running_loss_window is None: running_loss_window = x.shape[0] * len(dataloader) y = y.view(x.shape[0], -1) if w is not None: w = w.view(x.shape[0], -1) i += 1 out = model(x) loss = loss_fn(y, out, w) if reg_term is not None: reg = torch.norm(model.linear.weight, p=reg_term) loss += reg.sum() * alpha if len(loss_window) >= running_loss_window: loss_window = loss_window[1:] loss_window.append(loss.clone().detach()) assert len(loss_window) <= running_loss_window average_loss = torch.mean(torch.stack(loss_window)) if min_avg_loss is not None: # if we haven't improved by at least `threshold` if average_loss > min_avg_loss or torch.isclose( min_avg_loss, average_loss, atol=threshold ): convergence_counter += 1 if convergence_counter >= patience: converged = True break else: convergence_counter = 0 if min_avg_loss is None or min_avg_loss >= average_loss: min_avg_loss = average_loss.clone() if debug: print( f"lr={optim.param_groups[0]['lr']}, Loss={loss}," + "Aloss={average_loss}, min_avg_loss={min_avg_loss}" ) loss.backward() optim.step() model.zero_grad() if scheduler: scheduler.step(average_loss) temp = next(data_iter, None) if temp is None: break x, y, w = get_point(temp) if converged: break epoch += 1 data_iter = iter(dataloader) x, y, w = get_point(next(data_iter)) t2 = time.time() return { "train_time": t2 - t1, "train_loss": torch.mean(torch.stack(loss_window)).item(), "train_iter": i, "train_epoch": epoch, }
def _log_prob_with_subsetting(self, obs: Tensor, group_idx: Selector, time_idx: Selector, measure_idx: Selector, method: str = 'independent', lower: Optional[Tensor] = None, upper: Optional[Tensor] = None) -> Tensor: self._check_lp_sub_input(group_idx, time_idx) idx_no_measure = bmat_idx(group_idx, time_idx) idx_3d = bmat_idx(group_idx, time_idx, measure_idx) idx_4d = bmat_idx(group_idx, time_idx, measure_idx, measure_idx) # subset obs, lower, upper: obs, lower, upper = obs[idx_3d], lower[idx_3d], upper[idx_3d] if method.lower() == 'update': means = self.means[idx_no_measure] covs = self.covs[idx_no_measure] H = self.H[idx_3d] R = self.R[idx_4d] measured_means = H.matmul(means.unsqueeze(-1)).squeeze(-1) # calculate prob-obs: prob_lo, prob_up = tobit_probs(mean=measured_means, cov=R, lower=lower, upper=upper) prob_obs = torch.diag_embed(1 - prob_up - prob_lo) # calculate adjusted measure mean and cov: mm_adj, R_adj = tobit_adjustment(mean=measured_means, cov=R, lower=lower, upper=upper, probs=(prob_lo, prob_up)) # system uncertainty: Ht = H.permute(0, 1, 3, 2) system_uncertainty = prob_obs.matmul(H).matmul(covs).matmul( Ht).matmul(prob_obs) + R_adj # log prob: dist = torch.distributions.MultivariateNormal( mm_adj, system_uncertainty) return dist.log_prob(obs) elif method.lower() == 'independent': # pred_mean = self.predictions[idx_3d] pred_cov = self.prediction_uncertainty[idx_4d] # cens_up = torch.isclose(obs, upper) cens_lo = torch.isclose(obs, lower) # loglik_uncens = torch.zeros_like(obs) loglik_cens_up = torch.zeros_like(obs) loglik_cens_lo = torch.zeros_like(obs) for m in range(pred_mean.shape[-1]): std = pred_cov[..., m, m].sqrt() z = (pred_mean[..., m] - obs[..., m]) / std # pdf is well behaved at tails: loglik_uncens[..., m] = std_normal.log_prob(z) - std.log() # but cdf is not, clamp: z = torch.clamp(z, -5., 5.) loglik_cens_up[..., m] = std_normal.cdf(z).log() loglik_cens_lo[..., m] = (1. - std_normal.cdf(z)).log() loglik = torch.zeros_like(obs) loglik[cens_up] = loglik_cens_up[cens_up] loglik[cens_lo] = loglik_cens_lo[cens_lo] loglik[~(cens_up | cens_lo)] = loglik_uncens[~(cens_up | cens_lo)] # take the product of the dimension probs (i.e., assume independence) return torch.sum(loglik, -1) else: raise RuntimeError("Expected method to be one of: {}.".format( {'update', 'independent'}))