Exemplo n.º 1
0
    def test_takes_no_log_without_nllloss(self, net_cls, module_cls, data):
        net = net_cls(module_cls, criterion=nn.BCELoss, max_epochs=1)
        net.initialize()

        mock_loss = Mock(side_effect=lambda x, y: nn.NLLLoss()(x, y))
        net.criterion_.forward = mock_loss
        net.partial_fit(*data)  # call partial_fit to avoid re-initialization

        # check that loss was called with raw probabilities
        for (y_out, _), _ in mock_loss.call_args_list:
            assert not (y_out < 0).all()
            assert torch.isclose(torch.ones(len(y_out)), y_out.sum(1)).all()
Exemplo n.º 2
0
def assert_allclose(actual, expected, rtol=None, atol=None, equal_nan=True):
    if not isinstance(actual, torch.Tensor):
        actual = torch.tensor(actual)
    if not isinstance(expected, torch.Tensor):
        expected = torch.tensor(expected, dtype=actual.dtype)
    if expected.shape != actual.shape:
        expected = expected.expand_as(actual)
    if rtol is None or atol is None:
        if rtol is not None or atol is not None:
            raise ValueError("rtol and atol must both be specified or both be unspecified")
        rtol, atol = _get_default_tolerance(actual, expected)

    close = torch.isclose(actual, expected, rtol, atol, equal_nan)
    if close.all():
        return

    # Find the worst offender
    error = (expected - actual).abs()
    expected_error = atol + rtol * expected.abs()
    delta = error - expected_error
    delta[close] = 0  # mask out NaN/inf
    _, index = delta.reshape(-1).max(0)

    # TODO: consider adding torch.unravel_index
    def _unravel_index(index, shape):
        res = []
        for size in shape[::-1]:
            res.append(int(index % size))
            index = int(index // size)
        return tuple(res[::-1])

    index = _unravel_index(index.item(), actual.shape)

    # Count number of offenders
    count = (~close).long().sum()

    msg = ('Not within tolerance rtol={} atol={} at input{} ({} vs. {}) and {}'
           ' other locations ({:2.2f}%)')

    raise AssertionError(msg.format(
        rtol, atol, list(index), actual[index].item(), expected[index].item(),
        count - 1, 100 * count / actual.numel()))
Exemplo n.º 3
0
    return new_noise


# UNIT TEST
# Check that the basic function works
opt.zero_grad()
noise = torch.ones(20, 20) * 2
noise.requires_grad_()
fake_classes = (noise**2).mean()
fake_classes.backward()
new_noise = calculate_updated_noise(noise, 0.1)
assert type(new_noise) == torch.Tensor
assert tuple(new_noise.shape) == (20, 20)
assert new_noise.max() == 2.0010
assert new_noise.min() == 2.0010
assert torch.isclose(new_noise.sum(), torch.tensor(0.4) + 20 * 20 * 2)
print("Success!")

# Check that it works for generated images
opt.zero_grad()
noise = get_noise(32, z_dim).to(device).requires_grad_()
fake = gen(noise)
fake_classes = classifier(fake)[:, 0]
fake_classes.mean().backward()
noise.data = calculate_updated_noise(noise, 0.01)
fake = gen(noise)
fake_classes_new = classifier(fake)[:, 0]
assert torch.all(fake_classes_new > fake_classes)
print("Success!")

# First generate a bunch of images with the generator
    def test_large_margin_softmax_and_sphereface_loss(self):
        margin = 10
        scale = 2
        for dtype in TEST_DTYPES:
            loss_funcA = LargeMarginSoftmaxLoss(margin=margin,
                                                scale=scale,
                                                num_classes=10,
                                                embedding_size=2)
            loss_funcB = SphereFaceLoss(margin=margin,
                                        scale=scale,
                                        num_classes=10,
                                        embedding_size=2)

            embedding_angles = torch.arange(0, 180)
            # multiply by 10 to make the embeddings unnormalized
            embeddings = torch.tensor(
                np.array([c_f.angle_to_coord(a)
                          for a in embedding_angles]) * 10,
                requires_grad=True,
                dtype=dtype).to(self.device)  #2D embeddings
            labels = torch.randint(low=0, high=10,
                                   size=(180, )).to(self.device)

            lossA = loss_funcA(embeddings, labels)
            lossB = loss_funcB(embeddings, labels)

            weightsA = loss_funcA.W
            weightsB = torch.nn.functional.normalize(loss_funcB.W, dim=0)

            product_of_magnitudesA = torch.norm(
                weightsA, p=2, dim=0).unsqueeze(0) * torch.norm(
                    embeddings, p=2, dim=1).unsqueeze(1)
            product_of_magnitudesB = torch.norm(
                weightsB, p=2, dim=0).unsqueeze(0) * torch.norm(
                    embeddings, p=2, dim=1).unsqueeze(1)
            cosinesA = torch.matmul(embeddings,
                                    weightsA) / (product_of_magnitudesA)
            cosinesB = torch.matmul(embeddings,
                                    weightsB) / (product_of_magnitudesB)
            coefficients = [
                scipy.special.binom(margin, 2 * n)
                for n in range((margin // 2) + 1)
            ]

            for i, j in enumerate(labels):
                curr_cosineA = cosinesA[i, j]
                curr_cosineB = cosinesB[i, j]
                cos_with_marginA = torch.zeros(len(coefficients))
                cos_with_marginB = torch.zeros(len(coefficients))
                for z, c in enumerate(coefficients):
                    curr_valA = c * (curr_cosineA**(margin - (2 * z))) * (
                        (1 - curr_cosineA**2)**z)
                    curr_valB = c * (curr_cosineB**(margin - (2 * z))) * (
                        (1 - curr_cosineB**2)**z)
                    if z % 2 == 1:
                        curr_valA *= -1
                        curr_valB *= -1
                    cos_with_marginA[z] = curr_valA
                    cos_with_marginB[z] = curr_valB

                cos_with_marginA = torch.sum(cos_with_marginA)
                cos_with_marginB = torch.sum(cos_with_marginB)
                angleA = torch.acos(
                    torch.clamp(curr_cosineA, -1 + 1e-7, 1 - 1e-7))
                angleB = torch.acos(
                    torch.clamp(curr_cosineB, -1 + 1e-7, 1 - 1e-7))
                kA = (angleA / (math.pi / margin)).floor(
                )  # Equation 6: angles needs to be between [k*pi/m and (k+1)*pi/m]
                kB = (angleB / (math.pi / margin)).floor(
                )  # Equation 6: angles needs to be between [k*pi/m and (k+1)*pi/m]
                cosinesA[i, j] = ((-1)**kA) * cos_with_marginA - (2 * kA)
                cosinesB[i, j] = ((-1)**kB) * cos_with_marginB - (2 * kB)

            cosinesA *= product_of_magnitudesA
            cosinesB *= product_of_magnitudesB

            correct_lossA = torch.nn.functional.cross_entropy(
                cosinesA * scale, labels)
            correct_lossB = torch.nn.functional.cross_entropy(
                cosinesB * scale, labels)

            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(torch.isclose(lossA, correct_lossA, rtol=rtol))
            self.assertTrue(torch.isclose(lossB, correct_lossB, rtol=rtol))
Exemplo n.º 5
0
def test_max(model_test, data):
    "Test that argmax score is the same as max"
    gen = Gen(model_test, data, MaxSemiring)
    score = gen.struct.sum(gen.vals)
    marginals = gen.struct.marginals(gen.vals)
    assert torch.isclose(score, gen.struct.score(gen.vals, marginals)).all()
Exemplo n.º 6
0
    def test_contrastive_loss(self):
        loss_funcA = ContrastiveLoss(pos_margin=0.25,
                                     neg_margin=1.5,
                                     use_similarity=False,
                                     avg_non_zero_only=True,
                                     squared_distances=True)
        loss_funcB = ContrastiveLoss(pos_margin=1.5,
                                     neg_margin=0.6,
                                     use_similarity=True,
                                     avg_non_zero_only=True)
        loss_funcC = ContrastiveLoss(pos_margin=0.25,
                                     neg_margin=1.5,
                                     use_similarity=False,
                                     avg_non_zero_only=False,
                                     squared_distances=True)
        loss_funcD = ContrastiveLoss(pos_margin=1.5,
                                     neg_margin=0.6,
                                     use_similarity=True,
                                     avg_non_zero_only=False)

        embedding_angles = [0, 20, 40, 60, 80]
        embeddings = torch.FloatTensor(
            [c_f.angle_to_coord(a) for a in embedding_angles])  #2D embeddings
        labels = torch.LongTensor([0, 0, 1, 1, 2])

        lossA = loss_funcA(embeddings, labels)
        lossB = loss_funcB(embeddings, labels)
        lossC = loss_funcC(embeddings, labels)
        lossD = loss_funcD(embeddings, labels)

        pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)]
        neg_pairs = [(0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 0),
                     (2, 1), (2, 4), (3, 0), (3, 1), (3, 4), (4, 0), (4, 1),
                     (4, 2), (4, 3)]

        correct_pos_losses = [0, 0, 0, 0]
        correct_neg_losses = [0, 0, 0, 0]
        num_non_zero_pos = [0, 0, 0, 0]
        num_non_zero_neg = [0, 0, 0, 0]
        for a, p in pos_pairs:
            anchor, positive = embeddings[a], embeddings[p]
            correct_lossA = torch.relu(
                torch.sum((anchor - positive)**2) - 0.25)
            correct_lossB = torch.relu(1.5 - torch.matmul(anchor, positive))
            correct_pos_losses[0] += correct_lossA
            correct_pos_losses[1] += correct_lossB
            correct_pos_losses[2] += correct_lossA
            correct_pos_losses[3] += correct_lossB
            if correct_lossA > 0:
                num_non_zero_pos[0] += 1
                num_non_zero_pos[2] += 1
            if correct_lossB > 0:
                num_non_zero_pos[1] += 1
                num_non_zero_pos[3] += 1

        for a, n in neg_pairs:
            anchor, negative = embeddings[a], embeddings[n]
            correct_lossA = torch.relu(1.5 - torch.sum((anchor - negative)**2))
            correct_lossB = torch.relu(torch.matmul(anchor, negative) - 0.6)
            correct_neg_losses[0] += correct_lossA
            correct_neg_losses[1] += correct_lossB
            correct_neg_losses[2] += correct_lossA
            correct_neg_losses[3] += correct_lossB
            if correct_lossA > 0:
                num_non_zero_neg[0] += 1
                num_non_zero_neg[2] += 1
            if correct_lossB > 0:
                num_non_zero_neg[1] += 1
                num_non_zero_neg[3] += 1

        for i in range(2):
            if num_non_zero_pos[i] > 0:
                correct_pos_losses[i] /= num_non_zero_pos[i]
            if num_non_zero_neg[i] > 0:
                correct_neg_losses[i] /= num_non_zero_neg[i]

        for i in range(2, 4):
            correct_pos_losses[i] /= len(pos_pairs)
            correct_neg_losses[i] /= len(neg_pairs)

        correct_losses = [0, 0, 0, 0]
        for i in range(4):
            correct_losses[i] = correct_pos_losses[i] + correct_neg_losses[i]

        self.assertTrue(torch.isclose(lossA, correct_losses[0]))
        self.assertTrue(torch.isclose(lossB, correct_losses[1]))
        self.assertTrue(torch.isclose(lossC, correct_losses[2]))
        self.assertTrue(torch.isclose(lossD, correct_losses[3]))
Exemplo n.º 7
0
def _compare_tensors_internal(
        a: torch.Tensor, b: torch.Tensor, *, rtol, atol,
        equal_nan: Union[str, bool]) -> _compare_return_type:
    assert equal_nan in {True, False, "relaxed"}
    debug_msg: Optional[str]
    # Integer (including bool) comparisons are identity comparisons
    # when rtol is zero and atol is less than one
    if ((is_integral(a.dtype) and rtol == 0 and atol < 1)
            or a.dtype is torch.bool or is_quantized(a.dtype)):
        if (a == b).all().item():
            return (True, None)

        # Gathers debug info for failed integer comparison
        # NOTE: converts to long to correctly represent differences
        # (especially between uint8 tensors)
        identity_mask = a != b
        a_flat = a.to(torch.long).flatten()
        b_flat = b.to(torch.long).flatten()
        count_non_identical = torch.sum(identity_mask, dtype=torch.long)
        diff = torch.abs(a_flat - b_flat)
        greatest_diff_index = torch.argmax(diff)
        debug_msg = (
            "Found {0} different element(s) (out of {1}), with the greatest "
            "difference of {2} ({3} vs. {4}) occuring at index "
            "{5}.".format(count_non_identical.item(), a.numel(),
                          diff[greatest_diff_index],
                          a_flat[greatest_diff_index],
                          b_flat[greatest_diff_index],
                          _unravel_index(greatest_diff_index, a.shape)))
        return (False, debug_msg)

    # Compares complex tensors' real and imaginary parts separately.
    # (see NOTE Test Framework Tensor "Equality")
    if a.is_complex():
        a = a.resolve_conj()
        b = b.resolve_conj()
        if equal_nan == "relaxed":
            a = a.clone()
            b = b.clone()
            a.real[a.imag.isnan()] = math.nan
            a.imag[a.real.isnan()] = math.nan
            b.real[b.imag.isnan()] = math.nan
            b.imag[b.real.isnan()] = math.nan

        real_result, debug_msg = _compare_tensors_internal(a.real,
                                                           b.real,
                                                           rtol=rtol,
                                                           atol=atol,
                                                           equal_nan=equal_nan)

        if not real_result:
            debug_msg = "Real parts failed to compare as equal! " + cast(
                str, debug_msg)
            return (real_result, debug_msg)

        imag_result, debug_msg = _compare_tensors_internal(a.imag,
                                                           b.imag,
                                                           rtol=rtol,
                                                           atol=atol,
                                                           equal_nan=equal_nan)

        if not imag_result:
            debug_msg = "Imaginary parts failed to compare as equal! " + cast(
                str, debug_msg)
            return (imag_result, debug_msg)

        return (True, None)

    # All other comparisons use torch.allclose directly
    if torch.allclose(a,
                      b,
                      rtol=rtol,
                      atol=atol,
                      equal_nan=(equal_nan in {"relaxed", True})):
        return (True, None)

    # Gathers debug info for failed float tensor comparison
    # NOTE: converts to float64 to best represent differences
    a_flat = a.to(torch.float64).flatten()
    b_flat = b.to(torch.float64).flatten()
    diff = torch.abs(a_flat - b_flat)

    # Masks close values
    # NOTE: this avoids (inf - inf) oddities when computing the difference
    close = torch.isclose(a_flat, b_flat, rtol, atol,
                          (equal_nan in {"relaxed", True}))
    diff[close] = 0
    nans = torch.isnan(diff)
    num_nans = nans.sum()

    outside_range = (diff >
                     (atol + rtol * torch.abs(b_flat))) | (diff == math.inf)
    count_outside_range = torch.sum(outside_range, dtype=torch.long)
    greatest_diff_index = torch.argmax(diff)
    debug_msg = (
        "With rtol={0} and atol={1}, found {2} element(s) (out of {3}) whose "
        "difference(s) exceeded the margin of error (including {4} nan comparisons). "
        "The greatest difference was {5} ({6} vs. {7}), which "
        "occurred at index {8}.".format(
            rtol, atol, count_outside_range + num_nans, a.numel(), num_nans,
            diff[greatest_diff_index], a_flat[greatest_diff_index],
            b_flat[greatest_diff_index],
            _unravel_index(greatest_diff_index, a.shape)))
    return (False, debug_msg)
Exemplo n.º 8
0
def _check_classification_inputs(
    preds: torch.Tensor,
    target: torch.Tensor,
    threshold: float,
    num_classes: Optional[int],
    is_multiclass: bool,
    top_k: Optional[int],
) -> str:
    """Performs error checking on inputs for classification.

    This ensures that preds and target take one of the shape/type combinations that are
    specified in ``_input_format_classification`` docstring. It also checks the cases of
    over-rides with ``is_multiclass`` by checking (for multi-class and multi-dim multi-class
    cases) that there are only up to 2 distinct labels.

    In case where preds are floats (probabilities), it is checked whether they are in [0,1] interval.

    When ``num_classes`` is given, it is checked that it is consitent with input cases (binary,
    multi-label, ...), and that, if availible, the implied number of classes in the ``C``
    dimension is consistent with it (as well as that max label in target is smaller than it).

    When ``num_classes`` is not specified in these cases, consistency of the highest target
    value against ``C`` dimension is checked for (multi-dimensional) multi-class cases.

    If ``top_k`` is set (not None) for inputs that do not have probability predictions (and
    are not binary), an error is raised. Similarly if ``top_k`` is set to a number that
    is higher than or equal to the ``C`` dimension of ``preds``, an error is raised.

    Preds and target tensors are expected to be squeezed already - all dimensions should be
    greater than 1, except perhaps the first one (``N``).

    Args:
        preds: Tensor with predictions (labels or probabilities)
        target: Tensor with ground truth labels, always integers (labels)
        threshold:
            Threshold probability value for transforming probability predictions to binary
            (0,1) predictions, in the case of binary or multi-label inputs.
        num_classes:
            Number of classes. If not explicitly set, the number of classes will be infered
            either from the shape of inputs, or the maximum label in the ``target`` and ``preds``
            tensor, where applicable.
        top_k:
            Number of highest probability entries for each sample to convert to 1s - relevant
            only for inputs with probability predictions. The default value (``None``) will be
            interepreted as 1 for these inputs. If this parameter is set for multi-label inputs,
            it will take precedence over threshold.

            Should be left unset (``None``) for inputs with label predictions.
        is_multiclass:
            Used only in certain special cases, where you want to treat inputs as a different type
            than what they appear to be. See the parameter's
            :ref:`documentation section <pages/overview:using the is_multiclass parameter>`
            for a more detailed explanation and examples.


    Return:
        case: The case the inputs fall in, one of 'binary', 'multi-class', 'multi-label' or
            'multi-dim multi-class'
    """

    # Baisc validation (that does not need case/type information)
    _basic_input_validation(preds, target, threshold, is_multiclass)

    # Check that shape/types fall into one of the cases
    case, implied_classes = _check_shape_and_type_consistency(preds, target)

    # For (multi-dim) multi-class case with prob preds, check that preds sum up to 1
    if case in (DataType.MULTICLASS,
                DataType.MULTIDIM_MULTICLASS) and preds.is_floating_point():
        if not torch.isclose(preds.sum(dim=1), torch.ones_like(
                preds.sum(dim=1))).all():
            raise ValueError(
                "Probabilities in `preds` must sum up to 1 accross the `C` dimension."
            )
Exemplo n.º 9
0
def test_vif_p_one_for_equal_tensors(x) -> None:
    y = x.clone()
    measure = vif_p(x, y)
    assert torch.isclose(measure, torch.tensor(
        1.0)), f'VIF for equal tensors shouls be 1.0, got {measure}.'
Exemplo n.º 10
0
def test_bundleivp(x0, y0, y1, ones, lin, net11, net21, net31, net41):
    # Regular IVP with no bundle:
    x = x0 * ones
    cond = BundleIVP(x0, y0)
    y = cond.enforce(net11, x)
    assert torch.isclose(y, y0 * ones).all(), "y(x_0) != y_0"

    cond = BundleIVP(x0, y0, y1)
    y = cond.enforce(net11, x)
    assert all_close(y, y0), "y(x_0) != y_0"
    assert all_close(diff(y, x), y1), "y'(x_0) != y'_0"

    # Bundle in u_0:
    y_bundle = y0 * lin
    cond = BundleIVP(t_0=x0, bundle_conditions={'u_0': 0})
    y = cond.enforce(net21, x, y_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"

    cond = BundleIVP(t_0=x0, u_0_prime=y1, bundle_conditions={'u_0': 0})
    y = cond.enforce(net21, x, y_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"
    assert all_close(diff(y, x), y1), "y'(x_0) != y'_0"

    # Bundle in u_0_prime:
    y_prime_bundle = y1 * lin
    cond = BundleIVP(t_0=x0, u_0=y0, bundle_conditions={'u_0_prime': 0})
    y = cond.enforce(net21, x, y_prime_bundle)
    assert all_close(y, y0), "y(x_0) != y_0"
    assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0"

    # Bundle in u_0 and u_0_prime:
    cond = BundleIVP(t_0=x0, bundle_conditions={'u_0': 0, 'u_0_prime': 1})
    y = cond.enforce(net31, x, y_bundle, y_prime_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"
    assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0"

    # Bundle in t_0:
    x = x0 * lin
    x_bundle = x0 * lin
    cond = BundleIVP(u_0=y0, bundle_conditions={'t_0': 0})
    y = cond.enforce(net21, x, x_bundle)
    assert torch.isclose(y, y0 * ones).all(), "y(x_0) != y_0"

    cond = BundleIVP(u_0=y0, u_0_prime=y1, bundle_conditions={'t_0': 0})
    y = cond.enforce(net21, x, x_bundle)
    assert all_close(y, y0), "y(x_0) != y_0"
    assert all_close(diff(y, x), y1), "y'(x_0) != y'_0"

    # Bundle in t_0 and u_0:
    cond = BundleIVP(bundle_conditions={'t_0': 0, 'u_0': 1})
    y = cond.enforce(net31, x, x_bundle, y_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"

    cond = BundleIVP(u_0_prime=y1, bundle_conditions={'t_0': 0, 'u_0': 1})
    y = cond.enforce(net31, x, x_bundle, y_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"
    assert all_close(diff(y, x), y1), "y'(x_0) != y'_0"

    # Bundle in t_0, u_0 and u_0_prime:
    cond = BundleIVP(bundle_conditions={'t_0': 0, 'u_0': 1, 'u_0_prime': 2})
    y = cond.enforce(net41, x, x_bundle, y_bundle, y_prime_bundle)
    assert torch.isclose(y, y0 * lin).all(), "y(x_0) != y_0"
    assert torch.isclose(diff(y, x), y1 * lin).all(), "y'(x_0) != y'_0"
Exemplo n.º 11
0
    def test_configure_optimizers(self, mock_atom_model):

        optimizer = mock_atom_model.configure_optimizers()
        assert isinstance(optimizer, torch.optim.Adam)
        assert torch.isclose(torch.tensor(optimizer.defaults["lr"]),
                             torch.tensor(0.01))
Exemplo n.º 12
0
def dice_score(pred: torch.Tensor,
               label: torch.Tensor,
               bg=False,
               cls_logging=False,
               nan_score=0.0,
               no_fg_score=0.0,
               apply_argmax: bool = True):
    """
    Compute dice score 1/n_classes * (2*tp)/(2*tp + fp + fn)
    Parameters
    ----------
    pred : torch.Tensor
        probability for each class
    label : torch.Tensor
        ground truth annotation. Classes are given by numeric value
        (not onehot encoded)
    bg : bool, optional
        compute dice for background class, by default False
    cls_logging : int, optional
        logging for individual class results
    nan_score: float, optional
        if denominator is zero `nan_score`is used instead.
    no_fg_score: float, optional
        if foreground class is not present, `np_fg_score` is sued instead.
    Returns
    -------
    float
        dice score
    """
    if not (pred > 0).any():
        logger.warning(
            "Prediction only contains zeros. Dice score might be ambigious.")

    # invert background value
    bg = (1 - int(bool(bg)))

    n_classes = pred.shape[1]
    score = 0.
    for i in range(bg, n_classes):
        tp, fp, tn, fn = compute_stat_score(pred,
                                            label,
                                            i,
                                            do_argmax=apply_argmax)

        denom = (2 * tp + fp + fn).to(torch.float)
        if not (label == i).any():
            # no foreground class
            score_cls = no_fg_score
        elif torch.isclose(denom, torch.zeros_like(denom)).any():
            # nan result
            score_cls = nan_score
        else:
            score_cls = (2 * tp).to(torch.float) / denom

        if cls_logging:
            logger.info(
                {'value': {
                    'value': score_cls,
                    'name': 'dice_cls_' + str(i)
                }})
            pass

        score += score_cls
    return score / (n_classes - bg)
Exemplo n.º 13
0
def test_local_remote_gradient_clipping(workers):
    """
    Real test case of gradient clipping for the remote and
    local parameters of an RNN
    """
    alice = workers["alice"]

    class RNN(nn.Module):
        def __init__(self, input_size, hidden_size, output_size):
            super(RNN, self).__init__()
            self.hidden_size = hidden_size
            self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
            self.i2o = nn.Linear(input_size + hidden_size, output_size)
            self.softmax = nn.LogSoftmax(dim=1)

        def forward(self, input, hidden):
            combined = torch.cat((input, hidden), 1)
            hidden = self.i2h(combined)
            output = self.i2o(combined)
            output = self.softmax(output)
            return output, hidden

        def initHidden(self):
            return torch.zeros(1, self.hidden_size)

    # let's initialize a simple RNN
    n_hidden = 128
    n_letters = 57
    n_categories = 18

    rnn = RNN(n_letters, n_hidden, n_categories)

    # Let's send the model to alice, who will be responsible for the tiny computation
    alice_model = rnn.copy().send(alice)

    # Simple input for the Recurrent Neural Network
    input_tensor = torch.zeros(size=(1, 57))
    # Just set a random category for it
    input_tensor[0][20] = 1
    alice_input = input_tensor.copy().send(alice)

    label_tensor = torch.randint(low=0, high=(n_categories - 1), size=(1, ))
    alice_label = label_tensor.send(alice)

    hidden_layer = alice_model.initHidden()
    alice_hidden_layer = hidden_layer.send(alice)
    # Forward pass into the NN and its hidden layers, notice how it goes sequentially
    output, alice_hidden_layer = alice_model(alice_input, alice_hidden_layer)
    criterion = nn.NLLLoss()
    loss = criterion(output, alice_label)
    # time to backpropagate...
    loss.backward()

    # Remote gradient clipping
    remote_parameters = alice_model.parameters()
    total_norm_remote = nn.utils.clip_grad_norm_(remote_parameters, 2)

    # Local gradient clipping
    local_alice_model = alice_model.get()
    local_parameters = local_alice_model.parameters()
    total_norm_local = nn.utils.clip_grad_norm_(local_parameters, 2)

    # Is the output of the remote gradient clipping version equal to
    # the output of the local gradient clipping version?
    assert torch.isclose(total_norm_remote.get(), total_norm_local, atol=1e-4)
Exemplo n.º 14
0
 def test_rolling(agent_class: mantrap.agents.base.DTAgent.__class__):
     agent = agent_class(position=torch.zeros(2))
     controls = torch.tensor([[1, 1], [2, 2], [4, 4]]).float()
     trajectory = agent.unroll_trajectory(controls, dt=1.0)
     assert torch.all(
         torch.isclose(controls, agent.roll_trajectory(trajectory, dt=1.0)))
Exemplo n.º 15
0
    def test_snr_contrastive_loss(self):
        pos_margin, neg_margin, embedding_reg_weight = 0, 0.1, 0.1
        loss_func = SignalToNoiseRatioContrastiveLoss(
            pos_margin=pos_margin,
            neg_margin=neg_margin,
            embedding_regularizer=ZeroMeanRegularizer(),
            embedding_reg_weight=embedding_reg_weight,
        )

        for dtype in TEST_DTYPES:
            embedding_angles = [0, 20, 40, 60, 80]
            embeddings = torch.tensor(
                [c_f.angle_to_coord(a) for a in embedding_angles],
                requires_grad=True,
                dtype=dtype,
            ).to(TEST_DEVICE)  # 2D embeddings
            labels = torch.LongTensor([0, 0, 1, 1, 2])

            loss = loss_func(embeddings, labels)
            loss.backward()

            pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)]
            neg_pairs = [
                (0, 2),
                (0, 3),
                (0, 4),
                (1, 2),
                (1, 3),
                (1, 4),
                (2, 0),
                (2, 1),
                (2, 4),
                (3, 0),
                (3, 1),
                (3, 4),
                (4, 0),
                (4, 1),
                (4, 2),
                (4, 3),
            ]

            correct_pos_loss = 0
            correct_neg_loss = 0
            num_non_zero = 0
            for a, p in pos_pairs:
                anchor, positive = embeddings[a], embeddings[p]
                curr_loss = torch.relu(
                    torch.var(anchor - positive) / torch.var(anchor) -
                    pos_margin)
                correct_pos_loss += curr_loss
                if curr_loss > 0:
                    num_non_zero += 1
            if num_non_zero > 0:
                correct_pos_loss /= num_non_zero

            num_non_zero = 0
            for a, n in neg_pairs:
                anchor, negative = embeddings[a], embeddings[n]
                curr_loss = torch.relu(neg_margin -
                                       torch.var(anchor - negative) /
                                       torch.var(anchor))
                correct_neg_loss += curr_loss
                if curr_loss > 0:
                    num_non_zero += 1
            if num_non_zero > 0:
                correct_neg_loss /= num_non_zero

            reg_loss = torch.mean(torch.abs(torch.sum(embeddings, dim=1)))

            correct_total = (correct_pos_loss + correct_neg_loss +
                             embedding_reg_weight * reg_loss)
            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(torch.isclose(loss, correct_total, rtol=rtol))
Exemplo n.º 16
0
def isclose(a, b):
    return torch.isclose(a, b, rtol=1e-4, atol=1e-7)
Exemplo n.º 17
0
 def test_return_simple(self):
     loss = self.criterion(self.outputs, self.targets)
     self.assertTrue(torch.isclose(loss, torch.tensor(0.5092423)))
Exemplo n.º 18
0
def test_vif_p_works_for_zeros_tensors() -> None:
    x = torch.zeros(4, 3, 256, 256)
    y = torch.zeros(4, 3, 256, 256)
    measure = vif_p(x, y, data_range=1.)
    assert torch.isclose(measure, torch.tensor(
        1.0)), f'VIF for 2 zero tensors shouls be 1.0, got {measure}.'
Exemplo n.º 19
0
def test_gdc():
    edge_index = torch.tensor([[0, 0, 1, 1, 2, 2, 2, 3, 3, 4],
                               [1, 2, 0, 2, 0, 1, 3, 2, 4, 3]])

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='sym',
              normalization_out='sym',
              diffusion_kwargs=dict(method='ppr', alpha=0.15),
              sparsification_kwargs=dict(method='threshold', avg_degree=2),
              exact=True)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    assert torch.all(mat >= -1e-8)
    assert torch.allclose(mat, mat.t(), atol=1e-4)

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='sym',
              normalization_out='sym',
              diffusion_kwargs=dict(method='heat', t=10),
              sparsification_kwargs=dict(method='threshold', avg_degree=2),
              exact=True)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    assert torch.all(mat >= -1e-8)
    assert torch.allclose(mat, mat.t(), atol=1e-4)

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='col',
              normalization_out='col',
              diffusion_kwargs=dict(method='heat', t=10),
              sparsification_kwargs=dict(method='topk', k=2, dim=0),
              exact=True)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    col_sum = mat.sum(0)
    assert torch.all(mat >= -1e-8)
    assert torch.all(
        torch.isclose(col_sum, torch.tensor(1.0))
        | torch.isclose(col_sum, torch.tensor(0.0)))
    assert torch.all((~torch.isclose(mat, torch.tensor(0.0))).sum(0) == 2)

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='row',
              normalization_out='row',
              diffusion_kwargs=dict(method='heat', t=5),
              sparsification_kwargs=dict(method='topk', k=2, dim=1),
              exact=True)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    row_sum = mat.sum(1)
    assert torch.all(mat >= -1e-8)
    assert torch.all(
        torch.isclose(row_sum, torch.tensor(1.0))
        | torch.isclose(row_sum, torch.tensor(0.0)))
    assert torch.all((~torch.isclose(mat, torch.tensor(0.0))).sum(1) == 2)

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='row',
              normalization_out='row',
              diffusion_kwargs=dict(method='coeff', coeffs=[0.8, 0.3, 0.1]),
              sparsification_kwargs=dict(method='threshold', eps=0.1),
              exact=True)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    row_sum = mat.sum(1)
    assert torch.all(mat >= -1e-8)
    assert torch.all(
        torch.isclose(row_sum, torch.tensor(1.0))
        | torch.isclose(row_sum, torch.tensor(0.0)))

    data = Data(edge_index=edge_index, num_nodes=5)
    gdc = GDC(self_loop_weight=1,
              normalization_in='sym',
              normalization_out='col',
              diffusion_kwargs=dict(method='ppr', alpha=0.15, eps=1e-4),
              sparsification_kwargs=dict(method='threshold', avg_degree=2),
              exact=False)
    data = gdc(data)
    mat = to_dense_adj(data.edge_index, edge_attr=data.edge_attr).squeeze()
    col_sum = mat.sum(0)
    assert torch.all(mat >= -1e-8)
    assert torch.all(
        torch.isclose(col_sum, torch.tensor(1.0))
        | torch.isclose(col_sum, torch.tensor(0.0)))
    def test_triplet_margin_loss(self):
        margin = 0.2
        loss_funcA = TripletMarginLoss(margin=margin)
        loss_funcB = TripletMarginLoss(margin=margin, reducer=MeanReducer())
        loss_funcC = TripletMarginLoss(margin=margin,
                                       distance=CosineSimilarity())
        loss_funcD = TripletMarginLoss(margin=margin,
                                       reducer=MeanReducer(),
                                       distance=CosineSimilarity())
        for dtype in TEST_DTYPES:
            embedding_angles = [0, 20, 40, 60, 80]
            embeddings = torch.tensor(
                [c_f.angle_to_coord(a) for a in embedding_angles],
                requires_grad=True,
                dtype=dtype,
            ).to(self.device)  # 2D embeddings
            labels = torch.LongTensor([0, 0, 1, 1, 2])

            lossA = loss_funcA(embeddings, labels)
            lossB = loss_funcB(embeddings, labels)
            lossC = loss_funcC(embeddings, labels)
            lossD = loss_funcD(embeddings, labels)

            triplets = [
                (0, 1, 2),
                (0, 1, 3),
                (0, 1, 4),
                (1, 0, 2),
                (1, 0, 3),
                (1, 0, 4),
                (2, 3, 0),
                (2, 3, 1),
                (2, 3, 4),
                (3, 2, 0),
                (3, 2, 1),
                (3, 2, 4),
            ]

            correct_loss = 0
            correct_loss_cosine = 0
            num_non_zero_triplets = 0
            num_non_zero_triplets_cosine = 0
            for a, p, n in triplets:
                anchor, positive, negative = embeddings[a], embeddings[
                    p], embeddings[n]
                curr_loss = torch.relu(
                    torch.sqrt(torch.sum((anchor - positive)**2)) -
                    torch.sqrt(torch.sum((anchor - negative)**2)) + margin)
                curr_loss_cosine = torch.relu(
                    torch.sum(anchor * negative) -
                    torch.sum(anchor * positive) + margin)
                if curr_loss > 0:
                    num_non_zero_triplets += 1
                if curr_loss_cosine > 0:
                    num_non_zero_triplets_cosine += 1
                correct_loss += curr_loss
                correct_loss_cosine += curr_loss_cosine
            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(
                torch.isclose(lossA,
                              correct_loss / num_non_zero_triplets,
                              rtol=rtol))
            self.assertTrue(
                torch.isclose(lossB, correct_loss / len(triplets), rtol=rtol))
            self.assertTrue(
                torch.isclose(lossC,
                              correct_loss_cosine /
                              num_non_zero_triplets_cosine,
                              rtol=rtol))
            self.assertTrue(
                torch.isclose(lossD,
                              correct_loss_cosine / len(triplets),
                              rtol=rtol))
Exemplo n.º 21
0
def _compare_tensors_internal(a: torch.Tensor, b: torch.Tensor, *, rtol, atol,
                              equal_nan) -> _compare_return_type:
    debug_msg: Optional[str]
    # Integer (including bool) comparisons are identity comparisons
    # when rtol is zero and atol is less than one
    if ((is_integral(a.dtype) and rtol == 0 and atol < 1)
            or a.dtype is torch.bool or is_quantized(a.dtype)):
        if (a == b).all().item():
            return (True, None)

        # Gathers debug info for failed integer comparison
        # NOTE: converts to long to correctly represent differences
        # (especially between uint8 tensors)
        identity_mask = a != b
        a_flat = a.to(torch.long).flatten()
        b_flat = b.to(torch.long).flatten()
        count_non_identical = torch.sum(identity_mask, dtype=torch.long)
        diff = torch.abs(a_flat - b_flat)
        greatest_diff_index = torch.argmax(diff)
        debug_msg = (
            "Found {0} different element(s) (out of {1}), with the greatest "
            "difference of {2} ({3} vs. {4}) occuring at index "
            "{5}.".format(count_non_identical.item(), a.numel(),
                          diff[greatest_diff_index],
                          a_flat[greatest_diff_index],
                          b_flat[greatest_diff_index],
                          _unravel_index(greatest_diff_index, a.shape)))
        return (False, debug_msg)

    # All other comparisons use torch.allclose directly
    if torch.allclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan):
        return (True, None)

    # Gathers debug info for failed float tensor comparison
    # NOTE: converts to float64 to best represent differences
    a_flat = a.to(torch.float64 if not a.dtype.is_complex else torch.complex128
                  ).flatten()
    b_flat = b.to(torch.float64 if not a.dtype.is_complex else torch.complex128
                  ).flatten()
    diff = torch.abs(a_flat - b_flat)

    # Masks close values
    # NOTE: this avoids (inf - inf) oddities when computing the difference
    close = torch.isclose(a_flat, b_flat, rtol, atol, equal_nan)
    diff[close] = 0
    nans = torch.isnan(diff)
    num_nans = nans.sum()

    outside_range = (diff >
                     (atol + rtol * torch.abs(b_flat))) | (diff == math.inf)
    count_outside_range = torch.sum(outside_range, dtype=torch.long)
    greatest_diff_index = torch.argmax(diff)
    debug_msg = (
        "With rtol={0} and atol={1}, found {2} element(s) (out of {3}) whose "
        "difference(s) exceeded the margin of error (including {4} nan comparisons). "
        "The greatest difference was {5} ({6} vs. {7}), which "
        "occurred at index {8}.".format(
            rtol, atol, count_outside_range + num_nans, a.numel(), num_nans,
            diff[greatest_diff_index], a_flat[greatest_diff_index],
            b_flat[greatest_diff_index],
            _unravel_index(greatest_diff_index, a.shape)))
    return (False, debug_msg)
Exemplo n.º 22
0
def is_near_zero(tens: torch.Tensor, rtol: float = 1e-05, atol: float = 1e-08, equal_nan: bool = False) -> torch.Tensor:
    z = torch.zeros(1, dtype=tens.dtype, device=tens.device)
    return torch.isclose(tens, other=z, rtol=rtol, atol=atol, equal_nan=equal_nan)
Exemplo n.º 23
0
def isclose(x, y, rtol=rtol, atol=atol):
    if not torch.is_tensor(x):
        x = torch.tensor(x)
    if not torch.is_tensor(y):
        y = torch.tensor(y)
    return torch.isclose(x, y, atol=atol, rtol=rtol)
    def test_ntxent_loss(self):
        temperature = 0.1
        loss_funcA = NTXentLoss(temperature=temperature)
        loss_funcB = NTXentLoss(temperature=temperature, distance=LpDistance())
        loss_funcC = NTXentLoss(
            temperature=temperature, reducer=PerAnchorReducer(AvgNonZeroReducer())
        )
        loss_funcD = SupConLoss(temperature=temperature)
        loss_funcE = SupConLoss(temperature=temperature, distance=LpDistance())

        for dtype in TEST_DTYPES:
            embedding_angles = [0, 10, 20, 50, 60, 80]
            embeddings = torch.tensor(
                [c_f.angle_to_coord(a) for a in embedding_angles],
                requires_grad=True,
                dtype=dtype,
            ).to(
                TEST_DEVICE
            )  # 2D embeddings

            labels = torch.LongTensor([0, 0, 0, 1, 1, 2])

            obtained_losses = [
                x(embeddings, labels)
                for x in [loss_funcA, loss_funcB, loss_funcC, loss_funcD, loss_funcE]
            ]

            pos_pairs = [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1), (3, 4), (4, 3)]
            neg_pairs = [
                (0, 3),
                (0, 4),
                (0, 5),
                (1, 3),
                (1, 4),
                (1, 5),
                (2, 3),
                (2, 4),
                (2, 5),
                (3, 0),
                (3, 1),
                (3, 2),
                (3, 5),
                (4, 0),
                (4, 1),
                (4, 2),
                (4, 5),
                (5, 0),
                (5, 1),
                (5, 2),
                (5, 3),
                (5, 4),
            ]

            total_lossA, total_lossB, total_lossC, total_lossD, total_lossE = (
                0,
                0,
                torch.zeros(5, device=TEST_DEVICE, dtype=dtype),
                torch.zeros(5, device=TEST_DEVICE, dtype=dtype),
                torch.zeros(5, device=TEST_DEVICE, dtype=dtype),
            )
            for a1, p in pos_pairs:
                anchor, positive = embeddings[a1], embeddings[p]
                numeratorA = torch.exp(torch.matmul(anchor, positive) / temperature)
                numeratorB = torch.exp(
                    -torch.sqrt(torch.sum((anchor - positive) ** 2)) / temperature
                )
                denominatorA = numeratorA.clone()
                denominatorB = numeratorB.clone()
                denominatorD = 0
                denominatorE = 0
                for a2, n in pos_pairs + neg_pairs:
                    if a2 == a1:
                        negative = embeddings[n]
                        curr_denomD = torch.exp(
                            torch.matmul(anchor, negative) / temperature
                        )
                        curr_denomE = torch.exp(
                            -torch.sqrt(torch.sum((anchor - negative) ** 2))
                            / temperature
                        )
                        denominatorD += curr_denomD
                        denominatorE += curr_denomE
                        if (a2, n) not in pos_pairs:
                            denominatorA += curr_denomD
                            denominatorB += curr_denomE
                    else:
                        continue

                curr_lossA = -torch.log(numeratorA / denominatorA)
                curr_lossB = -torch.log(numeratorB / denominatorB)
                curr_lossD = -torch.log(numeratorA / denominatorD)
                curr_lossE = -torch.log(numeratorB / denominatorE)
                total_lossA += curr_lossA
                total_lossB += curr_lossB
                total_lossC[a1] += curr_lossA
                total_lossD[a1] += curr_lossD
                total_lossE[a1] += curr_lossE

            total_lossA /= len(pos_pairs)
            total_lossB /= len(pos_pairs)
            pos_pair_per_anchor = torch.tensor(
                [2, 2, 2, 1, 1], device=TEST_DEVICE, dtype=dtype
            )
            total_lossC, total_lossD, total_lossE = [
                torch.mean(x / pos_pair_per_anchor)
                for x in [total_lossC, total_lossD, total_lossE]
            ]

            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(torch.isclose(obtained_losses[0], total_lossA, rtol=rtol))
            self.assertTrue(torch.isclose(obtained_losses[1], total_lossB, rtol=rtol))
            self.assertTrue(torch.isclose(obtained_losses[2], total_lossC, rtol=rtol))
            self.assertTrue(torch.isclose(obtained_losses[3], total_lossD, rtol=rtol))
            self.assertTrue(torch.isclose(obtained_losses[4], total_lossE, rtol=rtol))
Exemplo n.º 25
0
def ignore_alignment(data):

    # log_potentials = torch.ones(2, 2, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)
    # print("FINAL", v)
    # log_potentials = torch.ones(2, 3, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)
    # print("FINAL", v)

    # log_potentials = torch.ones(2, 6, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)
    # print("FINAL", v)

    # log_potentials = torch.ones(2, 7, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)
    # print("FINAL", v)

    # log_potentials = torch.ones(2, 8, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)
    # print("FINAL", v)
    # assert False

    # model = data.draw(sampled_from([Alignment]))
    # semiring = data.draw(sampled_from([StdSemiring]))
    # struct = model(semiring)
    # vals, (batch, N) = model._rand()
    # print(batch, N)
    # struct = model(semiring)
    # # , max_gap=max(3, abs(vals.shape[1] - vals.shape[2]) + 1))
    # vals.fill_(1)
    # alpha = struct.sum(vals)

    model = data.draw(sampled_from([Alignment]))
    semiring = data.draw(sampled_from([StdSemiring]))
    test = test_lookup[model](semiring)
    struct = model(semiring, sparse_rounds=10)
    vals, (batch, N) = test._rand()
    alpha = struct.sum(vals)
    count = test.enumerate(vals)[0]
    assert torch.isclose(count, alpha).all()

    model = data.draw(sampled_from([Alignment]))
    semiring = data.draw(sampled_from([LogSemiring]))
    struct = model(semiring, sparse_rounds=10)
    vals, (batch, N) = model._rand()
    alpha = struct.sum(vals)
    count = test_lookup[model](semiring).enumerate(vals)[0]
    assert torch.isclose(count, alpha).all()

    # model = data.draw(sampled_from([Alignment]))
    # semiring = data.draw(sampled_from([MaxSemiring]))
    # struct = model(semiring)
    # log_potentials = torch.ones(2, 2, 2, 3)
    # v = Alignment(StdSemiring).sum(log_potentials)

    log_potentials = torch.ones(2, 2, 8, 3)
    v = Alignment(MaxSemiring).sum(log_potentials)
    # print(v)
    # assert False
    m = Alignment(MaxSemiring).marginals(log_potentials)
    score = Alignment(MaxSemiring).score(log_potentials, m)
    assert torch.isclose(v, score).all()

    semiring = data.draw(sampled_from([MaxSemiring]))
    struct = model(semiring, local=True)
    test = test_lookup[model](semiring)
    vals, (batch, N) = test._rand()
    vals[..., 0] = -2 * vals[..., 0].abs()
    vals[..., 1] = vals[..., 1].abs()
    vals[..., 2] = -2 * vals[..., 2].abs()
    alpha = struct.sum(vals)
    count = test.enumerate(vals)[0]
    mx = struct.marginals(vals)
    print(alpha, count)
    print(mx[0].nonzero())
    # assert torch.isclose(count, alpha).all()
    struct = model(semiring, max_gap=1)
    alpha = struct.sum(vals)
Exemplo n.º 26
0
    def __init__(
        self,
        loc: torch.Tensor,
        concentration: torch.Tensor,
        change_magnitude_sampling_algorithm: str = "wood",
    ):
        if loc.dim() < 1:
            raise ValueError("loc must be at least one-dimensional.")

        if concentration.dim() > 2 or (concentration.dim() == 2
                                       and concentration.shape[-1] != 1):
            raise ValueError("""
                `concentration` should be a tensor of a single value with shape (1,) 
                or batched with shapes (batch_size,) or (batch_size, 1); got {} instead
                """.format(concentration.size()))

        # For single batches, unsqueeze to (batch_size, dimension) where batch_size = 1.
        if loc.dim() == 1:
            loc = loc.unsqueeze(0)

        # TODO: Some torch distributions will repeat a parameter like this if only one is defined.
        if loc.shape[0] != concentration.shape[0]:
            raise ValueError("""
                batch size for loc ({}) and concentration ({}) differ; 
                concentration should be defined for each mean
                """.format(loc.shape[0], concentration.shape[0]))

        # Invariant: `self.concentration` should always have the shape (batch_size,).
        # Feedforward layers may project to a single dimension and produce shape (batch_size, 1).
        # Computing batched latent representations (w; sqrt(1 - w^t) v.T)^T however requires (batch_size,) for proper
        # matrix multiply.
        if concentration.dim() > 1:
            concentration = concentration.squeeze(-1)

        if change_magnitude_sampling_algorithm.lower() not in ("wood",
                                                               "ulrich"):
            raise ValueError(
                "unsupported change magnitude sampling algorithm: {}".format(
                    change_magnitude_sampling_algorithm))

        loc_norm = loc.norm(dim=-1)
        if not torch.all(torch.isclose(loc_norm, torch.ones(loc_norm.size()))):
            raise ValueError("""
                loc is not normalized; loc should be either a normalized tensor or
                a batched tensor normalized in the final dimension, instead L2 norm(s) of loc is {}
                """.format(loc_norm))

        self.loc = loc  # Shape: (batch_size, m)
        self.concentration = concentration  # Shape: (batch_size,)

        change_magnitude_sampling_algorithms = {
            "wood": self._rejection_sample_wood,
            "ulrich": self._rejection_sample_ulrich,
        }
        self._rejection_sample = change_magnitude_sampling_algorithms[
            change_magnitude_sampling_algorithm]

        # Distribution is set on the `(self._m - 1)` sphere.
        self._m = self.loc.shape[-1]

        batch_shape = loc.shape
        event_shape = torch.Size()

        super(VonMisesFisher, self).__init__(batch_shape=batch_shape,
                                             event_shape=event_shape)
Exemplo n.º 27
0
 def check(self, value):
     square_check = super().check(value)
     if not square_check.all():
         return square_check
     return torch.isclose(value, value.mT, atol=1e-6).all(-2).all(-1)
    def test_multi_similarity_loss(self):
        for dtype in TEST_DTYPES:
            if dtype == torch.float16:
                alpha, beta, base = 0.1, 10, 0.5
            else:
                alpha, beta, base = 0.1, 40, 0.5
            loss_func = MultiSimilarityLoss(alpha=alpha, beta=beta, base=base)
            embedding_angles = [0, 20, 40, 60, 80]
            embeddings = torch.tensor(
                [c_f.angle_to_coord(a) for a in embedding_angles],
                requires_grad=True,
                dtype=dtype,
            ).to(TEST_DEVICE)  # 2D embeddings
            labels = torch.LongTensor([0, 0, 1, 1, 2])

            loss = loss_func(embeddings, labels)
            loss.backward()

            pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)]
            neg_pairs = [
                (0, 2),
                (0, 3),
                (0, 4),
                (1, 2),
                (1, 3),
                (1, 4),
                (2, 0),
                (2, 1),
                (2, 4),
                (3, 0),
                (3, 1),
                (3, 4),
                (4, 0),
                (4, 1),
                (4, 2),
                (4, 3),
            ]

            correct_total = 0
            for i in range(len(embeddings)):
                correct_pos_loss = 0
                correct_neg_loss = 0
                for a, p in pos_pairs:
                    if a == i:
                        anchor, positive = embeddings[a], embeddings[p]
                        correct_pos_loss += torch.exp(
                            -alpha * (torch.matmul(anchor, positive) - base))
                if correct_pos_loss > 0:
                    correct_pos_loss = (
                        1 / alpha) * torch.log(1 + correct_pos_loss)

                for a, n in neg_pairs:
                    if a == i:
                        anchor, negative = embeddings[a], embeddings[n]
                        correct_neg_loss += torch.exp(
                            beta * (torch.matmul(anchor, negative) - base))
                if correct_neg_loss > 0:
                    correct_neg_loss = (1 / beta) * torch.log(1 +
                                                              correct_neg_loss)
                correct_total += correct_pos_loss + correct_neg_loss

            correct_total /= embeddings.size(0)
            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(torch.isclose(loss, correct_total, rtol=rtol))
Exemplo n.º 29
0
            SA = S.matmul(AM)
            TAR = T.matmul(AR)
            TAW = T.matmul(AM).matmul(W)
            SAW = SA.matmul(W)

            C = TAR
            D = SAW
            G = TAW

            # Removes batch entries with rank-deficient C or D
            U_c, Sig_c, V_c = torch.svd(C)
            U_d, Sig_d, V_d = torch.svd(D.permute(0, 2, 1))

            Sig_c_cpu = Sig_c.cpu()
            bool_array = torch.isclose(Sig_c_cpu,
                                       torch.zeros_like(Sig_c_cpu),
                                       atol=1e-4)
            zero_inds = torch.nonzero(bool_array)
            unique_c, counts = np.unique(zero_inds[:, 0], return_counts=True)

            Sig_d_cpu = Sig_d.cpu()
            bool_array = torch.isclose(Sig_d_cpu,
                                       torch.zeros_like(Sig_d_cpu),
                                       atol=1e-4)
            zero_inds = torch.nonzero(bool_array)
            unique_d, counts = np.unique(zero_inds[:, 0], return_counts=True)

            good_ind = np.arange(args.bs)
            good_ind = np.delete(good_ind, unique_c)
            good_ind = np.delete(good_ind, unique_d)
Exemplo n.º 30
0
def all_close(x_tensor, y_tensor, rtol=5e-4, atol=1e-6, equal_nan=False):
    if isinstance(y_tensor, (float, int)):
        y_tensor = torch.ones_like(x_tensor) * y_tensor
    return torch.isclose(x_tensor, y_tensor, rtol=rtol, atol=atol, equal_nan=equal_nan).all()
Exemplo n.º 31
0
Arquivo: train.py Projeto: xvdp/captum
def sgd_train_linear_model(
    model: LinearModel,
    dataloader: DataLoader,
    construct_kwargs: Dict[str, Any],
    max_epoch: int = 100,
    reduce_lr: bool = True,
    initial_lr: float = 0.01,
    alpha: float = 1.0,
    loss_fn: Callable = l2_loss,
    reg_term: Optional[int] = 1,
    patience: int = 10,
    threshold: float = 1e-4,
    running_loss_window: Optional[int] = None,
    device: Optional[str] = None,
    init_scheme: str = "zeros",
    debug: bool = False,
) -> Dict[str, float]:
    r"""
    Trains a linear model with SGD. This will continue to iterate your
    dataloader until we converged to a solution or alternatively until we have
    exhausted `max_epoch`.

    Convergence is defined by the loss not changing by `threshold` amount for
    `patience` number of iterations.

    Args:
        model
            The model to train
        dataloader
            The data to train it with. We will assume the dataloader produces
            either pairs or triples of the form (x, y) or (x, y, w). Where x and
            y are typical pairs for supervised learning and w is a weight
            vector.

            We will call `model._construct_model_params` with construct_kwargs
            and the input features set to `x.shape[1]` (`x.shape[0]` corresponds
            to the batch size). We assume that `len(x.shape) == 2`, i.e. the
            tensor is flat. The number of output features will be set to
            y.shape[1] or 1 (if `len(y.shape) == 1`); we require `len(y.shape)
            <= 2`.
        max_epoch
            The maximum number of epochs to exhaust
        reduce_lr
            Whether or not to reduce the learning rate as iterations progress.
            Halves the learning rate when the training loss does not move. This
            uses torch.optim.lr_scheduler.ReduceLROnPlateau and uses the
            parameters `patience` and `threshold`
        initial_lr
            The initial learning rate to use.
        alpha
            A constant for the regularization term.
        loss_fn
            The loss to optimise for. This must accept three parameters:
            x1 (predicted), x2 (labels) and a weight vector
        reg_term
            Regularization is defined by the `reg_term` norm of the weights.
            Please use `None` if you do not wish to use regularization.
        patience
            Defines the number of iterations in a row the loss must remain
            within `threshold` in order to be classified as converged.
        threshold
            Threshold for convergence detection.
        running_loss_window
            Used to report the training loss once we have finished training and
            to determine when we have converged (along with reducing the
            learning rate).

            The reported training loss will take the last `running_loss_window`
            iterations and average them.

            If `None` we will approximate this to be the number of examples in
            an epoch.
        init_scheme
            Initialization to use prior to training the linear model.
        device
            The device to send the model and data to. If None then no `.to` call
            will be used.
        debug
            Whether to print the loss, learning rate per iteration

    Returns
        This will return the final training loss (averaged with
        `running_loss_window`)
    """

    loss_window: List[torch.Tensor] = []
    min_avg_loss = None
    convergence_counter = 0
    converged = False

    def get_point(datapoint):
        if len(datapoint) == 2:
            x, y = datapoint
            w = None
        else:
            x, y, w = datapoint

        if device is not None:
            x = x.to(device)
            y = y.to(device)
            if w is not None:
                w = w.to(device)

        return x, y, w

    # get a point and construct the model
    data_iter = iter(dataloader)
    x, y, w = get_point(next(data_iter))

    model._construct_model_params(
        in_features=x.shape[1],
        out_features=y.shape[1] if len(y.shape) == 2 else 1,
        **construct_kwargs,
    )
    model.train()

    assert model.linear is not None

    if init_scheme is not None:
        assert init_scheme in ["xavier", "zeros"]

        with torch.no_grad():
            if init_scheme == "xavier":
                torch.nn.init.xavier_uniform_(model.linear.weight)
            else:
                model.linear.weight.zero_()

            if model.linear.bias is not None:
                model.linear.bias.zero_()

    optim = torch.optim.SGD(model.parameters(), lr=initial_lr)
    if reduce_lr:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optim, factor=0.5, patience=patience, threshold=threshold
        )

    t1 = time.time()
    epoch = 0
    i = 0
    while epoch < max_epoch:
        while True:  # for x, y, w in dataloader
            if running_loss_window is None:
                running_loss_window = x.shape[0] * len(dataloader)

            y = y.view(x.shape[0], -1)
            if w is not None:
                w = w.view(x.shape[0], -1)

            i += 1

            out = model(x)

            loss = loss_fn(y, out, w)
            if reg_term is not None:
                reg = torch.norm(model.linear.weight, p=reg_term)
                loss += reg.sum() * alpha

            if len(loss_window) >= running_loss_window:
                loss_window = loss_window[1:]
            loss_window.append(loss.clone().detach())
            assert len(loss_window) <= running_loss_window

            average_loss = torch.mean(torch.stack(loss_window))
            if min_avg_loss is not None:
                # if we haven't improved by at least `threshold`
                if average_loss > min_avg_loss or torch.isclose(
                    min_avg_loss, average_loss, atol=threshold
                ):
                    convergence_counter += 1
                    if convergence_counter >= patience:
                        converged = True
                        break
                else:
                    convergence_counter = 0
            if min_avg_loss is None or min_avg_loss >= average_loss:
                min_avg_loss = average_loss.clone()

            if debug:
                print(
                    f"lr={optim.param_groups[0]['lr']}, Loss={loss},"
                    + "Aloss={average_loss}, min_avg_loss={min_avg_loss}"
                )

            loss.backward()

            optim.step()
            model.zero_grad()
            if scheduler:
                scheduler.step(average_loss)

            temp = next(data_iter, None)
            if temp is None:
                break
            x, y, w = get_point(temp)

        if converged:
            break

        epoch += 1
        data_iter = iter(dataloader)
        x, y, w = get_point(next(data_iter))

    t2 = time.time()
    return {
        "train_time": t2 - t1,
        "train_loss": torch.mean(torch.stack(loss_window)).item(),
        "train_iter": i,
        "train_epoch": epoch,
    }
Exemplo n.º 32
0
    def _log_prob_with_subsetting(self,
                                  obs: Tensor,
                                  group_idx: Selector,
                                  time_idx: Selector,
                                  measure_idx: Selector,
                                  method: str = 'independent',
                                  lower: Optional[Tensor] = None,
                                  upper: Optional[Tensor] = None) -> Tensor:
        self._check_lp_sub_input(group_idx, time_idx)

        idx_no_measure = bmat_idx(group_idx, time_idx)
        idx_3d = bmat_idx(group_idx, time_idx, measure_idx)
        idx_4d = bmat_idx(group_idx, time_idx, measure_idx, measure_idx)

        # subset obs, lower, upper:
        obs, lower, upper = obs[idx_3d], lower[idx_3d], upper[idx_3d]

        if method.lower() == 'update':
            means = self.means[idx_no_measure]
            covs = self.covs[idx_no_measure]
            H = self.H[idx_3d]
            R = self.R[idx_4d]
            measured_means = H.matmul(means.unsqueeze(-1)).squeeze(-1)

            # calculate prob-obs:
            prob_lo, prob_up = tobit_probs(mean=measured_means,
                                           cov=R,
                                           lower=lower,
                                           upper=upper)
            prob_obs = torch.diag_embed(1 - prob_up - prob_lo)

            # calculate adjusted measure mean and cov:
            mm_adj, R_adj = tobit_adjustment(mean=measured_means,
                                             cov=R,
                                             lower=lower,
                                             upper=upper,
                                             probs=(prob_lo, prob_up))

            # system uncertainty:
            Ht = H.permute(0, 1, 3, 2)
            system_uncertainty = prob_obs.matmul(H).matmul(covs).matmul(
                Ht).matmul(prob_obs) + R_adj

            # log prob:
            dist = torch.distributions.MultivariateNormal(
                mm_adj, system_uncertainty)
            return dist.log_prob(obs)
        elif method.lower() == 'independent':
            #
            pred_mean = self.predictions[idx_3d]
            pred_cov = self.prediction_uncertainty[idx_4d]

            #
            cens_up = torch.isclose(obs, upper)
            cens_lo = torch.isclose(obs, lower)

            #
            loglik_uncens = torch.zeros_like(obs)
            loglik_cens_up = torch.zeros_like(obs)
            loglik_cens_lo = torch.zeros_like(obs)
            for m in range(pred_mean.shape[-1]):
                std = pred_cov[..., m, m].sqrt()
                z = (pred_mean[..., m] - obs[..., m]) / std

                # pdf is well behaved at tails:
                loglik_uncens[..., m] = std_normal.log_prob(z) - std.log()

                # but cdf is not, clamp:
                z = torch.clamp(z, -5., 5.)
                loglik_cens_up[..., m] = std_normal.cdf(z).log()
                loglik_cens_lo[..., m] = (1. - std_normal.cdf(z)).log()

            loglik = torch.zeros_like(obs)
            loglik[cens_up] = loglik_cens_up[cens_up]
            loglik[cens_lo] = loglik_cens_lo[cens_lo]
            loglik[~(cens_up | cens_lo)] = loglik_uncens[~(cens_up | cens_lo)]

            # take the product of the dimension probs (i.e., assume independence)
            return torch.sum(loglik, -1)
        else:
            raise RuntimeError("Expected method to be one of: {}.".format(
                {'update', 'independent'}))