示例#1
0
def test_embed_dim(estimator, build_dataset):
    # Checks that the the dimension of the output space is as expected
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    assert model.transform(X).shape == X.shape

    # assert that ValueError is thrown if input shape is 1D
    context = make_context(estimator)
    err_msg = ("2D array of formed points expected{}. Found 1D array "
               "instead:\ninput={}. Reshape your data and/or use a "
               "preprocessor.\n".format(context, X[0]))
    with pytest.raises(ValueError) as raised_error:
        model.score_pairs(model.transform(X[0, :]))
    assert str(raised_error.value) == err_msg
    # we test that the shape is also OK when doing dimensionality reduction
    if type(model).__name__ in {'LFDA', 'MLKR', 'NCA', 'RCA'}:
        # TODO:
        #  avoid this enumeration and rather test if hasattr n_components
        #  as soon as we have made the arguments names as such (issue #167)
        model.set_params(num_dims=2)
        model.fit(*remove_y_quadruplets(estimator, input_data, labels))
        assert model.transform(X).shape == (X.shape[0], 2)
        # assert that ValueError is thrown if input shape is 1D
        with pytest.raises(ValueError) as raised_error:
            model.transform(model.transform(X[0, :]))
        assert str(raised_error.value) == err_msg
def test_embed_dim(estimator, build_dataset):
  # Checks that the the dimension of the output space is as expected
  input_data, labels, _, X = build_dataset()
  model = clone(estimator)
  set_random_state(model)
  model.fit(*remove_y_quadruplets(estimator, input_data, labels))
  assert model.transform(X).shape == X.shape

  # assert that ValueError is thrown if input shape is 1D
  context = make_context(estimator)
  err_msg = ("2D array of formed points expected{}. Found 1D array "
             "instead:\ninput={}. Reshape your data and/or use a "
             "preprocessor.\n".format(context, X[0]))
  with pytest.raises(ValueError) as raised_error:
    model.score_pairs(model.transform(X[0, :]))
  assert str(raised_error.value) == err_msg
  # we test that the shape is also OK when doing dimensionality reduction
  if hasattr(model, 'n_components'):
    model.set_params(n_components=2)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    assert model.transform(X).shape == (X.shape[0], 2)
    # assert that ValueError is thrown if input shape is 1D
    with pytest.raises(ValueError) as raised_error:
        model.transform(model.transform(X[0, :]))
    assert str(raised_error.value) == err_msg
示例#3
0
def test_transformer_is_2D(estimator, build_dataset):
    """Tests that the transformer of metric learners is 2D"""
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    # test that it works for X.shape[1] features
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    assert model.transformer_.shape == (X.shape[1], X.shape[1])

    # test that it works for 1 feature
    trunc_data = input_data[..., :1]
    # we drop duplicates that might have been formed, i.e. of the form
    # aabc or abcc or aabb for quadruplets, and aa for pairs.
    if isinstance(estimator, _QuadrupletsClassifierMixin):
        for slice_idx in [slice(0, 2), slice(2, 4)]:
            pairs = trunc_data[:, slice_idx, :]
            diffs = pairs[:, 1, :] - pairs[:, 0, :]
            to_keep = np.where(np.abs(diffs.ravel()) > 1e-9)
            trunc_data = trunc_data[to_keep]
            labels = labels[to_keep]
    elif isinstance(estimator, _PairsClassifierMixin):
        diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :]
        to_keep = np.where(np.abs(diffs.ravel()) > 1e-9)
        trunc_data = trunc_data[to_keep]
        labels = labels[to_keep]
    model.fit(*remove_y_quadruplets(estimator, trunc_data, labels))
    assert model.transformer_.shape == (1, 1)  # the transformer must be 2D
def test_cross_validation_is_finite(estimator, build_dataset):
    """Tests that validation on metric-learn estimators returns something finite
  """
    input_data, labels, preprocessor, _ = build_dataset()
    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    set_random_state(estimator)
    assert np.isfinite(
        cross_val_score(estimator,
                        *remove_y_quadruplets(estimator, input_data,
                                              labels))).all()
    assert np.isfinite(
        cross_val_predict(estimator,
                          *remove_y_quadruplets(estimator, input_data,
                                                labels))).all()
def test_pipeline_consistency(estimator, build_dataset,
                              with_preprocessor):
  # Adapted from scikit learn
  # check that make_pipeline(est) gives same score as est
  # we do this test on all except quadruplets (since they don't have a y
  # in fit):
  if estimator.__class__.__name__ not in [e.__class__.__name__
                                          for (e, _) in
                                          quadruplets_learners]:
    input_data, y, preprocessor, _ = build_dataset(with_preprocessor)

    def make_random_state(estimator, in_pipeline):
      rs = {}
      name_estimator = estimator.__class__.__name__
      if name_estimator[-11:] == '_Supervised':
        name_param = 'random_state'
        if in_pipeline:
            name_param = name_estimator.lower() + '__' + name_param
        rs[name_param] = check_random_state(0)
      return rs

    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    pipeline = make_pipeline(estimator)
    estimator.fit(*remove_y_quadruplets(estimator, input_data, y),
                  **make_random_state(estimator, False))
    pipeline.fit(*remove_y_quadruplets(estimator, input_data, y),
                 **make_random_state(estimator, True))

    if hasattr(estimator, 'score'):
      result = estimator.score(*remove_y_quadruplets(estimator,
                                                     input_data,
                                                     y))
      result_pipe = pipeline.score(*remove_y_quadruplets(estimator,
                                                         input_data,
                                                         y))
      assert_allclose_dense_sparse(result, result_pipe)

    if hasattr(estimator, 'predict'):
      result = estimator.predict(input_data)
      result_pipe = pipeline.predict(input_data)
      assert_allclose_dense_sparse(result, result_pipe)

    if issubclass(estimator.__class__, TransformerMixin):
      if hasattr(estimator, 'transform'):
        result = estimator.transform(input_data)
        result_pipe = pipeline.transform(input_data)
        assert_allclose_dense_sparse(result, result_pipe)
示例#6
0
def test_pipeline_consistency(estimator, build_dataset,
                              with_preprocessor):
  # Adapted from scikit learn
  # check that make_pipeline(est) gives same score as est
  # we do this test on all except quadruplets (since they don't have a y
  # in fit):
  if estimator.__class__.__name__ not in [e.__class__.__name__
                                          for (e, _) in
                                          quadruplets_learners]:
    input_data, y, preprocessor, _ = build_dataset(with_preprocessor)

    def make_random_state(estimator, in_pipeline):
      rs = {}
      name_estimator = estimator.__class__.__name__
      if name_estimator[-11:] == '_Supervised':
        name_param = 'random_state'
        if in_pipeline:
            name_param = name_estimator.lower() + '__' + name_param
        rs[name_param] = check_random_state(0)
      return rs

    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    pipeline = make_pipeline(estimator)
    estimator.fit(*remove_y_quadruplets(estimator, input_data, y),
                  **make_random_state(estimator, False))
    pipeline.fit(*remove_y_quadruplets(estimator, input_data, y),
                 **make_random_state(estimator, True))

    if hasattr(estimator, 'score'):
      result = estimator.score(*remove_y_quadruplets(estimator,
                                                     input_data,
                                                     y))
      result_pipe = pipeline.score(*remove_y_quadruplets(estimator,
                                                         input_data,
                                                         y))
      assert_allclose_dense_sparse(result, result_pipe)

    if hasattr(estimator, 'predict'):
      result = estimator.predict(input_data)
      result_pipe = pipeline.predict(input_data)
      assert_allclose_dense_sparse(result, result_pipe)

    if issubclass(estimator.__class__, TransformerMixin):
      if hasattr(estimator, 'transform'):
        result = estimator.transform(input_data)
        result_pipe = pipeline.transform(input_data)
        assert_allclose_dense_sparse(result, result_pipe)
示例#7
0
def test_embed_finite(estimator, build_dataset):
    # Checks that embed returns vectors with finite values
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    assert np.isfinite(model.transform(X)).all()
def test_cross_validation_is_finite(estimator, build_dataset):
  """Tests that validation on metric-learn estimators returns something finite
  """
  input_data, labels, preprocessor, _ = build_dataset()
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  set_random_state(estimator)
  assert np.isfinite(cross_val_score(estimator,
                                     *remove_y_quadruplets(estimator,
                                                           input_data,
                                                           labels))).all()
  assert np.isfinite(cross_val_predict(estimator,
                                       *remove_y_quadruplets(estimator,
                                                             input_data,
                                                             labels)
                                       )).all()
示例#9
0
def test_score_pairs_finite(estimator, build_dataset):
    # tests that the score is finite
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    pairs = np.array(list(product(X, X)))
    assert np.isfinite(model.score_pairs(pairs)).all()
示例#10
0
def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset):
    """Check that the metric returned by get_metric is compatible with
  scikit-learn's algorithms using a custom metric, DBSCAN for instance"""
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    clustering = DBSCAN(metric=model.get_metric())
    clustering.fit(X)
def test_estimators_fit_returns_self(estimator, build_dataset,
                                     with_preprocessor):
    """Check if self is returned when calling fit"""
    # Adapted from scikit-learn
    input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    assert estimator.fit(
        *remove_y_quadruplets(estimator, input_data, labels)) is estimator
def test_cross_validation_manual_vs_scikit(estimator, build_dataset,
                                           with_preprocessor):
    """Tests that if we make a manual cross-validation, the result will be the
  same as scikit-learn's cross-validation (some code for generating the
  folds is taken from scikit-learn).
  """
    if any(hasattr(estimator, method) for method in ["predict", "score"]):
        input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
        estimator = clone(estimator)
        estimator.set_params(preprocessor=preprocessor)
        set_random_state(estimator)
        n_splits = 3
        kfold = KFold(shuffle=False, n_splits=n_splits)
        n_samples = input_data.shape[0]
        fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
        fold_sizes[:n_samples % n_splits] += 1
        current = 0
        scores, predictions = [], np.zeros(input_data.shape[0])
        for fold_size in fold_sizes:
            start, stop = current, current + fold_size
            current = stop
            test_slice = slice(start, stop)
            train_mask = np.ones(input_data.shape[0], bool)
            train_mask[test_slice] = False
            y_train, y_test = labels[train_mask], labels[test_slice]
            estimator.fit(*remove_y_quadruplets(
                estimator, input_data[train_mask], y_train))
            if hasattr(estimator, "score"):
                scores.append(
                    estimator.score(*remove_y_quadruplets(
                        estimator, input_data[test_slice], y_test)))
            if hasattr(estimator, "predict"):
                predictions[test_slice] = estimator.predict(
                    input_data[test_slice])
        if hasattr(estimator, "score"):
            assert all(scores == cross_val_score(
                estimator,
                *remove_y_quadruplets(estimator, input_data, labels),
                cv=kfold))
        if hasattr(estimator, "predict"):
            assert all(predictions == cross_val_predict(
                estimator,
                *remove_y_quadruplets(estimator, input_data, labels),
                cv=kfold))
示例#13
0
def test_embed_toy_example(estimator, build_dataset):
    # Checks that embed works on a toy example
    input_data, labels, _, X = build_dataset()
    n_samples = 20
    X = X[:n_samples]
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    embedded_points = X.dot(model.transformer_.T)
    assert_array_almost_equal(model.transform(X), embedded_points)
def test_estimators_fit_returns_self(estimator, build_dataset,
                                     with_preprocessor):
  """Check if self is returned when calling fit"""
  # Adapted from scikit-learn
  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  assert estimator.fit(*remove_y_quadruplets(estimator,
                                             input_data,
                                             labels)) is estimator
def test_cross_validation_manual_vs_scikit(estimator, build_dataset,
                                           with_preprocessor):
  """Tests that if we make a manual cross-validation, the result will be the
  same as scikit-learn's cross-validation (some code for generating the
  folds is taken from scikit-learn).
  """
  if any(hasattr(estimator, method) for method in ["predict", "score"]):
    input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    set_random_state(estimator)
    n_splits = 3
    kfold = KFold(shuffle=False, n_splits=n_splits)
    n_samples = input_data.shape[0]
    fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
    fold_sizes[:n_samples % n_splits] += 1
    current = 0
    scores, predictions = [], np.zeros(input_data.shape[0])
    for fold_size in fold_sizes:
      start, stop = current, current + fold_size
      current = stop
      test_slice = slice(start, stop)
      train_mask = np.ones(input_data.shape[0], bool)
      train_mask[test_slice] = False
      y_train, y_test = labels[train_mask], labels[test_slice]
      estimator.fit(*remove_y_quadruplets(estimator,
                                          input_data[train_mask],
                                          y_train))
      if hasattr(estimator, "score"):
        scores.append(estimator.score(*remove_y_quadruplets(
            estimator, input_data[test_slice], y_test)))
      if hasattr(estimator, "predict"):
        predictions[test_slice] = estimator.predict(input_data[test_slice])
    if hasattr(estimator, "score"):
      assert all(scores == cross_val_score(
          estimator, *remove_y_quadruplets(estimator, input_data, labels),
          cv=kfold))
    if hasattr(estimator, "predict"):
      assert all(predictions == cross_val_predict(
          estimator,
          *remove_y_quadruplets(estimator, input_data, labels),
          cv=kfold))
示例#16
0
def test_embed_is_linear(estimator, build_dataset):
    # Checks that the embedding is linear
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    assert_array_almost_equal(
        model.transform(X[:10] + X[10:20]),
        model.transform(X[:10]) + model.transform(X[10:20]))
    assert_array_almost_equal(model.transform(5 * X[:10]),
                              5 * model.transform(X[:10]))
def test_array_like_inputs(estimator, build_dataset, with_preprocessor):
    """Test that metric-learners can have as input (of all functions that are
  applied on data) any array-like object."""
    input_data, labels, preprocessor, X = build_dataset(with_preprocessor)

    # we subsample the data for the test to be more efficient
    input_data, _, labels, _ = train_test_split(input_data,
                                                labels,
                                                train_size=20)
    X = X[:10]

    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    set_random_state(estimator)
    input_variants, label_variants = generate_array_like(input_data, labels)
    for input_variant in input_variants:
        for label_variant in label_variants:
            estimator.fit(
                *remove_y_quadruplets(estimator, input_variant, label_variant))
        if hasattr(estimator, "predict"):
            estimator.predict(input_variant)
        if hasattr(estimator, "predict_proba"):
            estimator.predict_proba(input_variant)  # anticipation in case some
            # time we have that, or if ppl want to contribute with new algorithms
            # it will be checked automatically
        if hasattr(estimator, "decision_function"):
            estimator.decision_function(input_variant)
        if hasattr(estimator, "score"):
            for label_variant in label_variants:
                estimator.score(*remove_y_quadruplets(estimator, input_variant,
                                                      label_variant))

    X_variants, _ = generate_array_like(X)
    for X_variant in X_variants:
        estimator.transform(X_variant)

    pairs = np.array([[X[0], X[1]], [X[0], X[2]]])
    pairs_variants, _ = generate_array_like(pairs)
    for pairs_variant in pairs_variants:
        estimator.score_pairs(pairs_variant)
示例#18
0
def test_score_pairs_toy_example(estimator, build_dataset):
    # Checks that score_pairs works on a toy example
    input_data, labels, _, X = build_dataset()
    n_samples = 20
    X = X[:n_samples]
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    pairs = np.stack([X[:10], X[10:20]], axis=1)
    embedded_pairs = pairs.dot(model.transformer_.T)
    distances = np.sqrt(
        np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1))
    assert_array_almost_equal(model.score_pairs(pairs), distances)
示例#19
0
def test_metric_raises_deprecation_warning(estimator, build_dataset):
    """assert that a deprecation warning is raised if someones wants to call
  the `metric` function"""
    # TODO: remove this method in version 0.6.0
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))

    with pytest.warns(DeprecationWarning) as raised_warning:
        model.metric()
    assert (str(raised_warning[0].message) == (
        "`metric` is deprecated since version 0.5.0 and will be removed "
        "in 0.6.0. Use `get_mahalanobis_matrix` instead."))
def test_simple_estimator(estimator, build_dataset, with_preprocessor):
  """Tests that fit, predict and scoring works.
  """
  if any(hasattr(estimator, method) for method in ["predict", "score"]):
    input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
    (tuples_train, tuples_test, y_train,
     y_test) = train_test_split(input_data, labels, random_state=RNG)
    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    set_random_state(estimator)

    estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train))
    check_score(estimator, tuples_test, y_test)
    check_predict(estimator, tuples_test)
def test_simple_estimator(estimator, build_dataset, with_preprocessor):
    """Tests that fit, predict and scoring works.
  """
    if any(hasattr(estimator, method) for method in ["predict", "score"]):
        input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
        (tuples_train, tuples_test, y_train,
         y_test) = train_test_split(input_data, labels, random_state=RNG)
        estimator = clone(estimator)
        estimator.set_params(preprocessor=preprocessor)
        set_random_state(estimator)

        estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train))
        check_score(estimator, tuples_test, y_test)
        check_predict(estimator, tuples_test)
示例#22
0
def test_get_squared_metric(estimator, build_dataset):
    """Test that the squared metric returned is indeed the square of the
  metric"""
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    metric = model.get_metric()

    n_features = X.shape[1]
    for seed in range(10):
        rng = np.random.RandomState(seed)
        a, b = (rng.randn(n_features) for _ in range(2))
        assert_allclose(metric(a, b, squared=True),
                        metric(a, b, squared=False)**2,
                        rtol=1e-15)
def test_get_metric_equivalent_to_explicit_mahalanobis(estimator,
                                                       build_dataset):
  """Tests that using the get_metric method of mahalanobis metric learners is
  equivalent to explicitely calling scipy's mahalanobis metric
  """
  rng = np.random.RandomState(42)
  input_data, labels, _, X = build_dataset()
  model = clone(estimator)
  set_random_state(model)
  model.fit(*remove_y_quadruplets(estimator, input_data, labels))
  metric = model.get_metric()
  n_features = X.shape[1]
  a, b = (rng.randn(n_features), rng.randn(n_features))
  expected_dist = mahalanobis(a[None], b[None],
                              VI=model.get_mahalanobis_matrix())
  assert_allclose(metric(a, b), expected_dist, rtol=1e-13)
示例#24
0
def test_score_pairs_dim(estimator, build_dataset):
    # scoring of 3D arrays should return 1D array (several tuples),
    # and scoring of 2D arrays (one tuple) should return an error (like
    # scikit-learn's error when scoring 1D arrays)
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    tuples = np.array(list(product(X, X)))
    assert model.score_pairs(tuples).shape == (tuples.shape[0], )
    context = make_context(estimator)
    msg = ("3D array of formed tuples expected{}. Found 2D array "
           "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n"
           .format(context, tuples[1]))
    with pytest.raises(ValueError) as raised_error:
        model.score_pairs(tuples[1])
    assert str(raised_error.value) == msg
def test_dont_overwrite_parameters(estimator, build_dataset,
                                   with_preprocessor):
    # Adapted from scikit-learn
    # check that fit method only changes or sets private attributes
    input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    if hasattr(estimator, "n_components"):
        estimator.n_components = 1
    dict_before_fit = estimator.__dict__.copy()

    estimator.fit(*remove_y_quadruplets(estimator, input_data, labels))
    dict_after_fit = estimator.__dict__

    public_keys_after_fit = [
        key for key in dict_after_fit.keys() if is_public_parameter(key)
    ]

    attrs_added_by_fit = [
        key for key in public_keys_after_fit
        if key not in dict_before_fit.keys()
    ]

    # check that fit doesn't add any public attribute
    assert not attrs_added_by_fit, (
        "Estimator adds public attribute(s) during"
        " the fit method."
        " Estimators are only allowed to add private "
        "attributes"
        " either started with _ or ended"
        " with _ but %s added" % ', '.join(attrs_added_by_fit))

    # check that fit doesn't change any public attribute
    attrs_changed_by_fit = [
        key for key in public_keys_after_fit
        if (dict_before_fit[key] is not dict_after_fit[key])
    ]

    assert not attrs_changed_by_fit, (
        "Estimator changes public attribute(s) during"
        " the fit method. Estimators are only allowed"
        " to change attributes started"
        " or ended with _, but"
        " %s changed" % ', '.join(attrs_changed_by_fit))
示例#26
0
def test_score_pairs_pairwise(estimator, build_dataset):
    # Computing pairwise scores should return a euclidean distance matrix.
    input_data, labels, _, X = build_dataset()
    n_samples = 20
    X = X[:n_samples]
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))

    pairwise = model.score_pairs(np.array(list(product(X, X))))\
        .reshape(n_samples, n_samples)

    check_is_distance_matrix(pairwise)

    # a necessary condition for euclidean distance matrices: (see
    # https://en.wikipedia.org/wiki/Euclidean_distance_matrix)
    assert np.linalg.matrix_rank(pairwise**2) <= min(X.shape) + 2

    # assert that this distance is coherent with pdist on embeddings
    assert_array_almost_equal(squareform(pairwise), pdist(model.transform(X)))
def test_dont_overwrite_parameters(estimator, build_dataset,
                                   with_preprocessor):
  # Adapted from scikit-learn
  # check that fit method only changes or sets private attributes
  input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  if hasattr(estimator, "num_dims"):
    estimator.num_dims = 1
  dict_before_fit = estimator.__dict__.copy()

  estimator.fit(*remove_y_quadruplets(estimator, input_data, labels))
  dict_after_fit = estimator.__dict__

  public_keys_after_fit = [key for key in dict_after_fit.keys()
                           if is_public_parameter(key)]

  attrs_added_by_fit = [key for key in public_keys_after_fit
                        if key not in dict_before_fit.keys()]

  # check that fit doesn't add any public attribute
  assert not attrs_added_by_fit, (
      "Estimator adds public attribute(s) during"
      " the fit method."
      " Estimators are only allowed to add private "
      "attributes"
      " either started with _ or ended"
      " with _ but %s added" % ', '.join(attrs_added_by_fit))

  # check that fit doesn't change any public attribute
  attrs_changed_by_fit = [key for key in public_keys_after_fit
                          if (dict_before_fit[key]
                              is not dict_after_fit[key])]

  assert not attrs_changed_by_fit, (
      "Estimator changes public attribute(s) during"
      " the fit method. Estimators are only allowed"
      " to change attributes started"
      " or ended with _, but"
      " %s changed" % ', '.join(attrs_changed_by_fit))
示例#28
0
def test_get_metric_is_pseudo_metric(estimator, build_dataset):
    """Tests that the get_metric method of mahalanobis metric learners returns a
  pseudo-metric (metric but without one side of the equivalence of
  the identity of indiscernables property)
  """
    input_data, labels, _, X = build_dataset()
    model = clone(estimator)
    set_random_state(model)
    model.fit(*remove_y_quadruplets(estimator, input_data, labels))
    metric = model.get_metric()

    n_features = X.shape[1]
    for seed in range(10):
        rng = np.random.RandomState(seed)
        a, b, c = (rng.randn(n_features) for _ in range(3))
        assert metric(a, b) >= 0  # positivity
        assert metric(a, b) == metric(b, a)  # symmetry
        # one side of identity indiscernables: x == y => d(x, y) == 0. The other
        # side of the equivalence is not always true for Mahalanobis distances.
        assert metric(a, a) == 0
        # triangular inequality
        assert (metric(a, c) < metric(a, b) + metric(b, c) or np.isclose(
            metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20))
def test_dict_unchanged(estimator, build_dataset, with_preprocessor):
  # Adapted from scikit-learn
  (input_data, labels, preprocessor,
   to_transform) = build_dataset(with_preprocessor)
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  if hasattr(estimator, "num_dims"):
    estimator.num_dims = 1
  estimator.fit(*remove_y_quadruplets(estimator, input_data, labels))

  def check_dict():
    assert estimator.__dict__ == dict_before, (
        "Estimator changes __dict__ during %s" % method)
  for method in ["predict", "decision_function", "predict_proba"]:
    if hasattr(estimator, method):
      dict_before = estimator.__dict__.copy()
      getattr(estimator, method)(input_data)
      check_dict()
  if hasattr(estimator, "transform"):
    dict_before = estimator.__dict__.copy()
    # we transform only dataset of points
    estimator.transform(to_transform)
    check_dict()
示例#30
0
def test_dict_unchanged(estimator, build_dataset, with_preprocessor):
  # Adapted from scikit-learn
  (input_data, labels, preprocessor,
   to_transform) = build_dataset(with_preprocessor)
  estimator = clone(estimator)
  estimator.set_params(preprocessor=preprocessor)
  if hasattr(estimator, "num_dims"):
    estimator.num_dims = 1
  estimator.fit(*remove_y_quadruplets(estimator, input_data, labels))

  def check_dict():
    assert estimator.__dict__ == dict_before, (
        "Estimator changes __dict__ during %s" % method)
  for method in ["predict", "decision_function", "predict_proba"]:
    if hasattr(estimator, method):
      dict_before = estimator.__dict__.copy()
      getattr(estimator, method)(input_data)
      check_dict()
  if hasattr(estimator, "transform"):
    dict_before = estimator.__dict__.copy()
    # we transform only dataset of points
    estimator.transform(to_transform)
    check_dict()
def check_score(estimator, tuples, y):
  if hasattr(estimator, "score"):
    score = estimator.score(*remove_y_quadruplets(estimator, tuples, y))
    assert np.isfinite(score)
def check_score(estimator, tuples, y):
    if hasattr(estimator, "score"):
        score = estimator.score(*remove_y_quadruplets(estimator, tuples, y))
        assert np.isfinite(score)