Exemplo n.º 1
0
def test_fit_replicable():

    n_features = 20

    X, y = get_fake_data(100, n_features, RandomState(4))

    m0 = get_new_model(n_features, RandomState(45))
    m0 = binary_fit(m0, X, y)

    m1 = get_new_model(n_features, RandomState(45))
    m1 = binary_fit(m1, X, y)

    assert_array_equal(m0.sgd_weights, m1.sgd_weights)
    assert_array_equal(m0.sgd_bias, m1.sgd_bias)
Exemplo n.º 2
0
def test_fit_replicable():

    n_features = 20

    X, y = get_fake_data(100, n_features, RandomState(4))

    m0 = get_new_model(n_features, RandomState(45), 100)
    m0 = binary_fit(m0, (X, y))

    m1 = get_new_model(n_features, RandomState(45), 100)
    m1 = binary_fit(m1, (X, y))

    assert_array_equal(m0.sgd_weights, m1.sgd_weights)
    assert_array_equal(m0.sgd_bias, m1.sgd_bias)
Exemplo n.º 3
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    for L in [100, DEFAULT_MAX_EXAMPLES, int(DEFAULT_MAX_EXAMPLES * 1.5),
            int(DEFAULT_MAX_EXAMPLES * 3)]:

        clf = get_new_model(n_features, rstate, L)
        X, y = get_fake_data(L, n_features, rstate, separation=0.1)
        best = find_sgd_step_size0(clf, (X, y))
        _clf = binary_fit(clf, (X, y))
        assert _clf is clf
        assert 0 < clf.sgd_step_size0 <= best
Exemplo n.º 4
0
def train_svm(Xyd, l2_regularization, max_observations):
    """
    Return a sklearn-like classification model.
    """
    train_X, train_y, decisions = Xyd
    if train_X.ndim != 2:
        raise ValueError('train_X must be matrix')
    assert len(train_X) == len(train_y), (len(train_X), len(train_y))
    assert len(train_X) == len(decisions), (len(train_X), len(decisions))
    # doctor the decisions so that there is always something to learn
    margin = train_y * decisions
    margin_mean = margin.mean()
    decisions = decisions - margin_mean * train_y
    svm = MarginBinaryASGD(
        n_features=train_X.shape[1],
        l2_regularization=l2_regularization,
        dtype=train_X.dtype,
        rstate=np.random.RandomState(1234),
        max_observations=max_observations,
        )
    binary_fit(svm, (train_X, train_y, np.asarray(decisions)))
    return svm
Exemplo n.º 5
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    clf100 = get_new_model(n_features, rstate)
    X, y = get_fake_data(100, n_features, rstate)
    _clf100 = binary_fit(clf100, X, y)
    assert _clf100 is clf100
    assert_almost_equal(clf100.sgd_step_size0, 0.04812, decimal=4)

    # smoke test
    clf1000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(DEFAULT_MAX_EXAMPLES, n_features, rstate)
    _clf1000 = binary_fit(clf1000, X, y)
    assert _clf1000 is clf1000
    assert_almost_equal(clf1000.sgd_step_size0, 0.0047, decimal=4)

    # smoke test that at least it runs
    clf2000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(2000, n_features, rstate)
    _clf2000 = binary_fit(clf2000, X, y)
    assert _clf2000 == clf2000
    assert_almost_equal(clf2000.sgd_step_size0, 0.0067, decimal=4)
Exemplo n.º 6
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    clf100 = get_new_model(n_features, rstate)
    X, y = get_fake_data(100, n_features, rstate)
    _clf100 = binary_fit(clf100, X, y)
    assert _clf100 is clf100
    assert_almost_equal(clf100.sgd_step_size0, 0.04812, decimal=4)

    # smoke test
    clf1000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(DEFAULT_MAX_EXAMPLES, n_features, rstate)
    _clf1000 = binary_fit(clf1000, X, y)
    assert _clf1000 is clf1000
    assert_almost_equal(clf1000.sgd_step_size0, 0.0047, decimal=4)

    # smoke test that at least it runs
    clf2000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(2000, n_features, rstate)
    _clf2000 = binary_fit(clf2000, X, y)
    assert _clf2000 == clf2000
    assert_almost_equal(clf2000.sgd_step_size0, 0.0067, decimal=4)