Пример #1
0
def test_vectorizing_and_similar_terms():
    # Simple test for vectorizing weighted terms
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    weighted_terms = [('apple', 5), ('banana', 22), ('not a term', 17)]
    apple = assoc.row_named('apple')
    banana = assoc.row_named('banana')
    vector = assoc.vector_from_terms(weighted_terms)

    # The similarity of 'apple' to itself is approximately 1
    assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3

    # 'apple' and 'banana' are at least 10% less similar to each other than
    # to themselves
    assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9

    # The vector is some linear combination of apple and banana. Test this
    # by subtracting out apple and banana components, so that there is nothing
    # left.
    norm_apple = normalize(apple)
    banana_perp_apple = normalize(banana - norm_apple * norm_apple.dot(banana))
    residual = vector - norm_apple * norm_apple.dot(vector)
    residual -= banana_perp_apple * banana_perp_apple.dot(residual)
    assert norm(residual) < 1e-3

    # Simple test for finding similar terms
    labels, scores = zip(*assoc.terms_similar_to_vector(vector))
    eq_(list(scores), sorted(scores, reverse=True))

    most_similar = assoc.most_similar_to_vector(vector)
    eq_(most_similar[0], labels[0])
    eq_(most_similar[1], scores[0])

    assert labels.index('banana') < labels.index('apple')
    assert labels.index('apple') < labels.index('green')
    assert labels.index('apple') < labels.index('celery')
Пример #2
0
def test_vectorizing_and_similar_terms():
    # Simple test for vectorizing weighted terms
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    weighted_terms = [('apple', 5), ('banana', 22), ('not a term', 17)]
    apple = assoc.row_named('apple')
    banana = assoc.row_named('banana')
    vector = assoc.vector_from_terms(weighted_terms)

    # The similarity of 'apple' to itself is approximately 1
    assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3

    # 'apple' and 'banana' are at least 10% less similar to each other than
    # to themselves
    assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9

    # The vector is some linear combination of apple and banana. Test this
    # by subtracting out apple and banana components, so that there is nothing
    # left.
    norm_apple = normalize(apple)
    banana_perp_apple = normalize(banana - norm_apple * norm_apple.dot(banana))
    residual = vector - norm_apple * norm_apple.dot(vector)
    residual -= banana_perp_apple * banana_perp_apple.dot(residual)
    assert norm(residual) < 1e-3

    # Simple test for finding similar terms
    labels, scores = zip(*assoc.terms_similar_to_vector(vector))
    eq_(list(scores), sorted(scores, reverse=True))

    most_similar = assoc.most_similar_to_vector(vector)
    eq_(most_similar[0], labels[0])
    eq_(most_similar[1], scores[0])

    assert labels.index('banana') < labels.index('apple')
    assert labels.index('apple') < labels.index('green')
    assert labels.index('apple') < labels.index('celery')
Пример #3
0
def test_truncation():
    # Simple test of truncation
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    truncated = assoc.truncated_to(2)
    assert np.allclose(truncated.u, assoc.u[:, :2])
    assert np.allclose(truncated.sigma, assoc.sigma[:2])
    eq_(truncated.labels, assoc.labels)
    assert 0.999 < norm(truncated.assoc[0]) < 1.0
Пример #4
0
def test_truncation():
    # Simple test of truncation
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    truncated = assoc.truncated_to(2)
    assert np.allclose(truncated.u, assoc.u[:, :2])
    assert np.allclose(truncated.sigma, assoc.sigma[:2])
    eq_(truncated.labels, assoc.labels)
    assert 0.999 < norm(truncated.assoc[0]) < 1.0
Пример #5
0
def test_filter():
    # Build and filter an assoc space
    assoc = AssocSpace.from_entries(ENTRIES, k=5)
    filtered = assoc.filter(_filter)

    # Check simple properties of the filtered space
    eq_(filtered.k, 5)
    eq_(' '.join(filtered.labels), 'red green celery banana lemon')

    # Check that redecomposition happened
    assert np.allclose(norm(filtered.u[:, 1]), 1.0)

    # Redecomposition can be kind of weird, but this result is intuitive
    assert (assoc.assoc_between_two_terms(
        'red', 'banana') < filtered.assoc_between_two_terms('red', 'banana') <
            assoc.assoc_between_two_terms('yellow', 'banana'))
Пример #6
0
def test_filter():
    # Build and filter an assoc space
    assoc = AssocSpace.from_entries(ENTRIES, k=5)
    filtered = assoc.filter(_filter)

    # Check simple properties of the filtered space
    eq_(filtered.k, 5)
    eq_(' '.join(filtered.labels), 'red green celery banana lemon')

    # Check that redecomposition happened
    assert np.allclose(norm(filtered.u[:, 1]), 1.0)

    # Redecomposition can be kind of weird, but this result is intuitive
    assert (assoc.assoc_between_two_terms('red', 'banana') <
            filtered.assoc_between_two_terms('red', 'banana') <
            assoc.assoc_between_two_terms('yellow', 'banana'))
Пример #7
0
def test_norm_and_normalize():
    vec = np.asarray([8.0, 9.0, 12.0])
    assert np.allclose(norm(vec), 17.0)
    assert np.allclose(normalize(vec), vec / 17.0)
    # We normalize the zero vector to itself rather than raising an error
    assert (np.zeros(5) == normalize(np.zeros(5))).all()
Пример #8
0
def test_norm_and_normalize():
    vec = np.asarray([8.0, 9.0, 12.0])
    assert np.allclose(norm(vec), 17.0)
    assert np.allclose(normalize(vec), vec / 17.0)
    # We normalize the zero vector to itself rather than raising an error
    assert (np.zeros(5) == normalize(np.zeros(5))).all()