Exemplo n.º 1
0
def test_unique_tokens():
    # default space delimiter
    strs = nvstrings.to_device(
        [
            "this is my favorite book",
            "Your Favorite book is different",
            None,
            "",
        ]
    )
    unique_tokens_outcome = nvtext.unique_tokens(strs)
    expected = set(
        [
            "Favorite",
            "Your",
            "book",
            "different",
            "favorite",
            "is",
            "my",
            "this",
        ]
    )
    assert set(unique_tokens_outcome.to_host()) == expected

    # custom delimiter
    unique_tokens_outcome = nvtext.unique_tokens(strs, delimiter="my")
    expected = set(
        [" favorite book", "Your Favorite book is different", "this is "]
    )
    assert set(unique_tokens_outcome.to_host()) == expected
Exemplo n.º 2
0
def test_tokens_counts():
    strs = nvstrings.to_device(
        ["apples are green", "apples are a fruit", None, ""]
    )

    query_strings = nvtext.unique_tokens(strs)

    # host results
    contains_outcome = nvtext.tokens_counts(strs, query_strings)
    expected = [
        [0, 1, 1, 0, 1],
        [1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
    ]
    assert contains_outcome == expected

    # device results
    outcome_darray = rmm.device_array(
        (strs.size(), query_strings.size()), dtype=np.int32
    )
    nvtext.tokens_counts(
        strs, query_strings, devptr=outcome_darray.device_ctypes_pointer.value
    )
    assert np.array_equal(outcome_darray.copy_to_host(), expected)
Exemplo n.º 3
0
def test_unique_tokens():
    # default space delimiter
    strs = nvstrings.to_device([
        "this is my favorite book", "Your Favorite book is different", None, ""
    ])
    unique_tokens_outcome = nvtext.unique_tokens(strs)
    expected = set([
        'Favorite', 'Your', 'book', 'different', 'favorite', 'is', 'my', 'this'
    ])
    assert set(unique_tokens_outcome.to_host()) == expected

    # custom delimiter
    unique_tokens_outcome = nvtext.unique_tokens(strs, delimiter='my')
    expected = set(
        [' favorite book', 'Your Favorite book is different', 'this is '])
    assert set(unique_tokens_outcome.to_host()) == expected