def test_word2idx_return_unknown_tag(vocabulary: dp.Vocabulary) -> None:
    # given
    word = "testnonexistingword"
    expected_result = vocabulary.word2idx("<UNK>")

    # when
    result = vocabulary.word2idx(word)

    # then
    assert result == expected_result
def test_idx2word_raises_index_error(vocabulary: dp.Vocabulary) -> None:
    # given
    index = -1
    expected_message = "No word is mapped to -1"

    # when / then
    with pytest.raises(IndexError) as result:
        vocabulary.idx2word(index)

    assert str(result.value) == expected_message
def test_decode_caption(vocabulary: dp.Vocabulary) -> None:
    # given
    caption = [vocabulary.word2idx("a"), vocabulary.word2idx("man")]

    expected_result = "a man"

    # when
    result = dp.TextPipeline.decode_caption(vocabulary, caption)

    # then
    assert result == expected_result
示例#4
0
def plot_embeddings(embeddings: np.array,
                    words: List[str],
                    vocab: dp.Vocabulary = dp.Vocabulary(),
                    out_path: str = None) -> None:
    """Plot word embeddings in 2D.

    Args:
        embeddings (np.array): Embeddings matrix
        words (List[str]): List of words to be plotted
        vocab (dp.Vocabulary, optional): Vocabulary. Defaults to dp.Vocabulary().
    """
    E_reduced = reduce_to_k_dim(embeddings)

    x = []
    y = []
    for word in words:
        word_x, word_y = E_reduced[vocab.word2idx(word.lower())]

        x.append(word_x)
        y.append(word_y)

        plt.annotate(word, (word_x + 0.01, word_y + 0.01),
                     color="blue",
                     fontsize=12)

    plt.scatter(x, y, c="red")

    if out_path is not None:
        plt.savefig(out_path, dpi=100)

    plt.show()
def test_word2idx(vocabulary: dp.Vocabulary) -> None:
    # given
    word = "a"
    expected_result = 0

    # when
    result = vocabulary.word2idx(word)

    # then
    assert result == expected_result
def test_idx2word(vocabulary: dp.Vocabulary) -> None:
    # given
    index = 0
    expected_result = "a"

    # when
    result = vocabulary.idx2word(index)

    # then
    assert result == expected_result