Python VocabEntry.words2charindices 예제들, vocab.VocabEntry.words2charindices Python 예제들

예제 #1

0

파일 보기

파일: sanity_check.py 프로젝트: arkhalid/XCS224N-A5

def question_1a_sanity_check():
    """ Sanity check for words2charindices function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1a: words2charindices()")
    print("-" * 80)
    vocab = VocabEntry()

    print('Running test on small list of sentences')
    sentences = [["a", "b", "c?"], ["~d~", "c", "b", "a"]]
    small_ind = vocab.words2charindices(sentences)
    small_ind_gold = [[[1, 30, 2], [1, 31, 2], [1, 32, 70, 2]],
                      [[1, 85, 33, 85, 2], [1, 32, 2], [1, 31, 2], [1, 30, 2]]]
    assert (small_ind == small_ind_gold), \
        "small test resulted in indices list {:}, expected {:}".format(small_ind, small_ind_gold)

    print('Running test on large list of sentences')
    tgt_sents = [
        ['<s>', "Let's", 'start', 'by', 'thinking', 'about', 'the', 'member', 'countries', 'of', 'the', 'OECD,', 'or',
         'the', 'Organization', 'of', 'Economic', 'Cooperation', 'and', 'Development.', '</s>'],
        ['<s>', 'In', 'the', 'case', 'of', 'gun', 'control,', 'we', 'really', 'underestimated', 'our', 'opponents.',
         '</s>'],
        ['<s>', 'Let', 'me', 'share', 'with', 'those', 'of', 'you', 'here', 'in', 'the', 'first', 'row.', '</s>'],
        ['<s>', 'It', 'suggests', 'that', 'we', 'care', 'about', 'the', 'fight,', 'about', 'the', 'challenge.', '</s>'],
        ['<s>', 'A', 'lot', 'of', 'numbers', 'there.', 'A', 'lot', 'of', 'numbers.', '</s>']]
    tgt_ind = vocab.words2charindices(tgt_sents)
    tgt_ind_gold = pickle.load(open('./sanity_check_en_es_data/1a_tgt.pkl', 'rb'))
    assert (tgt_ind == tgt_ind_gold), "target vocab test resulted in indices list {:}, expected {:}".format(tgt_ind,
                                                                                                            tgt_ind_gold)

    print("All Sanity Checks Passed for Question 1a: words2charindices()!")
    print("-" * 80)

예제 #2

0

파일 보기

파일: sanity_check.py 프로젝트: zwh930712/cs224n_2019

def question_1g_sanity_check(model):
    """ Sanity check for pad_sents_char() function. 
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    #padded_sentences = pad_sents_char(word_ids, 0)
    padded_sentences = vocab.to_input_tensor_char(sentences, model.device)
    gold_padded_sentences = torch.load(
        './sanity_check_en_es_data/gold_padded_sentences.pkl')

    a = torch.rand(6, 4, 21)
    print(a.size())
    print(padded_sentences.size())
    assert padded_sentences.size() == a.size(
    ), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(
        a.size(), padded_sentences.size())

    print("Sanity Check Passed for Question 1g: Padding!")
    print("-" * 80)

예제 #3

0

파일 보기

파일: sanity_check.py 프로젝트: arkhalid/XCS224N-A5

def question_1c_sanity_check():
    """ Sanity check for to_input_tensor_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1c: To input tensor")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl')
    assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(
        gold_padded_sentences, padded_sentences)

    batch_size = len(gold_padded_sentences)
    max_sentence_length = len(gold_padded_sentences[0])
    max_word_length = len(gold_padded_sentences[0][0])

    padded_sentences_tensor = vocab.to_input_tensor_char(sentences, device=torch.device('cpu'))

    assert (padded_sentences_tensor.size() == (max_sentence_length, batch_size, max_word_length))

    print("Sanity Check Passed for Question 1c: To input tensor")
    print("-" * 80)

예제 #4

0

파일 보기

def question_1b_sanity_check():
    """ Sanity check for pad_sents_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1b: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    gold_padded_sentences = torch.load(
        './sanity_check_en_es_data/gold_padded_sentences.pkl')
    assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(
        gold_padded_sentences, padded_sentences)
    print(
        len(padded_sentences),
        torch.transpose(torch.tensor(padded_sentences, dtype=torch.int), 0,
                        1).shape)

    print("Sanity Check Passed for Question 1b: Padding!")
    print("-" * 80)

예제 #5

0

파일 보기

파일: sanity_check.py 프로젝트: zwh930712/cs224n_2019

def question_1h_sanity_check(model):
    """ Sanity check for highway network 
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1h: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    #padded_sentences = pad_sents_char(word_ids, 0)
    padded_sentences = vocab.to_input_tensor_char(sentences, model.device)
    gold_padded_sentences = torch.load(
        './sanity_check_en_es_data/gold_padded_sentences.pkl')

    #Test with batch size 1
    x = torch.rand(1, 1, 21)
    hw = Highway(21, 21, 21, 0.5)
    hw.forward(x)
    #Test with batch size 4

    print(a.size())
    print(padded_sentences.size())
    #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size())

    print("Sanity Check Passed for Question 1h: Padding!")
    print("-" * 80)

예제 #6

0

파일 보기

파일: test_sanity.py 프로젝트: sshleifer/a5-conv-translate

    def test_question_1e_sanity_check(self):
        """ Sanity check for words2charindices function.
        """
        vocab = VocabEntry()


        sentences = [["a", "b", "c?"], ["~d~", "c", "b", "a"]]
        small_ind = vocab.words2charindices(sentences)
        small_ind_gold = [[[1, 30, 2], [1, 31, 2], [1, 32, 70, 2]], [[1, 85, 33, 85, 2], [1, 32, 2], [1, 31, 2], [1, 30, 2]]]
        assert(small_ind == small_ind_gold), \
            "small test resulted in indices list {:}, expected {:}".format(small_ind, small_ind_gold)

        # print('Running test on single sentence')
        # sentence = ["right", "arcs", "only"]
        # single_ind = vocab.words2charindices(sentence)
        # single_ind_gold = [[[1, 47, 2], [1, 38, 2], [1, 36, 2], [1, 37, 2], [1, 49, 2]], [[1, 30, 2], [1, 47, 2], [1, 32, 2], [1, 48, 2]], [[1, 44, 2], [1, 43, 2], [1, 41, 2], [1, 54, 2]]]
        # assert(single_ind == single_ind_gold), \
        #     "single sentence test resulted in indices list {:}, expected {:}".format(single_ind, single_ind_gold)

        print('Running test on large list of sentences')
        tgt_sents = [['<s>', "Let's", 'start', 'by', 'thinking', 'about', 'the', 'member', 'countries', 'of', 'the', 'OECD,', 'or', 'the', 'Organization', 'of', 'Economic', 'Cooperation', 'and', 'Development.', '</s>'], ['<s>', 'In', 'the', 'case', 'of', 'gun', 'control,', 'we', 'really', 'underestimated', 'our', 'opponents.', '</s>'], ['<s>', 'Let', 'me', 'share', 'with', 'those', 'of', 'you', 'here', 'in', 'the', 'first', 'row.', '</s>'], ['<s>', 'It', 'suggests', 'that', 'we', 'care', 'about', 'the', 'fight,', 'about', 'the', 'challenge.', '</s>'], ['<s>', 'A', 'lot', 'of', 'numbers', 'there.', 'A', 'lot', 'of', 'numbers.', '</s>']]
        tgt_ind = vocab.words2charindices(tgt_sents)
        tgt_ind_gold = pickle.load(open('./sanity_check_en_es_data/1e_tgt.pkl', 'rb'))
        assert(tgt_ind == tgt_ind_gold), "target vocab test resulted in indices list {:}, expected {:}".format(tgt_ind, tgt_ind_gold)

예제 #7

0

파일 보기

파일: test_sanity.py 프로젝트: sshleifer/a5-conv-translate

    def test_question_1f_sanity_check(self):
        """ Sanity check for pad_sents_char() function.
        """
        vocab = VocabEntry()

        print("Running test on a list of sentences")
        sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
        word_ids = vocab.words2charindices(sentences)

        padded_sentences = pad_sents_char(word_ids, 0)
        gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl')
        assert len(gold_padded_sentences) == len(padded_sentences)
        for expected, got in zip(gold_padded_sentences, padded_sentences):
            if got != expected:
                raise AssertionError('got {}: expected: {}'.format(got, expected))
        assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences)

예제 #8

0

파일 보기

def question_1c_sanity_check():
    print("-" * 80)
    print("Running Sanity Check for Question 1c: Input tensor")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    o_tnsr = vocab.to_input_tensor_char(sentences, "cpu")
    print(o_tnsr.shape)

예제 #9

0

파일 보기

def question_1f_sanity_check():
    """ Sanity check for pad_sents_char() function. 
    """
    print ("-"*80)
    print("Running Sanity Check for Question 1f: Padding")
    print ("-"*80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl')
    assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences)

    test_list = [[[4]*33]]
    padded_sent = pad_sents_char(test_list, 0)
    assert len(padded_sent[0][0]) == 21
    print("Sanity Check Passed for Question 1f: Padding!")
    print("-"*80)

예제 #10

0

파일 보기

파일: sanity_check.py 프로젝트: The-Punk-and-The-Monk/cs224n-Assignments

def question_1g_sanity_check():
    """
    Sanity check for to_input_tensor_char() function
    :return:
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)
    padded_sentences = pad_sents_char(word_ids, 0)

    a = np.asarray(padded_sentences).transpose((1, 0, 2))
    a = torch.Tensor(a)
    b = vocab.to_input_tensor_char(sentences, device="cpu")

    assert a.equal(b), "Wrong!"

    print("Sanity Check Passed for Question 1g")
    print("-" * 80)

예제 #11

0

파일 보기

파일: tests.py 프로젝트: maxence-b/char-mnt

def test2():
    vocab = VocabEntry()

    word_ids = vocab.words2charindices(sentences)
    a = pad_sents_char(word_ids, 0)
    print(a[1])

예제 #12

0

파일 보기

파일: tests.py 프로젝트: maxence-b/char-mnt

def test1():
    vocab = VocabEntry()
    print('vocab', vocab)
    word_ids = vocab.words2charindices(sentences)
    print('Sentences in Chars', word_ids)