Пример #1
0
    def test_NMT_3_read_train(self):
        src=["I am Philip", "I am a student"]
        trg=["私 は フィリップ です", "私 は 学生 です"]
        SRC, TRG, data = load_nmt_train_data(src, trg, cut_threshold=1)
        x_exp = Vocabulary(unk=True, eos=True)
        y_exp = Vocabulary(unk=True, eos=True)
        
        for w in "i am".split():
            x_exp[w]

        for w in "私 は です".split():
            y_exp[w]
        x_data_exp = [\
                [x_exp["i"], x_exp["am"], x_exp.unk_id(), x_exp.eos_id()], \
                [x_exp["i"], x_exp["am"], x_exp.unk_id(), x_exp.unk_id(), x_exp.eos_id()] \
        ]

        y_data_exp = [\
                [y_exp["私" ], y_exp["は" ], y_exp.unk_id(), y_exp["です"], y_exp.eos_id()], \
                [y_exp["私" ], y_exp["は" ], y_exp.unk_id(), y_exp["です"], y_exp.eos_id()] \
        ]

        data_exp = list(zip(x_data_exp, y_data_exp))
        self.assertVocEqual(SRC, x_exp)
        self.assertVocEqual(TRG, y_exp)
        self.assertEqual(data, data_exp)
Пример #2
0
    def test_read_train(self):
        train = ["I_NNP am_VBZ Philip_NNP", "I_NNP am_VBZ student_NN"]
        X, Y, data = load_pos_train_data(train)
       
        data = list(data)
        # Check Vocabulary
        x_exp, y_exp = Vocabulary(), Vocabulary(unk=False)
        x_exp["I"], x_exp["am"]
        y_exp["NNP"], y_exp["VBZ"], y_exp["NNP"], y_exp["NN"]

        self.assertVocEqual(X, x_exp)
        self.assertVocEqual(Y, y_exp)
        
        # Check data
        word_exp = [\
                [x_exp["I"], x_exp["am"], x_exp.unk_id()],\
                [x_exp["I"], x_exp["am"], x_exp.unk_id()]\
        ]

        label_exp = [\
                [y_exp["NNP"], y_exp["VBZ"], y_exp["NNP"]],\
                [y_exp["NNP"], y_exp["VBZ"], y_exp["NN"]]\
        ]

        data_exp = [(x,y) for x, y in zip(word_exp, label_exp)]

        self.assertEqual(data, data_exp)
Пример #3
0
    def test_read_test(self):
        test = ["I live in Japan"]
        X = Vocabulary()
        X["I"], X["live"], X["in"]

        data = list(load_pos_test_data(test, X))[0][0]

        data_exp = [\
                X["I"], X["live"], X["in"], X.unk_id()\
        ]
        self.assertEqual(data, data_exp)
Пример #4
0
    def test_read_train(self):
        train=["I am Philip", "I am student"]
        X, data = load_lm_data(train,cut_threshold=1)
        
        x_exp = Vocabulary()
        for w in "<s> </s> i am".split():
            x_exp[w]

        word_exp = [\
                [x_exp["<s>"], x_exp["i"], x_exp["am"], x_exp.unk_id()], \
                [x_exp["<s>"], x_exp["i"], x_exp["am"], x_exp.unk_id()] \
        ]

        next_word_exp = [\
                [x_exp["i"], x_exp["am"], x_exp.unk_id(), x_exp["</s>"]], \
                [x_exp["i"], x_exp["am"], x_exp.unk_id(), x_exp["</s>"]] \
        ]

        data_exp = list(zip(word_exp, next_word_exp))

        self.assertVocEqual(X, x_exp)
        self.assertEqual(data, data_exp)