def test_minbatch_randomized_from_corpus(test_conf, test_corp): train_idxs, test_idxs, trains, tests = MinBatch.randomized_from_corpus(test_conf, test_corp, 2) assert train_idxs.shape == (4, 2) assert test_idxs.shape == (2, 2) train_idxs2, test_idxs2, _, _ = MinBatch.randomized_from_corpus(test_conf, test_corp, 2) for i in test_idxs2.reshape(4): for j in train_idxs.reshape(8): assert i != j
def test_minbatch_from_corpus(test_conf, test_corp): train_idxs = [[1, 3]] test_idxs = [[0, 2]] trains = MinBatch.from_corpus(test_conf, test_corp, train_idxs) tests = MinBatch.from_corpus(test_conf, test_corp, test_idxs) f = lambda x: test_corp.ids_to_tokens(list(x)) # I'm James. # He hasn't assert f(trains[0].data_batch_at(1)) == ["i", "he"] assert f(trains[0].data_batch_at(2)) == ["am", "has"] # <sj>James</sj> <v>is</v> a teacher. # I haven't assert f(tests[0].teach_batch_at(1)) == ["<sj>", "i"] assert f(tests[0].teach_batch_at(2)) == ["james", "have"] assert f(tests[0].teach_batch_at(7)) == ["a", "<pad>"] assert f(tests[0].teach_batch_at(8)) == ["teacher", "<pad>"]