def test_mark_teach_minbatch(test_conf, test_corp): data_rows = [test_corp.tokens_to_ids([ "i", "am" ]), test_corp.tokens_to_ids([ "i" ])] teach_rows = [test_corp.tokens_to_ids(["<sj>", "i", "</sj>", "<v>", "am", "</v>"]), test_corp.tokens_to_ids(["<sj>", "i", "</sj>"])] batch = MarkTeacherMinBatch(test_conf, test_corp, data_rows, teach_rows) f = lambda x: test_corp.ids_to_tokens(list(x)) assert f(batch.data_batch_at(0)) == ["i", "i"] assert f(batch.data_batch_at(1)) == ["am", "<pad>"] assert (batch.teach_batch_at(0)[0] == mark.convert_types_to_vec(['<sj>'])).all() assert (batch.teach_batch_at(0)[1] == mark.convert_types_to_vec(['<sj>'])).all() assert (batch.teach_batch_at(1)[0] == mark.convert_types_to_vec(['<v>'])).all() assert batch.teach_batch_at(1)[1] == -1
def test_convert_types_to_vec(test_corp): vec = mark.convert_types_to_vec(['<sj>', '<v>']) assert vec == 1
def test_convert_teach_id_row(test_corp): tokens = ["<bos>", "<sj>", "james", "</sj>", "<v>", "is", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "A", "<v>", "B", "</v>", "</sj>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # A assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # B assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "james", "<v>", "is", "</sj>", "men", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec(['<v>']) # men assert mark_vec[4] == mark.convert_types_to_vec([])
def test_convert_teach_id_row(test_corp): tokens = ["<bos>", "<sj>", "james", "</sj>", "<v>", "is", "</v>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = ["<bos>", "<sj>", "A", "<v>", "B", "</v>", "</sj>", "<eos>"] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # A assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # B assert mark_vec[3] == mark.convert_types_to_vec([]) tokens = [ "<bos>", "<sj>", "james", "<v>", "is", "</sj>", "men", "</v>", "<eos>" ] mark_vec = mark.convert_teach_id_row(test_corp.tokens_to_ids(tokens), test_corp) assert mark_vec[0] == mark.convert_types_to_vec([]) assert mark_vec[1] == mark.convert_types_to_vec(['<sj>']) # james assert mark_vec[2] == mark.convert_types_to_vec(['<sj>', '<v>']) # is assert mark_vec[3] == mark.convert_types_to_vec(['<v>']) # men assert mark_vec[4] == mark.convert_types_to_vec([])