def test_character_level_from_text(): test_strs = ["All work and no play makes jack a dull boy!@#**-~`", ""] # Make sure that it raises if it gets the wrong input assert_raises(ValueError, make_character_level_from_text, test_strs[0]) assert_raises(ValueError, make_character_level_from_text, test_strs[0][0]) clean, mf, imf, m = make_character_level_from_text(test_strs) if len(clean) != len(test_strs) - 1: raise AssertionError("Failed to remove empty line") new_str = "zzzzzzzzzzzzzz" new_clean = mf(new_str) # Make sure all the unknown chars get UNK tags if sum(new_clean[:-1]) != len(new_clean[:-1]) * m["UNK"]: raise AssertionError("Failed to handle unknown char") # Make sure last tag is EOS if new_clean[-1] != m["EOS"]: raise AssertionError("Failed to add EOS tag")
from dagbldr.nodes import masked_cost, categorical_crossentropy from dagbldr.nodes import softmax_layer, shift_layer from dagbldr.nodes import gru_recurrent_layer, conditional_gru_recurrent_layer from dagbldr.nodes import bidirectional_gru_recurrent_layer from dagbldr.nodes import conditional_attention_gru_recurrent_layer # minibatch size minibatch_size = 10 # Get data for lovecraft experiments mountains = load_mountains() text = mountains["data"] # Get a tiny subset text = text[:10] cleaned, mfunc, inv_mfunc, mapper = make_character_level_from_text(text) n_chars = len(mapper.keys()) # Necessary setup since text is done on per minibatch basis text_minibatch_func = gen_make_list_one_hot_minibatch(n_chars) X = [l[:3] for l in cleaned] y = [l[3:5] for l in cleaned] X_mb, X_mask = text_minibatch_func(X, slice(0, minibatch_size)) y_mb, y_mask = text_minibatch_func(y, slice(0, minibatch_size)) def test_conditional_gru_recurrent(): random_state = np.random.RandomState(1999) graph = OrderedDict() n_hid = 5 n_out = n_chars