def sample_rnn_text_classification_family_cfg(seed): """Samples a task config for an RNN classification task. These configs are nested python structures that provide enough information to create an instance of the problem. Args: seed: int Random seed to generate task from. Returns: A nested dictionary containing a configuration. """ rng = np.random.RandomState(seed) cfg = {} cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128) cfg["w_init"] = utils.sample_initializer(rng) cfg["dataset"] = utils.sample_text_dataset(rng) # TODO(lmetz) trim this if using characters... cfg["vocab_size"] = utils.sample_log_int(rng, 100, 10000) cfg["core"] = utils.sample_rnn_core(rng) cfg["trainable_init"] = bool(rng.choice([True, False])) cfg["loss_compute"] = rng.choice(["last", "avg", "max"]) return cfg
def sample_word_language_model_family_cfg(seed): """Sample a word language model config.""" rng = np.random.RandomState(seed) cfg = {} cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128) cfg["w_init"] = utils.sample_initializer(rng) cfg["vocab_size"] = utils.sample_log_int(rng, 1000, 30000) cfg["core"] = utils.sample_rnn_core(rng) cfg["trainable_init"] = bool(rng.choice([True, False])) cfg["dataset"] = utils.sample_word_lm_dataset(rng) return cfg
def sample_char_rnn_language_model_family_cfg(seed): """Samples a character NN language modeling task.""" rng = np.random.RandomState(seed) cfg = {} cfg["embed_dim"] = utils.sample_log_int(rng, 8, 128) cfg["w_init"] = utils.sample_initializer(rng) full_vocab = utils.sample_bool(rng, 0.8) if full_vocab: cfg["vocab_size"] = 256 else: # only operate on some subset of full words. cfg["vocab_size"] = utils.sample_log_int(rng, 100, 256) cfg["core"] = utils.sample_rnn_core(rng) cfg["trainable_init"] = bool(rng.choice([True, False])) cfg["dataset"] = utils.sample_char_lm_dataset(rng) return cfg