def transformer_teeny(): hparams = transformer.transformer_base() hparams.num_rec_steps = 2 hparams.hidden_size = 128 hparams.filter_size = 128 hparams.num_heads = 2 return hparams
def transformer_test(): hparams = transformer.transformer_base() hparams.batch_size = 10 hparams.hidden_size = 10 hparams.num_hidden_layers = 1 hparams.num_heads = 2 hparams.max_length = 16 return hparams
def transformer_poetry(): hparams = transformer.transformer_base() hparams.num_hidden_layers = 2 hparams.hidden_size = 128 hparams.filter_size = 512 hparams.num_heads = 4 hparams.attention_dropout = 0.6 hparams.layer_prepostprocess_dropout = 0.6 hparams.learning_rate = 0.05 return hparams
def transformer_base_sketch(): """Parameters based on base.""" hparams = transformer_base() hparams.batch_size = 2048 hparams.max_length = 784 hparams.clip_grad_norm = 5. hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate_warmup_steps = 8000 hparams.learning_rate = 0.2 hparams.num_hidden_layers = 6 hparams.initializer = "orthogonal" hparams.sampling_method = "random" return hparams
def transformer_aux_base(): """Set of hyperparameters.""" hparams = transformer.transformer_base() hparams.shared_embedding_and_softmax_weights = False hparams.add_hparam("shift_values", "1,2,3,4") return hparams
def transformer_base_bs5(): hparams = transformer.transformer_base() hparams.add_hparam("block_size", 5) return hparams
def universal_transformer_small(): hparams = transformer.transformer_base() hparams = update_hparams_for_universal_transformer(hparams) return hparams
def score2perf_transformer_base(): hparams = transformer.transformer_base() hparams.bottom['inputs'] = modalities.bottom return hparams
def transformer_teeny(): hparams = transformer.transformer_base() hparams.hidden_size = 128 hparams.filter_size = 128 hparams.num_heads = 2 return hparams
import tensorflow as tf import numpy as np import tensor2tensor.models.transformer_with_context as transformer_with_context import tensor2tensor.models.transformer as transformer hparams = transformer.transformer_base() hparams.hidden_size = 3 hparams.num_heads = 1 hparams.use_target_space_embedding = False model = transformer_with_context.TransformerWithContext(hparams) inputs_context_np = [[[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]], [[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]]] inputs_context = tf.convert_to_tensor(inputs_context_np, np.float32) inputs_np = [[[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]], [[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]]] inputs = tf.convert_to_tensor(inputs_np, np.float32) target_space_id = 0 targets_np = [[[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]], [[[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]], [[0.3, 0.2, 0.1]]]] targets = tf.convert_to_tensor(targets_np) features = dict() features["inputs_context"] = inputs_context features["inputs"] = inputs features["target_space_id"] = target_space_id features["targets"] = targets output = model.body(features) init = tf.global_variables_initializer() sess = tf.Session()
def transformer_wmt17_base(): # transformer v2 hparams = transformer_base() return hparams
def __init__(self, is_train, model_config, data): self.is_train = is_train self.model_config = model_config self.data = data self.hparams = transformer.transformer_base() self.setup_hparams()
def transformer_base_h256(): hparams = transformer_base() hparams.hidden_size = 256 hparams.batch_size = 4096 return hparams
def stacked_universal_transformer_base(): hparams = transformer.transformer_base() hparams = update_hparams_for_universal_transformer(hparams) hparams.num_stacked_universal_transformers = 6 hparams.num_rec_steps = 4 return hparams
def transformer_scan(): hparams = transformer_base() return hparams
def get_learning_rate(): hparams = transformer.transformer_base() return learning_rate_schedule(hparams)
def __init__(self, is_train): self.hparams = transformer.transformer_base() self.is_train = is_train
def my_transformer_base_single_gpu(): """HParams for transformer base model for single gpu.""" hparams = transformer.transformer_base() hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 16000 return hparams
def chatbot_cornell_base(): hparams = transformer.transformer_base() hparams.learning_rate_warmup_steps = 16000 return hparams
def r_transformer_base(): hparams = transformer.transformer_base() hparams = update_hparams_for_r_transformer(hparams) return hparams
def evolved_transformer_base(): """Base parameters for Evolved Transformer model.""" return add_evolved_transformer_hparams(transformer.transformer_base())
def universal_transformer_base_fp16(): hparams = transformer.transformer_base() hparams = update_hparams_for_universal_transformer(hparams) hparams.activation_dtype = "float16" return hparams
def transformer_dorka_big(): hparams=transformer.transformer_base() return hparams
def transformer_teeny(): hparams = transformer.transformer_base() hparams.model_d = 128 hparams.d_ff = 128 hparams.num_heads = 2 return hparams
def wmt_enro_tpu(): """HParams for Transformer model on TPU.""" hparams = transformer.transformer_base() hparams = transformer.update_hparams_for_tpu(hparams) hparams.batch_size = 512 return hparams
def transformer_base_12gb_gpu(): """HParams for transformer base model for a single 12GB gpu.""" hparams = transformer_base() hparams.learning_rate_warmup_steps = 8000 hparams.batch_size = 8192 return hparams
def __init__(self, data, is_train, model_config): super(TransformerGraph, self).__init__(data, is_train, model_config) self.hparams = transformer.transformer_base() self.setup_hparams() self.model_fn = self.transformer_fn