Пример #1
0
    def testLoad(self):
        vdict, idict = get_dict()

        ds = S2SDataSet(vdict, idict, 'bobsue-data/bobsue.seq2seq.dev.tsv')

        for batch in ds.batches(30):
            self.assertEqual(2, len(batch.data))
            self.assertEqual(batch.data[0].shape[0], batch.data[1].shape[0])
Пример #2
0
def transformer_params():
    """get default transformer params.
    Returns: Dict

    """
    dic, vocab_size = vocab_dict.get_dict()

    viseme_dic = RESERVED_TOKENS +\
                 ['b', 'f', 'd', 'l', 'g', 'j', 'zh', 'z',
                  'B', 'F', 'D', 'L', 'G', 'J', 'ZH', 'Z',
                  'a', 'an', 'ao', 'o', 'ou', 'e', 'en', 'er', 'i', 'u', 'v', 'i1', 'i2', ' ']

    pinyin_dic = RESERVED_TOKENS + list(string.ascii_lowercase) + [' ']

    return defaultdict(
        lambda: None,
        # Model params
        viseme_dic = viseme_dic,
        pinyin_dic = pinyin_dic,
        label_dic = dic,
        initializer_gain=1.0,  # Used in trainable variable initialization.
        hidden_size=512,  # Model dimension in the hidden layers.
        num_hidden_layers=3,  # Number of layers in the encoder and decoder stacks.
        num_heads=8,  # Number of heads to use in multi-headed attention.
        filter_size=1024,  # Inner layer dimension in the feedforward network.

        # Dropout values (only used when training)
        layer_postprocess_dropout=0.1,
        attention_dropout=0.1,
        relu_dropout=0.1,

        # Training params
        label_smoothing=0.1,
        learning_rate=2.0,
        learning_rate_decay_rate=1.0,
        learning_rate_warmup_steps=16000,

        # Optimizer params
        optimizer_adam_beta1=0.9,
        optimizer_adam_beta2=0.997,
        optimizer_adam_epsilon=1e-09,

        # Default prediction params
        extra_decode_length=50,
        beam_size=4,
        alpha=0.6,  # used to calculate length normalization in beam search
        allow_ffn_pad=True,

    )
Пример #3
0
from common_train import Trainer
from lm_loss import LogLoss
from lstm_dataset import S2SDataSet
from lstm_graph import BiLSTMEncodeGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.train.tsv")
dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv")
test_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.test.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

trainer = Trainer()
graph = BiLSTMEncodeGraph(LogLoss(), Adam(eta=0.001, decay=0.99), dict_size, hidden_dim)
trainer.train(idx_dict, 100, 's2s_bilstm', graph, train_ds, dev_ds, test_ds, 50)