Exemplo n.º 1
0
def make_model(opt,
               src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab), opt)

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Exemplo n.º 2
0
def BuildModel(vocab_size, encoder_emb, decoder_emb, d_model = 512, N = 6, d_ff = 2048, h = 8, dropout = 0.1):

    target_vocab = vocab_size
    c = copy.deepcopy

    attention = MultiHeadedAttention(h, d_model)
    feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)

    encoder_layer = EncoderLayer(d_model, c(attention), c(feed_forward), dropout)
    decoder_layer = DecoderLayer(d_model, c(attention), c(attention), c(feed_forward), dropout)

    encoder = Encoder(encoder_layer, N)
    decoder = Decoder(decoder_layer, N)

    model = EncoderDecoder( encoder, decoder,
        nn.Sequential(Embeddings(encoder_emb, d_model), c(position)),
        nn.Sequential(Embeddings(decoder_emb, d_model), c(position)),
        Generator(d_model, target_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Exemplo n.º 3
0
import tensorflow_datasets as tfds
from input_path import file_path
import time

tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(
    file_path.subword_vocab_path)

transformer = Transformer(num_layers=config.num_layers,
                          d_model=config.d_model,
                          num_heads=config.num_heads,
                          dff=config.dff,
                          input_vocab_size=config.input_vocab_size,
                          target_vocab_size=config.target_vocab_size,
                          rate=config.dropout_rate)

generator = Generator()


def restore_chkpt(checkpoint_path):
    ckpt = tf.train.Checkpoint(transformer=transformer,
                               optimizer=optimizer,
                               generator=generator)

    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_path,
                                              max_to_keep=20)

    # if a checkpoint exists, restore the latest checkpoint.
    if tf.train.latest_checkpoint(checkpoint_path):
        ckpt.restore(tf.train.latest_checkpoint(checkpoint_path))
        print(ckpt_manager.latest_checkpoint, 'checkpoint restored!!')