Exemplo n.º 1
0
def get_model():

    embedin = Embedin(vocab_size, embed_size)
    embedout = Embedout(vocab_size, hidden_size)

    enc = Encoder(embedin, embed_size, hidden_size, n_layers)
    dec5 = Decoder(embedin, embed_size, hidden_size, n_layers)
    #enc7 = Encoder(embedin, embed_size, hidden_size, n_layers)
    dec7 = Decoder(embedin, embed_size, hidden_size, n_layers)
    atten = Attention(hidden_size)
    #atten7 = Attention(hidden_size)
    ae5 = Autoencoder(enc, dec5, atten, embedout, 13)
    ae7 = Autoencoder(enc, dec7, atten, embedout, 17)

    discriminator = Discriminator(hidden_size)
    discriminator2 = Discriminator2(vocab_size, embed_size, hidden_size)

    seq2seq57 = Autoencoder(enc, dec7, atten, embedout, 17)
    seq2seq75 = Autoencoder(enc, dec5, atten, embedout, 13)

    lm5 = Lstm(vocab_size, embed_size, hidden_size, n_layers, drop_out=0)
    lm7 = Lstm(vocab_size, embed_size, hidden_size, n_layers, drop_out=0)
    lm5.load_state_dict(torch.load('models/lm5_lstm_dropout.th'))
    lm7.load_state_dict(torch.load('models/lm7_lstm_dropout.th'))

    ae5 = ae5.cuda()
    ae7 = ae7.cuda()
    discriminator = discriminator.cuda()
    discriminator2 = discriminator2.cuda()
    seq2seq57 = Autoencoder(enc, dec7, atten, embedout, 17)
    seq2seq75 = Autoencoder(enc, dec5, atten, embedout, 13)
    lm5 = lm5.cuda()
    lm7 = lm7.cuda()
    return ae5, ae7, discriminator, discriminator2, seq2seq57, seq2seq75, lm5, lm7
    def __init__(self, inp_dim, out_dim, emb_dim, enc_hid, dec_hid, enc_drop,
                 dec_drop, epoch, clip, sparse_max, tf, max_length, vocab,
                 batch, device):
        self.inp_dim = inp_dim
        self.out_dim = out_dim
        self.emb_dim = emb_dim
        self.enc_hid = enc_hid
        self.dec_hid = dec_hid
        self.enc_drop = enc_drop
        self.dec_drop = dec_drop
        self.tf = tf
        self.max_length = max_length
        self.batch = batch
        self.device = device
        self.vocab = vocab

        self.attn = Attention(enc_hid, dec_hid, sparse_max=sparse_max)
        self.enc = Encoder(inp_dim, emb_dim, enc_hid, dec_hid, enc_drop)
        self.dec = Decoder(out_dim, emb_dim, enc_hid, dec_hid, dec_drop,
                           self.attn)
        self.model = Seq2Seq(self.enc, self.dec, device).to(device)

        self.model.apply(self.init_weights)
        self.count_parameters()
        self.optimizer = optim.Adam(self.model.parameters())
        if sparse_max:
            self.criterion = SparsemaxLoss(ignore_index=0)
        else:
            self.criterion = nn.CrossEntropyLoss(ignore_index=0)  # pad_idx 0
        self.epoch = epoch
        self.clip = clip
Exemplo n.º 3
0

train_iterator, valid_iterator, test_iterator, SRC, TRG = Return_Data_Loaders()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

##########################################################################

model.apply(init_weights)

optimizer = optim.Adam(model.parameters())

TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

##########################################################################
import torch
import pickle
from torch.utils.data import DataLoader
from my_dataloader import *
from create_vocabulary import *
from Model import Encoder, Decoder, Seq2Seq
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


#encoder = Encoder(input_dim=2999, name='emb_inspec.npy')
#decoder = Decoder(output_dim=2999, name='emb_inspec.npy')
encoder = Encoder()
decoder = Decoder()
model = Seq2Seq(encoder, decoder, device).to(device)
#model.load_state_dict(torch.load('train.pt'))

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.01)

batch=64

tot_epoch = 100

vocab = np.load('vocab_kp20k2.npy', allow_pickle=True).item()
#vocab = np.load('vocab_inspec.npy', allow_pickle=True).item()
TRG_PAD_IDX = vocab('<pad>')
Exemplo n.º 5
0
    def _create_graph(self, DECODER_TYPE):
        self.raw_state = tf.placeholder(tf.float32, shape=[None, Config.NUM_OF_CUSTOMERS+1, 2], name='State')
        self.current_location = self.raw_state[:, -1]
        self.sampled_cost = tf.placeholder(tf.float32, [None, 1], name='Sampled_Cost')
        if Config.SEQUENCE_COST == 1:
            self.sampled_cost = tf.placeholder(tf.float32, [None, Config.NUM_OF_CUSTOMERS], name='Sampled_Cost')
        self.batch_size = tf.shape(self.raw_state)[0]
        self.keep_prob = tf.placeholder(tf.float32)
        self.global_step = tf.Variable(0, trainable=False, name='step')
        self.input_lengths = tf.convert_to_tensor([Config.NUM_OF_CUSTOMERS]*(self.batch_size))
        self.or_route = tf.placeholder(tf.int32, shape=[None, Config.NUM_OF_CUSTOMERS+1])
        self.or_cost = tf.placeholder(tf.float32, shape=[None, 1])
        self.difference_in_length = tf.reduce_mean(self.sampled_cost - self.or_cost)
        self.relative_length = tf.reduce_mean(self.sampled_cost/self.or_cost)
        if Config.SEQUENCE_COST == 1:
            self.relative_length = tf.reduce_mean(self.sampled_cost[:, 0]/self.or_cost)
        self.start_tokens = tf.placeholder(tf.int32, shape=[None])
        self.end_token = -1
        self.MA_baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        if Config.SEQUENCE_COST == 1:
            self.MA_baseline = tf.Variable(tf.tile([0.0], [Config.NUM_OF_CUSTOMERS]), dtype=tf.float32, trainable=False)
            self.assign_init_MA = tf.assign(self.MA_baseline, tf.reduce_mean(self.sampled_cost, axis=0))
        else:
            self.assign_init_MA = tf.assign(self.MA_baseline, tf.reduce_mean(self.sampled_cost))
        if Config.STATE_EMBED == 1:
            self.with_depot_state = self.raw_state
            for i in range(0):
                self.with_depot_state = tf.layers.conv1d(self.with_depot_state, Config.RNN_HIDDEN_DIM, 1,
                                                         padding="SAME", activation=tf.nn.relu)
            self.with_depot_state = tf.layers.conv1d(self.with_depot_state, Config.RNN_HIDDEN_DIM, 1,
                                                     padding="VALID")
        else:
            self.with_depot_state = self.raw_state
        self.state = self.with_depot_state[:, :-1, :]
        self.old_probs = tf.placeholder(tf.float32, shape=[None, Config.NUM_OF_CUSTOMERS, Config.NUM_OF_CUSTOMERS])

        # ENCODER
        if Config.DIRECTION == 4 or Config.DIRECTION == 5 or Config.DIRECTION == 6:
            self.encoder_outputs = self.state
            self.encoder_state = None
        if Config.DIRECTION < 6 and Config.DIRECTION != 4 and Config.DIRECTION != 5 and Config.DIRECTION != 6:
            self.encoder_outputs, self.encoder_state = Encoder(self.state, self.keep_prob)

        # HELPERS
        self.training_index = tf.concat([tf.expand_dims(self.start_tokens, -1), self.or_route], axis=1)
        self.training_index = self.training_index[:, :-1]
        self.gather_ids = tf.concat([tf.expand_dims(
            tf.reshape(tf.tile(tf.reshape(tf.range(self.batch_size), [-1, 1]), [1, tf.shape(self.with_depot_state)[1]]), [-1]), -1),
                                     tf.reshape(self.training_index, [-1, 1])], -1)
        if Config.STATE_EMBED == 0:
            self.training_inputs = tf.reshape(tf.gather_nd(self.with_depot_state, self.gather_ids),
                                              [self.batch_size, tf.shape(self.with_depot_state)[1], 2])
        else:
            self.training_inputs = tf.reshape(tf.gather_nd(self.with_depot_state, self.gather_ids),
                                              [self.batch_size, tf.shape(self.with_depot_state)[1], Config.RNN_HIDDEN_DIM])
        train_helper, pred_helper = Helper(self.with_depot_state, self.batch_size, self.training_inputs,
                                           self.start_tokens, self.end_token)

        # DECODER
        if Config.DIRECTION < 6:
            train_decoder, pred_decoder, critic_network_pred = Decoder(self.batch_size, self.encoder_state, self.encoder_outputs,
                                                                       train_helper, pred_helper, self.state, self.start_tokens,
                                                                       self.end_token, self.keep_prob, self.raw_state, DECODER_TYPE)

            self.train_final_output, self.train_final_state, train_final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                train_decoder, impute_finished=False, maximum_iterations=tf.shape(self.state)[1])
            self.train_final_action = self.train_final_output.sample_id

            self.pred_final_output, self.pred_final_state, pred_final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                pred_decoder, impute_finished=False, maximum_iterations=tf.shape(self.state)[1])
            self.pred_final_action = self.pred_final_output.sample_id
            self.base_line_est = critic_network_pred
            self.logits = self.train_final_output.rnn_output

        if Config.DIRECTION == 6:
            self.train_final_action, self.pred_final_action, self.base_line_est, self.logits = Beam_Search(
                self.batch_size, self.encoder_state, self.encoder_outputs,
                train_helper, pred_helper, self.with_depot_state, self.start_tokens,
                self.end_token, self.keep_prob, self.raw_state, DECODER_TYPE)
            # self.pred_final_action = tf.squeeze(self.pred_final_action)

        if Config.DIRECTION == 9:
            self.train_final_action, self.pred_final_action, self.base_line_est, self.logits = Reza_Model(self.batch_size,
                                                                                                          self.with_depot_state)
        if Config.DIRECTION == 10:
            self.train_final_action, self.pred_final_action, self.base_line_est, self.logits = Wyatt_Model(self.batch_size,
                                                                                                           self.state,
                                                                                                           self.raw_state)
        self.probs = self.logits
        self.probs = self.probs + tf.to_float(tf.less(self.probs, -.8*Config.LOGIT_PENALTY))*Config.LOGIT_PENALTY
        self.probs = tf.clip_by_value(tf.nn.softmax(self.probs), 1e-7, 1e7)
        gather_ind = tf.concat([
            tf.reshape(tf.tile(tf.reshape(tf.range(0, self.batch_size), [-1, 1]), [1, Config.NUM_OF_CUSTOMERS]), [-1, 1]),
            tf.tile(tf.reshape(tf.range(0, Config.NUM_OF_CUSTOMERS), [-1, 1]), [self.batch_size, 1]),
            tf.reshape(self.pred_final_action, [-1, 1])], axis=1)
        self.new_probs_with_pi = tf.reshape(tf.gather_nd(self.probs, gather_ind), [self.batch_size, Config.NUM_OF_CUSTOMERS])
        self.old_probs_with_pi = tf.reshape(tf.gather_nd(self.old_probs, gather_ind), [self.batch_size, Config.NUM_OF_CUSTOMERS])
        self.ratio = tf.divide(self.new_probs_with_pi, self.old_probs_with_pi)

        if DECODER_TYPE == 0:
            # x = tf.range(0, 19, dtype=tf.int32)
            # x = [tf.random_shuffle(x)]

            # for i in range(499):
            #     y = tf.range(0, 19, dtype=tf.int32)
            #     y = [tf.random_shuffle(y)]
            #     x = tf.concat((x, y), axis=0)
            # self.pred_final_action = x[:self.batch_size, :]
            if Config.SEQUENCE_COST == 0:
                self.critic_loss = tf.losses.mean_squared_error(self.sampled_cost, self.base_line_est)
            else:
                self.critic_loss = tf.losses.mean_squared_error(tf.reshape(self.sampled_cost[:, 0], [-1, 1]), self.base_line_est)

            if Config.LOGIT_CLIP_SCALAR != 0:
                self.logits = Config.LOGIT_CLIP_SCALAR*tf.nn.tanh(self.logits)

            if Config.REINFORCE == 0:
                # self.weights = tf.to_float(tf.tile(tf.reshape(tf.range(
                #     1, tf.divide(1, tf.shape(self.state)[1]), -tf.divide(1, tf.shape(self.state)[1])),
                #                                               [1, -1]), [self.batch_size, 1]))
                self.actor_loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.or_route[:, :-1],
                    weights=tf.ones([self.batch_size, tf.shape(self.state)[1]])
                    # weights=self.weights
                )
            else:
                self.neg_log_prob = -1*tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                                                      labels=self.train_final_action)
                self.R = tf.stop_gradient(self.sampled_cost)
                if Config.SEQUENCE_COST == 1 and Config.USE_PPO == 0:
                    assign = tf.assign(self.MA_baseline, self.MA_baseline*.999 + tf.reduce_mean(self.R, axis=0)*.001)
                    with tf.control_dependencies([assign]):
                        V = self.MA_baseline
                        self.actor_loss = tf.reduce_mean(tf.multiply(self.neg_log_prob, self.R-V))
                elif Config.USE_PPO == 1:
                    assign = tf.assign(self.MA_baseline, self.MA_baseline*.999 + tf.reduce_mean(self.R, axis=0)*.001)
                    with tf.control_dependencies([assign]):
                        V = self.MA_baseline
                        adv = self.R - V
                        epsilon = 0.1
                        self.actor_loss = -tf.reduce_mean(tf.reduce_sum(
                            tf.minimum(tf.multiply(self.ratio, adv),
                                       tf.clip_by_value(self.ratio, 1.0-epsilon, 1.0+epsilon)*adv), axis=1))
                elif Config.MOVING_AVERAGE == 1:
                    assign = tf.assign(self.MA_baseline, self.MA_baseline*.999 + tf.reduce_mean(self.R)*.001)
                    with tf.control_dependencies([assign]):
                        V = self.MA_baseline
                        self.actor_loss = tf.reduce_mean(tf.multiply(tf.reduce_sum(self.neg_log_prob, axis=1), self.R-V))
                elif Config.USE_OR_COST == 1:
                    V = tf.stop_gradient(self.or_cost)
                    self.actor_loss = tf.reduce_mean(tf.multiply(tf.reduce_sum(self.neg_log_prob, axis=1), (self.R-V)/5))
                else:
                    V = tf.stop_gradient(self.base_line_est)
                    self.actor_loss = tf.reduce_mean(tf.multiply(tf.reduce_sum(self.neg_log_prob, axis=1), self.R-V))

            with tf.name_scope("Train"):
                if Config.GPU == 1:
                    colocate = True
                else:
                    colocate = False
                if Config.LR_DECAY_OFF == 0:
                    self.lr = tf.train.exponential_decay(
                        Config.LEARNING_RATE, self.global_step, 200000,
                        .9, staircase=True, name="learning_rate")
                else:
                    self.lr = Config.LEARNING_RATE
                self.train_critic_op = tf.train.AdamOptimizer(self.lr).minimize(self.critic_loss)
                if Config.MAX_GRAD != 0:
                    self.params = tf.trainable_variables()
                    self.gradients = tf.gradients(self.actor_loss, self.params, colocate_gradients_with_ops=colocate)
                    opt = tf.train.AdamOptimizer(self.lr)
                    self.clipped_gradients, gradient_norm = tf.clip_by_global_norm(self.gradients, Config.MAX_GRAD)
                    self.train_actor_op = opt.apply_gradients(zip(self.clipped_gradients, self.params), global_step=self.global_step)
                    tf.summary.scalar("grad_norm", gradient_norm)
                    tf.summary.scalar("LearningRate", self.lr)
                else:
                    self.train_actor_op = tf.train.AdamOptimizer(self.lr).minimize(self.actor_loss,
                                                                                   global_step=self.global_step,
                                                                                   colocate_gradients_with_ops=colocate)
            # # for gradient clipping https://github.com/tensorflow/nmt/blob/master/nmt/model.py

            with tf.name_scope("Loss"):
                tf.summary.scalar("Loss", self.actor_loss)
                tf.summary.scalar("Critic_Loss", self.critic_loss)
            with tf.name_scope("Performace"):
                tf.summary.scalar("Relative Critic Loss", tf.reduce_mean(self.base_line_est/self.or_cost))
                tf.summary.scalar("Relative Critic Loss to Sampled", tf.reduce_mean(self.base_line_est/self.sampled_cost))
                tf.summary.scalar("difference_in_length", self.difference_in_length)
                tf.summary.scalar("relative_length", self.relative_length)
                tf.summary.scalar("Avg_or_cost", tf.reduce_mean(self.or_cost))
                if Config.SEQUENCE_COST == 0:
                    tf.summary.scalar("Avg_sampled_cost", tf.reduce_mean(self.sampled_cost))
                else:
                    tf.summary.scalar("Avg_sampled_cost", tf.reduce_mean(self.sampled_cost[:, 0]))
                # tf.summary.histogram("LocationStartDist", tf.transpose(self.pred_final_action, [1, 0])[0])
                # tf.summary.histogram("LocationEndDist", tf.transpose(self.pred_final_action, [1, 0])[-1])
            with tf.name_scope("Config"):
                tf.summary.scalar("REINFORCE", Config.REINFORCE)
                tf.summary.scalar("DIRECTION", Config.DIRECTION)
                tf.summary.scalar("NUM_OF_CUSTOMERS", Config.NUM_OF_CUSTOMERS)
                tf.summary.scalar("StateEmbed", tf.cast(Config.STATE_EMBED, tf.int32))
                tf.summary.scalar("MAX_GRAD", Config.MAX_GRAD)
                tf.summary.scalar("LogitPen", Config.LOGIT_PENALTY)
                tf.summary.scalar("batch_size", self.batch_size)
                tf.summary.scalar("Config.LAYERS_STACKED_COUNT", Config.LAYERS_STACKED_COUNT)
                tf.summary.scalar("RNN_HIDDEN_DIM", Config.RNN_HIDDEN_DIM)
                tf.summary.scalar("RUN_TIME", Config.RUN_TIME)
                tf.summary.scalar("LOGIT_CLIP_SCALAR", Config.LOGIT_CLIP_SCALAR)
                tf.summary.scalar("Droput", tf.cast(Config.DROPOUT, tf.int32))
                tf.summary.scalar("GPU", Config.GPU)
Exemplo n.º 6
0
Arquivo: Train.py Projeto: bothe/NMT
    def __init__(self, **kwargs):
        dataset_folder = Path(kwargs["dataset_folder"]).resolve()
        check_valid_path(dataset_folder)
        result_folder = kwargs["result_folder"]

        self.initial_epoch = 1
        self.test_mode = kwargs["test"]
        self.epochs = kwargs["epochs"]
        self.use_label_smoothing = kwargs["label_smoothing"]

        self.ckpt_path = kwargs["ckpt_path"]
        self.ckpt_epoch = kwargs["ckpt_epoch"]

        # model에 필요한 폴더 및 파일 생성
        self.log_folder, self.ckpt_folder, self.image_folder = create_folder(
            result_folder)
        if not self.test_mode:
            self.training_result_file = self.log_folder / "training_result.txt"
        self.test_result_file = None

        # kwargs 값 저장
        msg = ""
        for k, v in list(kwargs.items()):
            msg += "{} = {}\n".format(k, v)
        msg += "new model checkpoint path = {}\n".format(self.ckpt_folder)
        with (self.log_folder / "model_settings.txt").open(
                "w", encoding="utf-8") as fp:
            fp.write(msg)

        # 필요한 data를 불러옴
        self.src_word2id, self.src_id2word, self.src_vocab_size = load_word_dic(
            dataset_folder / "src_word2id.pkl")
        self.tar_word2id, self.tar_id2word, self.tar_vocab_size = load_word_dic(
            dataset_folder / "tar_word2id.pkl")

        if not self.test_mode:
            train_src, num_train_src = get_dataset(
                self.src_word2id, dataset_folder / "train_src.txt", False,
                True, True)
            train_tar, num_train_tar = get_dataset(
                self.tar_word2id, dataset_folder / "train_tar.txt", True, True,
                True)
            if num_train_src != num_train_tar:
                raise Exception(
                    "source 데이터셋({})과 target 데이터셋({})의 크기가 다릅니다.".format(
                        num_train_src, num_train_tar))

            self.num_train = num_train_src
            self.train_dataset = tf.data.Dataset.from_generator(
                lambda: zip(train_src, train_tar), (tf.int32, tf.int32))
            self.train_dataset = self.train_dataset.cache().shuffle(
                self.num_train + 1).padded_batch(
                    batch_size=kwargs["batch_size"],
                    padded_shapes=(tf.TensorShape([None]),
                                   tf.TensorShape([None])),
                    padding_values=(self.src_word2id["<PAD>"],
                                    self.tar_word2id["<PAD>"])).prefetch(1)

        test_src_path = dataset_folder / "test.txt"
        if test_src_path.exists():
            test_src, self.num_test = get_dataset(self.src_word2id,
                                                  test_src_path, False, True,
                                                  False)

            # self.test_src_max_len = max([len(sentence) for sentence in test_src])
            # padded_test_src = tf.keras.preprocessing.sequence.pad_sequences(
            #    test_src, maxlen = self.test_src_max_len, padding = 'post',
            #    dtype = 'int32', value = self.src_word2id["<PAD>"])

            self.test_dataset = tf.data.Dataset.from_generator(
                lambda: test_src, tf.int32)
            self.test_dataset = self.test_dataset.cache().batch(1).prefetch(1)
            self.test_result_file = self.log_folder / "test_result.txt"

        elif self.test_mode:
            raise FileNotFoundError(
                "[ {} ] 경로가 존재하지 않습니다.".format(test_src_path))

        self.encoder = Encoder(self.src_vocab_size, kwargs["embedding_size"],
                               kwargs["hidden_size"], kwargs["dropout_rate"],
                               kwargs["gru"], kwargs["bi"])
        self.decoder = Decoder(self.tar_vocab_size, kwargs["embedding_size"],
                               kwargs["hidden_size"], kwargs["attention_size"],
                               kwargs["dropout_rate"], kwargs["gru"],
                               kwargs["bi"])

        # 아래 line 6줄은 colab에서 한글 깨짐을 방지하기 위한 부분으로 생략해도 됩니다.
        # %config InlineBackend.figure_format = 'retina'
        # !apt -qq -y install fonts-nanum
        fontpath = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf'
        font = fm.FontProperties(fname=fontpath, size=9)
        plt.rc('font', family='NanumBarunGothic')
        mpl.font_manager._rebuild()
Exemplo n.º 7
0

def _get_default_params():
    return {
        "cell_class": "tensorflow.contrib.rnn.BasicLSTMCell",
        "cell_params": {
            "num_units": 32
        },
        "dropout_input_keep_prob": 1.0,
        "dropout_output_keep_prob": 1.0
    }


input_data, source_sequence_length, targets, target_sequence_length = _get_inputs(batch_size)
encoder_embed_input = tf.contrib.layers.embed_sequence(input_data, source_vocab_size, encoding_embedding_size)
encoder = Encoder(tf.contrib.learn.ModeKeys.TRAIN, _get_default_params())
outputs, final_state = encoder(encoder_embed_input, source_sequence_length)

layers = Dense(source_vocab_size)
# I think I could use outputs to decoder real output.
outputs = layers(outputs)  # b * t * source_v_size
masks = tf.sequence_mask(source_sequence_length, max_sequence_length)  # b * t
masks = tf.cast(masks, tf.float32)
cost = sequence_loss(outputs, targets, masks)
optimizer = tf.train.AdamOptimizer(lr)
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("start trainning")
Exemplo n.º 8
0
    assert args.version is not None, "wrong --version argument."

    #-------------------------------------------------------------------------
    #
    #-------------------------------------------------------------------------
    Ps = args2Ps(args)
    #-------------------------------------------------------------------------
    # Vocabulary
    #-------------------------------------------------------------------------
    vocab = Vocabulary()
    vocab.make(dataset="flickr8k", min_word_freq=5)

    #-------------------------------------------------------------------------
    # models
    #-------------------------------------------------------------------------
    encoder = Encoder()
    encoder.fine_tune(Ps["fine_tune_encoder"])
    decoder = Decoder(attention_dim   = Ps["attention_dim"],
                      embed_dim       = Ps["embed_dim"],
                      decoder_dim     = Ps["decoder_dim"],
                      encoder_dim     = encoder.encoder_dim,
                      vocab_size      = len(vocab),
                      device          = Ps["device"],
                      dropout         = Ps["dropout"] )
    encoder = encoder.to(Ps["device"])
    decoder = decoder.to(Ps["device"])
    # whether to load a saved state_dict from checkpoint file
    if Ps["parent"] is not None:
        pass

    #-------------------------------------------------------------------------
Exemplo n.º 9
0
def train(train_dataset,
          validation_dataset=None,
          iterations=150,
          hidden_size=64,
          batch_size=16):
    print("Training...")
    train = DataLoader(train_dataset,
                       batch_size=batch_size,
                       shuffle=True,
                       collate_fn=collate)
    validation = DataLoader(validation_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate)

    encoder = Encoder(1, hidden_size).to(device)
    decoder = Decoder(hidden_size, 1).to(device)

    encoder_optimizer = optim.Adam(encoder.parameters())
    decoder_optimizer = optim.Adam(decoder.parameters())
    criterion = nn.MSELoss()

    train_losses = []
    validation_losses = []

    for iter in range(iterations):
        encoder.train()
        decoder.train()

        loss_acc = 0
        for input_tensor, target_tensor, _, max_len, lens in train:
            _, encoder_hidden = encoder(input_tensor, None)
            decoder_hidden = encoder_hidden

            decoder_input = target_tensor[:, 0].view(batch_size, 1, 1)
            outputs = torch.zeros(batch_size, max_len)

            for di in range(1, max_len):
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
                outputs[:, di] = decoder_output.view(batch_size)
                decoder_input = decoder_output.detach()

            for i in range(len(lens)):
                outputs[i, lens[i]:] = 0
            """ if iter == iterations-1:
                print(target_tensor[:,1:].squeeze())
                print(outputs[:,1:].squeeze())
                print() """

            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            batch_loss = criterion(outputs[:, 1:].squeeze(),
                                   target_tensor[:, 1:].squeeze())
            batch_loss.backward(retain_graph=True)
            loss_acc += batch_loss.item()

            encoder_optimizer.step()
            decoder_optimizer.step()

        train_losses.append(loss_acc)

        with torch.no_grad():
            val_loss_acc = 0
            for input_tensor, target_tensor, _, max_len, lens in validation:
                val_batch_size = len(target_tensor)

                _, encoder_hidden = encoder(input_tensor)
                decoder_hidden = encoder_hidden

                decoder_input = target_tensor[:, 0].view(val_batch_size, 1, 1)
                decoder_hidden = encoder_hidden
                outputs = torch.zeros(val_batch_size, max_len)

                for di in range(1, max_len):
                    decoder_output, decoder_hidden = decoder(
                        decoder_input, decoder_hidden)
                    outputs[:, di] = decoder_output.view(val_batch_size)
                    decoder_input = decoder_output

                for i in range(len(lens)):
                    outputs[i, lens[i]:] = 0

                val_loss = criterion(outputs[:, 1:].squeeze(),
                                     target_tensor[:, 1:].squeeze())
                val_loss_acc += val_loss.item()

            validation_losses.append(val_loss_acc)

        if iter % 1 == 0:
            print("Iteration:", iter, " Train loss: ",
                  "{0:.5f}".format(loss_acc / len(train)),
                  " Validation loss: ",
                  "{0:.5f}".format(validation_losses[-1]))

    showPlot(train_losses, validation_losses)
    torch.save(encoder, "models/encoder.pt")
    torch.save(decoder, "models/decoder.pt")