示例#1
0
文件: s2s.py 项目: sb-nmt-team/sb-nmt
    def __init__(self,
                 source_lang,
                 target_lang,
                 hps,
                 training_hps,
                 writer=None,
                 searchengine=None):
        super(Seq2Seq, self).__init__()
        self.hps = hps
        self.writer = writer
        self.training_hps = training_hps
        self.source_lang = source_lang
        self.target_lang = target_lang
        self.i_cont = 0
        self.encoder = EncoderRNN(source_lang.input_size(),
                                  self.hps,
                                  self.training_hps,
                                  writer=writer)
        self.decoder = DecoderRNN(target_lang.input_size(),
                                  target_lang.input_size(),
                                  self.hps,
                                  self.training_hps,
                                  writer=writer)

        self.max_length = self.training_hps.max_length
        self.criterion = nn.NLLLoss(reduce=False, size_average=False)
        if hps.tm_init:
            self.translationmemory = TranslationMemory(
                self, writer=writer, hps=hps, searchengine=searchengine)
        else:
            self.translationmemory = None
示例#2
0
    def __init__(self, cfg):
        super(Model, self).__init__()
        self.cfg = cfg

        self.embeddings_src = nn.Embedding(
            cfg.svoc.size, cfg.emb_src_size)  ### embeddings for encoder
        self.encoder = EncoderRNN(self.embeddings_src, self.cfg)

        if self.cfg.reuse_words:
            self.embeddings_tgt = self.embeddings_src  ### same embeddings for encoder and decoder
        else:
            self.embeddings_tgt = nn.Embedding(
                self.cfg.tvoc.size,
                self.cfg.emb_tgt_size)  ### new embeddings for decoder
        self.decoder = DecoderRNN_Attn(self.embeddings_tgt, self.cfg)

        sys.stderr.write('Initializing model pars\n')
        for param in self.encoder.parameters():
            param.data.uniform_(-0.08, 0.08)
        for param in self.decoder.parameters():
            param.data.uniform_(-0.08, 0.08)
示例#3
0
    def __init__(self, config: HiDDenConfiguration, noiser: Noiser):

        super(EncoderDecoder, self).__init__()

        self.encoder = Encoder(config)
        self.encoder = nn.DataParallel(self.encoder)

        self.encode_rnn = EncoderRNN(config)
        #self.encode_rnn = nn.DataParallel(self.encode_rnn)

        self.noiser = noiser

        self.decoder = Decoder(config)
        self.decoder = nn.DataParallel(self.decoder)

        self.decode_rnn = DecoderRNN(config)
        self.adversarial_decode_rnn = DecoderRNN(config)
示例#4
0
class Model(nn.Module):
    def __init__(self, cfg):
        super(Model, self).__init__()
        self.cfg = cfg

        self.embeddings_src = nn.Embedding(
            cfg.svoc.size, cfg.emb_src_size)  ### embeddings for encoder
        self.encoder = EncoderRNN(self.embeddings_src, self.cfg)

        if self.cfg.reuse_words:
            self.embeddings_tgt = self.embeddings_src  ### same embeddings for encoder and decoder
        else:
            self.embeddings_tgt = nn.Embedding(
                self.cfg.tvoc.size,
                self.cfg.emb_tgt_size)  ### new embeddings for decoder
        self.decoder = DecoderRNN_Attn(self.embeddings_tgt, self.cfg)

        sys.stderr.write('Initializing model pars\n')
        for param in self.encoder.parameters():
            param.data.uniform_(-0.08, 0.08)
        for param in self.decoder.parameters():
            param.data.uniform_(-0.08, 0.08)

    def forward(self,
                src_batch,
                tgt_batch,
                len_src_batch,
                len_tgt_batch,
                teacher_forcing=1.0):
        enc_outputs, enc_final = self.encoder(src_batch, len_src_batch)
        if self.cfg.par.beam_size > 1:
            dec_outputs, dec_output_words = self.decoder.beam_search(
                self.cfg, len_src_batch, self.cfg.par.max_tgt_len, enc_final,
                enc_outputs, teacher_forcing)
        else:
            dec_outputs, dec_output_words = self.decoder(
                tgt_batch, len_src_batch, len_tgt_batch, enc_final,
                enc_outputs, teacher_forcing)
        return dec_outputs, dec_output_words
示例#5
0
文件: s2s.py 项目: sb-nmt-team/sb-nmt
 def get_default_hparams():
     return merge_hparams(EncoderRNN.get_default_hparams(),
                          DecoderRNN.get_default_hparams(),
                          TranslationMemory.get_default_hparams(),
                          lang.Lang.get_default_hparams())
示例#6
0
文件: s2s.py 项目: sb-nmt-team/sb-nmt
class Seq2Seq(nn.Module):
    @log_func
    def __init__(self,
                 source_lang,
                 target_lang,
                 hps,
                 training_hps,
                 writer=None,
                 searchengine=None):
        super(Seq2Seq, self).__init__()
        self.hps = hps
        self.writer = writer
        self.training_hps = training_hps
        self.source_lang = source_lang
        self.target_lang = target_lang
        self.i_cont = 0
        self.encoder = EncoderRNN(source_lang.input_size(),
                                  self.hps,
                                  self.training_hps,
                                  writer=writer)
        self.decoder = DecoderRNN(target_lang.input_size(),
                                  target_lang.input_size(),
                                  self.hps,
                                  self.training_hps,
                                  writer=writer)

        self.max_length = self.training_hps.max_length
        self.criterion = nn.NLLLoss(reduce=False, size_average=False)
        if hps.tm_init:
            self.translationmemory = TranslationMemory(
                self, writer=writer, hps=hps, searchengine=searchengine)
        else:
            self.translationmemory = None

    @log_func
    def eval(self):
        return self.train(False)

    @log_func
    def train(self, mode=True):
        super(self.__class__, self).train(mode)
        if self.translationmemory is not None:
            self.translationmemory.train(mode)

        return self

    @log_func
    def translate(self, input_batch, mask, use_search=False):
        batch_size = input_batch.size()[0]
        encoder_outputs = self.encoder(input_batch)

        if use_search:
            assert self.translationmemory is not None, "No sample pairs for translation memory, did you want it?"
            self.translationmemory.fit(input_batch)
        hidden = None

        dec_input = Variable(torch.LongTensor([lang.BOS_TOKEN] * batch_size))

        if self.training_hps.use_cuda:
            dec_input = dec_input.cuda()

        translations = [[lang.BOS_TOKEN] for _ in range(batch_size)]
        converged = np.zeros(shape=(batch_size, ))
        for i in range(self.max_length):
            if use_search:
                output, hidden, _ = self.decoder(dec_input, encoder_outputs, mask=mask, hidden=hidden,\
                                                 translation_memory=self.translationmemory, position=i)
            else:
                output, hidden, _ = self.decoder(dec_input,
                                                 encoder_outputs,
                                                 mask=mask,
                                                 hidden=hidden)
            _, output_idx = torch.max(output, -1)

            for j in range(batch_size):
                if translations[j][-1] != self.target_lang.get_eos():
                    translations[j].append(output_idx[j].data[0])
                else:
                    converged[j] = True
                if output_idx[j].data[0] == self.target_lang.get_eos():
                    converged[j] = True
            dec_input = Variable(
                torch.LongTensor([tr[-1] for tr in translations]))

            if self.training_hps.use_cuda:
                dec_input = dec_input.cuda()

            if np.all(converged):
                break

        if use_search:
            self.translationmemory.dump_logs([
                tuple(map(self.target_lang.get_word, elem))
                for elem in translations
            ], "../dumped_translation_logs.pkl")
        return [
            ' '.join(map(self.target_lang.get_word, elem))
            for elem in translations
        ]

    @log_func
    def forward(self,
                input_batch,
                mask,
                output_batch,
                out_mask,
                use_search=False):
        encoder_outputs = self.encoder(input_batch)

        if use_search:
            assert self.translationmemory is not None, "No sample pairs for translation memory, did you want it?"
            self.translationmemory.fit(input_batch)

        hidden = None

        loss = 0.0
        contexts = Variable(torch.zeros((out_mask.size()[0], out_mask.size()[1] - 1,\
                                     (self.hps.enc_bidirectional + 1) *\
                                     self.hps.enc_hidden_size)))
        #translate_to_all_loggers("out_mask s2s {}".format(out_mask.size()))
        for i in range(out_mask.size()[1] - 1):
            if use_search:
                output, hidden, _ = self.decoder(output_batch[:, i], encoder_outputs, mask=mask, hidden=hidden,\
                                                 translation_memory=self.translationmemory, position=i)
            else:
                output, hidden, _ = self.decoder(output_batch[:, i],
                                                 encoder_outputs,
                                                 mask=mask,
                                                 hidden=hidden)
                contexts[:, i, :] = _
            loss += (self.criterion(output, output_batch[:, i + 1]) *
                     out_mask[:, i + 1]).sum()
        if not use_search:
            self.writer.add_scalar("normal/context",
                                   (contexts.max(1)[0] -
                                    contexts.min(1)[0]).sum(-1).mean(),
                                   self.i_cont)
            self.i_cont += 1
        loss /= out_mask.sum()
        return loss

    @log_func
    def get_hiddens_and_contexts(self, input_batch, mask, output_batch,
                                 out_mask):
        """
        input_batch: [B, T]
        encoder_outputs:  [B, T, DE * HE]
    """
        encoder_outputs = self.encoder(input_batch)
        B, *_ = input_batch.shape
        hidden = None

        loss = 0.0
        hiddens = Variable(torch.zeros((self.hps.dec_layers * (self.hps.dec_bidirectional + 1), out_mask.size()[1] - 1, \
                                        B,  self.hps.dec_hidden_size)))
        contexts = Variable(torch.zeros((B, out_mask.size()[1] - 1,\
                                     (self.hps.enc_bidirectional + 1) *\
                                     self.hps.enc_hidden_size)))

        if self.training_hps.use_cuda:
            hiddens = hiddens.cuda()
            contexts = contexts.cuda()

        for i in range(out_mask.size()[1] - 1):
            output, hidden, context = self.decoder(output_batch[:, i],
                                                   encoder_outputs,
                                                   mask=mask,
                                                   hidden=hidden)
            hiddens[:, i, :, :] = hidden
            contexts[:, i, :] = context

        return hiddens, contexts

    def state_dict(self, destination=None, prefix='', keep_vars=False):
        destination = super(Seq2Seq, self).state_dict(destination, prefix,
                                                      keep_vars)
        if self.translationmemory:
            self.translationmemory.state_dict(destination, prefix, keep_vars)

        return destination

    def load_state_dict(self, state_dict, strict=True):
        if self.translationmemory is not None:
            self.translationmemory.load_state_dict(state_dict)
        #print(state_dict)
        #del state_dict['translation_memory.M']
        super(Seq2Seq, self).load_state_dict(state_dict, strict)

    def cuda(self):
        if self.translationmemory:
            self.translationmemory = self.translationmemory.cuda()
        self.encoder = self.encoder.cuda()
        self.decoder = self.decoder.cuda()
        return super(Seq2Seq, self).cuda()

    def cpu(self):
        self.translationmemory = self.translationmemory.cpu()
        self.encoder = self.encoder.cpu()
        self.decoder = self.decoder.cpu()
        return super(Seq2Seq, self).cpu()

    #def eval(self):
    #  self.train = False

    #def eval(self):
    #  self.train = True

    @staticmethod
    def get_default_hparams():
        return merge_hparams(EncoderRNN.get_default_hparams(),
                             DecoderRNN.get_default_hparams(),
                             TranslationMemory.get_default_hparams(),
                             lang.Lang.get_default_hparams())
示例#7
0
def run_1tm(n_cluster, n_exp, ed, hs):
    embedding_dim = ed
    hidden_size = hs
    encoder_n_layers = 2
    decoder_n_layers = 2

    save_dir = os.path.join("..", "data", "save", "cluster" + str(n_cluster),
                            "exp" + str(n_exp))
    option = 'story_fp_1tm'
    model_name = option + '_model_Attn_embedding_dim' + str(ed)
    attn_model = 'general'
    checkpoint_iter = 160000
    k = n_cluster
    loadFilename = os.path.join(
        save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers,
                                                decoder_n_layers, hidden_size),
        '{}_checkpoint.tar'.format(checkpoint_iter))
    # Load model if a loadFilename is provided
    if not os.path.isfile(loadFilename):
        print('Cannot find model file: ' + loadFilename)
    else:
        print('Loading model file: ' + loadFilename)
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))

        encoder_sd = checkpoint['en']
        decoder_sd = [checkpoint['de' + str(i + 1)] for i in range(k)]
        #decoder_sd1 = checkpoint['de1']
        #decoder_sd2 = checkpoint['de2']
        #decoder_sd3 = checkpoint['de3']
        #decoder_sd4 = checkpoint['de4']

        # encoder_optimizer_sd = checkpoint['en_opt']
        # decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']

        # prepare vocabulary and test data
        voc, _, test_pairs = prepareData_1tm(
            os.path.join("..", "data", "save", 'vocab.pickle'),
            os.path.join(save_dir, option + '.pickle'))
        # test_pairs = read_stories_from_xls(os.path.join('..', 'data', 'compare.xlsx'))

        # test_pairs = [random.choice(test_pairs) for _ in range(10)]
        print("Read {!s} testing pairs".format(len(test_pairs)))

        print('Building encoder and decoder ...')

        # Initialize word embeddings
        embedding = torch.nn.Embedding(voc.num_words, embedding_dim)
        embedding.load_state_dict(embedding_sd)

        # Initialize encoder & decoder models
        encoder = EncoderRNN(embedding_dim, hidden_size, embedding,
                             encoder_n_layers)
        decoder = [
            LuongAttnDecoderRNN(attn_model, embedding, embedding_dim,
                                hidden_size, voc.num_words, decoder_n_layers)
            for _ in range(k)
        ]
        #decoder1 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers)
        #decoder2 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers)
        #decoder3 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers)
        #decoder4 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers)

        # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers)
        encoder.load_state_dict(encoder_sd)
        [decoder[i].load_state_dict(decoder_sd[i]) for i in range(k)]
        #decoder1.load_state_dict(decoder_sd1)
        #decoder2.load_state_dict(decoder_sd2)
        #decoder3.load_state_dict(decoder_sd3)
        #decoder4.load_state_dict(decoder_sd4)

        # Use appropriate device
        encoder = encoder.to(device)
        [decoder[i].to(device) for i in range(k)]
        #decoder1 = decoder1.to(device)
        #decoder2 = decoder2.to(device)
        #decoder3 = decoder3.to(device)
        #decoder4 = decoder4.to(device)

        # Set dropout layers to eval mode
        encoder.eval()
        [decoder[i].eval() for i in range(k)]
        #decoder1.eval()
        #decoder2.eval()
        #decoder3.eval()
        #decoder4.eval()

        # Initialize search module
        searcher = GreedySearchDecoder_1tm(encoder, decoder, k)

        print('Evaluating {!s} test pairs ...'.format(len(test_pairs)))
        OMEG_score = 0
        OMEG_dis = 0
        SAFE_score = 0
        SAFE_dis = 0
        MOSES_score = 0
        MOSES_dis = 0
        num = 0
        # fps_list = read_predicted_fps(save_dir)
        for i, test_pair in enumerate(test_pairs):
            num += 1
            # try:
            OMEG_s, OMEG_d = evaluate_1tm(searcher, voc, test_pair[0],
                                          test_pair[1], k)
            # SAFE_s, SAFE_d = evaluate_SAFE(test_pair[0], test_pair[1])
            # MOSES_s, MOSES_d = evaluate_MOSES(test_pair[0], test_pair[1], fps_list[i])
            # s, d = evaluate_beam_pretrain(encoder, decoder, voc, test_pair[0], test_pair[3])
            OMEG_score += OMEG_s
            OMEG_dis += OMEG_d
            # SAFE_score += SAFE_s
            # SAFE_dis += SAFE_d
            # MOSES_score += MOSES_s
            # MOSES_dis += MOSES_d
            # except ZeroDivisionError:
            #     continue
        print(
            '\n\n' + '-' * 40 + "cluster" + str(n_cluster),
            "exp" + str(n_exp) + '_embedding' + str(ed) + '_hidden' + str(hs) +
            '-' * 40)
        print('OMEG Total Precision: ' + str(OMEG_score / num))
        print('OMEG Total Recall: ' + str(OMEG_dis / num))

        # print('SAFE Total Precision: ' + str(SAFE_score / num))
        # print('SAFE Total Recall: ' + str(SAFE_dis / num))
        #
        # print('MOSES Total Precision: ' + str(MOSES_score / num))
        # print('MOSES Total Recall: ' + str(MOSES_dis / num))
        print(
            '-' * 40 + "cluster" + str(n_cluster), "exp" + str(n_exp) +
            '_embedding' + str(ed) + '_hidden' + str(hs) + '-' * 40 + '\n\n')
示例#8
0
def run_pretrain():
    hidden_size = 100
    encoder_n_layers = 2
    decoder_n_layers = 2

    save_dir = os.path.join("..", "data", "save")
    option = 'story_fp'
    model_name = option + '_nd_model_Attn'
    attn_model = 'general'
    checkpoint_iter = 40000
    loadFilename = os.path.join(
        save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers,
                                                decoder_n_layers, hidden_size),
        '{}_checkpoint.tar'.format(checkpoint_iter))
    # Load model if a loadFilename is provided
    if not os.path.isfile(loadFilename):
        print('Cannot find model file: ' + loadFilename)
    else:
        print('Loading model file: ' + loadFilename)
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        # encoder_optimizer_sd = checkpoint['en_opt']
        # decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']

        # prepare vocabulary and test data
        voc, _, test_pairs = prepareData(
            os.path.join(save_dir, 'vocab.pickle'),
            os.path.join(save_dir, option + '.pickle'), option)
        # test_pairs = read_stories_from_xls(os.path.join('..', 'data', 'compare.xlsx'))

        # test_pairs = [random.choice(test_pairs) for _ in range(10)]
        print("Read {!s} testing pairs".format(len(test_pairs)))

        print('Building encoder and decoder ...')

        # Initialize word embeddings
        embedding = torch.nn.Embedding(voc.num_words, hidden_size)
        embedding.load_state_dict(embedding_sd)

        # Initialize encoder & decoder models
        encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers)
        decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                      voc.num_words, decoder_n_layers)
        # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers)
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)

        # Use appropriate device
        encoder = encoder.to(device)
        decoder = decoder.to(device)

        # Set dropout layers to eval mode
        encoder.eval()
        decoder.eval()

        # Initialize search module
        searcher = GreedySearchDecoder_pretrain(encoder, decoder)

        print('Evaluating {!s} test pairs ...'.format(len(test_pairs)))
        score = 0
        dis = 0
        num = 0
        for test_pair in test_pairs:
            try:
                s, d = evaluate_pretain(searcher, voc, test_pair[0],
                                        test_pair[3])
                # s, d = evaluate_beam_pretrain(encoder, decoder, voc, test_pair[0], test_pair[3])
                score += s
                dis += d
                num += 1
            except ZeroDivisionError:
                continue
        print('Total BLEU score: ' + str(score / num))
        print('Total levenshtein distance: ' + str(dis / num))
示例#9
0
def run():
    hidden_size = 100
    encoder_n_layers = 2
    decoder_n_layers = 2

    save_dir = os.path.join("..", "data", "save")
    option = 'story_fp'
    model_name = option + '_model_Attn'
    attn_model = 'general'
    checkpoint_iter = 120000
    loadFilename = os.path.join(
        save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers,
                                                decoder_n_layers, hidden_size),
        '{}_checkpoint.tar'.format(checkpoint_iter))
    # Load model if a loadFilename is provided
    if not os.path.isfile(loadFilename):
        print('Cannot find model file: ' + loadFilename)
    else:
        print('Loading model file: ' + loadFilename)
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        transfer_encoder_sd = checkpoint['transfer_en']
        # smr_encoder_sd = checkpoint['smr_en']
        # des_encoder_sd = checkpoint['des_en']
        # accp_encoder_sd = checkpoint['accp_en']
        fp_decoder_sd = checkpoint['fp_de']
        # encoder_optimizer_sd = checkpoint['en_opt']
        # decoder_optimizer_sd = checkpoint['de_opt']
        # embedding_sd = checkpoint['embedding']

        # prepare vocabulary and test data
        voc, _, test_pairs = prepareData(
            os.path.join(save_dir, 'vocab.pickle'),
            os.path.join(save_dir, option + '.pickle'), option)
        print("Read {!s} testing pairs".format(len(test_pairs)))

        print('Building encoder and decoder ...')

        # Initialize word embeddings
        embedding = torch.nn.Embedding(voc.num_words, hidden_size)
        # embedding.load_state_dict(embedding_sd)

        # Initialize encoder & decoder models
        # smr_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers)
        smr_encoder = rnn.RNNModel('GRU', voc.num_words, 100, 100, 2)
        des_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers)
        accp_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers)
        fp_decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                         voc.num_words, decoder_n_layers)
        # fp_decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers)
        fp_decoder.load_state_dict(fp_decoder_sd)

        # transfer_encoder = TransferEncoder(smr_encoder, des_encoder, accp_encoder, fp_decoder.hidden_size)
        transfer_encoder = TransferEncoder(smr_encoder, smr_encoder,
                                           smr_encoder, fp_decoder.hidden_size)
        transfer_encoder.load_state_dict(transfer_encoder_sd)
        # smr_encoder.load_state_dict(smr_encoder_sd)
        # des_encoder.load_state_dict(des_encoder_sd)
        # accp_encoder.load_state_dict(accp_encoder_sd)

        # for name, para in encoder_decoder.named_parameters():
        #     print(name, ':', para)

        # encoder.load_state_dict(encoder_sd)
        # decoder.load_state_dict(decoder_sd)

        # Use appropriate device
        transfer_encoder = transfer_encoder.to(device)
        # smr_encoder = smr_encoder.to(device)
        # des_encoder = des_encoder.to(device)
        # accp_encoder = accp_encoder.to(device)
        fp_decoder = fp_decoder.to(device)

        # Set dropout layers to eval mode
        transfer_encoder.eval()
        # smr_encoder.train()
        # des_encoder.train()
        # accp_encoder.train()
        fp_decoder.eval()

        # Initialize search module
        searcher = GreedySearchDecoder(transfer_encoder, fp_decoder)

        print('Evaluating {!s} test pairs ...'.format(len(test_pairs)))
        score = 0
        dis = 0
        num = 0
        for test_pair in test_pairs:
            try:
                s, d = evaluate(searcher, voc, test_pair[0], test_pair[1],
                                test_pair[2], test_pair[3])
                # score += evaluate_beam(transfer_encoder, fp_decoder, voc, test_pair[0], test_pair[1], test_pair[2], test_pair[3])
                score += s
                dis += d
                num += 1
            except ZeroDivisionError:
                continue
        print('Total BLEU score: ' + str(score / num))
        print('Total levenshtein distance: ' + str(dis / num))
示例#10
0
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]


def evaluateRandomly(encoder, decoder, n=10):
    counter = 0
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        if output_sentence[:-6] == pair[1]:
            counter += 1
        print('<', output_sentence)
        print('')
    print("Correct Examples : {} out of {}".format(counter, n))


hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words,
                               dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 100000, print_every=500, plot_every=1000)
evaluateRandomly(encoder1, attn_decoder1, n=1000)
示例#11
0
style_tok_test_reverse = [[getword(input_lang, a1)] if i else [getword(input_lang, a2)] for i in y_test_reverse]
attribute_train.append(torch.LongTensor(style_tok_train))
attribute_train.append(torch.LongTensor(style_tok_train_reverse))
attribute_valid.append(torch.LongTensor(style_tok_valid))
attribute_valid.append(torch.LongTensor(style_tok_valid_reverse))
attribute_test.append(torch.LongTensor(style_tok_test))
attribute_test.append(torch.LongTensor(style_tok_test_reverse))
label_train.append(torch.LongTensor(y_train))
label_train.append(torch.LongTensor(y_train_reverse))
label_valid.append(torch.LongTensor(y_valid))
label_valid.append(torch.LongTensor(y_valid_reverse))
label_test.append(torch.LongTensor(y_test))
label_test.append(torch.LongTensor(y_test_reverse))
print(input_lang.n_words)
#print(len(train_pairs))
encoder = EncoderRNN(input_lang.n_words, config['hidden_size'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device)
decoder1 = DecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device)
#decoder1 = AttnDecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device)
decoder2 = AttnDecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device)
classifier = model = WordAttention(input_lang.n_words, tag_lang.n_words, dep_lang.n_words, config['embedding_dim'], config['tag_dim'], config['dep_dim'], config['hidden_size'], config['classifer_class_size'], config['num_layers'], config['dropout'], embedding_weights, config['structural'])
classifier.load_state_dict(torch.load(config['classifier_name'] + '.pt'))
def init_weights(m):
    for name, param in m.named_parameters():
        if name != 'embedding.weight':
            if 'weight' in name:
                #print(name)
                #print(param)
                #nn.init.uniform_(param.data, -0.1, 0.1)
                nn.init.normal_(param.data, mean=0, std=0.1)
            else:
                nn.init.constant_(param.data, 0)
示例#12
0
def run(j, i, k, ed, hs):
    save_dir = os.path.join("..", "data", "save", "cluster" + str(k),
                            "exp" + str(j))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    option = 'story_fp_1tm'
    # if option not in ['story_summary', 'story_description', 'story_acceptance', 'fp', 'story_fp', 'story_fp_1tm']:
    #     raise ValueError(option, "is not an appropriate corpus type.")
    voc, pairs, _ = prepareData_1tm(
        os.path.join("..", "data", "save", 'vocab.pickle'),
        os.path.join(save_dir, option + '.pickle'))
    print("Read {!s} training pairs".format(len(pairs)))

    # Configure models
    model_name = option + '_model_Attn_embedding_dim' + str(ed)
    # attn_model = 'dot'
    attn_model = 'general'
    # attn_model = 'concat'
    hidden_size = hs
    embedding_dim = ed
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    # Set checkpoint to load from; set to None if starting from scratch
    checkpoint_iter = i * 40000
    n_iteration = checkpoint_iter + 40000
    if i == 0:
        loadFilename = None
    else:
        loadFilename = os.path.join(
            save_dir, model_name,
            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
            '{}_checkpoint.tar'.format(checkpoint_iter))

    checkpoint_iteration = 0
    # Load model if a loadFilename is provided
    if loadFilename:
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd1 = checkpoint['de1']
        decoder_sd2 = checkpoint['de2']
        decoder_sd3 = checkpoint['de3']
        #decoder_sd4 = checkpoint['de4']

        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd1 = checkpoint['de_opt1']
        decoder_optimizer_sd2 = checkpoint['de_opt2']
        decoder_optimizer_sd3 = checkpoint['de_opt3']
        #decoder_optimizer_sd4 = checkpoint['de_opt4']

        embedding_sd = checkpoint['embedding']

        checkpoint_iteration = checkpoint['iteration']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = torch.nn.Embedding(voc.num_words, embedding_dim)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(embedding_dim, hidden_size, embedding,
                         encoder_n_layers, dropout)
    decoder1 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim,
                                   hidden_size, voc.num_words,
                                   decoder_n_layers)
    decoder2 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim,
                                   hidden_size, voc.num_words,
                                   decoder_n_layers)
    decoder3 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim,
                                   hidden_size, voc.num_words,
                                   decoder_n_layers)
    #decoder4 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers)

    # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder1.load_state_dict(decoder_sd1)
        decoder2.load_state_dict(decoder_sd2)
        decoder3.load_state_dict(decoder_sd3)
        #decoder4.load_state_dict(decoder_sd4)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder1 = decoder1.to(device)
    decoder2 = decoder2.to(device)
    decoder3 = decoder3.to(device)
    #decoder4 = decoder4.to(device)

    print('Models built and ready to go!')

    # Configure training/optimization
    clip = 50.0
    teacher_forcing_ratio = 1.0
    learning_rate = 0.0001
    decoder_learning_ratio = 5.0
    print_every = 100
    save_every = 40000

    # Ensure dropout layers are in train mode
    encoder.train()
    decoder1.train()
    decoder2.train()
    decoder3.train()
    #decoder4.train()

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer1 = optim.Adam(decoder1.parameters(),
                                    lr=learning_rate * decoder_learning_ratio)
    decoder_optimizer2 = optim.Adam(decoder2.parameters(),
                                    lr=learning_rate * decoder_learning_ratio)
    decoder_optimizer3 = optim.Adam(decoder3.parameters(),
                                    lr=learning_rate * decoder_learning_ratio)
    #decoder_optimizer4 = optim.Adam(decoder4.parameters(), lr=learning_rate * decoder_learning_ratio)

    if loadFilename:
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer1.load_state_dict(decoder_optimizer_sd1)
        decoder_optimizer2.load_state_dict(decoder_optimizer_sd2)
        decoder_optimizer3.load_state_dict(decoder_optimizer_sd3)
        #decoder_optimizer4.load_state_dict(decoder_optimizer_sd4)

    # Run training iterations
    print("Starting Training!")
    trainIters(model_name, voc, pairs, encoder, decoder1, decoder2, decoder3,
               encoder_optimizer, decoder_optimizer1, decoder_optimizer2,
               decoder_optimizer3, embedding, encoder_n_layers,
               decoder_n_layers, hidden_size, save_dir, n_iteration,
               batch_size, print_every, save_every, teacher_forcing_ratio,
               clip, loadFilename, checkpoint_iter, checkpoint_iteration,
               option)
示例#13
0
文件: main.py 项目: ddhruvkr/Seq2Seq
from utils import *
from model.encoder import EncoderRNN
from model.decoder import DecoderRNN
from model.decoder_attn import AttnDecoderRNN
from test import *
from evaluate import *

hidden_size = 256
num_layers = 1
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers).to(device)
#decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size,
                               output_lang.n_words,
                               MAX_LENGTH,
                               num_layers,
                               dropout_p=0.1).to(device)

trainIters(encoder1,
           attn_decoder1,
           10000,
           pairs,
           input_lang,
           output_lang,
           print_every=5000)

evaluateRandomly(encoder1, attn_decoder1, pairs, input_lang, output_lang)