示例#1
0
    def __init__(self, args, word_dict, feature_dict,
                 state_dict=None, normalize_q=True,normalize_s=True):
        self.args = args
        self.word_dict = word_dict
        self.args.vocab_size = len(word_dict)
        # feature to idx
        self.args.num_features = len(feature_dict)
        self.feature_dict = feature_dict            
        self.updates = 0
        self.use_cuda = False
        self.parallel = False
        self.normalize_q=normalize_q
        self.normalize_s=normalize_s

        # Building network. If normalize if false, scores are not normalized
        # 0-1 per paragraph (no softmax).
        if args.model_type == 'rnn':
            if args.train_mode=='string_match':
                self.network = RnnDocReader(args, normalize_s)
            else:
                self.network = RnnDocReader_Q(args, normalize_s)
        else:
            raise RuntimeError('Unsupported model: %s' % args.model_type)
        # Load old saved state ,(if use pretrain/ from checkpoint)  
        # include 'fix-embedding' and model state-dict  / just return model state-dict
        if state_dict:
            # Load buffer separately
            if 'fixed_embedding' in state_dict:
                fixed_embedding = state_dict.pop('fixed_embedding')                 # pop fix embeddings
                self.network.load_state_dict(state_dict)                            # model parameters
                self.network.register_buffer('fixed_embedding', fixed_embedding)    # name of the buffer.buffer can be accessed from this module using the given name
            else:
                self.network.load_state_dict(state_dict)    
示例#2
0
    def __init__(self, args, word_dict, char_dict, feature_dict,
                 state_dict=None, normalize=True):
        # Book-keeping.
        self.args = args
        self.word_dict = word_dict
        self.char_dict = char_dict
        self.args.vocab_size = len(word_dict)
        self.args.char_size = len(char_dict)
        self.feature_dict = feature_dict
        self.args.num_features = len(feature_dict)
        self.updates = 0
        self.use_cuda = False
        self.parallel = False

        # Building network. If normalize if false, scores are not normalized
        # 0-1 per paragraph (no softmax).
        if args.model_type == 'rnn':
            self.network = RnnDocReader(args, normalize)
        elif args.model_type == 'r_net':
            self.network = R_Net(args, normalize)
        elif args.model_type == 'mnemonic':
            self.network = MnemonicReader(args, normalize)
        else:
            raise RuntimeError('Unsupported model: %s' % args.model_type)

        # Load saved state
        if state_dict:
            # Load buffer separately
            if 'fixed_embedding' in state_dict:
                fixed_embedding = state_dict.pop('fixed_embedding')
                self.network.load_state_dict(state_dict)
                self.network.register_buffer('fixed_embedding', fixed_embedding)
            else:
                self.network.load_state_dict(state_dict)
    def __init__(self, opt, embedding=None, state_dict=None):
        # Book-keeping.
        self.opt = opt
        self.device = torch.cuda.current_device(
        ) if opt['cuda'] else torch.device('cpu')
        self.updates = state_dict['updates'] if state_dict else 0
        self.train_loss = AverageMeter()
        if state_dict:
            self.train_loss.load(state_dict['loss'])

        # Building network.
        self.network = RnnDocReader(opt, embedding=embedding)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            self.network.load_state_dict(state_dict['network'])
        self.network.to(self.device)

        # Building optimizer.
        self.opt_state_dict = state_dict['optimizer'] if state_dict else None
        self.build_optimizer()
示例#4
0
#!/usr/bin/env python
# coding: utf-8

import argparse
import torch
from torch.nn import functional as F
from torch.autograd import Variable
from rnn_reader import RnnDocReader

parser = argparse.ArgumentParser()
args = parser.parse_args()
args.char_vocab_size = 100
args.vocab_size = 1000
args.embedding_dim = 300

reader = RnnDocReader(args).cuda()


def get_n_params(model):
    pp = 0
    for p in list(model.parameters()):
        nn = 1
        for s in list(p.size()):
            nn = nn * s
        pp += nn
    return pp


print(get_n_params(reader))
for _ in range(1000):
    x1 = Variable(torch.LongTensor(10, 40).cuda())
class DocReaderModel(object):
    """High level model that handles intializing the underlying network
    architecture, saving, updating examples, and predicting examples.
    """
    def __init__(self, opt, embedding=None, state_dict=None):
        # Book-keeping.
        self.opt = opt
        self.device = torch.cuda.current_device(
        ) if opt['cuda'] else torch.device('cpu')
        self.updates = state_dict['updates'] if state_dict else 0
        self.train_loss = AverageMeter()
        if state_dict:
            self.train_loss.load(state_dict['loss'])

        # Building network.
        self.network = RnnDocReader(opt, embedding=embedding)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            self.network.load_state_dict(state_dict['network'])
        self.network.to(self.device)

        # Building optimizer.
        self.opt_state_dict = state_dict['optimizer'] if state_dict else None
        self.build_optimizer()

    def build_optimizer(self):
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if self.opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters,
                                       self.opt['learning_rate'],
                                       momentum=self.opt['momentum'],
                                       weight_decay=self.opt['weight_decay'])
        elif self.opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(
                parameters, weight_decay=self.opt['weight_decay'])
        else:
            raise RuntimeError('Unsupported optimizer: %s' %
                               self.opt['optimizer'])
        if self.opt_state_dict:
            self.optimizer.load_state_dict(self.opt_state_dict)

    def update(self, ex):
        #print("======")
        #print(ex)
        # Train mode
        self.network.train()

        # Transfer to GPU
        inputs = [e.to(self.device) for e in ex[:7]]
        target_s = ex[7].to(self.device)
        target_e = ex[8].to(self.device)

        # Run forward
        score_s, score_e = self.network(*inputs)

        # Compute loss and accuracies
        loss = F.nll_loss(score_s, target_s) + F.nll_loss(score_e, target_e)
        self.train_loss.update(loss.item())

        # Clear gradients and run backward
        self.optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        torch.nn.utils.clip_grad_norm_(self.network.parameters(),
                                       self.opt['grad_clipping'])

        # Update parameters
        self.optimizer.step()
        self.updates += 1

    def predict(self, ex):
        # Eval mode
        self.network.eval()

        # Transfer to GPU
        if self.opt['cuda']:
            inputs = [Variable(e.cuda()) for e in ex[:7]]
        else:
            inputs = [Variable(e) for e in ex[:7]]

        # Run forward
        with torch.no_grad():
            score_s, score_e = self.network(*inputs)

        # Transfer to CPU/normal tensors for numpy ops
        score_s = score_s.data.cpu()
        score_e = score_e.data.cpu()

        # Get argmax text spans
        text = ex[-2]
        spans = ex[-1]
        predictions = []
        max_len = self.opt['max_len'] or score_s.size(1)
        for i in range(score_s.size(0)):
            scores = torch.ger(score_s[i], score_e[i])
            scores.triu_().tril_(max_len - 1)
            scores = scores.numpy()
            s_idx, e_idx = np.unravel_index(np.argmax(scores), scores.shape)
            s_offset, e_offset = spans[i][s_idx][0], spans[i][e_idx][1]
            predictions.append(text[i][s_offset:e_offset])

        return predictions

    def save(self, filename, epoch, scores):
        em, f1, best_eval = scores
        params = {
            'state_dict': {
                'network': self.network.state_dict(),
                'embeddings': self.network.embedding.state_dict(),
                'doc_encoder': self.network.doc_rnn.state_dict(),
                'q_encoder': self.network.question_rnn.state_dict(),
                'self_attn_layer': self.network.self_attn.state_dict(),
                'qemb_layer': self.network.qemb_match.state_dict(),
                'optimizer': self.optimizer.state_dict(),
                'updates': self.updates,
                'loss': self.train_loss.state_dict()
            },
            'config': self.opt,
            'epoch': epoch,
            'em': em,
            'f1': f1,
            'best_eval': best_eval,
            'random_state': random.getstate(),
            'torch_state': torch.random.get_rng_state(),
            'torch_cuda_state': torch.cuda.get_rng_state()
        }
        try:
            torch.save(params, filename)
            #logger.info('model saved to {}'.format(filename))
        except BaseException:
            #logger.warning('[ WARN: Saving failed... continuing anyway. ]')
            pass