Пример #1
0
    def __init__(self, config):
        self.logger = ModelLogger(config, dirname=config['dir'])
        self.dirname = self.logger.dirname
        cuda = config['cuda']
        cuda_id = config['cuda_id']
        if not cuda:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device('cuda' if cuda_id < 0 else 'cuda:%d' %
                                       cuda_id)

        datasets = prepare_datasets(config)
        train_set = datasets['train']
        dev_set = datasets['dev']
        test_set = datasets['test']

        # Evaluation Metrics:
        self._train_loss = AverageMeter()
        self._train_f1 = AverageMeter()
        self._train_em = AverageMeter()
        self._dev_f1 = AverageMeter()
        self._dev_em = AverageMeter()

        if train_set:
            self.train_loader = DataLoader(train_set,
                                           batch_size=config['batch_size'],
                                           shuffle=config['shuffle'],
                                           collate_fn=lambda x: x,
                                           pin_memory=True)
            self._n_train_batches = len(train_set) // config['batch_size']
        else:
            self.train_loader = None

        if dev_set:
            self.dev_loader = DataLoader(dev_set,
                                         batch_size=config['batch_size'],
                                         shuffle=False,
                                         collate_fn=lambda x: x,
                                         pin_memory=True)
            self._n_dev_batches = len(dev_set) // config['batch_size']
        else:
            self.dev_loader = None

        if test_set:
            self.test_loader = DataLoader(test_set,
                                          batch_size=config['batch_size'],
                                          shuffle=False,
                                          collate_fn=lambda x: x,
                                          pin_memory=True)
            self._n_test_batches = len(test_set) // config['batch_size']
            self._n_test_examples = len(test_set)
        else:
            self.test_loader = None

        self._n_train_examples = 0
        self.model = Model(config, train_set)
        self.tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
        self.model.network = self.model.network.to(self.device)
        self.config = config
        self.is_test = False
Пример #2
0
 def __init__(self, config):
     self.config = config
     tokenizer_model = MODELS[config['model_name']]
     self.train_loader, self.dev_loader, tokenizer = prepare_datasets(
         config, tokenizer_model)
     if config['cuda']:
         self.device = torch.device(
             'cuda' if torch.cuda.is_available() else 'cpu')
Пример #3
0
    def __init__(self, config):

        self.config = config
        tokenizer_model = MODELS[config['model_name']]

        self.train_loader, self.dev_loader, tokenizer = prepare_datasets(
            config, tokenizer_model)

        self._n_dev_batches = len(
            self.dev_loader.dataset) // config['batch_size']
        self._n_train_batches = len(
            self.train_loader.dataset) // config['batch_size']
        if config['cuda']:

            self.device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu')

        else:
            self.device = torch.device('cpu')
        print("use device: ", self.device)

        self._train_loss = AverageMeter()
        self._train_f1 = AverageMeter()
        self._train_em = AverageMeter()
        self._dev_f1 = AverageMeter()
        self._dev_em = AverageMeter()

        self.model = Model(config, MODELS[config['model_name']], self.device,
                           tokenizer).to(self.device)
        t_total = len(
            self.train_loader
        ) // config['gradient_accumulation_steps'] * config['max_epochs']
        self.optimizer = AdamW(self.model.parameters(),
                               lr=config['lr'],
                               eps=config['adam_epsilon'])
        self.optimizer.zero_grad()
        self._n_train_examples = 0
        self._epoch = self._best_epoch = 0
        self._best_f1 = 0
        self._best_em = 0
        self.restored = False
        if config['pretrained_dir'] is not None:
            if config['mode'] == 'train':
                self.restore()
            else:
                self.load_model()
Пример #4
0
    def __init__(self, config):
        self.logger = ModelLogger(config,
                                  dirname=config['dir'],
                                  pretrained=config['pretrained'])
        self.dirname = self.logger.dirname
        #self.dirname=config["pretrained_model"]
        cuda = config['cuda']
        cuda_id = config['cuda_id']
        if not cuda:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device('cuda' if cuda_id < 0 else 'cuda:%d' %
                                       cuda_id)

        #データの読み込み
        datasets = prepare_datasets(config)
        train_set = datasets['train']
        dev_set = datasets['dev']
        test_set = datasets['test']

        # Evaluation Metrics:
        self._train_loss = AverageMeter()
        self._train_f1 = AverageMeter()
        self._train_em = AverageMeter()
        self._dev_f1 = AverageMeter()
        self._dev_em = AverageMeter()

        #データのロード
        if train_set:
            self.train_loader = DataLoader(train_set,
                                           batch_size=config['batch_size'],
                                           shuffle=config['shuffle'],
                                           collate_fn=lambda x: x,
                                           pin_memory=True)
            self._n_train_batches = len(train_set) // config['batch_size']
        else:
            self.train_loader = None

        if dev_set:
            self.dev_loader = DataLoader(dev_set,
                                         batch_size=config['batch_size'],
                                         shuffle=False,
                                         collate_fn=lambda x: x,
                                         pin_memory=True)
            self._n_dev_batches = len(dev_set) // config['batch_size']
        else:
            self.dev_loader = None

        if test_set:
            self.test_loader = DataLoader(test_set,
                                          batch_size=config['batch_size'],
                                          shuffle=False,
                                          collate_fn=lambda x: x,
                                          pin_memory=True)
            self._n_test_batches = len(test_set) // config['batch_size']
            self._n_test_examples = len(test_set)
        else:
            self.test_loader = None

        #モデルの用意
        self._n_train_examples = 0
        self.model = Model(config, train_set)
        self.model.network = self.model.network.to(self.device)
        self.config = self.model.config
        self.is_test = False
        self.textlogger = get_logger("log.txt")