def __init__(self, model, **kwargs): super(ClassifyTrainerPyTorch, self).__init__() self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.optimizer = OptimizerManager(model, **kwargs) self.crit = model.create_loss().cuda() self.model = torch.nn.DataParallel(model).cuda() self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(LanguageModelTrainerPyTorch, self).__init__() self.model = model self.clip = float(kwargs.get('clip', 5)) self.gpu = not bool(kwargs.get('nogpu', False)) self.crit = model.create_loss() if self.gpu: self.model = self.model.cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500) self.optimizer = OptimizerManager(self.model, **kwargs)
def __init__(self, model, **kwargs): super(Seq2SeqTrainerPyTorch, self).__init__() self.gpu = bool(kwargs.get('gpu', True)) self.clip = float(kwargs.get('clip', 5)) self.model = model self.optimizer = OptimizerManager(self.model, **kwargs) self._input = model.make_input self._predict = model.predict self.crit = model.create_loss() self.tgt_rlut = kwargs['tgt_rlut'] if self.gpu: self.model = torch.nn.DataParallel(model).cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpus = int(kwargs.get('gpus', 1)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpus > 1: logger.info( "Trainer for PyTorch tagger currently doesnt support multiple GPUs. Setting to 1" ) self.gpus = 1 if self.gpus > 0: self.model = model.to_gpu() else: logger.warning("Requested training on CPU. This will be slow.") self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(ClassifyTrainerPyTorch, self).__init__() self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.gpus = int(kwargs.get('gpus', 1)) if self.gpus == -1: self.gpus = len( os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) self.optimizer = OptimizerManager(model, **kwargs) self.model = model if self.gpus > 0: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.model = model self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) if self.verbose: logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
class TaggerTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) if self.verbose: logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE) @staticmethod def _get_batchsz(batch_dict): return batch_dict['y'].shape[0] def process_output(self, guess, truth, sentence_lengths, ids, handle=None, txts=None): correct_labels = 0 total_labels = 0 truth_n = truth.cpu().numpy() # For fscore gold_count = 0 guess_count = 0 overlap_count = 0 # For each sentence for b in range(len(guess)): sentence = guess[b].cpu().numpy() sentence_length = sentence_lengths[b] gold = truth_n[b, :sentence_length] correct_labels += np.sum(np.equal(sentence, gold)) total_labels += sentence_length gold_chunks = to_spans(gold, self.idx2label, self.span_type, self.verbose) gold_count += len(gold_chunks) guess_chunks = to_spans(sentence, self.idx2label, self.span_type, self.verbose) guess_count += len(guess_chunks) overlap_chunks = gold_chunks & guess_chunks overlap_count += len(overlap_chunks) # Should we write a file out? If so, we have to have txts if handle is not None: id = ids[b] txt = txts[id] write_sentence_conll(handle, sentence, gold, txt, self.idx2label) return correct_labels, total_labels, overlap_count, gold_count, guess_count def _test(self, ts, **kwargs): self.model.eval() total_correct = 0 total_sum = 0 total_gold_count = 0 total_guess_count = 0 total_overlap_count = 0 metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] pred = self.model(inputs) correct, count, overlaps, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count total_gold_count += golds total_guess_count += guesses total_overlap_count += overlaps total_acc = total_correct / float(total_sum) # Only show the fscore if requested metrics['f1'] = f_score(total_overlap_count, total_gold_count, total_guess_count) metrics['acc'] = total_acc return metrics def _train(self, ts, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 steps = len(ts) pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) self.optimizer.zero_grad() loss = self.model.compute_loss(inputs) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() bsz = self._get_batchsz(batch_dict) report_loss = loss.item() * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_norm) return metrics
class TaggerTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpus = int(kwargs.get('gpus', 1)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpus > 1: logger.info( "Trainer for PyTorch tagger currently doesnt support multiple GPUs. Setting to 1" ) self.gpus = 1 if self.gpus > 0: self.model = model.to_gpu() else: logger.warning("Requested training on CPU. This will be slow.") self.nsteps = kwargs.get('nsteps', six.MAXSIZE) @staticmethod def _get_batchsz(batch_dict): return batch_dict['y'].shape[0] def process_output(self, guess, truth, sentence_lengths, ids, handle=None, txts=None): # For acc correct_labels = 0 total_labels = 0 truth_n = truth.cpu().numpy() # For f1 gold_chunks = [] pred_chunks = [] # For each sentence for b in range(len(guess)): sentence = guess[b].cpu().numpy() sentence_length = sentence_lengths[b] gold = truth_n[b, :sentence_length] correct_labels += np.sum(np.equal(sentence, gold)) total_labels += sentence_length gold_chunks.append( set( to_spans(gold, self.idx2label, self.span_type, self.verbose))) pred_chunks.append( set( to_spans(sentence, self.idx2label, self.span_type, self.verbose))) # Should we write a file out? If so, we have to have txts if handle is not None and txts is not None: txt_id = ids[b] txt = txts[txt_id] write_sentence_conll(handle, sentence, gold, txt, self.idx2label) return correct_labels, total_labels, gold_chunks, pred_chunks def _test(self, ts, **kwargs): self.model.eval() total_sum = 0 total_correct = 0 gold_spans = [] pred_spans = [] metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] pred = self.model(inputs) correct, count, golds, guesses = self.process_output( pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count gold_spans.extend(golds) pred_spans.extend(guesses) total_acc = total_correct / float(total_sum) metrics['acc'] = total_acc metrics['f1'] = span_f1(gold_spans, pred_spans) if self.verbose: # TODO: Add programmatic access to these metrics? conll_metrics = per_entity_f1(gold_spans, pred_spans) conll_metrics['acc'] = total_acc * 100 conll_metrics['tokens'] = total_sum.item() logger.info(conlleval_output(conll_metrics)) return metrics def _train(self, ts, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 steps = len(ts) pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) self.optimizer.zero_grad() loss = self.model.compute_loss(inputs) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() bsz = self._get_batchsz(batch_dict) report_loss = loss.item() * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_norm) return metrics
class Seq2SeqTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super(Seq2SeqTrainerPyTorch, self).__init__() self.gpu = bool(kwargs.get('gpu', True)) self.clip = float(kwargs.get('clip', 5)) self.model = model self.optimizer = OptimizerManager(self.model, **kwargs) self._input = model.make_input self._predict = model.predict self.crit = model.create_loss() self.tgt_rlut = kwargs['tgt_rlut'] if self.gpu: self.model = torch.nn.DataParallel(model).cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500) @staticmethod def _num_toks(tgt_lens): return np.sum(tgt_lens) def calc_metrics(self, agg, norm): metrics = super(Seq2SeqTrainerPyTorch, self).calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase, **kwargs): if phase == 'Test': return self._evaluate(vs, reporting_fns, **kwargs) self.model.eval() total_loss = total_toks = 0 steps = len(vs) self.valid_epochs += 1 preds = [] golds = [] start = time.time() pg = create_progress_bar(steps) for batch_dict in pg(vs): input_ = self._input(batch_dict) tgt = input_['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = self.model(input_) loss = self.crit(pred, tgt) toks = self._num_toks(tgt_lens) total_loss += loss.item() * toks total_toks += toks greedy_preds = [ p[0] for p in self._predict(input_, beam=1, make_input=False) ] preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut)) golds.extend( convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds)[0] self.report(self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns) return metrics def _evaluate(self, es, reporting_fns, **kwargs): self.model.eval() pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.time() for batch_dict in pg(es): tgt = batch_dict['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = [p[0] for p in self._predict(batch_dict, **kwargs)] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds)[0]} self.report(0, metrics, start, 'Test', 'EPOCH', reporting_fns) return metrics def train(self, ts, reporting_fns): self.model.train() epoch_loss = 0 epoch_toks = 0 start = time.time() self.nstep_start = start for batch_dict in ts: start_time = time.time() self.optimizer.zero_grad() input_ = self._input(batch_dict) tgt = input_['tgt'] pred = self.model(input_) loss = self.crit(pred, tgt) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() tgt_lens = batch_dict['tgt_lengths'] tok_count = self._num_toks(tgt_lens) reporting_loss = loss.item() * tok_count epoch_loss += reporting_loss epoch_toks += tok_count self.nstep_agg += reporting_loss self.nstep_div += tok_count if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) self.train_epochs += 1 self.report(self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns) return metrics
class TaggerTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) if self.verbose: logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE) @staticmethod def _get_batchsz(batch_dict): return batch_dict['y'].shape[0] def process_output(self, guess, truth, sentence_lengths, ids, handle=None, txts=None): correct_labels = 0 total_labels = 0 truth_n = truth.cpu().numpy() # For fscore gold_count = 0 guess_count = 0 overlap_count = 0 # For each sentence for b in range(len(guess)): sentence = guess[b].cpu().numpy() sentence_length = sentence_lengths[b] gold = truth_n[b, :sentence_length] correct_labels += np.sum(np.equal(sentence, gold)) total_labels += sentence_length gold_chunks = to_spans(gold, self.idx2label, self.span_type, self.verbose) gold_count += len(gold_chunks) guess_chunks = to_spans(sentence, self.idx2label, self.span_type, self.verbose) guess_count += len(guess_chunks) overlap_chunks = gold_chunks & guess_chunks overlap_count += len(overlap_chunks) # Should we write a file out? If so, we have to have txts if handle is not None: id = ids[b] txt = txts[id] write_sentence_conll(handle, sentence, gold, txt, self.idx2label) return correct_labels, total_labels, overlap_count, gold_count, guess_count def _test(self, ts, **kwargs): self.model.eval() total_correct = 0 total_sum = 0 total_gold_count = 0 total_guess_count = 0 total_overlap_count = 0 metrics = {} steps = len(ts) conll_output = kwargs.get('conll_output', None) txts = kwargs.get('txts', None) handle = None if conll_output is not None and txts is not None: handle = open(conll_output, "w") pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') lengths = inputs['lengths'] ids = inputs['ids'] pred = self.model(inputs) correct, count, overlaps, golds, guesses = self.process_output(pred, y.data, lengths, ids, handle, txts) total_correct += correct total_sum += count total_gold_count += golds total_guess_count += guesses total_overlap_count += overlaps total_acc = total_correct / float(total_sum) # Only show the fscore if requested metrics['f1'] = f_score(total_overlap_count, total_gold_count, total_guess_count) metrics['acc'] = total_acc return metrics def _train(self, ts, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 steps = len(ts) pg = create_progress_bar(steps) for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) self.optimizer.zero_grad() loss = self.model.compute_loss(inputs) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() bsz = self._get_batchsz(batch_dict) report_loss = loss.item() * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_norm) return metrics
class LanguageModelTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super(LanguageModelTrainerPyTorch, self).__init__() self.model = model self.clip = float(kwargs.get('clip', 5)) self.gpu = not bool(kwargs.get('nogpu', False)) self.crit = model.create_loss() if self.gpu: self.model = self.model.cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500) self.optimizer = OptimizerManager(self.model, **kwargs) def repackage_hidden(self, h): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(self.repackage_hidden(v) for v in h) @staticmethod def _get_dims(batch_dict): return batch_dict['y'].shape @staticmethod def _num_toks(batch_dict): return np.prod(LanguageModelTrainerPyTorch._get_dims(batch_dict)) def calc_metrics(self, agg, norm): metrics = super(LanguageModelTrainerPyTorch, self).calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase='Valid'): epoch = 0 if phase == 'Valid': self.valid_epochs += 1 epoch = self.valid_epochs start = time.time() self.model.eval() total_loss = 0 total_toks = 0 metrics = {} batchsz, nctx = self._get_dims(vs[0]) hidden = self.model.init_hidden(batchsz) for batch_dict in vs: inputs = self.model.make_input(batch_dict) y = inputs.pop('y') output, hidden = self.model(inputs, hidden) toks = self._num_toks(batch_dict) total_loss += self.crit(output, y).item() * toks total_toks += toks if hidden is not None: hidden = self.repackage_hidden(hidden) metrics = self.calc_metrics(total_loss, total_toks) self.report(epoch, metrics, start, phase, 'EPOCH', reporting_fns) return metrics def train(self, ts, reporting_fns): start = time.time() self.nstep_start = start self.model.train() epoch_loss = 0 epoch_toks = 0 batchsz, nctx = self._get_dims(ts[0]) hidden = self.model.init_hidden(batchsz) for batch_dict in ts: if hidden is not None: hidden = self.repackage_hidden(hidden) inputs = self.model.make_input(batch_dict) y = inputs.pop('y') self.optimizer.zero_grad() output, hidden = self.model(inputs, hidden) loss = self.crit(output, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() toks = self._num_toks(batch_dict) report_loss = loss.item() * toks epoch_loss += report_loss epoch_toks += toks self.nstep_agg += report_loss self.nstep_div += toks if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) self.train_epochs += 1 self.report(self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns) return metrics
class ClassifyTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super(ClassifyTrainerPyTorch, self).__init__() self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.optimizer = OptimizerManager(model, **kwargs) self.crit = model.create_loss().cuda() self.model = torch.nn.DataParallel(model).cuda() self.nsteps = kwargs.get('nsteps', six.MAXSIZE) def _make_input(self, batch_dict): return self.model.module.make_input(batch_dict) @staticmethod def _get_batchsz(batch_dict): return len(batch_dict['y']) def _test(self, loader, **kwargs): self.model.eval() total_loss = 0 total_norm = 0 steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) verbose = kwargs.get("verbose", None) for batch_dict in pg(loader): example = self._make_input(batch_dict) y = example.pop('y') pred = self.model(example) loss = self.crit(pred, y) batchsz = self._get_batchsz(batch_dict) total_loss += loss.item() * batchsz total_norm += batchsz _add_to_cm(cm, y, pred) metrics = cm.get_all_metrics() metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) return metrics def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) y = example.pop('y') pred = self.model(example) loss = self.crit(pred, y) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) _add_to_cm(cm, y, pred) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = cm.get_all_metrics() metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics
class ClassifyTrainerPyTorch(EpochReportingTrainer): def __init__(self, model, **kwargs): super(ClassifyTrainerPyTorch, self).__init__() self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.gpus = int(kwargs.get('gpus', 1)) if self.gpus == -1: self.gpus = len( os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) self.optimizer = OptimizerManager(model, **kwargs) self.model = model if self.gpus > 0: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.model = model self.nsteps = kwargs.get('nsteps', six.MAXSIZE) def _make_input(self, batch_dict): if self.gpus > 1: return self.model.module.make_input(batch_dict) return self.model.make_input(batch_dict) @staticmethod def _get_batchsz(batch_dict): return len(batch_dict['y']) def _test(self, loader, **kwargs): self.model.eval() total_loss = 0 total_norm = 0 steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) verbose = kwargs.get("verbose", None) output = kwargs.get('output') txts = kwargs.get('txts') handle = None line_number = 0 if output is not None and txts is not None: handle = open(output, "w") for batch_dict in pg(loader): example = self._make_input(batch_dict) ys = example.pop('y') pred = self.model(example) loss = self.crit(pred, ys) if handle is not None: for p, y in zip(pred, ys): handle.write('{}\t{}\t{}\n'.format( " ".join(txts[line_number]), self.model.labels[p], self.model.labels[y])) line_number += 1 batchsz = self._get_batchsz(batch_dict) total_loss += loss.item() * batchsz total_norm += batchsz _add_to_cm(cm, ys, pred) metrics = cm.get_all_metrics() metrics['avg_loss'] = total_loss / float(total_norm) verbose_output(verbose, cm) if handle is not None: handle.close() return metrics def _train(self, loader, **kwargs): self.model.train() reporting_fns = kwargs.get('reporting_fns', []) steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 for batch_dict in pg(loader): self.optimizer.zero_grad() example = self._make_input(batch_dict) y = example.pop('y') pred = self.model(example) loss = self.crit(pred, y) batchsz = self._get_batchsz(batch_dict) report_loss = loss.item() * batchsz epoch_loss += report_loss epoch_div += batchsz self.nstep_agg += report_loss self.nstep_div += batchsz loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) _add_to_cm(cm, y, pred) self.optimizer.step() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report(self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps) self.reset_nstep() metrics = cm.get_all_metrics() metrics['avg_loss'] = epoch_loss / float(epoch_div) return metrics
class LanguageModelTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super(LanguageModelTrainerPyTorch, self).__init__() self.model = model self.clip = float(kwargs.get('clip', 5)) self.gpu = not bool(kwargs.get('nogpu', False)) self.crit = model.create_loss() if self.gpu: self.model = self.model.cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500) self.optimizer = OptimizerManager(self.model, **kwargs) def repackage_hidden(self, h): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(self.repackage_hidden(v) for v in h) @staticmethod def _get_dims(batch_dict): return batch_dict['y'].shape @staticmethod def _num_toks(batch_dict): return np.prod(LanguageModelTrainerPyTorch._get_dims(batch_dict)) def calc_metrics(self, agg, norm): metrics = super(LanguageModelTrainerPyTorch, self).calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase='Valid'): epoch = 0 if phase == 'Valid': self.valid_epochs += 1 epoch = self.valid_epochs start = time.time() self.model.eval() total_loss = 0 total_toks = 0 metrics = {} batchsz, nctx = self._get_dims(vs[0]) hidden = self.model.init_hidden(batchsz) for batch_dict in vs: inputs = self.model.make_input(batch_dict) y = inputs.pop('y') output, hidden = self.model(inputs, hidden) toks = self._num_toks(batch_dict) total_loss += self.crit(output, y).item() * toks total_toks += toks if hidden is not None: hidden = self.repackage_hidden(hidden) metrics = self.calc_metrics(total_loss, total_toks) self.report( epoch, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics def train(self, ts, reporting_fns): start = time.time() self.nstep_start = start self.model.train() epoch_loss = 0 epoch_toks = 0 batchsz, nctx = self._get_dims(ts[0]) hidden = self.model.init_hidden(batchsz) for batch_dict in ts: if hidden is not None: hidden = self.repackage_hidden(hidden) inputs = self.model.make_input(batch_dict) y = inputs.pop('y') self.optimizer.zero_grad() output, hidden = self.model(inputs, hidden) loss = self.crit(output, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() toks = self._num_toks(batch_dict) report_loss = loss.item() * toks epoch_loss += report_loss epoch_toks += toks self.nstep_agg += report_loss self.nstep_div += toks if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) self.train_epochs += 1 self.report( self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns ) return metrics
class Seq2SeqTrainerPyTorch(Trainer): def __init__(self, model, **kwargs): super(Seq2SeqTrainerPyTorch, self).__init__() self.gpu = bool(kwargs.get('gpu', True)) self.clip = float(kwargs.get('clip', 5)) self.model = model self.optimizer = OptimizerManager(self.model, **kwargs) self._input = model.make_input self._predict = model.predict self.crit = model.create_loss() self.tgt_rlut = kwargs['tgt_rlut'] if self.gpu: self.model = torch.nn.DataParallel(model).cuda() self.crit.cuda() self.nsteps = kwargs.get('nsteps', 500) @staticmethod def _num_toks(tgt_lens): return np.sum(tgt_lens) def calc_metrics(self, agg, norm): metrics = super(Seq2SeqTrainerPyTorch, self).calc_metrics(agg, norm) metrics['perplexity'] = np.exp(metrics['avg_loss']) return metrics def test(self, vs, reporting_fns, phase): if phase == 'Test': return self._evaluate(vs, reporting_fns) self.model.eval() total_loss = total_toks = 0 steps = len(vs) self.valid_epochs += 1 preds = [] golds = [] start = time.time() pg = create_progress_bar(steps) for batch_dict in pg(vs): input_ = self._input(batch_dict) tgt = input_['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = self.model(input_) loss = self.crit(pred, tgt) toks = self._num_toks(tgt_lens) total_loss += loss.item() * toks total_toks += toks greedy_preds = [p[0] for p in self._predict(input_, beam=1, make_input=False)] preds.extend(convert_seq2seq_preds(greedy_preds, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt.cpu().numpy(), tgt_lens, self.tgt_rlut)) metrics = self.calc_metrics(total_loss, total_toks) metrics['bleu'] = bleu(preds, golds)[0] self.report( self.valid_epochs, metrics, start, phase, 'EPOCH', reporting_fns ) return metrics def _evaluate(self, es, reporting_fns): self.model.eval() pg = create_progress_bar(len(es)) preds = [] golds = [] start = time.time() for batch_dict in pg(es): tgt = batch_dict['tgt'] tgt_lens = batch_dict['tgt_lengths'] pred = [p[0] for p in self._predict(batch_dict)] preds.extend(convert_seq2seq_preds(pred, self.tgt_rlut)) golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut)) metrics = {'bleu': bleu(preds, golds)[0]} self.report( 0, metrics, start, 'Test', 'EPOCH', reporting_fns ) return metrics def train(self, ts, reporting_fns): self.model.train() epoch_loss = 0 epoch_toks = 0 start = time.time() self.nstep_start = start for batch_dict in ts: start_time = time.time() self.optimizer.zero_grad() input_ = self._input(batch_dict) tgt = input_['tgt'] pred = self.model(input_) loss = self.crit(pred, tgt) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() tgt_lens = batch_dict['tgt_lengths'] tok_count = self._num_toks(tgt_lens) reporting_loss = loss.item() * tok_count epoch_loss += reporting_loss epoch_toks += tok_count self.nstep_agg += reporting_loss self.nstep_div += tok_count if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() metrics = self.calc_metrics(epoch_loss, epoch_toks) self.train_epochs += 1 self.report( self.train_epochs, metrics, start, 'Train', 'EPOCH', reporting_fns ) return metrics