def __init__(self, original_data: List[List[torch.Tensor]], max_seq_len: int, batch_size: int, model: Seq2Seq, translator: Translator, teacher: Seq2Seq): super().__init__() device = select_device(None) self.device = device self.teacher = teacher.to(self.device) self.teacher = teacher.eval() self.translator = translator self.model = model.to(self.device) self.model = model.eval() self.translator.initialize(self.model) original_data = filter( lambda lst: all([len(x) <= max_seq_len for x in lst]), original_data) original_data = collate_fn(original_data, pad=0) self.data = original_data self.batch_size = batch_size self.old_words = None self.new_contexts = None self.new_words = None self.new_confs = None self.register_attrs('old_words', 'new_contexts', 'new_words', 'new_confs')
def __init__(self, dataset: Dataset, sampler: Sampler, translator: Translator, base_dir: str, train_file: str, val_file: str, test_file: str, others_file: str, split: str = 'train', use_seqinter: bool = False, batch_saving: Optional[int] = None) -> None: self.dataset = dataset self.sampler = sampler self.device = select_device(None) self.translator = translator.to(self.device) self.base_dir = base_dir self.train_file = train_file self.val_file = val_file self.test_file = test_file self.others_file = others_file self.split = split self.use_seqinter = use_seqinter self.batch_saving = batch_saving self.register_attrs('base_dir', 'train_file', 'val_file', 'test_file', 'others_file')
def __init__(self, alias: str, beam_size: int, max_seq_len: int, tgt_sos_idx: int = 2, tgt_eos_idx: int = 3, device: Optional[str] = None, **kwargs) -> None: super().__init__() interface = torch.hub.load('pytorch/fairseq', alias, **kwargs) self.beam_size = beam_size self.max_seq_len = max_seq_len self.tgt_sos_idx = tgt_sos_idx self.tgt_eos_idx = tgt_eos_idx self.interface = interface args = copy.copy(self.interface.args) args.beam = beam_size args.max_len_b = max_seq_len - 1 # one extra token for <SOS> self.generator = self.interface.task.build_generator(args) self.src_pad_idx = interface.models[0].encoder.padding_idx self.tgt_pad_idx = interface.models[0].decoder.padding_idx self.device = select_device(device)
def __init__(self, dataset: Dataset, model: Module, metric_fn: Metric, eval_sampler: Optional[Sampler] = None, eval_data: str = 'test', device: Optional[str] = None, save_preds: bool = False, teacher: Module = None, save_targets: bool = False, gen_style: str = 'greedy') -> None: self.eval_sampler = eval_sampler or BaseSampler(batch_size=16, shuffle=False) self.model = model self.metric_fn = metric_fn self.eval_metric = None self.dataset = dataset self.device = select_device(device) data = getattr(dataset, eval_data) self._eval_iterator = self.eval_sampler.sample(data) # By default, no prefix applied to tb logs self.tb_log_prefix = None self.save_preds = save_preds self.decode_data = None self.teacher = teacher self.save_targets = save_targets self.targets = None self.gen_style = gen_style self.register_attrs('decode_data', 'targets')
def compute(self, pred: torch.Tensor, target: torch.Tensor): pred = torch.log_softmax(pred, dim=-1) device = select_device(None) prob = self.one_hot.repeat(target.size(0), 1).to(device) prob.scatter_(1, target.unsqueeze(1), self.confidence) prob.masked_fill_((target == self.ignore_index).unsqueeze(1), 0) return F.kl_div(pred, prob, reduction='batchmean')
def __init__(self, beam_size: int, max_seq_len: int, device: Optional[str] = None) -> None: super().__init__() self.beam_size = beam_size self.max_seq_len = max_seq_len self._model = BartForConditionalGeneration.from_pretrained('bart-large-cnn') self.tgt_sos_idx = self._model.config.bos_token_id self.tgt_eos_idx = self._model.config.eos_token_id self.tgt_pad_idx = self._model.config.pad_token_id self.device = select_device(device)
def __init__(self, max_seq_len, min_seq_len=0, translator=None, teacher=None, device=None, sample_factor=1, scheduler_type='exponential', **kwargs): self.max_seq_len = max_seq_len self.min_seq_len = min_seq_len self.device = select_device(None) self.sample_factor = sample_factor self.scheduler_type = scheduler_type self.initialize(translator, teacher) super().__init__(**kwargs)
def __init__(self, dataset: Dataset, model: Module, metric_fn: Metric, eval_sampler: Optional[Sampler] = None, eval_data: str = 'test', device: Optional[str] = None) -> None: """Initialize the evaluator. Parameters ---------- dataset : Dataset The dataset to run evaluation on model : Module The model to train metric_fn: Metric The metric to use for evaluation eval_sampler : Optional[Sampler] The sampler to use over validation examples. By default it will use `BaseSampler` with batch size 16 and without shuffling. eval_data: str The data split to evaluate on: one of train, val or test device: str, optional The device to use in the computation. """ self.eval_sampler = eval_sampler or BaseSampler(batch_size=16, shuffle=False) self.model = model self.metric_fn = metric_fn self.dataset = dataset self.device = select_device(device) data = getattr(dataset, eval_data) self._eval_iterator = self.eval_sampler.sample(data) # By default, no prefix applied to tb logs self.tb_log_prefix = None self.eval_metric: Union[float, None] = None self.register_attrs('eval_metric')
def __init__( self, dataset: Dataset, train_sampler: Sampler, val_sampler: Sampler, model: Module, loss_fn: Metric, metric_fn: Metric, optimizer: Optimizer, scheduler: Optional[_LRScheduler] = None, iter_scheduler: Optional[_LRScheduler] = None, device: Optional[str] = None, max_steps: int = 10, epoch_per_step: float = 1.0, iter_per_step: Optional[int] = None, batches_per_iter: int = 1, lower_is_better: bool = False, max_grad_norm: Optional[float] = None, max_grad_abs_val: Optional[float] = None, extra_validation_metrics: Optional[List[Metric]] = None) -> None: """Initialize an instance of Trainer Parameters ---------- dataset : Dataset The dataset to use in training the model train_sampler : Sampler The sampler to use over training examples during training val_sampler : Sampler The sampler to use over validation examples model : Module The model to train loss_fn: Metric The loss function to use in training the model metric_fn: Metric The metric function to use in evaluation optimizer : torch.optim.Optimizer The optimizer to use scheduler : torch.optim.lr_scheduler._LRScheduler, optional An optional learning rate scheduler to run after each step iter_scheduler : torch.optim.lr_scheduler._LRScheduler, optional An optional learning rate scheduler to run after each batch (i.e iteration) device: str, optional The device to use in the computation. max_steps : int, optional The maximum number of training steps to run epoch_per_step : float, optional Fraction of an epoch to perform in a single training step (i.e before a checkpoint.) Defaults to 1. Overridden by `iter_per_step`, if given. iter_per_step : int, optional Number of iterations to perform in a single training step. Overrides `epoch_per_step` if given. batches_per_iter : int, optional Number of batches to pass through the model before calling optimizer.step. Requires the sampler to have drop_last set to True. (default set to 1 so optimizer.step is called after every batch) lower_is_better : bool, optional If true, the lowest val metric is considered best, otherwise the highest. Defaults to False. max_grad_norm : float, optional Maximum Euclidean norm of gradient after clipping. max_grad_abs_val: float, optional Maximum absolute value of all gradient vector components after clipping. extra_validation_metrics: Optional[List[Metric]] A list with extra metrics to show in each step but which don't guide the training procedures (i.e model selection through early stopping) """ self.dataset = dataset self.train_sampler = train_sampler self.val_sampler = val_sampler self.model = model self.loss_fn = loss_fn self.metric_fn = metric_fn self.optimizer = optimizer self.scheduler = scheduler self.iter_scheduler = iter_scheduler self.lower_is_better = lower_is_better self.max_grad_norm = max_grad_norm self.max_grad_abs_val = max_grad_abs_val self.extra_validation_metrics = extra_validation_metrics or [] # By default, no prefix applied to tb logs self.tb_log_prefix = None # Select right device self.device = select_device(device) if (not getattr(self.train_sampler, 'drop_last', False) and batches_per_iter != 1): raise ValueError( f'batches_per_iter cannot be set to {batches_per_iter} ' 'if the sampler does not have `drop_last` set to True') self.batches_per_iter = batches_per_iter n_batches = self.train_sampler.length(dataset.train) if iter_per_step is None: # Compute epoch per step if self.batches_per_iter > n_batches: raise Exception( f'Please set batches_per_iter ({self.batches_per_iter}) ' f'to be ≤ the length of your train_sampler ' f'({n_batches})') iter_per_epoch = n_batches // self.batches_per_iter iter_per_step = math.ceil(epoch_per_step * iter_per_epoch) else: # Iter per step takes precedent over epoch_per_step epoch_per_step = iter_per_step / n_batches self.iter_per_step = iter_per_step self.max_steps = max_steps self._step = 0 self._best_metric = None self._best_model = None self.register_attrs('_step', '_best_metric', '_best_model') n_epochs = math.ceil(epoch_per_step * max_steps) self._train_iterator = self.train_sampler.sample( dataset.train, n_epochs)