示例#1
0
    def __init__(self, original_data: List[List[torch.Tensor]],
                 max_seq_len: int, batch_size: int, model: Seq2Seq,
                 translator: Translator, teacher: Seq2Seq):

        super().__init__()

        device = select_device(None)
        self.device = device
        self.teacher = teacher.to(self.device)
        self.teacher = teacher.eval()

        self.translator = translator
        self.model = model.to(self.device)
        self.model = model.eval()
        self.translator.initialize(self.model)

        original_data = filter(
            lambda lst: all([len(x) <= max_seq_len for x in lst]),
            original_data)
        original_data = collate_fn(original_data, pad=0)

        self.data = original_data
        self.batch_size = batch_size

        self.old_words = None
        self.new_contexts = None
        self.new_words = None
        self.new_confs = None

        self.register_attrs('old_words', 'new_contexts', 'new_words',
                            'new_confs')
示例#2
0
    def __init__(self,
                 dataset: Dataset,
                 sampler: Sampler,
                 translator: Translator,
                 base_dir: str,
                 train_file: str,
                 val_file: str,
                 test_file: str,
                 others_file: str,
                 split: str = 'train',
                 use_seqinter: bool = False,
                 batch_saving: Optional[int] = None) -> None:

        self.dataset = dataset
        self.sampler = sampler
        self.device = select_device(None)
        self.translator = translator.to(self.device)

        self.base_dir = base_dir
        self.train_file = train_file
        self.val_file = val_file
        self.test_file = test_file
        self.others_file = others_file
        self.split = split
        self.use_seqinter = use_seqinter
        self.batch_saving = batch_saving

        self.register_attrs('base_dir', 'train_file', 'val_file', 'test_file', 'others_file')
示例#3
0
文件: seq2seq.py 项目: al5250/imitkd
    def __init__(self,
                 alias: str,
                 beam_size: int,
                 max_seq_len: int,
                 tgt_sos_idx: int = 2,
                 tgt_eos_idx: int = 3,
                 device: Optional[str] = None,
                 **kwargs) -> None:
        super().__init__()

        interface = torch.hub.load('pytorch/fairseq', alias, **kwargs)

        self.beam_size = beam_size
        self.max_seq_len = max_seq_len
        self.tgt_sos_idx = tgt_sos_idx
        self.tgt_eos_idx = tgt_eos_idx

        self.interface = interface
        args = copy.copy(self.interface.args)
        args.beam = beam_size
        args.max_len_b = max_seq_len - 1  # one extra token for <SOS>
        self.generator = self.interface.task.build_generator(args)

        self.src_pad_idx = interface.models[0].encoder.padding_idx
        self.tgt_pad_idx = interface.models[0].decoder.padding_idx

        self.device = select_device(device)
示例#4
0
    def __init__(self,
                 dataset: Dataset,
                 model: Module,
                 metric_fn: Metric,
                 eval_sampler: Optional[Sampler] = None,
                 eval_data: str = 'test',
                 device: Optional[str] = None,
                 save_preds: bool = False,
                 teacher: Module = None,
                 save_targets: bool = False,
                 gen_style: str = 'greedy') -> None:
        self.eval_sampler = eval_sampler or BaseSampler(batch_size=16,
                                                        shuffle=False)
        self.model = model
        self.metric_fn = metric_fn
        self.eval_metric = None
        self.dataset = dataset

        self.device = select_device(device)

        data = getattr(dataset, eval_data)
        self._eval_iterator = self.eval_sampler.sample(data)

        # By default, no prefix applied to tb logs
        self.tb_log_prefix = None

        self.save_preds = save_preds
        self.decode_data = None
        self.teacher = teacher
        self.save_targets = save_targets
        self.targets = None
        self.gen_style = gen_style
        self.register_attrs('decode_data', 'targets')
示例#5
0
文件: metric.py 项目: al5250/imitkd
    def compute(self, pred: torch.Tensor, target: torch.Tensor):
        pred = torch.log_softmax(pred, dim=-1)

        device = select_device(None)
        prob = self.one_hot.repeat(target.size(0), 1).to(device)
        prob.scatter_(1, target.unsqueeze(1), self.confidence)
        prob.masked_fill_((target == self.ignore_index).unsqueeze(1), 0)

        return F.kl_div(pred, prob, reduction='batchmean')
示例#6
0
    def __init__(self,
                 beam_size: int,
                 max_seq_len: int,
                 device: Optional[str] = None) -> None:
        super().__init__()

        self.beam_size = beam_size
        self.max_seq_len = max_seq_len

        self._model = BartForConditionalGeneration.from_pretrained('bart-large-cnn')

        self.tgt_sos_idx = self._model.config.bos_token_id
        self.tgt_eos_idx = self._model.config.eos_token_id
        self.tgt_pad_idx = self._model.config.pad_token_id

        self.device = select_device(device)
示例#7
0
文件: sampler.py 项目: al5250/imitkd
    def __init__(self,
                 max_seq_len,
                 min_seq_len=0,
                 translator=None,
                 teacher=None,
                 device=None,
                 sample_factor=1,
                 scheduler_type='exponential',
                 **kwargs):
        self.max_seq_len = max_seq_len
        self.min_seq_len = min_seq_len
        self.device = select_device(None)

        self.sample_factor = sample_factor
        self.scheduler_type = scheduler_type

        self.initialize(translator, teacher)
        super().__init__(**kwargs)
示例#8
0
文件: eval.py 项目: yukw777/flambe
    def __init__(self,
                 dataset: Dataset,
                 model: Module,
                 metric_fn: Metric,
                 eval_sampler: Optional[Sampler] = None,
                 eval_data: str = 'test',
                 device: Optional[str] = None) -> None:
        """Initialize the evaluator.

        Parameters
        ----------
        dataset : Dataset
            The dataset to run evaluation on
        model : Module
            The model to train
        metric_fn: Metric
            The metric to use for evaluation
        eval_sampler : Optional[Sampler]
            The sampler to use over validation examples. By default
            it will use `BaseSampler` with batch size 16 and without
            shuffling.
        eval_data: str
            The data split to evaluate on: one of train, val or test
        device: str, optional
            The device to use in the computation.

        """
        self.eval_sampler = eval_sampler or BaseSampler(batch_size=16,
                                                        shuffle=False)
        self.model = model
        self.metric_fn = metric_fn
        self.dataset = dataset

        self.device = select_device(device)

        data = getattr(dataset, eval_data)
        self._eval_iterator = self.eval_sampler.sample(data)

        # By default, no prefix applied to tb logs
        self.tb_log_prefix = None

        self.eval_metric: Union[float, None] = None
        self.register_attrs('eval_metric')
示例#9
0
    def __init__(
            self,
            dataset: Dataset,
            train_sampler: Sampler,
            val_sampler: Sampler,
            model: Module,
            loss_fn: Metric,
            metric_fn: Metric,
            optimizer: Optimizer,
            scheduler: Optional[_LRScheduler] = None,
            iter_scheduler: Optional[_LRScheduler] = None,
            device: Optional[str] = None,
            max_steps: int = 10,
            epoch_per_step: float = 1.0,
            iter_per_step: Optional[int] = None,
            batches_per_iter: int = 1,
            lower_is_better: bool = False,
            max_grad_norm: Optional[float] = None,
            max_grad_abs_val: Optional[float] = None,
            extra_validation_metrics: Optional[List[Metric]] = None) -> None:
        """Initialize an instance of Trainer

        Parameters
        ----------
        dataset : Dataset
            The dataset to use in training the model
        train_sampler : Sampler
            The sampler to use over training examples during training
        val_sampler : Sampler
            The sampler to use over validation examples
        model : Module
            The model to train
        loss_fn: Metric
            The loss function to use in training the model
        metric_fn: Metric
            The metric function to use in evaluation
        optimizer : torch.optim.Optimizer
            The optimizer to use
        scheduler : torch.optim.lr_scheduler._LRScheduler, optional
            An optional learning rate scheduler to run after each step
        iter_scheduler : torch.optim.lr_scheduler._LRScheduler, optional
            An optional learning rate scheduler to run after each batch
            (i.e iteration)
        device: str, optional
            The device to use in the computation.
        max_steps : int, optional
            The maximum number of training steps to run
        epoch_per_step : float, optional
            Fraction of an epoch to perform in a single training step
            (i.e before a checkpoint.) Defaults to 1.
            Overridden by `iter_per_step`, if given.
        iter_per_step : int, optional
            Number of iterations to perform in a single training step.
            Overrides `epoch_per_step` if given.
        batches_per_iter : int, optional
            Number of batches to pass through the model before
            calling optimizer.step. Requires the sampler to have
            drop_last set to True. (default set to 1 so optimizer.step
            is called after every batch)
        lower_is_better : bool, optional
            If true, the lowest val metric is considered best,
            otherwise the highest. Defaults to False.
        max_grad_norm : float, optional
            Maximum Euclidean norm of gradient after clipping.
        max_grad_abs_val: float, optional
            Maximum absolute value of all gradient vector components
            after clipping.
        extra_validation_metrics: Optional[List[Metric]]
            A list with extra metrics to show in each step
            but which don't guide the training procedures
            (i.e model selection through early stopping)

        """
        self.dataset = dataset
        self.train_sampler = train_sampler
        self.val_sampler = val_sampler
        self.model = model
        self.loss_fn = loss_fn
        self.metric_fn = metric_fn
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.iter_scheduler = iter_scheduler
        self.lower_is_better = lower_is_better
        self.max_grad_norm = max_grad_norm
        self.max_grad_abs_val = max_grad_abs_val
        self.extra_validation_metrics = extra_validation_metrics or []

        # By default, no prefix applied to tb logs
        self.tb_log_prefix = None

        # Select right device
        self.device = select_device(device)

        if (not getattr(self.train_sampler, 'drop_last', False)
                and batches_per_iter != 1):
            raise ValueError(
                f'batches_per_iter cannot be set to {batches_per_iter} '
                'if the sampler does not have `drop_last` set to True')

        self.batches_per_iter = batches_per_iter
        n_batches = self.train_sampler.length(dataset.train)

        if iter_per_step is None:
            # Compute epoch per step
            if self.batches_per_iter > n_batches:
                raise Exception(
                    f'Please set batches_per_iter ({self.batches_per_iter}) '
                    f'to be ≤ the length of your train_sampler '
                    f'({n_batches})')
            iter_per_epoch = n_batches // self.batches_per_iter
            iter_per_step = math.ceil(epoch_per_step * iter_per_epoch)
        else:
            # Iter per step takes precedent over epoch_per_step
            epoch_per_step = iter_per_step / n_batches

        self.iter_per_step = iter_per_step
        self.max_steps = max_steps

        self._step = 0
        self._best_metric = None
        self._best_model = None
        self.register_attrs('_step', '_best_metric', '_best_model')

        n_epochs = math.ceil(epoch_per_step * max_steps)

        self._train_iterator = self.train_sampler.sample(
            dataset.train, n_epochs)