def lr_step(self, val_loss=None, epoch=None): """Adjust the learning rate depending on the validation loss.""" lr = Future.gen_list([ self.call_async(rank, '_async_lr_step', val_loss=val_loss, epoch=epoch) for rank in range(self.num_replicas) ]) return lr[0]
def _scatter_samples(self, samples, volatile=False, replace_empty_samples=False): """Split and distribute a sample across GPUs.""" if not replace_empty_samples: # pad with None until its size is equal to the number of replicas samples = samples + [None] * (self.num_replicas - len(samples)) else: # pad by cycling through the given samples samples = list(islice(cycle(samples), self.num_replicas)) Future.gen_list([ self.call_async(rank, '_async_prepare_sample', sample=samples[rank], volatile=volatile) for rank in range(self.num_replicas) ])
def __init__(self, args, model, criterion, device_ids=None, multiprocessing_method='spawn'): if device_ids is None: device_ids = tuple(range(torch.cuda.device_count())) super().__init__(device_ids, multiprocessing_method) if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') model = model.share_memory() nccl_uid = nccl.get_unique_id() self.criterion = criterion Future.gen_list([ self.call_async(rank, '_async_init', args=args, model=model, criterion=criterion, nccl_uid=nccl_uid) for rank in range(self.num_replicas) ]) self._grads_initialized = False
def valid_step(self, samples, criterion): """Do forward pass in parallel.""" # scatter sample across GPUs self._scatter_samples(samples, volatile=True) criterion.prepare(samples) # forward pass losses = [ self.call_async(rank, '_async_valid_step', criterion=criterion) for rank in range(self.num_replicas) ] # aggregate losses loss = criterion.aggregate(Future.gen_list(losses)) return loss
def valid_step(self, samples): """Do forward pass in parallel.""" # scatter sample across GPUs self._scatter_samples(samples, volatile=True) # forward pass _sample_sizes, logging_outputs, ooms_fwd = Future.gen_tuple_list([ self.call_async(rank, '_async_forward', eval=True) for rank in range(self.num_replicas) ]) assert sum(ooms_fwd) == 0 # aggregate logging output logging_output = self.criterion.__class__.aggregate_logging_outputs(logging_outputs) return logging_output
def train_step(self, samples, criterion): """Do forward, backward and gradient step in parallel.""" assert isinstance(criterion, FairseqCriterion) # scatter sample across GPUs self._scatter_samples(samples) criterion.prepare(samples) # forward pass, backward pass and gradient step losses = [ self.call_async(rank, '_async_train_step', criterion=criterion) for rank in range(self.num_replicas) ] # aggregate losses and gradient norms losses, grad_norms = Future.gen_tuple_list(losses) loss = criterion.aggregate(losses) return loss, grad_norms[0]
def valid_step(self, samples, criterion): """Do forward pass in parallel.""" # scatter sample across GPUs self._scatter_samples(samples, volatile=True) criterion.prepare(samples) # forward pass res = [ self.call_async(rank, '_async_valid_step', criterion=criterion) for rank in range(self.num_replicas) ] # aggregate losses losses, mean_rouge_greedy, mean_rouge_sampled = Future.gen_tuple_list( res) loss = criterion.aggregate(losses) mean_rouge_greedy = utils.sum_if_not_none(mean_rouge_greedy) mean_rouge_sampled = utils.sum_if_not_none(mean_rouge_sampled) return loss, mean_rouge_greedy, mean_rouge_sampled
def valid_step(self, samples, criterion): """Do forward pass in parallel.""" # scatter sample across GPUs samples, data_events = self._scatter_samples(samples, volatile=True) criterion.prepare(samples) # forward pass losses = [ self.call_async(rank, '_async_valid_step', sample=samples[rank], criterion=criterion, data_event=event) for rank, event in enumerate(data_events) ] # aggregate losses loss = criterion.aggregate(Future.gen_list(losses)) return loss
def set_seed(self, seed): Future.gen_list([ self.call_async(rank, '_async_set_seed', seed=seed) for rank in range(self.num_replicas) ])
def train_step(self, samples, criterion): """Do forward, backward and gradient step in parallel.""" assert isinstance(criterion, FairseqCriterion) # scatter sample across GPUs self._scatter_samples(samples) criterion.prepare(samples) # forward pass, backward pass and gradient step # res is namedtuple res = [ self.call_async(rank, '_async_train_step', criterion=criterion) for rank in range(self.num_replicas) ] # aggregate losses and gradient norms losses, grad_norms, ml_losses, rl_losses, mean_rouge_greedy, mean_rouge_sampled, mean_sum_log_probs = Future.gen_tuple_list( res) loss = criterion.aggregate(losses) ml_loss = criterion.aggregate(ml_losses) rl_loss = utils.sum_if_not_none(rl_losses) mean_rouge_greedy = utils.sum_if_not_none(mean_rouge_greedy) mean_rouge_sampled = utils.sum_if_not_none(mean_rouge_sampled) mean_sum_log_prob = utils.sum_if_not_none(mean_sum_log_probs) aggregate_res = Results(loss, grad_norms[0], ml_loss, rl_loss, mean_rouge_greedy, mean_rouge_sampled, mean_sum_log_prob) return aggregate_res