def benchmark_torch_function(iters: int, f, *args) -> float: """Estimates the average time duration for a single inference call in second If the input is batched, then the estimation is for the batches inference call. Args: iters: number of inference iterations to run f: a function to perform a single inference call Returns: estimated average time duration in second for a single inference call """ with torch.inference_mode(): f(*args) torch.cuda.synchronize() start_event = torch.cuda.Event(enable_timing=True) end_event = torch.cuda.Event(enable_timing=True) print("== Start benchmark iterations") with torch.inference_mode(): start_event.record() for _ in range(iters): f(*args) end_event.record() torch.cuda.synchronize() print("== End benchmark iterations") return (start_event.elapsed_time(end_event) * 1.0e-3) / iters
def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output["out"] confmat.update(target.flatten(), output.argmax(1).flatten()) # FIXME need to take into account that the datasets # could have been padded in distributed setup num_processed_samples += image.shape[0] confmat.reduce_from_all_processes() num_processed_samples = utils.reduce_across_processes( num_processed_samples) if (hasattr(data_loader.dataset, "__len__") and len(data_loader.dataset) != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") return confmat
def eval_step(self, dataloader: torch.utils.data.DataLoader): """Evaluation (val / test) step. Args: dataloader (torch.utils.data.DataLoader): Torch dataloader to load batches from. """ # Set model to eval mode self.model.eval() loss = 0.0 y_trues, y_probs = [], [] # Iterate over val batches with torch.inference_mode(): for i, batch in enumerate(dataloader): # Step batch = [item.to(self.device) for item in batch] # Set device inputs, y_true = batch[:-1], batch[-1] z = self.model(inputs) # Forward pass J = self.loss_fn(z, y_true).item() # Cumulative Metrics loss += (J - loss) / (i + 1) # Store outputs y_prob = torch.sigmoid(z).cpu().numpy() y_probs.extend(y_prob) y_trues.extend(y_true.cpu().numpy()) return loss, np.vstack(y_trues), np.vstack(y_probs)
def render_rotating_volume(volume_model, device, n_frames=50, video_size=400, n_pts_per_ray=192): renderer = get_renderer(video_size, n_pts_per_ray) # Render frames. with torch.inference_mode(): print("Generating rotating volume ...") elev = 30 azimuths = torch.linspace(0., 360., n_frames, device=device) frames = [] for azim in tqdm(azimuths): R, T = look_at_view_transform(dist=args.camera_radius, elev=elev, azim=azim) batch_cameras = FoVPerspectiveCameras(device=device, R=R, T=T) rgbo = volume_model(batch_cameras, renderer) rgb = rgbo[Ellipsis, :3] opacity = rgbo[Ellipsis, 3:4] frame = opacity * rgb + 1 - opacity frame = frame.clamp(0.0, 1.0) frames.append(frame) frames = torch.cat(frames).clamp(0., 1.) frames = frames.movedim(-1, 1) # THWC to TCHW. return frames.cpu().numpy()
def encode(self, docs: DocumentArray, **kwargs): with torch.inference_mode(): _input = torch.from_numpy(docs.blobs.astype('float32')) _features = self._get_features(_input).detach() _features = _features.numpy() _features = self._get_pooling(_features) docs.embeddings = _features
def train_step(self, optimizer, objective, sparse_depth, sparse_intensity, gt_depth, rgb, dirs, offsets, im_shape): output = self.model(d=sparse_depth, rgb=rgb, r=sparse_intensity, dirs=dirs, offsets=offsets, im_shape=im_shape) # Calculate loss for valid pixel in the ground truth loss = objective(output['d'], gt_depth, output['cd'], self.epoch) loss.backward() skip = False for param in self.model.parameters(): if param.requires_grad and param.grad.isnan().any(): print(param) skip = True if skip: with torch.inference_mode(): self.model.cpu().visualize_weights() plt.show() else: optimizer.step() for param in self.model.parameters(): param.grad = None return loss.detach().item(), sparse_depth.detach().size(0)
def predict_step(self, dataloader: torch.utils.data.DataLoader): """Prediction (inference) step. Note: Loss is not calculated for this loop. Args: dataloader (torch.utils.data.DataLoader): Torch dataloader to load batches from. """ # Set model to eval mode self.model.eval() y_trues, y_probs = [], [] # Iterate over batches with torch.inference_mode(): for i, batch in enumerate(dataloader): # Forward pass w/ inputs batch = [item.to(self.device) for item in batch] # Set device inputs, y_true = batch[:-1], batch[-1] z = self.model(inputs) # Store outputs y_prob = torch.sigmoid(z).cpu().numpy() y_probs.extend(y_prob) y_trues.extend(y_true.cpu().numpy()) return np.vstack(y_trues), np.vstack(y_probs)
def test_model( args: Arguments, model: nn.Module, test_loader: TensorDataLoader, criterion: nn.Module, ) -> None: model.eval() test_loss, correct = 0.0, 0.0 test_len = len(test_loader) with torch.inference_mode(), tqdm(desc="Test", total=test_len, ncols=120) as pbar: for data, target in test_loader: if isinstance(data, (list, tuple)): output = model(*data) batch_size = data[0].size(args.batch_dim) else: output = model(data) batch_size = data.size(args.batch_dim) loss = criterion(output, target) test_loss += loss.item() * batch_size pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() pbar.update() test_loss /= test_len print( f"\nTest set: Average loss: {test_loss:.4f},", f"Accuracy: {correct}/{test_len} ({100. * correct / test_len:.2f}%)\n", )
def compute_query_rank(model, preprocess, rendering, query, queries_r, device): if query not in queries_r: print(f"WARN: query \"{query}\" not in retrieval set. Adding it.") queries_r = queries_r + [query] query_idx = len(queries_r) - 1 else: query_idx = queries_r.index(query) with torch.inference_mode(): # Embed the retrieval set of captions. queries_tok = clip.tokenize(queries).to(device) z_queries = model.encode_text(queries_tok).detach() z_queries = F.normalize(z_queries, dim=-1) # Embed render. assert rendering.ndim == 4 assert rendering.shape[1] == 3 x = preprocess(rendering) z_rendering = model.encode_image(x) z_rendering = F.normalize(z_rendering, dim=-1) sim = torch.sum(z_rendering * z_queries, dim=-1) ranks = torch.argsort(sim, dim=0, descending=True) return torch.nonzero( ranks == query_idx)[0].item(), sim[query_idx].item()
def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" with torch.inference_mode(): for video, target in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) # gather the stats from all processes metric_logger.synchronize_between_processes() print( " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}". format(top1=metric_logger.acc1, top5=metric_logger.acc5)) return metric_logger.acc1.global_avg
def reduce_dict(input_dict, average=True): """ Args: input_dict (dict): all the values will be reduced average (bool): whether to do average or sum Reduce the values in the dictionary from all processes so that all processes have the averaged results. Returns a dict with the same fields as input_dict, after reduction. """ world_size = get_world_size() if world_size < 2: return input_dict with torch.inference_mode(): names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(input_dict.keys()): names.append(k) values.append(input_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size reduced_dict = {k: v for k, v in zip(names, values)} return reduced_dict
def compute_scores(model, batch, beam_width=32, beam_cut=100.0, scale=1.0, offset=0.0, blank_score=2.0, reverse=False): """ Compute scores for model. """ with torch.inference_mode(): device = next(model.parameters()).device dtype = torch.float16 if half_supported() else torch.float32 scores = model(batch.to(dtype).to(device)) if reverse: scores = model.seqdist.reverse_complement(scores) sequence, qstring, moves = beam_search(scores, beam_width=beam_width, beam_cut=beam_cut, scale=scale, offset=offset, blank_score=blank_score) return { 'qstring': qstring, 'sequence': sequence, 'moves': np.array(moves, dtype=bool), }
def align_utterance(utterance, utterance_ID, wav_file, model): labels = tuple(finnish_labels_dict.keys()) with torch.inference_mode(): waveform, _ = torchaudio.load(wav_file) emissions, _ = model(waveform.to(device)) emissions = torch.log_softmax(emissions, dim=-1) emission = emissions[0].cpu().detach() transcript_raw = utterance transcript = transcript_raw.upper().replace(" ", "|") dictionary = {c: i for i, c in enumerate(labels)} tokens = [dictionary[c] for c in transcript] trellis = get_trellis(emission, tokens) path = backtrack(trellis, emission, tokens) segments = merge_repeats(path, transcript) word_segments = merge_words(segments) ratio = waveform.size(1) / (trellis.size(0) - 1) return alignment2ctm(word_segments, utterance_ID, 1, ratio)
def test_resnet18(self): import torchvision EXAMPLE_IMAGE_TENSORS = [torch.randn(3, 10, 10) for _ in range(3)] model = torchvision.models.resnet.resnet18(pretrained=True).eval() with torch.inference_mode(): result_model_nt = model(ntnt_nograd( EXAMPLE_IMAGE_TENSORS)).unbind() result_model = model(torch.stack(EXAMPLE_IMAGE_TENSORS)).unbind() for t0, t1 in zip(result_model_nt, result_model): self.assertEqual(t0, t1) # non-regular shape smoke test EXAMPLE_IMAGE_TENSORS = [torch.randn( 3, 100 * i, 100) for i in range(1, 4)] with torch.inference_mode(): model(ntnt_nograd(EXAMPLE_IMAGE_TENSORS))
def visualize_weights(self): checkpoint_path = '{}/weights_run{:04d}_ep{:04d}.pt'.format( self.params_dir, self.run, self.epoch) img_file_path = 'images/{}_weights.png'.format( checkpoint_path.replace('.pt', '').replace('/weights', '').replace( '/', '_').replace('\\', '_')) with torch.inference_mode(): self.model.cpu().visualize_weights(img_file_path)
def update(self, a, b): n = self.num_classes if self.mat is None: self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) with torch.inference_mode(): k = (a >= 0) & (a < n) inds = n * a[k].to(torch.int64) + b[k] self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
def _evaluate( self, checkpoint: int, data_iter, checkpoint_decoder: Optional[checkpoint_decoder_pt.CheckpointDecoder] ) -> List[loss_pt.LossMetric]: """ Computes loss(es) on validation data and returns their metrics. :param data_iter: Validation data iterator. :return: List of validation metrics, same order as self.loss_functions. """ # Switch model to eval mode (disable dropout, etc.) to score validation # set and run checkpoint decoder. self.sockeye_model.eval() data_iter.reset() val_metrics = [lf.create_metric() for lf in self.loss_functions] for batch in data_iter: batch = batch.load(device=self.device) with torch.inference_mode(): # Forward: use sockeye_model because (traced) training_model # doesn't support eval mode (still runs dropout, etc.) outputs = self.sockeye_model(batch.source, batch.source_length, batch.target, batch.target_length) # Loss loss_outputs = [ loss_function(outputs, batch.labels) for loss_function in self.loss_functions ] # Update validation metrics for batch for loss_metric, (loss_value, num_samples) in zip(val_metrics, loss_outputs): loss_metric.update(loss_value.item(), num_samples.item()) # Primary worker optionally runs the checkpoint decoder decoder_metrics = {} # type: Dict[str, float] if utils.is_primary_worker() and checkpoint_decoder is not None: output_name = os.path.join( self.config.output_dir, C.DECODE_OUT_NAME.format(checkpoint=checkpoint)) decoder_metrics = checkpoint_decoder.decode_and_evaluate( output_name=output_name) # Broadcast decoder metrics (if any) from primary worker to secondary # workers if utils.is_distributed(): decoder_metrics = utils.broadcast_object(decoder_metrics) # Add decoder metrics (if any) to validation metrics for metric_name, metric_value in decoder_metrics.items(): assert metric_name not in val_metrics, "Duplicate validation metric %s" % metric_name metric = loss_pt.LossMetric(name=metric_name) metric.update(metric_value, num_samples=1) val_metrics.append(metric) logger.info('Checkpoint [%d]\t%s', self.state.checkpoint, "\t".join("Validation-%s" % str(lm) for lm in val_metrics)) # Switch model back to train mode to continue training self.sockeye_model.train() return val_metrics
def test_torchscript_script(): openpifpaf.network.heads.CompositeField3.inplace_ops = False openpifpaf.network.heads.CompositeField4.inplace_ops = False datamodule = openpifpaf.datasets.factory('cocokp') model, _ = openpifpaf.network.Factory( base_name='shufflenetv2k16', ).factory( head_metas=datamodule.head_metas) with torch.inference_mode(): torch.jit.script(model)
def learn(self, iteratable_data): ''' Learn the observed data points for vector representation of the input images. Args: iteratable_data: is-a `IteratableData`. ''' if isinstance(iteratable_data, IteratableData) is False: raise TypeError( "The type of `iteratable_data` must be `IteratableData`.") self.__loss_list = [] learning_rate = self.__learning_rate try: epoch = 0 iter_n = 0 for batch_observed_arr, batch_target_arr, test_batch_observed_arr, test_batch_target_arr in iteratable_data.generate_learned_samples( ): self.epoch = epoch self.batch_size = batch_observed_arr.shape[0] self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() # rank-3 pred_arr = self.inference(batch_observed_arr) loss = self.compute_loss(pred_arr, batch_target_arr) loss.backward() self.encoder_optimizer.step() self.decoder_optimizer.step() self.regularize() if (iter_n + 1) % int( iteratable_data.iter_n / iteratable_data.epochs) == 0: with torch.inference_mode(): # rank-3 test_pred_arr = self.inference(test_batch_observed_arr) test_loss = self.compute_loss(test_pred_arr, test_batch_target_arr) _loss = loss.to('cpu').detach().numpy().copy() _test_loss = test_loss.to('cpu').detach().numpy().copy() self.__loss_list.append((_loss, _test_loss)) self.__logger.debug("Epochs: " + str(epoch + 1) + " Train loss: " + str(_loss) + " Test loss: " + str(_test_loss)) epoch += 1 iter_n += 1 except KeyboardInterrupt: self.__logger.debug("Interrupt.") self.__logger.debug("end. ")
def learn(self, iteratable_data): ''' Learn samples drawn by `IteratableData.generate_learned_samples()`. Args: iteratable_data: is-a `IteratableData`. ''' if isinstance(iteratable_data, IteratableData) is False: raise TypeError("The type of `iteratable_data` must be `IteratableData`.") self.__loss_list = [] try: epoch = self.epoch iter_n = 0 for batch_observed_arr, batch_target_arr, test_batch_observed_arr, test_batch_target_arr in iteratable_data.generate_learned_samples(): batch_size = batch_observed_arr.shape[0] seq_len = batch_observed_arr.shape[1] self.initialize_params( input_dim=batch_observed_arr.reshape((batch_size, seq_len, -1)).shape[2], input_seq_len=seq_len ) self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() # rank-3 pred_arr = self.inference(batch_observed_arr) loss = self.compute_loss( pred_arr, batch_target_arr ) loss.backward() self.encoder_optimizer.step() self.decoder_optimizer.step() self.regularize() if (iter_n+1) % int(iteratable_data.iter_n / iteratable_data.epochs) == 0: with torch.inference_mode(): # rank-3 test_pred_arr = self.inference(test_batch_observed_arr) test_loss = self.compute_loss( test_pred_arr, test_batch_target_arr ) _loss = loss.to('cpu').detach().numpy().copy() _test_loss = test_loss.to('cpu').detach().numpy().copy() self.__loss_list.append((_loss, _test_loss)) self.__logger.debug("Epochs: " + str(epoch + 1) + " Train loss: " + str(_loss) + " Test loss: " + str(_test_loss)) epoch += 1 iter_n += 1 except KeyboardInterrupt: self.__logger.debug("Interrupt.") self.epoch = epoch self.__logger.debug("end. ")
def test_fake_dispatch_keys(self): with enable_torch_dispatch_mode(FakeTensorMode(inner=None)): x = torch.rand([4]) f = FileCheck().check("CPU").check("ADInplaceOrView").check("AutogradCPU").check("AutocastCPU") f.run(torch._C._dispatch_key_set(x)) with torch.inference_mode(): x = torch.rand([4]) y = x + x FileCheck().check("CPU").check("AutocastCPU").run(torch._C._dispatch_key_set(y)) FileCheck().check_not("ADInplaceOrView").check_not("Autograd").run(torch._C._dispatch_key_set(y))
def get_wer_feat(mfst, asr, tokens_per_chunk, delay, model_stride_in_secs, batch_size): hyps = [] refs = [] audio_filepaths = [] with open(mfst, "r") as mfst_f: print("Parsing manifest files...") for l in mfst_f: row = json.loads(l.strip()) audio_filepaths.append(row['audio_filepath']) refs.append(row['text']) with torch.inference_mode(): with torch.cuda.amp.autocast(): batch = [] asr.sample_offset = 0 for idx in tqdm.tqdm(range(len(audio_filepaths)), desc='Sample:', total=len(audio_filepaths)): batch.append((audio_filepaths[idx], refs[idx])) if len(batch) == batch_size: audio_files = [sample[0] for sample in batch] asr.reset() asr.read_audio_file(audio_files, delay, model_stride_in_secs) hyp_list = asr.transcribe(tokens_per_chunk, delay) hyps.extend(hyp_list) batch.clear() asr.sample_offset += batch_size if len(batch) > 0: asr.batch_size = len(batch) asr.frame_bufferer.batch_size = len(batch) asr.reset() audio_files = [sample[0] for sample in batch] asr.read_audio_file(audio_files, delay, model_stride_in_secs) hyp_list = asr.transcribe(tokens_per_chunk, delay) hyps.extend(hyp_list) batch.clear() asr.sample_offset += len(batch) if os.environ.get('DEBUG', '0') in ('1', 'y', 't'): for hyp, ref in zip(hyps, refs): print("hyp:", hyp) print("ref:", ref) wer = word_error_rate(hypotheses=hyps, references=refs) return hyps, refs, wer
def run(evaluator, seed=None): environment = get_environment(evaluator.config) if seed is not None: environment.seed(seed) set_all_seeds(seed) with torch.inference_mode(): game = evaluator.play_game(environment) game.history.observations = [] game.environment = None return game
def test_onnxruntime(tmpdir, test_batch_dim): """Export an onnx model and test outputs. This test predicts the outputs of a model with standard OpenPifPaf and using onnxruntime from an exported ONNX graph. """ if test_batch_dim == 2 and torch.__version__.startswith('1.7'): pytest.skip() onnx_model_file = str(tmpdir.join('openpifpaf-shufflenetv2k16.onnx')) assert not os.path.exists(onnx_model_file) # create model openpifpaf.plugins.coco.CocoKp.upsample_stride = 2 # create a model with PixelShuffle datamodule = openpifpaf.datasets.factory('cocokp') model, _ = openpifpaf.network.Factory( base_name='shufflenetv2k16', ).factory( head_metas=datamodule.head_metas) print(model) # export to onnx file openpifpaf.export_onnx.apply(model, onnx_model_file, verbose=False) # pytorch prediction dummy_input = torch.randn(test_batch_dim, 3, 97, 129, dtype=torch.float32) model.eval() with torch.inference_mode(): pred_pytorch = model(dummy_input) # onnxruntime prediction so = onnxruntime.SessionOptions() so.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL ort_session = onnxruntime.InferenceSession(onnx_model_file, so) input_name = ort_session.get_inputs()[0].name cif_name = ort_session.get_outputs()[0].name caf_name = ort_session.get_outputs()[1].name pred_onnx = ort_session.run([cif_name, caf_name], {input_name: dummy_input.numpy()}) # compare shapes assert pred_pytorch[0].shape == pred_onnx[0].shape assert pred_pytorch[1].shape == pred_onnx[1].shape # compare values np.testing.assert_allclose(pred_pytorch[0].numpy(), pred_onnx[0], rtol=1e-03, atol=1e-05) np.testing.assert_allclose(pred_pytorch[1].numpy(), pred_onnx[1], rtol=1e-03, atol=1e-05)
def evaluate(self, args, model, criterion, data_loader, device, log_suffix=""): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = f"Test: {log_suffix}" num_processed_samples = 0 start_time = time.time() with torch.inference_mode(): for image, target in metric_logger.log_every( data_loader, -1, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) image = self.preprocess_test_sample(args, image) output = self.process_model_output(args, model(image)) loss = criterion(output, target) acc1, acc5 = self.cal_acc1_acc5(output, target) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = target.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size functional.reset_net(model) # gather the stats from all processes num_processed_samples = utils.reduce_across_processes( num_processed_samples) if (hasattr(data_loader.dataset, "__len__") and len(data_loader.dataset) != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") metric_logger.synchronize_between_processes() test_loss, test_acc1, test_acc5 = metric_logger.loss.global_avg, metric_logger.acc1.global_avg, metric_logger.acc5.global_avg print( f'Test: test_acc1={test_acc1:.3f}, test_acc5={test_acc5:.3f}, test_loss={test_loss:.6f}, samples/s={num_processed_samples / (time.time() - start_time):.3f}' ) return test_loss, test_acc1, test_acc5
def render_validation_view(volume_model, render_size, device): with torch.inference_mode(): test_renderer = get_renderer(render_size, n_pts_per_ray=192) R, T = look_at_view_transform(dist=4.0, elev=45, azim=30) camera = FoVPerspectiveCameras(device=device, R=R, T=T) rgbo = volume_model(camera, test_renderer) rgb = rgbo[Ellipsis, :3] opacity = rgbo[Ellipsis, 3:4] rendering = opacity * rgb + (1 - opacity) rendering = rendering.clamp(0.0, 1.0) return rendering.squeeze(0)
def evaluate(model, batches, device=None, transform=None): model = yann.resolve.model(model, required=True) if isinstance(batches, str): batches = yann.loader(batches, transform=transform) for x, y in batches: if device: x, y = x.to(device), y.to(device) model.eval() with torch.inference_mode(): pred = model(x) yield x, y, pred
def forward( self, encoder_output: torch.Tensor, encoded_lengths: torch.Tensor, partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, ): """Returns a list of hypotheses given an input batch of the encoder hidden embedding. Output token is generated auto-repressively. Args: encoder_output: A tensor of size (batch, features, timesteps). encoded_lengths: list of int representing the length of each sequence output sequence. Returns: packed list containing batch number of sentences (Hypotheses). """ # Preserve decoder and joint training state decoder_training_state = self.decoder.training joint_training_state = self.joint.training with torch.inference_mode(): # Apply optional preprocessing encoder_output = encoder_output.transpose(1, 2) # (B, T, D) self.decoder.eval() self.joint.eval() hypotheses = [] # Process each sequence independently with self.decoder.as_frozen(), self.joint.as_frozen(): for batch_idx in range(encoder_output.size(0)): inseq = encoder_output[batch_idx, :, :].unsqueeze( 1) # [T, 1, D] logitlen = encoded_lengths[batch_idx] partial_hypothesis = partial_hypotheses[ batch_idx] if partial_hypotheses is not None else None hypothesis = self._greedy_decode( inseq, logitlen, partial_hypotheses=partial_hypothesis) hypotheses.append(hypothesis) # Pack results into Hypotheses packed_result = pack_hypotheses(hypotheses, encoded_lengths) self.decoder.train(decoder_training_state) self.joint.train(joint_training_state) return (packed_result, )
def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 with torch.inference_mode(): for video, target in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes(num_processed_samples) if isinstance(data_loader.sampler, DistributedSampler): # Get the len of UniformClipSampler inside DistributedSampler num_data_from_sampler = len(data_loader.sampler.dataset) else: num_data_from_sampler = len(data_loader.sampler) if ( hasattr(data_loader.dataset, "__len__") and num_data_from_sampler != num_processed_samples and torch.distributed.get_rank() == 0 ): # See FIXME above warnings.warn( f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet." ) metric_logger.synchronize_between_processes() print( " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}".format( top1=metric_logger.acc1, top5=metric_logger.acc5 ) ) return metric_logger.acc1.global_avg
def sample_flow( flow, n: int = 50000, context: Optional[Union[np.ndarray, torch.Tensor]] = None, batch_size: int = 512, output_device: Union[str, torch.device] = 'cpu', dtype=torch.float64, ): """Draw samples from the posterior. The nsf package concatenates on the wrong dimension (dim=0 instead of dim=1). Arguments: flow {Flow} -- NSF model y {array} -- strain data nsamples {int} -- number of samples desired Keyword Arguments: device {torch.device} -- model device (CPU or GPU) (default: {None}) batch_size {int} -- batch size for sampling (default: {512}) Returns: Tensor -- samples """ if not flow.training: print( "WARNING: Flows not in eval mode may generate incorrect samples.") with torch.inference_mode(): if context is not None: if not isinstance(context, torch.Tensor): context = torch.from_numpy(context) if len(context.shape) == 1: # if 1 context tensor provided, unsqueeze batch dim context = context.unsqueeze(0) num_batches = n // batch_size num_leftover = n % batch_size samples = [ flow.sample(batch_size, context).to(output_device, dtype) for _ in range(num_batches) ] if num_leftover > 0: samples.append( flow.sample(num_leftover, context).to(output_device, dtype)) samples = torch.cat(samples, dim=1) return samples