def _get_cluster_assignment_for_split(self, task: ClassyTask, split: str): task.model.eval() logging.info("Model set to eval mode during feature extraction...") cluster_assignments = {} task.data_iterator = iter(self.task.dataloaders[split.lower()]) while True: try: sample = next(task.data_iterator) assert isinstance(sample, dict) assert "data_idx" in sample, "Indices not passed" input_sample = { "images": torch.cat(sample["data"]).cuda(non_blocking=True), "indices": torch.cat(sample["data_idx"]).cpu().numpy(), } with torch.no_grad(): features = task.model(input_sample["images"]) features = features[0] prototype_score = features[1] prototype_index = prototype_score.argmax(dim=-1) num_images = input_sample["indices"].shape[0] for idx in range(num_images): image_index = input_sample["indices"][idx] cluster_assignments[image_index] = prototype_index[idx].item() except StopIteration: break return cluster_assignments
def _get_cluster_assignment_for_split( self, task: ClassyTask, split: str, output_folder: str ): task.model.eval() logging.info("Model set to eval mode during feature extraction...") dist_rank = torch.distributed.get_rank() cluster_assignments = {} soft_cluster_assignments = {} image_indices = [] chunk_index, buffer_size = 0, 0 task.data_iterator = iter(self.task.dataloaders[split.lower()]) while True: try: sample = next(task.data_iterator) assert isinstance(sample, dict) assert "data_idx" in sample, "Indices not passed" input_sample = { "images": torch.cat(sample["data"]).cuda(non_blocking=True), "indices": torch.cat(sample["data_idx"]).cpu().numpy(), } with torch.no_grad(): outputs = task.model(input_sample["images"]) prototype_score = outputs[0][1] prototype_index = prototype_score.argmax(dim=-1) num_images = input_sample["indices"].shape[0] buffer_size += num_images for idx in range(num_images): image_index = input_sample["indices"][idx] cluster_assignments[image_index] = prototype_index[idx].item() soft_cluster_assignments[ image_index ] = prototype_score.cpu().numpy() image_indices.append(image_index) if buffer_size >= self.cfg.EXTRACT_FEATURES.CHUNK_THRESHOLD >= 0: self._save_extracted_prototypes( soft_assignments=soft_cluster_assignments, out_indices=image_indices, dist_rank=dist_rank, chunk_index=chunk_index, split=split, output_folder=output_folder, ) soft_cluster_assignments.clear() image_indices.clear() chunk_index += 1 buffer_size = 0 except StopIteration: if buffer_size: self._save_extracted_prototypes( soft_assignments=soft_cluster_assignments, out_indices=image_indices, dist_rank=dist_rank, chunk_index=chunk_index, split=split, output_folder=output_folder, ) break return cluster_assignments