def setup_model(self): torch_devices = self.args.pytorch_gpu_ids device = "cuda:" + str(torch_devices[0]) args = copy.deepcopy(self.args) args.title = os.path.join(args.title, "VinceModel") args.tensorboard_dir = os.path.join( args.base_logdir, args.title, *(args.checkpoint_dir.split(os.sep)[2:-1]), constants.TIME_STR) args.checkpoint_dir = os.path.join( args.base_logdir, args.title, *(args.checkpoint_dir.split(os.sep)[2:])) args.long_save_checkpoint_dir = os.path.join( args.base_logdir, args.title, *(args.long_save_checkpoint_dir.split(os.sep)[2:-1]), constants.TIME_STR) self.model = VinceModel(args) print(self.model) self.iteration = self.model.restore() self.model.to(device) self.queue_model = VinceQueueModel(args, self.model) self.queue_model.to(device) self.vince_queue = StorageQueue(args.vince_queue_size, args.vince_embedding_size, device=device) self.epoch = self.iteration // (self.args.iterations_per_epoch * self.args.batch_size) if self.iteration > 0: print("Resuming epoch", self.epoch) self.start_prefetch() self.fill_queue_repeat()
def main(): with torch.no_grad(): torch_devices = args.pytorch_gpu_ids device = "cuda:" + str(torch_devices[0]) model = VinceModel(args) model.restore() model.eval() model.to(device) yt_dataset = R2V2Dataset( args, "val", transform=StandardVideoTransform(args.input_size, "val"), num_images_to_return=1 ) torch.manual_seed(0) random.seed(0) np.random.seed(0) data_loader = PersistentDataLoader( yt_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, collate_fn=R2V2Dataset.collate_fn, worker_init_fn=R2V2Dataset.worker_init_fn, ) yt_features, yt_images = dataset_nn(model, data_loader) del data_loader draw_nns(yt_features, yt_images, "youtube") torch.manual_seed(0) random.seed(0) np.random.seed(0) valdir = os.path.join(args.imagenet_data_path, data_subset) transform = RepeatedImagenetTransform(args.input_height, data_subset="val", repeats=1) imagenet_dataset = datasets.ImageFolder(valdir, transform) data_loader = DataLoader( imagenet_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True ) imagenet_features, imagenet_images = dataset_nn(model, data_loader) del data_loader draw_nns(imagenet_features, imagenet_images, "imagenet") draw_nns(imagenet_features, imagenet_images, "imagenet", yt_features, yt_images, "youtube") draw_nns(yt_features, yt_images, "youtube", imagenet_features, imagenet_images, "imagenet")
def setup_feature_extractor(self): args = copy.deepcopy(self.args) args.title = os.path.join(args.title, "VinceModel") args.checkpoint_dir = os.path.join( args.base_logdir, args.title, *(args.checkpoint_dir.split(os.sep)[2:])) args.long_save_checkpoint_dir = os.path.join( args.base_logdir, args.title, *(args.long_save_checkpoint_dir.split(os.sep)[2:-1]), constants.TIME_STR) args.tensorboard_dir = os.path.join( args.base_logdir, args.title, *(args.checkpoint_dir.split(os.sep)[2:-1]), constants.TIME_STR) self.feature_extractor = VinceModel(args) print(self.feature_extractor) self.feature_extractor.restore() self.feature_extractor.to(self.device) if self.freeze_feature_extractor: self.feature_extractor.eval() else: self.feature_extractor.train()
print("starting TSNE") dataset = R2V2Dataset(args, "val", transform=StandardVideoTransform(args.input_size, "val"), num_images_to_return=1) data_loader = DataLoader( dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, collate_fn=R2V2Dataset.collate_fn, ) with torch.no_grad(): torch_devices = args.pytorch_gpu_ids device = "cuda:" + str(torch_devices[0]) model = VinceModel(args) model.restore() model.eval() model.to(device) all_images = [] all_features = [] for batch in tqdm.tqdm(data_loader, total=NUM_IMAGES_IN_TSNE // args.batch_size): batch = process_video_data(batch) features = model.get_embeddings(batch)["embeddings"] images = to_uint8(batch["data"]) for image, feature in zip(images, features): all_images.append(image) all_features.append(pt_util.to_numpy(feature))