def explain(self): val_dataset = VisDialDataset( config["dataset"], "/tmp/J.json", "hello", return_options=True, add_boundary_toks=False if config["model"]["decoder"] == "disc" else True, ) sample = val_dataset[0] _, attention_weights =model({k: val.unsqueeze(0) for k, val in sample.items()}) attention_weights = attention_weights.detach().cpu().numpy() pil_image = Image.open(available_image_ids[self.correct_img_id]) w, h = pil_image.size for ix_ques, (ques, ans) in enumerate(zip(self.questions, self.answers)): weights = attention_weights[ix_ques] weights = np.resize(weights, (8, 8)) fig = plt.figure(frameon=False) fig.set_size_inches(w/100,h/100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) img = ax.imshow(np.array(pil_image)) ax.imshow(weights, cmap='gray', alpha=0.6, extent=img.get_extent()) fig.savefig('/tmp/pic.png') self.send_pic('/tmp/pic.png', caption='{}({})'.format(ques, ans))
def chat(self, query): if len(self.questions)==10: self.send_msg('You ran out of question... Please predict with /predict <number_of_image> (1-4)') return J = jon(query=query, answers=self.answers, questions=self.questions,image_id=self.correct_img_id) with open('/tmp/J.json','w') as file: json.dump(J, file) val_dataset = VisDialDataset( config["dataset"], "/tmp/J.json", "hello", return_options=True, add_boundary_toks=False if config["model"]["decoder"] == "disc" else True, ) sample = val_dataset[0] forward=model({k: val.unsqueeze(0) for k, val in sample.items()}) #self.send_msg('forward.shape'+str(forward.shape)) best_answer_index = int(forward[0][0][len(self.questions)].argmax()) with open('/tmp/J.json','r') as file: best_answer = json.load(file)['possible_answers'][best_answer_index] self.questions.append(query) self.answers.append(best_answer) self.send_msg(best_answer)
) # Print config and args. print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ============================================================================= # SETUP DATASET, DATALOADER, MODEL, CRITERION, OPTIMIZER, SCHEDULER # ============================================================================= val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=False, sample_flag=False ) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, shuffle=True, ) # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"]))
if isinstance(args.gpu_ids, int): args.gpu_ids = [args.gpu_ids] device = torch.device("cuda", args.gpu_ids[0]) if args.gpu_ids[0] >= 0 else torch.device("cpu") # print config and args print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ================================================================================================ # SETUP DATASET, DATALOADER, MODEL, CRITERION, OPTIMIZER # ================================================================================================ train_dataset = VisDialDataset( config["dataset"], args.train_json, overfit=args.overfit, in_memory=args.in_memory ) train_dataloader = DataLoader( train_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory ) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) # pass vocabulary to construct nn.Embedding encoder = Encoder(config["model"], train_dataset.vocabulary) decoder = Decoder(config["model"], train_dataset.vocabulary)
if args.gpu_ids[0] >= 0 else torch.device("cpu")) # Print config and args. print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ============================================================================= # SETUP DATASET, DATALOADER, MODEL, CRITERION, OPTIMIZER, SCHEDULER # ============================================================================= train_dataset = VisDialDataset( config["dataset"], args.train_json, overfit=args.overfit, in_memory=args.in_memory, num_workers=args.cpu_workers, return_options=True if config["model"]["decoder"] == "disc" else False, add_boundary_toks=False if config["model"]["decoder"] == "disc" else True, ) train_dataloader = DataLoader( train_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, shuffle=True, ) val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json,
pin_memory = config["solver"].get("pin_memory", True) print(f"Pin memory is set to {pin_memory}") # ============================================================================= # SETUP DATASET, DATALOADER, MODEL # ============================================================================= if args.split == "val": val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, use_pretrained_emb=args.use_pretrained_emb, overfit=args.overfit, in_memory=args.in_memory, use_caption=args.use_caption, return_options=True, add_boundary_toks=False if config["model"]["decoder"] != "gen" else True, ) else: val_dataset = VisDialDataset( config["dataset"], args.test_json, use_pretrained_emb=args.use_pretrained_emb, overfit=args.overfit, in_memory=args.in_memory, use_caption=args.ignore_caption, return_options=True,
if args.gpu_ids[0] >= 0 else torch.device("cpu")) # Print config and args. print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ============================================================================= # SETUP DATASET, DATALOADER, MODEL, CRITERION, OPTIMIZER, SCHEDULER # ============================================================================= train_sample_dataset = VisDialDataset( config["dataset"], args.train_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=False, sample_flag=True # only train on data with dense annotations ) train_sample_dataloader = DataLoader( train_sample_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, shuffle=True, ) val_dataset = VisDialDataset(config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit,
def get_dataloader(config, args, finetune: bool = False, use_augment_dense: bool = False): # SA: pin memory for speed up # https://discuss.pytorch.org/t/when-to-set-pin-memory-to-true/19723/2 pin_memory = config["solver"].get("pin_memory", True) print(f"Pin memory is set to {pin_memory}") # This should be emb dir. emb_dir_file_path = get_emb_dir_file_path(args.data_dir, args.emb_type) # SA: todo should be emb_dir_file_path # config["dataset"]["qa_emb_file_path"] = "{}/{}".format(args.data_dir, args.qa_emb_file_path) # config["dataset"]["hist_emb_file_path"] = "{}/{}".format(args.data_dir, args.hist_emb_file_path) # ============================================================================= # SETUP DATASET, DATALOADER, MODEL, CRITERION, OPTIMIZER, SCHEDULER # ============================================================================= qa_emb_train_file_path = get_qa_embeddings_file_path( args.data_dir, data_type="train", emb_type=args.emb_type) qa_emb_val_file_path = get_qa_embeddings_file_path(args.data_dir, data_type="val", emb_type=args.emb_type) print(f"Embedding file path for train: {qa_emb_train_file_path}") print(f"Embedding file path for valid: {qa_emb_val_file_path}") hist_emb_train_file_path = get_hist_embeddings_file_path( emb_dir_file_path, data_type="train", concat=config["dataset"]["concat_history"], emb_type=args.emb_type) hist_emb_val_file_path = get_hist_embeddings_file_path( emb_dir_file_path, data_type="val", concat=config["dataset"]["concat_history"], emb_type=args.emb_type) pin_memory = config["solver"].get("pin_memory", True) print(f"Pin memory is set to {pin_memory}") if use_augment_dense: augment_dense_annotations_jsonpath = args.augment_train_dense_json else: augment_dense_annotations_jsonpath = None # SA: todo generalize "disc" config["model"]["decoder"] == "disc" train_dataset = VisDialDataset( config["dataset"], args.train_json, args.train_dense_json, augment_dense_annotations_jsonpath=augment_dense_annotations_jsonpath, qa_emb_file_path=qa_emb_train_file_path, hist_emb_file_path=hist_emb_train_file_path, use_pretrained_emb=args.use_pretrained_emb, use_caption=args.ignore_caption, finetune=finetune, overfit=args.overfit, in_memory=args.in_memory, num_workers=args.cpu_workers, return_options=True if config["model"]["decoder"] != "gen" else False, add_boundary_toks=False if config["model"]["decoder"] != "gen" else True) train_dataloader = DataLoader(train_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, shuffle=True, pin_memory=pin_memory) val_dataset = VisDialDataset(config["dataset"], args.val_json, args.val_dense_json, qa_emb_file_path=qa_emb_val_file_path, hist_emb_file_path=hist_emb_val_file_path, use_pretrained_emb=args.use_pretrained_emb, use_caption=args.ignore_caption, finetune=finetune, overfit=args.overfit, in_memory=args.in_memory, num_workers=args.cpu_workers, return_options=True, add_boundary_toks=False if config["model"]["decoder"] != "gen" else True) val_dataloader = DataLoader(val_dataset, batch_size=config["solver"]["batch_size"] if config["model"]["decoder"] != "gen" else 5, num_workers=args.cpu_workers, pin_memory=pin_memory) # SA: best practice to return dic instead of variables dataloader_dic = { "train_dataloader": train_dataloader, "val_dataloader": val_dataloader, "train_dataset": train_dataset, "val_dataset": val_dataset } return dataloader_dic
proj_to_senq_id=config["model"]["decoder"] == "gen") val_dataset = BertVisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=config["model"]["decoder"] == "gen", proj_to_senq_id=config["model"]["decoder"] == "gen") else: train_dataset = VisDialDataset( config["dataset"], args.train_json, overfit=args.overfit, in_memory=args.in_memory, return_options=config["model"]["decoder"] == "disc", add_boundary_toks=config["model"]["decoder"] == "gen") val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=config["model"]["decoder"] == "gen") assert ((config["solver"]['batch_size'] % config["solver"]["accumulation_steps"]) == 0)
device = torch.device( "cuda", args.gpu_ids[0]) if args.gpu_ids[0] >= 0 else torch.device("cpu") # Print config and args. print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ================================================================================================ # SETUP DATASET, DATALOADER, MODEL # ================================================================================================ if args.split == "val": val_dataset = VisDialDataset(config["dataset"], args.val_json, args.captions_val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory) else: val_dataset = VisDialDataset(config["dataset"], args.test_json, caption_jsonpath=args.captions_test_json, overfit=args.overfit, in_memory=args.in_memory) val_dataloader = DataLoader(val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers) with open(config["dataset"]["glovepath"], "r") as glove_file: glove = json.load(glove_file) glovevocabulary = Vocabulary(config["dataset"]["word_counts_json"],
'test2014', 'val2014', 'VisualDialog_test2018', 'VisualDialog_val2018', ) for f in os.listdir(f'../visdial-challenge-starter-pytorch/data/images/{split}/') } # keys: {"dataset", "model", "solver"} config = yaml.load(open('checkpoints/new_features_baseline/config.yml')) val_dataset = VisDialDataset( config["dataset"], "data/visdial_1.0_val.json", "data/visdial_1.0_val_dense_annotations.json", return_options=True, add_boundary_toks=False if config["model"]["decoder"] == "disc" else True, ) # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) model = EncoderDecoderModel(encoder, decoder) model_state_dict, _ = load_checkpoint('checkpoints/new_features_baseline/checkpoint_10.pth') if isinstance(model, nn.DataParallel): model.module.load_state_dict(model_state_dict)
# keys: {"dataset", "model", "solver"} config = yaml.load(open(args.config_yml)) if type(args.gpu_ids) == int: args.gpu_ids = [args.gpu_ids] device = torch.device("cuda", args.gpu_ids[0]) if args.gpu_ids[0] >= 0 else torch.device("cpu") # print config and args print(yaml.dump(config, default_flow_style=False)) for arg in vars(args): print("{:<20}: {}".format(arg, getattr(args, arg))) # ================================================================================================ # SETUP DATASET, DATALOADER, MODEL # ================================================================================================ val_dataset = VisDialDataset(args.eval_json, config["dataset"], args.overfit) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) if args.use_gt and "test" in dataset.split: print("Warning: No ground truth for test split, changing use_gt to False.") args.use_gt = False # pass vocabulary to construct nn.Embedding encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # share word embedding between encoder and decoder
proj_to_senq_id=config["model"]["decoder"] == "gen") val_dataset = BertVisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=config["model"]["decoder"] == "gen", proj_to_senq_id=config["model"]["decoder"] == "gen") else: train_dataset = VisDialDataset( config["dataset"], args.train_json, args.train_dense_json, return_adjusted_gt_relevance=config["dataset"]["use_adjusted"], overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=config["model"]["decoder"] == "gen") val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=config["model"]["decoder"] == "gen") assert ((config["solver"]['batch_size'] % config["solver"]["accumulation_steps"]) == 0)