def __init__( self, labels_header, dataroot, tokenizer, bert_model, seq_len, encoding="utf-8", visual_target=0, batch_size=512, shuffle=False, num_workers=25, cache=5000, drop_last=False, cuda=False, objective=0, visualization=False, ): self.seq_len = seq_len self.region_len = 101 # self.labels_header = labels_header lmdb_file = os.path.join(dataroot) caption_path = os.path.join(dataroot, "captions_all_json.json") print("Loading from %s" % lmdb_file) ds = ImageFeaturesH5Reader( lmdb_file, True ) self.image_reader = ImageFeaturesH5Reader( lmdb_file, True ) self.image_name = self.image_reader.keys() # ds = td.LMDBSerializer.load(lmdb_file, shuffle=False) self.num_dataset = len(ds) # preprocess_function = BertPreprocessBatch( # caption_path, # tokenizer, # bert_model, # seq_len, # 101, # self.num_dataset, # encoding="utf-8", # visual_target=visual_target, # visualization=visualization, # objective=objective, # ) # self.ds = td.MapData(ds, preprocess_function) self.tokenizer = tokenizer # self.ds = td.BatchData(ds, batch_size, remainder=True) print("man ye kharama ") # self.ds.reset_state() self.captions = json.load(open(caption_path, "r")) self.batch_size = batch_size self.num_workers = num_workers
def LoadDatasetTransfer(args, task_cfg, id): tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True) task_feature_reader1 = {} task_feature_reader2 = {} task = "TASK" + id if task_cfg[task]["features_h5path1"] not in task_feature_reader1: task_feature_reader1[task_cfg[task]["features_h5path1"]] = None if task_cfg[task]["features_h5path2"] not in task_feature_reader2: task_feature_reader2[task_cfg[task]["features_h5path2"]] = None # initilzie the feature reader for features_h5path in task_feature_reader1.keys(): if features_h5path != "": task_feature_reader1[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) for features_h5path in task_feature_reader2.keys(): if features_h5path != "": task_feature_reader2[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) task_datasets_val = {} task_dataloader_val = {} task_batch_size = {} task_num_iters = {} task_ids = task task_name = task_cfg[task]["name"] batch_size = args.batch_size if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) logger.info("Loading %s Dataset with batch size %d" % (task_cfg[task]["name"], batch_size)) task_datasets_val[task] = DatasetMapTrans[task_name]( task=task_cfg[task]["name"], dataroot=task_cfg[task]["dataroot"], annotations_jsonpath=task_cfg[task]["trans_annotations_jsonpath"], image_features_reader=task_feature_reader1[task_cfg[task] ["features_h5path1"]], gt_image_features_reader=task_feature_reader2[task_cfg[task] ["features_h5path2"]], tokenizer=tokenizer, bert_model=args.bert_model, padding_index=0, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], ) task_dataloader_val[task] = DataLoader( task_datasets_val[task], shuffle=False, batch_size=batch_size, pin_memory=True, ) task_num_iters[task] = len(task_dataloader_val[task]) task_batch_size[task] = batch_size return ( task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val, )
def LoadDatasets(args, task_cfg, task_id, split="trainval"): tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) task_feature_reader1 = {} task_feature_reader2 = {} task = "TASK" + task_id if task_cfg[task]["features_h5path1"] not in task_feature_reader1: task_feature_reader1[task_cfg[task]["features_h5path1"]] = None if task_cfg[task]["features_h5path2"] not in task_feature_reader2: task_feature_reader2[task_cfg[task]["features_h5path2"]] = None # initilzie the feature reader for features_h5path in task_feature_reader1.keys(): if features_h5path != "": task_feature_reader1[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) for features_h5path in task_feature_reader2.keys(): if features_h5path != "": task_feature_reader2[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) task_datasets_train = {} task_datasets_val = {} task_dataloader_train = {} task_dataloader_val = {} task_ids = [] task_batch_size = {} task_num_iters = {} task = "TASK" + task_id task_name = task_cfg[task]["name"] task_ids.append(task) batch_size = task_cfg[task][ "batch_size"] // args.gradient_accumulation_steps num_workers = args.num_workers if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) num_workers = int(num_workers / dist.get_world_size()) # num_workers = int(num_workers / len(ids)) logger.info("Loading %s Dataset with batch size %d" % (task_cfg[task]["name"], batch_size)) task_datasets_train[task] = None if "train" in split: task_datasets_train[task] = DatasetMapTrain[task_name]( task=task_cfg[task]["name"], dataroot=task_cfg[task]["dataroot"], annotations_jsonpath=task_cfg[task]["train_annotations_jsonpath"], split=task_cfg[task]["train_split"], image_features_reader=task_feature_reader1[task_cfg[task] ["features_h5path1"]], gt_image_features_reader=task_feature_reader2[ task_cfg[task]["features_h5path2"]], tokenizer=tokenizer, bert_model=args.bert_model, padding_index=0, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], ) task_datasets_val[task] = None if "val" in split: task_datasets_val[task] = DatasetMapTrain[task_name]( task=task_cfg[task]["name"], dataroot=task_cfg[task]["dataroot"], annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"], split=task_cfg[task]["val_split"], image_features_reader=task_feature_reader1[task_cfg[task] ["features_h5path1"]], gt_image_features_reader=task_feature_reader2[ task_cfg[task]["features_h5path2"]], tokenizer=tokenizer, bert_model=args.bert_model, padding_index=0, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], ) task_num_iters[task] = 0 task_batch_size[task] = 0 if "train" in split: if args.local_rank == -1: train_sampler = RandomSampler(task_datasets_train[task]) else: # TODO: check if this works with current data generator from disk that relies on next(file) # (it doesn't return item back by index) train_sampler = DistributedSampler(task_datasets_train[task]) task_dataloader_train[task] = DataLoader( task_datasets_train[task], sampler=train_sampler, batch_size=batch_size, num_workers=num_workers, pin_memory=True, ) task_num_iters[task] = len(task_dataloader_train[task]) task_batch_size[task] = batch_size if "val" in split: task_dataloader_val[task] = DataLoader( task_datasets_val[task], shuffle=False, batch_size=batch_size, num_workers=2, pin_memory=True, ) return ( task_batch_size, task_num_iters, task_ids, task_datasets_train, task_datasets_val, task_dataloader_train, task_dataloader_val, )
def LoadDatasetEval(args, task_cfg, ids): tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True) task_feature_reader1 = {} task_feature_reader2 = {} for i, task_id in enumerate(ids): task = "TASK" + task_id if task_cfg[task]["features_h5path1"] not in task_feature_reader1: task_feature_reader1[task_cfg[task]["features_h5path1"]] = None if task_cfg[task]["features_h5path2"] not in task_feature_reader2: task_feature_reader2[task_cfg[task]["features_h5path2"]] = None # initilzie the feature reader for features_h5path in task_feature_reader1.keys(): if features_h5path != "": task_feature_reader1[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) for features_h5path in task_feature_reader2.keys(): if features_h5path != "": task_feature_reader2[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) task_datasets_val = {} task_dataloader_val = {} task_ids = [] task_batch_size = {} task_num_iters = {} for i, task_id in enumerate(ids): task = "TASK" + task_id task_ids.append(task) task_name = task_cfg[task]["name"] batch_size = args.batch_size if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) num_workers = int(args.num_workers / len(ids)) logger.info("Loading %s Dataset with batch size %d" % (task_cfg[task]["name"], batch_size)) if args.split: eval_split = args.split else: eval_split = task_cfg[task]["val_split"] task_datasets_val[task] = DatasetMapEval[task_name]( task=task_cfg[task]["name"], dataroot=task_cfg[task]["dataroot"], annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"], split=eval_split, image_features_reader=task_feature_reader1[task_cfg[task] ["features_h5path1"]], gt_image_features_reader=task_feature_reader2[ task_cfg[task]["features_h5path2"]], tokenizer=tokenizer, bert_model=args.bert_model, clean_datasets=args.clean_train_sets, padding_index=0, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], captions_dir=task_cfg[task]["captions_dir"]) task_dataloader_val[task] = DataLoader( task_datasets_val[task], shuffle=False, batch_size=batch_size, num_workers=10, pin_memory=True, ) task_num_iters[task] = len(task_dataloader_val[task]) task_batch_size[task] = batch_size return ( task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val, )
def main(): parser = argparse.ArgumentParser() # Data files for FOIL task. parser.add_argument( "--features_h5path", default= "data/datasets/refcoco/refcoco_unc/refcoco_resnext152_faster_rcnn_genome.lmdb", ) parser.add_argument( "--gt_features_h5path", default= "data/datasets/refcoco/refcoco_unc/refcoco_gt_resnext152_faster_rcnn_genome.lmdb", ) parser.add_argument("--instances-jsonpath", default="data/referExpression") parser.add_argument("--task", default="refcoco+") # Required parameters parser.add_argument( "--in_memory", default=False, type=bool, help="whether use chunck for parallel training.", ) parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.", ) parser.add_argument( "--pretrained_weight", default="bert-base-uncased", type=str, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.", ) parser.add_argument( "--output_dir", default="save", type=str, help= "The output directory where the model checkpoints will be written.", ) parser.add_argument( "--config_file", default="config/bert_base_6layer_6conect.json", type=str, help="The config file which specified the model details.", ) # Other parameters parser.add_argument( "--clean_train_sets", default=True, type=bool, help="whether clean train sets for multitask data.", ) parser.add_argument( "--max_seq_length", default=30, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.", ) parser.add_argument("--tasks", default="", type=str, help="1-2-3... training task separate by -") parser.add_argument( "--train_batch_size", default=128, type=int, help="Total batch size for training.", ) parser.add_argument("--no_cuda", action="store_true", help="Whether not to use CUDA when available") parser.add_argument( "--do_lower_case", action="store_true", help= "Whether to lower case the input text. True for uncased models, False for cased models.", ) parser.add_argument( "--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus", ) parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help= "Number of updates steps to accumualte before performing a backward/update pass.", ) parser.add_argument( "--fp16", action="store_true", help="Whether to use 16-bit float precision instead of 32-bit", ) parser.add_argument( "--num_workers", type=int, default=20, help="Number of workers in the dataloader.", ) parser.add_argument( "--from_pretrained", action="store_true", help="Wheter the tensor is from pretrained.", ) parser.add_argument( "--baseline", action="store_true", help="Wheter to use the baseline model (single bert).", ) parser.add_argument( "--use_chunk", default=0, type=float, help="whether use chunck for parallel training.", ) parser.add_argument( "--split", default="test", type=str, help="whether use chunck for parallel training.", ) args = parser.parse_args() # with open("vilbert_tasks.yml", "r") as f: # task_cfg = edict(yaml.safe_load(f)) # Declare path to save checkpoints. config = BertConfig.from_json_file(args.config_file) # print(config) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps num_train_optimization_steps = None tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) image_features_reader = ImageFeaturesH5Reader(args.features_h5path, True) gt_image_features_reader = ImageFeaturesH5Reader(args.gt_features_h5path, True) # dataset = FlickrGroundingDataset(task="FlickrGrounding", # dataroot="data/datasets/flickr30k/", # annotations_jsonpath="", # split="val", # image_features_reader=image_features_reader, # gt_image_features_reader=gt_image_features_reader, # tokenizer=tokenizer, # bert_model=args.bert_model, # clean_datasets=True, # padding_index=0, # max_seq_length=24, # max_region_num=200, # ) dataset = ReferExpressionDataset( task="refcoco", dataroot="data/datasets/refcoco/", annotations_jsonpath="", split="val", image_features_reader=image_features_reader, gt_image_features_reader=gt_image_features_reader, tokenizer=tokenizer, bert_model=args.bert_model, clean_datasets=True, padding_index=0, max_seq_length=20, max_region_num=201, ) dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=False, pin_memory=True) dataset_iter = iter(dataloader) features, spatials, image_mask, caption, target, input_mask, segment_ids, co_attention_mask, image_id, mask = next( dataset_iter) pprint( f'features: {features.shape}, spatials: {spatials.shape}, image_mask: {image_mask.shape}, caption: {caption.shape}, target: {target.shape}, input_mask: {input_mask.shape}, segment_ids: {segment_ids.shape}, co_attention_mask: {co_attention_mask.shape}, image_id: {image_id.shape}, mask: {mask.shape}' ) for mask_ in mask: plt.imshow(mask_) plt.show()
def LoadDatasetEval(args, task_cfg, ids): # args.in_memory decides whether data should be loaded in RAM or not tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True) task_feature_reader1 = {} task_feature_reader2 = {} # vikram printed this # print('Number of IDs: ', len(ids)) # print('Id[:10] = ', ids[:10]) # print('args.in_memory: ', args.in_memory) for i, task_id in enumerate(ids): task = "TASK" + task_id # print('************************') # print('task_cfg[task]["features_h5path1"]: ', task_cfg[task]["features_h5path1"]) # print('task_cfg[task]["features_h5path2"]: ', task_cfg[task]["features_h5path2"]) # # print('************************') if task_cfg[task]["features_h5path1"] not in task_feature_reader1: task_feature_reader1[task_cfg[task]["features_h5path1"]] = None if task_cfg[task]["features_h5path2"] not in task_feature_reader2: task_feature_reader2[task_cfg[task]["features_h5path2"]] = None # initilzie the feature reader for features_h5path in task_feature_reader1.keys(): if features_h5path != "": task_feature_reader1[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory ) for features_h5path in task_feature_reader2.keys(): if features_h5path != "": task_feature_reader2[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory ) # Note, dataset is not yet loaded # only loading procedure is initialized till here task_datasets_val = {} task_dataloader_val = {} task_ids = [] task_batch_size = {} task_num_iters = {} for i, task_id in enumerate(ids): task = "TASK" + task_id task_ids.append(task) task_name = task_cfg[task]["name"] batch_size = args.batch_size if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) num_workers = int(args.num_workers / len(ids)) logger.info( "Loading %s Dataset with batch size %d" % (task_cfg[task]["name"], batch_size) ) if args.split: eval_split = args.split else: eval_split = task_cfg[task]["val_split"] # Here the data gets loaded # DatasetMapEval is present in __init__.py # Task name is RetrievalFlickr30k print('TASK NAME: ', task_name) # raise NotImplementedError task_datasets_val[task] = DatasetMapEval[task_name]( task=task_cfg[task]["name"], dataroot=task_cfg[task]["dataroot"], annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"], split=eval_split, image_features_reader=task_feature_reader1[ task_cfg[task]["features_h5path1"] ], gt_image_features_reader=task_feature_reader2[ task_cfg[task]["features_h5path2"] ], tokenizer=tokenizer, bert_model=args.bert_model, clean_datasets=args.clean_train_sets, padding_index=0, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], ) print('Dataset Finished loading!!') print('Type of task_datasets_val[task]: ', type(task_datasets_val[task])) task_dataloader_val[task] = DataLoader( task_datasets_val[task], shuffle=False, batch_size=batch_size, num_workers=10, pin_memory=True, ) task_num_iters[task] = len(task_dataloader_val[task]) task_batch_size[task] = batch_size return ( task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val, )
def LoadDatasetEval(args, task_cfg, ids): tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True) task_feature_reader1 = {} task_feature_reader2 = {} for i, task_id in enumerate(ids): task = 'TASK' + task_id if task_cfg[task]['features_h5path1'] not in task_feature_reader1: task_feature_reader1[task_cfg[task]['features_h5path1']] = None if task_cfg[task]['features_h5path2'] not in task_feature_reader2: task_feature_reader2[task_cfg[task]['features_h5path2']] = None # initilzie the feature reader for features_h5path in task_feature_reader1.keys(): if features_h5path != '': task_feature_reader1[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) for features_h5path in task_feature_reader2.keys(): if features_h5path != '': task_feature_reader2[features_h5path] = ImageFeaturesH5Reader( features_h5path, args.in_memory) task_datasets_val = {} task_dataloader_val = {} task_ids = [] task_batch_size = {} task_num_iters = {} for i, task_id in enumerate(ids): task = 'TASK' + task_id task_ids.append(task) batch_size = args.batch_size if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) num_workers = int(args.num_workers / len(ids)) logger.info("Loading %s Dataset with batch size %d" % (task_cfg[task]['name'], batch_size)) if args.split: eval_split = args.split else: eval_split = task_cfg[task]['val_split'] if args.split == 'train': annotations_jsonpath = task_cfg[task]['train_annotations_jsonpath'] else: annotations_jsonpath = task_cfg[task]['val_annotations_jsonpath'] task_datasets_val[task] = DatasetMapEval[task]( task=task_cfg[task]['name'], dataroot=task_cfg[task]['dataroot'], annotations_jsonpath=annotations_jsonpath, split=eval_split, image_features_reader=task_feature_reader1[task_cfg[task] ['features_h5path1']], gt_image_features_reader=task_feature_reader2[ task_cfg[task]['features_h5path2']], tokenizer=tokenizer, padding_index=0, max_seq_length=task_cfg[task]['max_seq_length'], max_region_num=task_cfg[task]['max_region_num']) task_dataloader_val[task] = DataLoader( task_datasets_val[task], shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=True, ) task_num_iters[task] = len(task_dataloader_val[task]) task_batch_size[task] = batch_size return task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val