def __init__(
            self,
            labels_header,
            dataroot,
            tokenizer,
            bert_model,
            seq_len,
            encoding="utf-8",
            visual_target=0,
            batch_size=512,
            shuffle=False,
            num_workers=25,
            cache=5000,
            drop_last=False,
            cuda=False,
            objective=0,
            visualization=False,
    ):

        self.seq_len = seq_len
        self.region_len = 101
        # self.labels_header = labels_header
        lmdb_file = os.path.join(dataroot)
        caption_path = os.path.join(dataroot, "captions_all_json.json")
        print("Loading from %s" % lmdb_file)
        ds = ImageFeaturesH5Reader(
            lmdb_file, True
        )

        self.image_reader = ImageFeaturesH5Reader(
            lmdb_file, True
        )
        self.image_name = self.image_reader.keys()
        # ds = td.LMDBSerializer.load(lmdb_file, shuffle=False)
        self.num_dataset = len(ds)
        # preprocess_function = BertPreprocessBatch(
        #     caption_path,
        #     tokenizer,
        #     bert_model,
        #     seq_len,
        #     101,
        #     self.num_dataset,
        #     encoding="utf-8",
        #     visual_target=visual_target,
        #     visualization=visualization,
        #     objective=objective,
        # )
        # self.ds = td.MapData(ds, preprocess_function)
        self.tokenizer = tokenizer
        # self.ds = td.BatchData(ds, batch_size, remainder=True)
        print("man ye kharama ")
        # self.ds.reset_state()
        self.captions = json.load(open(caption_path, "r"))

        self.batch_size = batch_size
        self.num_workers = num_workers
예제 #2
0
def LoadDatasetTransfer(args, task_cfg, id):

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=True)

    task_feature_reader1 = {}
    task_feature_reader2 = {}

    task = "TASK" + id
    if task_cfg[task]["features_h5path1"] not in task_feature_reader1:
        task_feature_reader1[task_cfg[task]["features_h5path1"]] = None
    if task_cfg[task]["features_h5path2"] not in task_feature_reader2:
        task_feature_reader2[task_cfg[task]["features_h5path2"]] = None

    # initilzie the feature reader
    for features_h5path in task_feature_reader1.keys():
        if features_h5path != "":
            task_feature_reader1[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    for features_h5path in task_feature_reader2.keys():
        if features_h5path != "":
            task_feature_reader2[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    task_datasets_val = {}
    task_dataloader_val = {}
    task_batch_size = {}
    task_num_iters = {}

    task_ids = task
    task_name = task_cfg[task]["name"]
    batch_size = args.batch_size
    if args.local_rank != -1:
        batch_size = int(batch_size / dist.get_world_size())

    logger.info("Loading %s Dataset with batch size %d" %
                (task_cfg[task]["name"], batch_size))

    task_datasets_val[task] = DatasetMapTrans[task_name](
        task=task_cfg[task]["name"],
        dataroot=task_cfg[task]["dataroot"],
        annotations_jsonpath=task_cfg[task]["trans_annotations_jsonpath"],
        image_features_reader=task_feature_reader1[task_cfg[task]
                                                   ["features_h5path1"]],
        gt_image_features_reader=task_feature_reader2[task_cfg[task]
                                                      ["features_h5path2"]],
        tokenizer=tokenizer,
        bert_model=args.bert_model,
        padding_index=0,
        max_seq_length=task_cfg[task]["max_seq_length"],
        max_region_num=task_cfg[task]["max_region_num"],
    )

    task_dataloader_val[task] = DataLoader(
        task_datasets_val[task],
        shuffle=False,
        batch_size=batch_size,
        pin_memory=True,
    )

    task_num_iters[task] = len(task_dataloader_val[task])
    task_batch_size[task] = batch_size

    return (
        task_batch_size,
        task_num_iters,
        task_ids,
        task_datasets_val,
        task_dataloader_val,
    )
예제 #3
0
def LoadDatasets(args, task_cfg, task_id, split="trainval"):

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    task_feature_reader1 = {}
    task_feature_reader2 = {}

    task = "TASK" + task_id
    if task_cfg[task]["features_h5path1"] not in task_feature_reader1:
        task_feature_reader1[task_cfg[task]["features_h5path1"]] = None
    if task_cfg[task]["features_h5path2"] not in task_feature_reader2:
        task_feature_reader2[task_cfg[task]["features_h5path2"]] = None

    # initilzie the feature reader
    for features_h5path in task_feature_reader1.keys():
        if features_h5path != "":
            task_feature_reader1[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)
    for features_h5path in task_feature_reader2.keys():
        if features_h5path != "":
            task_feature_reader2[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    task_datasets_train = {}
    task_datasets_val = {}
    task_dataloader_train = {}
    task_dataloader_val = {}
    task_ids = []
    task_batch_size = {}
    task_num_iters = {}

    task = "TASK" + task_id
    task_name = task_cfg[task]["name"]
    task_ids.append(task)
    batch_size = task_cfg[task][
        "batch_size"] // args.gradient_accumulation_steps
    num_workers = args.num_workers
    if args.local_rank != -1:
        batch_size = int(batch_size / dist.get_world_size())
        num_workers = int(num_workers / dist.get_world_size())

    # num_workers = int(num_workers / len(ids))
    logger.info("Loading %s Dataset with batch size %d" %
                (task_cfg[task]["name"], batch_size))

    task_datasets_train[task] = None
    if "train" in split:
        task_datasets_train[task] = DatasetMapTrain[task_name](
            task=task_cfg[task]["name"],
            dataroot=task_cfg[task]["dataroot"],
            annotations_jsonpath=task_cfg[task]["train_annotations_jsonpath"],
            split=task_cfg[task]["train_split"],
            image_features_reader=task_feature_reader1[task_cfg[task]
                                                       ["features_h5path1"]],
            gt_image_features_reader=task_feature_reader2[
                task_cfg[task]["features_h5path2"]],
            tokenizer=tokenizer,
            bert_model=args.bert_model,
            padding_index=0,
            max_seq_length=task_cfg[task]["max_seq_length"],
            max_region_num=task_cfg[task]["max_region_num"],
        )

    task_datasets_val[task] = None
    if "val" in split:
        task_datasets_val[task] = DatasetMapTrain[task_name](
            task=task_cfg[task]["name"],
            dataroot=task_cfg[task]["dataroot"],
            annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"],
            split=task_cfg[task]["val_split"],
            image_features_reader=task_feature_reader1[task_cfg[task]
                                                       ["features_h5path1"]],
            gt_image_features_reader=task_feature_reader2[
                task_cfg[task]["features_h5path2"]],
            tokenizer=tokenizer,
            bert_model=args.bert_model,
            padding_index=0,
            max_seq_length=task_cfg[task]["max_seq_length"],
            max_region_num=task_cfg[task]["max_region_num"],
        )

    task_num_iters[task] = 0
    task_batch_size[task] = 0
    if "train" in split:
        if args.local_rank == -1:
            train_sampler = RandomSampler(task_datasets_train[task])
        else:
            # TODO: check if this works with current data generator from disk that relies on next(file)
            # (it doesn't return item back by index)
            train_sampler = DistributedSampler(task_datasets_train[task])

        task_dataloader_train[task] = DataLoader(
            task_datasets_train[task],
            sampler=train_sampler,
            batch_size=batch_size,
            num_workers=num_workers,
            pin_memory=True,
        )

        task_num_iters[task] = len(task_dataloader_train[task])
        task_batch_size[task] = batch_size

    if "val" in split:
        task_dataloader_val[task] = DataLoader(
            task_datasets_val[task],
            shuffle=False,
            batch_size=batch_size,
            num_workers=2,
            pin_memory=True,
        )

    return (
        task_batch_size,
        task_num_iters,
        task_ids,
        task_datasets_train,
        task_datasets_val,
        task_dataloader_train,
        task_dataloader_val,
    )
예제 #4
0
def LoadDatasetEval(args, task_cfg, ids):

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=True)

    task_feature_reader1 = {}
    task_feature_reader2 = {}
    for i, task_id in enumerate(ids):
        task = "TASK" + task_id
        if task_cfg[task]["features_h5path1"] not in task_feature_reader1:
            task_feature_reader1[task_cfg[task]["features_h5path1"]] = None
        if task_cfg[task]["features_h5path2"] not in task_feature_reader2:
            task_feature_reader2[task_cfg[task]["features_h5path2"]] = None

    # initilzie the feature reader
    for features_h5path in task_feature_reader1.keys():
        if features_h5path != "":
            task_feature_reader1[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    for features_h5path in task_feature_reader2.keys():
        if features_h5path != "":
            task_feature_reader2[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    task_datasets_val = {}
    task_dataloader_val = {}
    task_ids = []
    task_batch_size = {}
    task_num_iters = {}

    for i, task_id in enumerate(ids):
        task = "TASK" + task_id
        task_ids.append(task)
        task_name = task_cfg[task]["name"]
        batch_size = args.batch_size
        if args.local_rank != -1:
            batch_size = int(batch_size / dist.get_world_size())

        num_workers = int(args.num_workers / len(ids))
        logger.info("Loading %s Dataset with batch size %d" %
                    (task_cfg[task]["name"], batch_size))

        if args.split:
            eval_split = args.split
        else:
            eval_split = task_cfg[task]["val_split"]

        task_datasets_val[task] = DatasetMapEval[task_name](
            task=task_cfg[task]["name"],
            dataroot=task_cfg[task]["dataroot"],
            annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"],
            split=eval_split,
            image_features_reader=task_feature_reader1[task_cfg[task]
                                                       ["features_h5path1"]],
            gt_image_features_reader=task_feature_reader2[
                task_cfg[task]["features_h5path2"]],
            tokenizer=tokenizer,
            bert_model=args.bert_model,
            clean_datasets=args.clean_train_sets,
            padding_index=0,
            max_seq_length=task_cfg[task]["max_seq_length"],
            max_region_num=task_cfg[task]["max_region_num"],
            captions_dir=task_cfg[task]["captions_dir"])

        task_dataloader_val[task] = DataLoader(
            task_datasets_val[task],
            shuffle=False,
            batch_size=batch_size,
            num_workers=10,
            pin_memory=True,
        )

        task_num_iters[task] = len(task_dataloader_val[task])
        task_batch_size[task] = batch_size

    return (
        task_batch_size,
        task_num_iters,
        task_ids,
        task_datasets_val,
        task_dataloader_val,
    )
예제 #5
0
def main():
    parser = argparse.ArgumentParser()

    # Data files for FOIL task.
    parser.add_argument(
        "--features_h5path",
        default=
        "data/datasets/refcoco/refcoco_unc/refcoco_resnext152_faster_rcnn_genome.lmdb",
    )
    parser.add_argument(
        "--gt_features_h5path",
        default=
        "data/datasets/refcoco/refcoco_unc/refcoco_gt_resnext152_faster_rcnn_genome.lmdb",
    )

    parser.add_argument("--instances-jsonpath", default="data/referExpression")
    parser.add_argument("--task", default="refcoco+")

    # Required parameters
    parser.add_argument(
        "--in_memory",
        default=False,
        type=bool,
        help="whether use chunck for parallel training.",
    )
    parser.add_argument(
        "--bert_model",
        default="bert-base-uncased",
        type=str,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.",
    )

    parser.add_argument(
        "--pretrained_weight",
        default="bert-base-uncased",
        type=str,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.",
    )

    parser.add_argument(
        "--output_dir",
        default="save",
        type=str,
        help=
        "The output directory where the model checkpoints will be written.",
    )

    parser.add_argument(
        "--config_file",
        default="config/bert_base_6layer_6conect.json",
        type=str,
        help="The config file which specified the model details.",
    )
    # Other parameters
    parser.add_argument(
        "--clean_train_sets",
        default=True,
        type=bool,
        help="whether clean train sets for multitask data.",
    )
    parser.add_argument(
        "--max_seq_length",
        default=30,
        type=int,
        help=
        "The maximum total input sequence length after WordPiece tokenization. \n"
        "Sequences longer than this will be truncated, and sequences shorter \n"
        "than this will be padded.",
    )
    parser.add_argument("--tasks",
                        default="",
                        type=str,
                        help="1-2-3... training task separate by -")
    parser.add_argument(
        "--train_batch_size",
        default=128,
        type=int,
        help="Total batch size for training.",
    )
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Whether not to use CUDA when available")
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help=
        "Whether to lower case the input text. True for uncased models, False for cased models.",
    )

    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="local_rank for distributed training on gpus",
    )

    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumualte before performing a backward/update pass.",
    )
    parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit float precision instead of 32-bit",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=20,
        help="Number of workers in the dataloader.",
    )
    parser.add_argument(
        "--from_pretrained",
        action="store_true",
        help="Wheter the tensor is from pretrained.",
    )
    parser.add_argument(
        "--baseline",
        action="store_true",
        help="Wheter to use the baseline model (single bert).",
    )

    parser.add_argument(
        "--use_chunk",
        default=0,
        type=float,
        help="whether use chunck for parallel training.",
    )

    parser.add_argument(
        "--split",
        default="test",
        type=str,
        help="whether use chunck for parallel training.",
    )

    args = parser.parse_args()
    # with open("vilbert_tasks.yml", "r") as f:
    #     task_cfg = edict(yaml.safe_load(f))

    # Declare path to save checkpoints.
    config = BertConfig.from_json_file(args.config_file)
    # print(config)

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    n_gpu = torch.cuda.device_count()
    logger.info(
        "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".
        format(device, n_gpu, bool(args.local_rank != -1), args.fp16))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    num_train_optimization_steps = None

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)

    image_features_reader = ImageFeaturesH5Reader(args.features_h5path, True)
    gt_image_features_reader = ImageFeaturesH5Reader(args.gt_features_h5path,
                                                     True)

    # dataset = FlickrGroundingDataset(task="FlickrGrounding",
    #                                  dataroot="data/datasets/flickr30k/",
    #                                  annotations_jsonpath="",
    #                                  split="val",
    #                                  image_features_reader=image_features_reader,
    #                                  gt_image_features_reader=gt_image_features_reader,
    #                                  tokenizer=tokenizer,
    #                                  bert_model=args.bert_model,
    #                                  clean_datasets=True,
    #                                  padding_index=0,
    #                                  max_seq_length=24,
    #                                  max_region_num=200,
    #                                  )

    dataset = ReferExpressionDataset(
        task="refcoco",
        dataroot="data/datasets/refcoco/",
        annotations_jsonpath="",
        split="val",
        image_features_reader=image_features_reader,
        gt_image_features_reader=gt_image_features_reader,
        tokenizer=tokenizer,
        bert_model=args.bert_model,
        clean_datasets=True,
        padding_index=0,
        max_seq_length=20,
        max_region_num=201,
    )

    dataloader = DataLoader(dataset=dataset,
                            batch_size=4,
                            shuffle=False,
                            pin_memory=True)

    dataset_iter = iter(dataloader)

    features, spatials, image_mask, caption, target, input_mask, segment_ids, co_attention_mask, image_id, mask = next(
        dataset_iter)

    pprint(
        f'features: {features.shape}, spatials: {spatials.shape}, image_mask: {image_mask.shape}, caption: {caption.shape}, target: {target.shape}, input_mask: {input_mask.shape}, segment_ids: {segment_ids.shape}, co_attention_mask: {co_attention_mask.shape}, image_id: {image_id.shape}, mask: {mask.shape}'
    )

    for mask_ in mask:
        plt.imshow(mask_)
        plt.show()
예제 #6
0
def LoadDatasetEval(args, task_cfg, ids):
    # args.in_memory decides whether data should be loaded in RAM or not

    tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=True)

    task_feature_reader1 = {}
    task_feature_reader2 = {}
    # vikram printed this
    # print('Number of IDs: ', len(ids))
    # print('Id[:10] = ', ids[:10])
    # print('args.in_memory: ', args.in_memory)

    for i, task_id in enumerate(ids):
        task = "TASK" + task_id
        # print('************************')
        # print('task_cfg[task]["features_h5path1"]: ', task_cfg[task]["features_h5path1"])
        # print('task_cfg[task]["features_h5path2"]: ', task_cfg[task]["features_h5path2"])
        #
        # print('************************')

        if task_cfg[task]["features_h5path1"] not in task_feature_reader1:
            task_feature_reader1[task_cfg[task]["features_h5path1"]] = None
        if task_cfg[task]["features_h5path2"] not in task_feature_reader2:
            task_feature_reader2[task_cfg[task]["features_h5path2"]] = None



    # initilzie the feature reader
    for features_h5path in task_feature_reader1.keys():
        if features_h5path != "":
            task_feature_reader1[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory
            )

    for features_h5path in task_feature_reader2.keys():
        if features_h5path != "":
            task_feature_reader2[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory
            )
    # Note, dataset is not yet loaded
    # only loading procedure is initialized till here

    task_datasets_val = {}
    task_dataloader_val = {}
    task_ids = []
    task_batch_size = {}
    task_num_iters = {}

    for i, task_id in enumerate(ids):
        task = "TASK" + task_id
        task_ids.append(task)
        task_name = task_cfg[task]["name"]
        batch_size = args.batch_size
        if args.local_rank != -1:
            batch_size = int(batch_size / dist.get_world_size())

        num_workers = int(args.num_workers / len(ids))
        logger.info(
            "Loading %s Dataset with batch size %d"
            % (task_cfg[task]["name"], batch_size)
        )

        if args.split:
            eval_split = args.split
        else:
            eval_split = task_cfg[task]["val_split"]


        # Here the data gets loaded
        # DatasetMapEval is present in __init__.py
        # Task name is RetrievalFlickr30k
        print('TASK NAME: ', task_name)
        # raise NotImplementedError

        task_datasets_val[task] = DatasetMapEval[task_name](
            task=task_cfg[task]["name"],
            dataroot=task_cfg[task]["dataroot"],
            annotations_jsonpath=task_cfg[task]["val_annotations_jsonpath"],
            split=eval_split,
            image_features_reader=task_feature_reader1[
                task_cfg[task]["features_h5path1"]
            ],
            gt_image_features_reader=task_feature_reader2[
                task_cfg[task]["features_h5path2"]
            ],
            tokenizer=tokenizer,
            bert_model=args.bert_model,
            clean_datasets=args.clean_train_sets,
            padding_index=0,
            max_seq_length=task_cfg[task]["max_seq_length"],
            max_region_num=task_cfg[task]["max_region_num"],
        )
        print('Dataset Finished loading!!')
        print('Type of task_datasets_val[task]: ', type(task_datasets_val[task]))

        task_dataloader_val[task] = DataLoader(
            task_datasets_val[task],
            shuffle=False,
            batch_size=batch_size,
            num_workers=10,
            pin_memory=True,
        )

        task_num_iters[task] = len(task_dataloader_val[task])
        task_batch_size[task] = batch_size

    return (
        task_batch_size,
        task_num_iters,
        task_ids,
        task_datasets_val,
        task_dataloader_val,
    )
예제 #7
0
def LoadDatasetEval(args, task_cfg, ids):

    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=True)

    task_feature_reader1 = {}
    task_feature_reader2 = {}
    for i, task_id in enumerate(ids):
        task = 'TASK' + task_id
        if task_cfg[task]['features_h5path1'] not in task_feature_reader1:
            task_feature_reader1[task_cfg[task]['features_h5path1']] = None
        if task_cfg[task]['features_h5path2'] not in task_feature_reader2:
            task_feature_reader2[task_cfg[task]['features_h5path2']] = None

    # initilzie the feature reader
    for features_h5path in task_feature_reader1.keys():
        if features_h5path != '':
            task_feature_reader1[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    for features_h5path in task_feature_reader2.keys():
        if features_h5path != '':
            task_feature_reader2[features_h5path] = ImageFeaturesH5Reader(
                features_h5path, args.in_memory)

    task_datasets_val = {}
    task_dataloader_val = {}
    task_ids = []
    task_batch_size = {}
    task_num_iters = {}

    for i, task_id in enumerate(ids):
        task = 'TASK' + task_id
        task_ids.append(task)
        batch_size = args.batch_size
        if args.local_rank != -1:
            batch_size = int(batch_size / dist.get_world_size())

        num_workers = int(args.num_workers / len(ids))
        logger.info("Loading %s Dataset with batch size %d" %
                    (task_cfg[task]['name'], batch_size))

        if args.split:
            eval_split = args.split
        else:
            eval_split = task_cfg[task]['val_split']

        if args.split == 'train':
            annotations_jsonpath = task_cfg[task]['train_annotations_jsonpath']
        else:
            annotations_jsonpath = task_cfg[task]['val_annotations_jsonpath']
        task_datasets_val[task] = DatasetMapEval[task](
            task=task_cfg[task]['name'],
            dataroot=task_cfg[task]['dataroot'],
            annotations_jsonpath=annotations_jsonpath,
            split=eval_split,
            image_features_reader=task_feature_reader1[task_cfg[task]
                                                       ['features_h5path1']],
            gt_image_features_reader=task_feature_reader2[
                task_cfg[task]['features_h5path2']],
            tokenizer=tokenizer,
            padding_index=0,
            max_seq_length=task_cfg[task]['max_seq_length'],
            max_region_num=task_cfg[task]['max_region_num'])

        task_dataloader_val[task] = DataLoader(
            task_datasets_val[task],
            shuffle=False,
            batch_size=batch_size,
            num_workers=num_workers,
            pin_memory=True,
        )

        task_num_iters[task] = len(task_dataloader_val[task])
        task_batch_size[task] = batch_size

    return task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val