def val_data_loader(val_file): val_dataset = MemeDataset(filepath=val_file, text_only=True, text_padding=tokenizer_func) return data.DataLoader(val_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=val_dataset.get_collate_fn())
def test_data_loader(test_file): test_dataset = MemeDataset(filepath=test_file, text_only=True, text_padding=tokenizer_func, return_ids=True) return data.DataLoader(test_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=test_dataset.get_collate_fn())
def val_data_loader(val_file): val_dataset = MemeDataset( filepath=val_file, feature_dir=config['feature_path'], preload_images=False, debug=True, text_padding=tokenizer_func, confidence_threshold=config['object_conf_thresh']) return data.DataLoader(val_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=val_dataset.get_collate_fn())
def train_data_loader(train_file): if config['debug']: train_file = os.path.join(config["data_path"], "dev_seen.jsonl") train_dataset = MemeDataset(filepath=train_file, text_only=True, text_padding=tokenizer_func) return data.DataLoader( train_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=train_dataset.get_collate_fn(), pin_memory= True, # shuffle is mutually exclusive with sampler. It is shuffled anyways sampler=ConfounderSampler( train_dataset, repeat_factor=config["confounder_repeat"]))
def train_data_loader(train_file): train_dataset = MemeDataset( filepath=train_file, feature_dir=config['feature_path'], preload_images=False, debug=True, text_padding=tokenizer_func, confidence_threshold=config['object_conf_thresh']) return data.DataLoader( train_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=train_dataset.get_collate_fn(), pin_memory= True, # shuffle is mutually exclusive with sampler. It is shuffled anyways sampler=ConfounderSampler( train_dataset, repeat_factor=config["confounder_repeat"]))
val_dataset = MemeDataset(filepath=os.path.join(config['data_path'], 'dev_seen.jsonl'), feature_dir=config['feature_path'], text_padding=tokenizer_func, filter_text=config["filter_text"]) test_dataset = MemeDataset(filepath=os.path.join(config['data_path'], 'test_seen.jsonl'), feature_dir=config['feature_path'], text_padding=tokenizer_func, filter_text=config["filter_text"]) config['train_loader'] = data.DataLoader( train_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=train_dataset.get_collate_fn(), shuffle=True, pin_memory=True) config['val_loader'] = data.DataLoader( val_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=val_dataset.get_collate_fn()) config['test_loader'] = data.DataLoader( test_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], collate_fn=test_dataset.get_collate_fn()) try: trainer = TrainerUniter(config)