def create_payloads(database_tasks): payloads = [] splits = ["train", "valid", "test"] for i, database_task_name in enumerate(database_tasks): input_task = database_tasks[database_task_name] payload_train_name = f"Payload{i}_train" payload_dev_name = f"Payload{i}_dev" payload_test_name = f"Payload{i}_test" task_name = database_task_name batch_size = input_task.batch_size train_inputs = input_task.train_inputs dev_inputs = input_task.dev_inputs test_inputs = input_task.test_inputs train_X = { "data": create_BERT_tensor(train_inputs[0], train_inputs[1]) } dev_X = {"data": create_BERT_tensor(dev_inputs[0], dev_inputs[1])} test_X = {"data": create_BERT_tensor(test_inputs[0], test_inputs[1])} train_Y = train_inputs[2] dev_Y = dev_inputs[2] test_Y = test_inputs[2] payload_train = Payload.from_tensors(payload_train_name, train_X, train_Y, task_name, "train", batch_size=batch_size) payload_dev = Payload.from_tensors(payload_dev_name, dev_X, dev_Y, task_name, "valid", batch_size=batch_size) payload_test = Payload.from_tensors(payload_test_name, test_X, test_Y, task_name, "test", batch_size=batch_size) payloads.append(payload_train) payloads.append(payload_dev) payloads.append(payload_test) return payloads
def create_payloads(N, T, batch_size=1): # Create two instance sets from the same (uniform) distribution, each of which # have labels for the same tasks (classification with respect to parallel # linear boundaries). labels_to_tasks = {f"labelset{t}": f"task{t}" for t in range(T)} payloads = [] for t in range(T): X = np.random.random((N, 2)) * 2 - 1 Y = np.zeros((N, 2)) Y[:, 0] = (X[:, 0] > X[:, 1] + 0.5).astype(int) + 1 Y[:, 1] = (X[:, 0] > X[:, 1] + 0.25).astype(int) + 1 uids = list(range(t * N, (t + 1) * N)) X = torch.tensor(X, dtype=torch.float) Y = torch.tensor(Y, dtype=torch.long) uid_lists, Xs, Ys = split_data(uids, X, Y, splits=[0.8, 0.1, 0.1], shuffle=True) for i, split in enumerate(SPLITS): payload_name = f"payload{t}_{split}" X_dict = {"data": Xs[i], "uids": uid_lists[i]} Y_dict = {f"labelset{t}": Ys[i][:, t] for t in range(T)} dataset = MmtlDataset(X_dict, Y_dict) data_loader = MmtlDataLoader(dataset, batch_size=batch_size) payload = Payload(payload_name, data_loader, labels_to_tasks, split) payloads.append(payload) return payloads
def create_payloads( task_name, uid_lists, Xs, Ys, batch_size=1, slice_funcs={}, SPLITS=["train", "valid", "test"], verbose=False, ): payloads = [] labels_to_tasks = {"labelset_gold": task_name} for i, split in enumerate(SPLITS): payload_name = f"payload_{split}" # convert to torch tensors X_dict = { "data": torch.Tensor(Xs[i]), "uids": torch.Tensor(uid_lists[i]) } Y_dict = {"labelset_gold": torch.Tensor(Ys[i])} if slice_funcs: slice_labels = generate_slice_labels(Xs[i], Ys[i], slice_funcs) # labelset_name -> {"ind": [1,2,1,2,2], "pred": [0,1,0,2,2]} for slice_name, slice_label in slice_labels.items(): # slice_type \in {"ind", "pred"} for slice_type, label in slice_label.items(): slice_task_name = f"{task_name}:{slice_name}:{slice_type}" slice_labelset_name = f"labelset:{slice_name}:{slice_type}" Y_dict[slice_labelset_name] = torch.tensor(label) labels_to_tasks[slice_labelset_name] = slice_task_name dataset = MmtlDataset(X_dict, Y_dict) data_loader = MmtlDataLoader(dataset, batch_size=batch_size) payload = Payload(payload_name, data_loader, labels_to_tasks, split) payloads.append(payload) if verbose: print(f"Creating {len(payloads)} payloads...") for p in payloads: print(p) return payloads
train_inputs = input_task.train_inputs dev_inputs = input_task.dev_inputs test_inputs = input_task.test_inputs train_X = {"data": create_BERT_tensor(train_inputs[0], train_inputs[1])} dev_X = {"data": create_BERT_tensor(dev_inputs[0], dev_inputs[1])} test_X = {"data": create_BERT_tensor(test_inputs[0], test_inputs[1])} train_Y = train_inputs[2] dev_Y = dev_inputs[2] test_Y = test_inputs[2] payload_train = Payload.from_tensors(payload_train_name, train_X, train_Y, task_name, "train", batch_size=batch_size) payload_dev = Payload.from_tensors(payload_dev_name, dev_X, dev_Y, task_name, "valid", batch_size=batch_size) payload_test = Payload.from_tensors(payload_test_name, test_X, test_Y, task_name, "test", batch_size=batch_size)
def create_glue_tasks_payloads(task_names, skip_payloads=False, **kwargs): assert len(task_names) > 0 config = recursive_merge_dicts(task_defaults, kwargs) if config["seed"] is None: config["seed"] = np.random.randint(1e6) print(f"Using random seed: {config['seed']}") set_seed(config["seed"]) # share bert encoder for all tasks if config["encoder_type"] == "bert": bert_kwargs = config["bert_kwargs"] bert_model = BertRaw(config["bert_model"], **bert_kwargs) if "base" in config["bert_model"]: neck_dim = 768 elif "large" in config["bert_model"]: neck_dim = 1024 input_module = bert_model pooler = bert_model.pooler if bert_kwargs["pooler"] else None cls_middle_module = BertExtractCls(pooler=pooler, dropout=config["dropout"]) else: raise NotImplementedError # Create dict override dl_kwarg for specific task # e.g. {"STSB": {"batch_size": 2}} task_dl_kwargs = {} if config["task_dl_kwargs"]: task_configs_str = [ tuple(config.split(".")) for config in config["task_dl_kwargs"].split(",") ] for (task_name, kwarg_key, kwarg_val) in task_configs_str: if kwarg_key == "batch_size": kwarg_val = int(kwarg_val) task_dl_kwargs[task_name] = {kwarg_key: kwarg_val} tasks = [] payloads = [] for task_name in task_names: # If a flag is specified for attention, use it, otherwise use identity module if config["attention"]: print("Using soft attention head") attention_module = SoftAttentionModule(neck_dim) else: attention_module = IdentityModule() # Pull out names of auxiliary tasks to be dealt with in a second step # TODO: fix this logic for cases where auxiliary task for task_name has # its own payload has_payload = task_name not in config["auxiliary_task_dict"] # Note whether this task has auxiliary tasks that apply to it and require spacy run_spacy = False for aux_task, target_payloads in config["auxiliary_task_dict"].items(): run_spacy = run_spacy or (task_name in target_payloads and aux_task in SPACY_TASKS and aux_task in task_names) # Override general dl kwargs with task-specific kwargs dl_kwargs = copy.deepcopy(config["dl_kwargs"]) if task_name in task_dl_kwargs: dl_kwargs.update(task_dl_kwargs[task_name]) # Each primary task has data_loaders to load if has_payload and not skip_payloads: if config["preprocessed"]: datasets = load_glue_datasets( dataset_name=task_name, splits=config["splits"], bert_vocab=config["bert_model"], max_len=config["max_len"], max_datapoints=config["max_datapoints"], run_spacy=run_spacy, verbose=True, ) else: datasets = create_glue_datasets( dataset_name=task_name, splits=config["splits"], bert_vocab=config["bert_model"], max_len=config["max_len"], max_datapoints=config["max_datapoints"], generate_uids=kwargs.get("generate_uids", False), run_spacy=run_spacy, verbose=True, ) # Wrap datasets with DataLoader objects data_loaders = create_glue_dataloaders( datasets, dl_kwargs=dl_kwargs, split_prop=config["split_prop"], splits=config["splits"], seed=config["seed"], ) if task_name == "COLA": scorer = Scorer( standard_metrics=["accuracy"], custom_metric_funcs={matthews_corr: ["matthews_corr"]}, ) task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=scorer, ) elif task_name == "SST2": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), ) elif task_name == "MNLI": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=MulticlassHead(neck_dim, 3), scorer=Scorer(standard_metrics=["accuracy"]), ) elif task_name == "SNLI": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=MulticlassHead(neck_dim, 3), scorer=Scorer(standard_metrics=["accuracy"]), ) elif task_name == "RTE": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=Scorer(standard_metrics=["accuracy"]), ) elif task_name == "WNLI": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=Scorer(standard_metrics=["accuracy"]), ) elif task_name == "QQP": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=Scorer( custom_metric_funcs={acc_f1: ["accuracy", "f1", "acc_f1"] }), ) elif task_name == "MRPC": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=Scorer( custom_metric_funcs={acc_f1: ["accuracy", "f1", "acc_f1"] }), ) elif task_name == "STSB": scorer = Scorer( standard_metrics=[], custom_metric_funcs={ pearson_spearman: [ "pearson_corr", "spearman_corr", "pearson_spearman", ] }, ) task = RegressionTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=RegressionHead(neck_dim), scorer=scorer, ) elif task_name == "QNLI": task = ClassificationTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=BinaryHead(neck_dim), scorer=Scorer(standard_metrics=["accuracy"]), ) # AUXILIARY TASKS elif task_name == "THIRD": # A toy task that predict which third of the sentence each token is in OUT_DIM = 3 task = TokenClassificationTask( name="THIRD", input_module=input_module, attention_module=attention_module, head_module=BertTokenClassificationHead(neck_dim, OUT_DIM), loss_multiplier=config["auxiliary_loss_multiplier"], ) elif task_name == "BLEU": task = RegressionTask( name=task_name, input_module=input_module, middle_module=cls_middle_module, attention_module=attention_module, head_module=RegressionHead(neck_dim), output_hat_func=torch.sigmoid, loss_hat_func=(lambda out, Y_gold: F.mse_loss( torch.sigmoid(out), Y_gold)), scorer=Scorer(custom_metric_funcs={mse: ["mse"]}), loss_multiplier=config["auxiliary_loss_multiplier"], ) elif task_name == "SPACY_NER": OUT_DIM = len(SPACY_TAGS["SPACY_NER"]) task = TokenClassificationTask( name=task_name, input_module=input_module, attention_module=attention_module, head_module=BertTokenClassificationHead(neck_dim, OUT_DIM), loss_multiplier=config["auxiliary_loss_multiplier"], ) elif task_name == "SPACY_POS": OUT_DIM = len(SPACY_TAGS["SPACY_POS"]) task = TokenClassificationTask( name=task_name, input_module=input_module, attention_module=attention_module, head_module=BertTokenClassificationHead(neck_dim, OUT_DIM), loss_multiplier=config["auxiliary_loss_multiplier"], ) else: msg = (f"Task name {task_name} was not recognized as a primary or " f"auxiliary task.") raise Exception(msg) tasks.append(task) # Gather slice names slice_names = (config["slice_dict"].get(task_name, []) if config["slice_dict"] else []) # Add a task for each slice for slice_name in slice_names: slice_task_name = f"{task_name}_slice:{slice_name}" slice_task = create_slice_task(task, slice_task_name) tasks.append(slice_task) if has_payload and not skip_payloads: # Create payloads (and add slices/auxiliary tasks as applicable) for split, data_loader in data_loaders.items(): payload_name = f"{task_name}_{split}" labels_to_tasks = {f"{task_name}_gold": task_name} payload = Payload(payload_name, data_loader, labels_to_tasks, split) # Add auxiliary label sets if applicable auxiliary_task_dict = config["auxiliary_task_dict"] for aux_task_name, target_payloads in auxiliary_task_dict.items( ): if aux_task_name in task_names and task_name in target_payloads: aux_task_func = auxiliary_task_functions[aux_task_name] payload = aux_task_func(payload) # Add a labelset slice to each split dataset = payload.data_loader.dataset for slice_name in slice_names: slice_task_name = f"{task_name}_slice:{slice_name}" slice_labels = create_slice_labels( dataset, base_task_name=task_name, slice_name=slice_name) labelset_slice_name = f"{task_name}_slice:{slice_name}" payload.add_label_set(slice_task_name, labelset_slice_name, slice_labels) payloads.append(payload) return tasks, payloads
max_len=task_config["max_len"], max_datapoints=args.max_datapoints, ) data_loaders = create_glue_dataloaders( datasets, dl_kwargs=dl_kwargs, split_prop=None, splits=splits, seed=123, ) for i, (split, data_loader) in enumerate(data_loaders.items()): state = states[i] payload_name = f"{task.name}_{split}" payload = Payload(payload_name, data_loader, [task.name], split) Ys, Ys_probs, Ys_preds = model.predict_with_gold( payload, [task.name], return_preds=True) if args.eval_split == "dev": target_metrics = {task.name: None} metrics_dict = {} scorer = model.task_map[task.name].scorer print(model_path) task_metrics_dict = scorer.score( Ys[task.name], Ys_probs[task.name], Ys_preds[task.name], target_metrics=target_metrics[task.name], )