merged_preds[i] = merge_regression(prediction_dict[i])
        elif args.task_type == 'multiclass':
            merged_preds[i] = avg_probs_multiclass(
                np.array(prediction_dict[i]))
    return merged_preds


for predictor_params in grid:
    print(predictor_params, flush=True)
    predictor = Classifier(**predictor_params).to(device)
    if n_gpu > 1:
        predictor = torch.nn.DataParallel(predictor)

    if not(args.freeze_bert) and not(args.use_adversary):
        param_optimizer = list(model.named_parameters()) + \
            list(predictor.named_parameters())
    elif args.freeze_bert and not(args.use_adversary):
        param_optimizer = list(predictor.named_parameters())
    elif args.freeze_bert and args.use_adversary:
        raise Exception(
            'No purpose in using an adversary if BERT layers are frozen')
    else:
        param_optimizer = list(model.named_parameters(
        )) + list(predictor.named_parameters()) + list(discriminator.named_parameters())

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay)], 'weight_decay': 0.0}
            merged_preds[i] = merge_regression(prediction_dict[i])
        elif args.task_type == 'multiclass':
            merged_preds[i] = avg_probs_multiclass(np.array(
                prediction_dict[i]))
    return merged_preds


for predictor_params in grid:
    print(predictor_params, flush=True)
    predictor = Classifier(**predictor_params).to(device)
    if n_gpu > 1:
        predictor = torch.nn.DataParallel(predictor)

    if not (args.freeze_bert) and not (args.use_adversary):
        param_optimizer = list(model.named_parameters()) + list(
            predictor.named_parameters())
    elif args.freeze_bert and not (args.use_adversary):
        param_optimizer = list(predictor.named_parameters())
    elif args.freeze_bert and args.use_adversary:
        raise Exception(
            'No purpose in using an adversary if BERT layers are frozen')
    else:
        param_optimizer = list(model.named_parameters()) + list(
            predictor.named_parameters()) + list(
                discriminator.named_parameters())

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':