Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", nargs="+")
    parser.add_argument("--nocache", action="store_true")
    parser.add_argument("--n_workers", type=int, default=12)
    parser.add_argument("--test", action="store_true")
    args = parser.parse_args()

    py_utils.add_stdout_logger()
    if args.test:
        ds = ImageNetAnimals10k("test", None)
    else:
        ds = ImageNetAnimals10k("dev", None)

    evaluator = BiasThresholdEvaluator([0.05, 0.1, 0.15, 0.2, 0.3, 0.8, 1.0])
    evaluator_name = "text-thresh-eval-v2"
    evaluator.preprocess([ds])

    models = extract_models(args.model)
    if sum(len(runs) for model_dir, runs in models.values()) == 0:
        print("No models selected")
        return

    get_evaluation(models,
                   args.nocache,
                   ds,
                   evaluator,
                   evaluator_name,
                   128,
                   sort=False,
                   n_workers=args.n_workers)
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("output_dir", nargs="+")
    parser.add_argument("--n_processes", type=int, default=4)
    parser.add_argument("--fp16", action="store_true")
    parser.add_argument("--seed", type=int)
    parser.add_argument("--nruns", type=int, default=1)
    args = parser.parse_args()

    py_utils.add_stdout_logger()

    for src in args.output_dir:
        output_dir = src
        for i in range(args.nruns):
            trainer: Trainer = load_config.load_config(
                join(output_dir, "trainer.json"))
            subdir = train_utils.select_subdir(src)
            logging.info(f"Starting run for directory {subdir}")
            model: Model = load_config.load_config(
                join(output_dir, "model.json"))
            trainer.training_run(model,
                                 subdir,
                                 args.seed,
                                 args.n_processes,
                                 fp16=args.fp16)
Exemplo n.º 3
0
def main():
  # Re-use if we are evaluating multiple models
  load_word_vectors.GLOBAL_CACHE = {}

  parser = argparse.ArgumentParser()
  parser.add_argument("output_dir", nargs="+")
  parser.add_argument("--batch_size", type=int, default=128)
  parser.add_argument("--dataset", choices=["hans", "dev", "test"], default="dev")
  args = parser.parse_args()

  py_utils.add_stdout_logger()

  if args.dataset == "test":
    ds = MnliDevUnmatched()
  elif args.dataset == "dev":
    ds = MnliDevMatched()
  else:
    ds = Hans()
  ds.cache = True
  models = extract_models(args.output_dir)

  if len(models) == 0:
    print("No models found")
    return

  if args.dataset == "hans":
    evaluator = ClfEnsembleEvaluator(output_format="{output}-{metric}")
    evaluator_name = "ensemble-eval-v1"
  else:
    evaluator = ClfHardEasyEvaluator(prefix_format="{output}-{metric}/{split}")
    evaluator_name = "hard-easy-eval-v1"

  models, all_stats = get_cached_evaluations(models, ds, evaluator_name)

  if len(models) == 0:
    print("All models were cached")
    return
  elif len(all_stats) == 0:
    logging.info("No models were cached")
  else:
    logging.info(f"{len(all_stats)} models ({sum(len(x) for x in all_stats.items())}) were cached")

  evaluator.preprocess([ds])
  for name, (model_dir, runs) in models.items():
    logging.info(f"Evaluating model: {name} ({len(runs)} runs)")
    for run in runs:
      run_evaluation(run, ds, args.batch_size, evaluator, evaluator_name,
                     cache=True, cache_model_output=True, n_processes=4)
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Directory of models of evaluate")
    parser.add_argument(
        "--dataset",
        choices=list(datasets_fns.keys()),
        help="Dataset to evaluate on, if not set if will be inferred"
        " based on the what the models were trained on")
    parser.add_argument("--nocache", action="store_true")
    parser.add_argument("--test",
                        action="store_true",
                        help="Evaluated on the test data")
    args = parser.parse_args()

    is_test = args.test

    py_utils.add_stdout_logger()
    models = py_utils.extract_models(args.model)
    if len(models) == 0:
        logging.info("No models found")
        return 0

    if args.dataset is not None:
        fn = datasets_fns[args.dataset]
    else:
        all_train_ds = None
        for model_dir, _ in models.values():
            trainer = json.load(open(join(model_dir, "trainer.json")))
            train_ds = trainer["train_dataset"]["name"]
            if all_train_ds is None:
                all_train_ds = train_ds
            elif all_train_ds != train_ds:
                raise ValueError("No dataset given, and unable to infer seems "
                                 "models were trained on different datasets")
        logging.info(
            f"All models were trained on {all_train_ds}, so testing on the same bias"
        )
        if all_train_ds == "MNISTPatches":
            fn = get_patch
        elif all_train_ds == "MNISTDependent":
            fn = get_split
        elif all_train_ds == "MNISTBackgroundColor":
            fn = get_background
        else:
            raise ValueError()

    id_test, ood_test = fn(is_test, True), fn(is_test, False)

    evaluator = ClfEnsembleEvaluator()

    logging.info("Evaluating OOD Test")
    get_evaluation(models,
                   args.nocache,
                   ood_test,
                   evaluator,
                   "ensemble-eval-v1",
                   128,
                   sort=False,
                   progress_bar=False)

    logging.info("Evaluating ID Test")
    get_evaluation(models,
                   args.nocache,
                   id_test,
                   evaluator,
                   "ensemble-eval-v1",
                   128,
                   sort=False,
                   progress_bar=False)
Exemplo n.º 5
0
def main():
    py_utils.add_stdout_logger()
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset",
                        choices=["patch", "split", "background"],
                        required=True,
                        help="Bias to train on")
    add_train_args(parser,
                   entropy_w=False,
                   default_adv_penalty=None,
                   default_batch_size=1024,
                   default_epochs=100,
                   default_entropy_penalty=None,
                   lc_weight_default=None)

    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--nruns", type=int, default=1)

    args = parser.parse_args()

    dataset = args.dataset

    if dataset == "patch":
        ds = MNISTPatches
        n_classes = 10
        w = 30
    elif dataset == "background":
        ds = MNISTBackgroundColor
        w = 28
        n_classes = 10
    elif dataset == "split":
        ds = MNISTDependent
        n_classes = 4
        w = 30
    else:
        raise NotImplementedError(f"Unknown dataset {dataset}")

    p = 0.9
    n_per_class = 200
    train = ds(p, True, (0, n_per_class))

    opt = SGD(args.lr, momentum=0.9)

    eval_sets = [
        EvalDataset(ds(p, True, (1400, 2400)),
                    TorchDataIterator(SubsetSampler(None, args.batch_size)),
                    "id"),
        EvalDataset(ds(1. / n_classes, True, (1400, 2400)),
                    TorchDataIterator(SubsetSampler(None, args.batch_size)),
                    "od"),
    ]

    train.cache = True
    for ds in eval_sets:
        ds.cache = True

    def build_model():
        hc = get_high_capacity_model(w, n_classes)
        if args.mode == "none":
            # An ensemble with a Null predictor
            predictor = ClfArgminEnsemble(
                [
                    ClfHead(predictor=NullMNISTPredictor(n_classes),
                            head_name="bias"),
                    ClfHead(predictor=hc, head_name="debiased")
                ],
                n_classes,
            )
        elif args.mode == "adv":
            if args.adversary_loss is None:
                if dataset == "patch":
                    adv_loss = 0.01
                elif dataset == "background":
                    adv_loss = 0.08
                elif dataset == "split":
                    adv_loss = 0.01
                else:
                    raise RuntimeError()
            else:
                adv_loss = args.adversary_loss

            if args.lc_weight is None:
                # Default depends on the bias
                if dataset == "patch":
                    lc_w = 0.7
                elif dataset == "background":
                    lc_w = 0.05
                elif dataset == "split":
                    lc_w = 0.02
                else:
                    raise RuntimeError()
            else:
                lc_w = args.lc_weight

            predictor = ClfBiAdversary(hc,
                                       get_low_capacity_model(w, n_classes),
                                       n_classes,
                                       adv_w=adv_loss,
                                       bias_loss=lc_w,
                                       main_loss=0.0,
                                       joint_loss=1.0,
                                       use_y_values=True,
                                       joint_adv=False)
        elif args.mode == "oracle":
            # An ensemble with a gold bias-predictor
            bias = FromBiasFeaturePredictor(p, n_classes)
            predictor = ClfArgminEnsemble(
                [
                    ClfHead(predictor=bias, head_name="bias"),
                    ClfHead(predictor=hc, head_name="debiased")
                ],
                n_classes,
            )
        else:
            if args.mode.startswith("mce"):
                rescaler = lambda: ArgminTransformFunction(
                    AffineNLL(
                        n_classes,
                        n_classes,
                        NumpyOptimizer(),
                        residual=True,
                        penalty=L2NormPenalty(0.002),
                        fix_last_bias_to_zero=True,
                    ))
            elif args.mode == "noci":
                rescaler = lambda: None
            elif args.mode == "nobp":
                rescaler = lambda: ArgminTransformFunction(AffineNLL(
                    n_classes,
                    n_classes,
                    NumpyOptimizer(),
                    residual=True,
                    penalty=L2NormPenalty(0.002),
                    fix_last_bias_to_zero=True,
                ),
                                                           backprop_argmin=
                                                           False)
            else:
                raise ValueError("Unknown mode: " + args.mode)

            predictor = ClfArgminEnsemble([
                ClfHead(
                    predictor=get_low_capacity_model(w, n_classes),
                    head_name="bias",
                    rescaler=rescaler(),
                    nll_penalty=0.2
                    if args.lc_weight is None else args.lc_weight,
                ),
                ClfHead(
                    predictor=hc,
                    head_name="debiased",
                    rescaler=rescaler(),
                )
            ], n_classes)

        return ImageClfModel(predictor)

    evaluator = ClfEnsembleEvaluator()

    if args.mode in {"mce", "nobp"}:
        hook = FitRescaleParameters(1024, None, sort=False)
    else:
        hook = None

    trainer = Trainer(
        opt,
        train,
        eval_sets,
        train_eval_iterator=TorchDataIterator(
            SubsetSampler(None, args.batch_size)),
        train_iterator=TorchDataIterator(
            StratifiedSampler(args.batch_size, n_repeat=10)),
        num_train_epochs=args.epochs,
        evaluator=evaluator,
        pre_eval_hook=hook,
        tb_factor=args.batch_size / 256,
        save_each_epoch=False,
        progress_bar=True,
        eval_progress_bar=False,
        epoch_progress_bar=False,
        early_stopping_criteria=StoppingPoint("train", "nll/joint", 3e-4, 3),
        log_to_tb=False,
    )

    for r in range(args.nruns):
        if args.nruns > 1:
            print("")
            print("")
            print("*" * 10 + f" STARTING RUN {r+1}/{args.nruns} " + "*" * 10)

        # Build a model for each run to ensure it is fully reset
        model = build_model()

        if args.output_dir:
            if r == 0:
                train_utils.clear_if_nonempty(args.output_dir)
                train_utils.init_model_dir(args.output_dir, trainer, model)

            subdir = train_utils.select_subdir(args.output_dir)
        else:
            subdir = None

        if args.init_only:
            return

        if subdir is not None:
            logging.info(f"Start run for {subdir}")

        if args.time:
            t0 = perf_counter()
        else:
            t0 = None

        try:
            if subdir is not None:
                with open(join(subdir, "console.out"), "w") as f:
                    trainer.training_run(model,
                                         subdir,
                                         no_cuda=True,
                                         print_out=f)
            else:
                trainer.training_run(model, subdir, no_cuda=True)
        except Exception as e:
            if args.nruns == 1 or isinstance(e, KeyboardInterrupt):
                raise e
            logging.warning("Error during training: " + str(e))
            continue

        if args.time:
            logging.info(f"Training took {perf_counter() - t0:.3f} seconds")
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    add_train_args(parser,
                   default_entropy_penalty=0.1,
                   default_adv_penalty=0.3,
                   default_batch_size=256,
                   default_epochs=3,
                   lc_weight_default=None)

    args = parser.parse_args()
    lc_weight = args.lc_weight
    if lc_weight is None:
        # Default depends on the mode
        if args.mode == "adv":
            lc_weight = 0.3
        else:
            lc_weight = 0.2

    dbg = args.debug
    py_utils.add_stdout_logger()

    main_model = FromBertPredictor(FromPooled(FullyConnected(768, 3, None)))

    if args.mode in {"mce", "noci", "nobp", "adv"}:
        lc_model = decatt_bias(150 if dbg else 400, 200)
    elif args.mode == "oracle":
        lc_model = None
    elif args.mode == "none":
        lc_model = FromEmbeddingPredictor(NullClfPredictor(3))
    else:
        raise NotImplementedError(args.mode)

    if args.mode == "adv":
        predictor = ClfBiAdversary(main_model,
                                   lc_model,
                                   3,
                                   args.adversary_loss,
                                   joint_loss=1.0,
                                   bias_loss=lc_weight,
                                   use_y_values=True,
                                   joint_adv=False)
    elif args.mode == "oracle":
        predictor = ClfBiasMixinEnsemble(ExtractPooled(), 3,
                                         MnliHypothesisOnlyBias(), 768,
                                         args.entropy_penalty)
    else:
        if args.mode in {"mce", "nobp"}:
            rescaler = lambda: ArgminTransformFunction(AffineNLL(
                3,
                3,
                NumpyOptimizer(),
                residual=True,
                penalty=L2NormPenalty(0.002),
                fix_last_bias_to_zero=True,
            ),
                                                       backprop_argmin=args.
                                                       mode == "mce")
        elif args.mode in {"noci", "none"}:
            rescaler = lambda: None
        else:
            raise RuntimeError()
        predictor = ClfArgminEnsemble(
            [
                ClfHead(
                    lc_model,
                    head_name="bias",
                    rescaler=rescaler(),
                    nll_penalty=lc_weight,
                ),
                ClfHead(
                    main_model,
                    head_name="debiased",
                    rescaler=rescaler(),
                )
            ],
            n_classes=3,
            add_prior=False,  # prior is uniform
            no_rescale_on_first_step=True)

    bias_set = [
        ParameterSet(
            "predictor",
            "(encoder\..*)|(predictor\.heads\.0\..*)|(predictor\.(bias|main_to_bias).*)",
            dict(lr=1e-3, e=1e-8, weight_decay=0.0), ConstantLearningRate())
    ]
    enc = WordAndCharEncoder(
        "random" if dbg else "crawl-300d-2M",
        None,
        24,
        layers.Conv1D(24, 100, 5),
        MaxPooling(),
    )
    model = BertAndEmbedModel("bert-base-uncased", 128,
                              NltkAndPunctTokenizer(), enc, predictor)

    opt = Adam(
        lr=5e-5,
        e=1e-6,
        weight_decay=0.01,
        max_grad_norm=1.0,
        schedule=LinearTriangle(0.1),
        alternative_sets=bias_set + [
            ParameterSet("no-weight-decay", ".*(\.bias|LayerNorm\.weight)$",
                         dict(weight_decay=0.0))
        ])

    n_final_eval = 512 if dbg else 4096
    dev = MnliDevMatched(512 if dbg else None)
    train = MnliTrain(4096 if dbg else None)
    evaluator = ClfHardEasyEvaluator(prefix_format="{output}-{metric}/{split}")
    batch_size = args.batch_size
    trainer = Trainer(
        opt,
        train, [
            EvalDataset(dev, TorchDataIterator(SortedBatchSampler(batch_size)),
                        "dev"),
        ],
        pre_eval_hook=FitRescaleParameters(256, n_final_eval),
        train_eval_iterator=TorchDataIterator(
            SubsetSampler(None if dbg else 10000, batch_size, True)),
        train_iterator=TorchDataIterator(StratifiedSampler(batch_size)),
        num_train_epochs=args.epochs,
        evaluator=evaluator,
        tb_factor=batch_size / 256.,
        evals_to_print=[
            "bias-acc/ind", "bias-acc/ood", "debiased-acc/ind",
            "debiased-acc/ood", "joint-acc/ind", "joint-acc/ood"
        ])

    if args.init_only:
        train_utils.init_model_dir(args.output_dir, trainer, model)
    else:
        trainer.train(model,
                      args.output_dir,
                      args.seed,
                      args.n_processes,
                      fp16=args.fp16,
                      no_cuda=args.nocuda)
Exemplo n.º 7
0
def main(args=None, init_only=False):
  py_utils.add_stdout_logger()
  parser = argparse.ArgumentParser()
  add_train_args(parser, default_entropy_penalty=0.1, default_adv_penalty=0.005,
                 default_epochs=38, default_batch_size=512, lc_weight_default=None)
  parser.add_argument("--n_workers", type=int, default=12,
                      help="N workers to use when loading images")
  args = parser.parse_args(args)

  if args.lc_weight is None:
    if args.mode == "adv":
      lc_weight = 0.3
    else:
      lc_weight = 0.2
  else:
    lc_weight = args.lc_weight

  dbg = args.debug
  n_classes = 6

  if args.mode in {"mce", "noci"}:
    if args.mode == "noci":
      rescaler = lambda: None
    else:
      rescaler = lambda: ArgminTransformFunction(AffineNLL(
        n_classes, n_classes, NumpyOptimizer(),
        residual=True, penalty=L2NormPenalty(0.002),
        fix_last_bias_to_zero=True,
      ), backprop_argmin=args.mode != "nobp")

    predictor = ClfArgminEnsemble([
      ClfHead(
        get_low_capacity(), head_name="bias",
        rescaler=rescaler(), nll_penalty=lc_weight
      ),
      ClfHead(
        get_high_capacity(), head_name="debiased",
        rescaler=rescaler(),
      )
    ], n_classes, add_prior=False)

  elif args.mode == "none":
    predictor = ClfArgminEnsemble([
      ClfHead(NullResnetPredictor(6), head_name="bias", rescaler=None),
      ClfHead(get_high_capacity(), head_name="debiased", rescaler=None)
    ], n_classes, add_prior=False)
  elif args.mode == "oracle":
    predictor = ClfBiasMixinEnsemble(
      ExtractLastEmbeddings(),
      6,
      ResnetOracleBias(),
      512,
      args.entropy_penalty
    )
  elif args.mode == "adv":
    predictor = ClfBiAdversary(
      get_high_capacity(), get_low_capacity(), n_classes, args.adversary_loss,
      joint_loss=1.0,
      bias_loss=lc_weight, use_y_values=True,
      joint_adv=False,
    )
  else:
    raise RuntimeError()

  evaluator = ClfEnsembleEvaluator()

  model = ResNetModel(
    predictor, arch="resnet18", from_pretrained=False,
    resize=256,
    eval_transform=ImageNetEvalTransform(224, resize=False),
    train_transform=RandomCropTransform(224),
  )

  opt = SGD(0.02, momentum=0.9, schedule=PiecewiseLinear([args.epochs - 5], 0.3))

  num_workers = args.n_workers
  n_train_workers = num_workers
  test_batch_size = 512

  train = ImageNetAnimals10k("train", 300 if args.debug else None)
  dev = ImageNetAnimals10k("dev", 150 if args.debug else None)

  eval_sets = [
    EvalDataset(
      dev,
      TorchDataIterator(
        SubsetSampler(None if args.debug else 12000, test_batch_size),
        pin_memory=True, num_workers=num_workers), "dev"),
  ]

  trainer = Trainer(
    opt,
    train,
    eval_sets,
    train_eval_iterator=TorchDataIterator(SubsetSampler(None if args.debug else 8000, args.batch_size),
                                          num_workers=n_train_workers, pin_memory=True),
    train_iterator=TorchDataIterator(
      StratifiedSampler(test_batch_size, n_repeat=2), pin_memory=True, num_workers=n_train_workers),
    num_train_epochs=3 if dbg else args.epochs,
    evaluator=evaluator,
    tb_factor=args.batch_size/256,
    pre_eval_hook=FitRescaleParameters(test_batch_size, None if args.debug else 4096, sort=False),
    save_best_model=("dev", "acc/joint"),
    eval_on_epochs=2,
    split_then_collate=True
  )

  if init_only or args.init_only:
    train_utils.init_model_dir(args.output_dir, trainer, model)
  else:
    trainer.train(model, args.output_dir, args.seed, args.n_processes, fp16=args.fp16, no_cuda=args.nocuda)