예제 #1
0
        aggregated_metrics = {}
        for curr_fold_data in fold_cache.fold_info:
            for metric, metric_value in curr_fold_data["results"].items():
                existing = aggregated_metrics.get(metric, [])
                existing.append(metric_value)

                aggregated_metrics[metric] = existing

        for metric, metric_values in aggregated_metrics.items():
            logging.info(f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n"
                         f"\t all fold scores: {metric_values}")
    else:
        logging.info(f"Using single train/dev/test split...")
        if args.fixed_split:
            logging.info("Using fixed dataset split")
            train_docs, dev_docs, test_docs = fixed_split(tgt_docs, args.target_dataset)
        else:
            train_docs, dev_docs, test_docs = split_into_sets(tgt_docs, train_prop=0.7, dev_prop=0.15, test_prop=0.15)

        combined_train = src_docs + train_docs
        curr_tok2id, _ = extract_vocab(combined_train, lowercase=True, top_n=args.max_vocab_size)
        curr_tok2id = {tok: all_tok2id[tok] for tok in curr_tok2id}

        model = create_model_instance(args.model_name, tok2id=curr_tok2id)
        model.train(epochs=args.num_epochs, train_docs=combined_train, dev_docs=dev_docs)
        # Reload best checkpoint
        model = NoncontextualController.from_pretrained(model.path_model_dir)

        model.evaluate(test_docs)
        model.visualize()
        logging.info(f"Final all-in-one scores (over {OUTER_K} folds)")
        for metric, metric_values in aio_metrics.items():
            logging.info(
                f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n"
                f"\t all fold scores: {metric_values}")

        logging.info(f"Final each-in-own scores (over {OUTER_K} folds)")
        for metric, metric_values in eio_metrics.items():
            logging.info(
                f"- {metric}: mean={np.mean(metric_values): .4f} +- sd={np.std(metric_values): .4f}\n"
                f"\t all fold scores: {metric_values}")
    else:
        logging.info(f"Using single train/dev/test split...")
        if args.fixed_split:
            logging.info("Using fixed dataset split")
            train_docs, dev_docs, test_docs = fixed_split(
                documents, args.dataset)
        else:
            train_docs, dev_docs, test_docs = split_into_sets(documents,
                                                              train_prop=0.7,
                                                              dev_prop=0.15,
                                                              test_prop=0.15)

        model = create_model_instance(args.model_name)
        model.train(epochs=args.num_epochs,
                    train_docs=train_docs,
                    dev_docs=dev_docs)
        # Reload best checkpoint
        model = BaselineController.from_pretrained(model.path_model_dir)

        model.evaluate(test_docs)
        model.visualize()