示例#1
0
    def _do_run(self, run: ModelClassificationRun,
                run_output_dir: Path) -> str:
        ds = self.dataset.load()
        X_train_valid, y_train_valid, X_test, y_test = maybe_limit(
            ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(),
            self.dataset_limit)

        assert_in("preprocess_func", run.preprocess_func, PREPROCESS_FUNCS)
        preprocess_func = PREPROCESS_FUNCS[run.preprocess_func]
        X_train_valid_preprocessed = preprocess_func(X_train_valid)
        X_test_preprocessed = preprocess_func(X_test)

        assert_valid_model(run.model_name)
        model_cls = getattr(gobbli.model, run.model_name)

        stdout_catcher = StdoutCatcher()
        with stdout_catcher:
            results = run_benchmark_experiment(
                f"{self.name}_{run.key}",
                X_train_valid_preprocessed,
                y_train_valid,
                model_cls,
                run.param_grid,
                test_dataset=(X_test_preprocessed, y_test),
                worker_log_level=logging.INFO,
                run_kwargs=run.run_kwargs,
            )
            # Sleep a few seconds to let logs from the worker catch up
            time.sleep(3)

        # Sample the observations if there are more than 1,000 in the test set, since we
        # need to save the chart, and trying to save large charts can cause Selenium timeouts
        # when they're rendered to PNG
        sample_size = 1000
        chart = results.plot(sample_size=sample_size).properties(
            title=
            f"Predicted Probability (Sampled Test Set Observations, n={sample_size})"
        )
        plot_path = run_output_dir / "plot.png"
        # Longer driver timeout needed since these images can be very big
        chart.save(str(plot_path), driver_timeout=600)

        md = f"# Results: {run.key}\n"
        md += f"```\n{stdout_catcher.get_logs()}\n```\n"
        md += tabulate(pd.DataFrame(results.training_results),
                       tablefmt="pipe",
                       headers="keys")
        md += f"\n```\n{results.metrics_report()}\n```\n"
        md += f"\n![Results]({self.get_markdown_relative_path(plot_path)})\n---"

        return md
示例#2
0
    def _do_run(self, run: ModelClassificationRun,
                run_output_dir: Path) -> str:
        ds = IMDBDataset.load()
        X_train_valid, y_train_valid, X_test, y_test = maybe_limit(
            ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(),
            self.dataset_limit)

        assert_in("preprocess_func", run.preprocess_func, PREPROCESS_FUNCS)
        preprocess_func = PREPROCESS_FUNCS[run.preprocess_func]
        X_train_valid_preprocessed = preprocess_func(X_train_valid)
        X_test_preprocessed = preprocess_func(X_test)

        assert_valid_model(run.model_name)
        model_cls = getattr(gobbli.model, run.model_name)

        all_results = []

        for window_len, pooling in self.params["window_len_poolings"]:

            if window_len is not None and pooling is not None:
                with tempfile.TemporaryDirectory() as tmpdir:
                    tokenizer_path = Path(tmpdir) / "tokenizer"

                    X_windowed, _, y_windowed = make_document_windows(
                        X_train_valid_preprocessed,
                        window_len=window_len,
                        y=y_train_valid,
                        tokenize_method=TokenizeMethod.SENTENCEPIECE,
                        vocab_size=self.params["vocab_size"],
                        model_path=tokenizer_path,
                    )
                    (
                        X_test_windowed,
                        X_test_windowed_indices,
                        y_test_windowed,
                    ) = make_document_windows(
                        X_test_preprocessed,
                        window_len=window_len,
                        y=y_test,
                        tokenize_method=TokenizeMethod.SENTENCEPIECE,
                        vocab_size=self.params["vocab_size"],
                        model_path=tokenizer_path,
                    )
            else:
                X_windowed, y_windowed = X_train_valid_preprocessed, y_train_valid
                X_test_windowed, y_test_windowed = X_test_preprocessed, y_test

            print(
                f"{dt.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')} "
                f"Evaluating window: Length {window_len}, pooling {pooling} ({len(X_windowed)} obs)"
            )
            results = run_benchmark_experiment(
                f"{self.name}_{run.key}",
                X_windowed,
                y_windowed,
                model_cls,
                run.param_grid,
                test_dataset=(X_test_windowed, y_test_windowed),
                run_kwargs=run.run_kwargs,
            )

            if window_len is not None:
                pooled_output = PredictOutput(
                    y_pred_proba=results.y_pred_proba.copy())

                pool_document_windows(
                    pooled_output,
                    X_test_windowed_indices,
                    pooling=WindowPooling(pooling),
                )

            all_results.append(results.metrics())

        all_metrics = pd.DataFrame([{
            "Window Config": f"Length {window_len}, pooling {pooling}",
            **r
        } for w, r in zip(self.params["window_len_poolings"], all_results)])

        fig = plt.figure(figsize=(10, 10))

        acc_ax = fig.add_subplot()
        all_metrics.plot(x="Window Config",
                         y="Accuracy",
                         ax=acc_ax,
                         kind="bar")

        plt.xlabel("Document Windowing")
        plt.title(
            f"Model Performance by Document Windowing - {model_cls.__name__}")
        plt.ylim(0, 1)

        plot_path = run_output_dir / "plot.png"
        fig.savefig(plot_path)

        md = f"# Results: {run.key}\n"
        md += tabulate(all_metrics, tablefmt="pipe", headers="keys")
        md += f"\n\n![Results]({self.get_markdown_relative_path(plot_path)})\n---"

        return md
示例#3
0
    def _do_run(self, run: AugmentRun, run_output_dir: Path) -> str:
        ds = IMDBDataset.load()
        X_train_valid, y_train_valid, X_test, y_test = maybe_limit(
            ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(),
            self.dataset_limit)

        preprocess_func = PREPROCESS_FUNCS[self.params["preprocess_func"]]
        X_test_preprocessed = preprocess_func(X_test)

        model_cls = getattr(gobbli.model, self.params["model_name"])

        assert_valid_augment(run.augment_name)
        augment_cls = getattr(gobbli.augment, run.augment_name)

        model_run_params: Dict[str, Any] = {}
        if issubclass(augment_cls, BaseModel):
            # If the augment method is also a gobbli model (and will be mounting files back-
            # and-forth with Docker), we need to make sure it has the proper params
            # applied ex. to store data in the correct place and use GPU(s)
            model_run_params = get_model_run_params()

        augment_obj = augment_cls(**run.params, **model_run_params)

        # Some augmentation methods are also models, which need to be built
        # beforehand
        if isinstance(augment_obj, BaseModel):
            augment_obj.build()

        all_results = []

        for percent, multiplier in self.params["percent_multipliers"]:

            X_sampled, _, y_sampled, _ = train_test_split(X_train_valid,
                                                          y_train_valid,
                                                          train_size=percent,
                                                          random_state=1)

            if multiplier == 0:
                X_augmented = X_sampled
                y_augmented = y_sampled
            else:
                X_augmented = X_sampled + augment_obj.augment(
                    X_sampled,
                    times=multiplier,
                    p=self.params["augment_probability"])
                y_augmented = y_sampled + (y_sampled * multiplier)

            print(
                f"{dt.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')} "
                f"Evaluating multiplier x{multiplier}, percent {percent} ({len(X_augmented)} obs)"
            )
            results = run_benchmark_experiment(
                f"{self.name}_{run.key}",
                preprocess_func(X_augmented),
                y_augmented,
                model_cls,
                self.params["param_grid"],
                test_dataset=(X_test_preprocessed, y_test),
            )
            all_results.append(results.metrics())

        all_metrics = pd.DataFrame([{
            "percent": p,
            "multiplier": m,
            **r
        } for (p, m), r in zip(self.params["percent_multipliers"], all_results)
                                    ])

        fig, ax = plt.subplots(figsize=(10, 10))
        for key, grp in all_metrics.groupby("multiplier"):
            grp.plot(
                x="percent",
                y="Weighted F1 Score",
                kind="line",
                label=f"{key}x augmentation",
                ax=ax,
            )

        plt.xlabel("Proportion of Data Used")
        plt.ylabel("Weighted F1 Score")
        plt.title(
            f"Model Performance by Proportion of Data Used - {model_cls.__name__}"
        )
        plt.xlim(0, 1)
        plt.ylim(0, 1)

        plot_path = run_output_dir / "plot.png"
        fig.savefig(plot_path)

        md = f"# Results: {run.key}\n"
        md += tabulate(all_metrics, tablefmt="pipe", headers="keys")
        md += f"\n\n![Results]({self.get_markdown_relative_path(plot_path)})\n---"

        return md
示例#4
0
    def _do_run(self, run: ModelClassificationRun,
                run_output_dir: Path) -> str:
        ds = IMDBDataset.load()
        X_train_valid, y_train_valid, X_test, y_test = maybe_limit(
            ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(),
            self.dataset_limit)

        assert_in("preprocess_func", run.preprocess_func, PREPROCESS_FUNCS)
        preprocess_func = PREPROCESS_FUNCS[run.preprocess_func]
        X_train_valid_preprocessed = preprocess_func(X_train_valid)
        X_test_preprocessed = preprocess_func(X_test)

        assert_valid_model(run.model_name)
        model_cls = getattr(gobbli.model, run.model_name)

        all_results = []

        # Finish linting, test
        for proportion in self.params["data_proportions"]:
            X_sampled, _, y_sampled, _ = train_test_split(
                X_train_valid_preprocessed,
                y_train_valid,
                train_size=proportion,
                random_state=1,
            )
            LOGGER.info(
                f"{dt.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')} "
                f"Evaluating proportion {round(proportion, 3)} ({len(X_sampled)} obs)"
            )
            results = run_benchmark_experiment(
                f"{self.name}_{run.key}",
                X_sampled,
                y_sampled,
                model_cls,
                run.param_grid,
                test_dataset=(X_test_preprocessed, y_test),
                run_kwargs=run.run_kwargs,
            )
            all_results.append(results)

        all_metrics = pd.DataFrame([{
            "data_proportion":
            p,
            "num_documents":
            int(p * len(X_train_valid)),
            **r.metrics(),
        } for p, r in zip(self.params["data_proportions"], all_results)])

        fig = plt.figure(figsize=(10, 10))
        f1_ax = fig.add_subplot()
        all_metrics.plot(x="num_documents", y="Weighted F1 Score", ax=f1_ax)

        acc_ax = fig.add_subplot()
        all_metrics.plot(x="num_documents", y="Accuracy", ax=acc_ax)

        plt.xlabel("Number of Documents Used for Training/Validation")
        plt.title(
            f"Model Performance by Number of Documents Used for Training/Validation - {model_cls.__name__}"
        )
        plt.xlim(0, int(all_metrics["num_documents"].max() * 1.1))
        plt.ylim(0, 1)
        plot_path = run_output_dir / "plot.png"
        fig.savefig(plot_path)

        md = f"# Results: {run.key}\n"
        md += tabulate(all_metrics, tablefmt="pipe", headers="keys")
        md += f"\n\n![Results]({self.get_markdown_relative_path(plot_path)})\n---"

        return md
示例#5
0
    def _do_run(self, run: ModelClassificationRun,
                run_output_dir: Path) -> str:
        ds = IMDBDataset.load()
        X_train_valid, y_train_valid, X_test, y_test = maybe_limit(
            ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(),
            self.dataset_limit)

        assert_in("preprocess_func", run.preprocess_func, PREPROCESS_FUNCS)
        preprocess_func = PREPROCESS_FUNCS[run.preprocess_func]
        X_train_valid_preprocessed = preprocess_func(X_train_valid)
        X_test_preprocessed = preprocess_func(X_test)

        assert_valid_model(run.model_name)
        model_cls = getattr(gobbli.model, run.model_name)

        all_results = []

        majority, minority = ClassImbalanceScenario.find_majority_minority_classes(
            y_test)
        majority_df, minority_df = ClassImbalanceScenario.split_dataset(
            X_train_valid_preprocessed, y_train_valid, majority, minority)

        for proportion in self.params["imbalance_proportions"]:
            # Downsample the minority class so the final dataset contains the desired
            # proportion of the minority
            orig_len = majority_df.shape[0]
            downsample_proportion = -orig_len / (orig_len -
                                                 orig_len / proportion)
            minority_sample = minority_df.sample(
                frac=downsample_proportion).reset_index()
            sampled_df = pd.concat([majority_df, minority_sample])

            X = sampled_df["X"].tolist()
            y = sampled_df["y"].tolist()

            LOGGER.info(
                f"{dt.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')} "
                f"Evaluating proportion {round(proportion, 3)} ({len(X)} obs)")

            results = run_benchmark_experiment(
                f"{self.name}_{run.key}",
                X,
                y,
                model_cls,
                run.param_grid,
                test_dataset=(X_test_preprocessed, y_test),
                run_kwargs=run.run_kwargs,
            )
            all_results.append(results)

        minority_f1_scores = []
        majority_f1_scores = []
        for result in all_results:
            majority_f1, minority_f1 = f1_score(
                result.y_true,
                pred_prob_to_pred_label(result.y_pred_proba),
                average=None,
                labels=[majority, minority],
            )
            minority_f1_scores.append(minority_f1)
            majority_f1_scores.append(majority_f1)

        all_metrics = pd.DataFrame([{
            "imbalance_proportion": p,
            **r.metrics()
        } for p, r in zip(self.params["imbalance_proportions"], all_results)])

        all_metrics["Minority Class F1 Score"] = minority_f1_scores
        all_metrics["Majority Class F1 Score"] = majority_f1_scores

        fig = plt.figure(figsize=(10, 10))
        minority_ax = fig.add_subplot()
        all_metrics.plot(x="imbalance_proportion",
                         y="Minority Class F1 Score",
                         ax=minority_ax)

        majority_ax = fig.add_subplot()
        all_metrics.plot(x="imbalance_proportion",
                         y="Majority Class F1 Score",
                         ax=majority_ax)

        plt.xlabel("Prevalence of Minority Class")
        plt.title(
            f"Model Performance by Prevalence of Minority Class - {model_cls.__name__}"
        )
        plt.xlim(0, 0.5)
        plt.ylim(0, 1)

        plot_path = run_output_dir / "plot.png"
        fig.savefig(plot_path)

        md = f"# Results: {run.key}\n"
        md += tabulate(all_metrics, tablefmt="pipe", headers="keys")
        md += f"\n\n![Results]({self.get_markdown_relative_path(plot_path)})\n---"

        return md