Exemplo n.º 1
0
    def test_run(self):
        method = LogisticRegression()
        dataset = RepertoireDataset()
        dataset.encoded_data = EncodedData(examples=np.array([[1, 2, 3],
                                                              [2, 3, 4],
                                                              [1, 2, 3],
                                                              [2, 3, 4],
                                                              [1, 2, 3],
                                                              [2, 3, 4]]),
                                           labels={
                                               "l1": [1, 0, 1, 0, 1, 0],
                                               "l2": [0, 1, 0, 1, 0, 1]
                                           },
                                           feature_names=["f1", "f2", "f3"])

        path = EnvironmentSettings.root_path + "test/tmp/mlmethodtrainer/"

        method = MLMethodTrainer.run(
            MLMethodTrainerParams(
                result_path=path,
                dataset=dataset,
                label="l1",
                method=method,
                model_selection_n_folds=2,
                model_selection_cv=True,
                cores_for_training=1,
                train_predictions_path=f"{path}predictions.csv",
                ml_details_path=f"{path}details.yaml",
                optimization_metric="balanced_accuracy"))

        method.predict(EncodedData(np.array([1, 2, 3]).reshape(1, -1)), "l1")
        self.assertTrue(os.path.isfile(f"{path}predictions.csv"))
        self.assertTrue(os.path.isfile(f"{path}details.yaml"))

        shutil.rmtree(path)
Exemplo n.º 2
0
    def test_run(self):
        path = EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/"
        PathBuilder.build(path)
        dataset = RepertoireDataset(repertoires=RepertoireBuilder.build([["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"]], path)[0])
        dataset.encoded_data = EncodedData(
            examples=np.array([[1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3, 3]]),
            labels={"l1": [1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3], "l2": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]}
        )

        label_config = LabelConfiguration()
        label_config.add_label("l1", [1, 3])

        method1 = LogisticRegression()
        method1.fit(dataset.encoded_data, label_name='l1')

        res = MLMethodAssessment.run(MLMethodAssessmentParams(
            dataset=dataset,
            method=method1,
            metrics={Metric.ACCURACY, Metric.BALANCED_ACCURACY, Metric.F1_MACRO},
            optimization_metric=Metric.LOG_LOSS,
            predictions_path=EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/predictions.csv",
            label="l1",
            ml_score_path=EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/ml_score.csv",
            split_index=1,
            path=EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/"
        ))

        self.assertTrue(isinstance(res, dict))
        self.assertTrue(res[Metric.LOG_LOSS.name.lower()] <= 0.1)

        self.assertTrue(os.path.isfile(EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/ml_score.csv"))

        df = pd.read_csv(EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/ml_score.csv")
        self.assertTrue(df.shape[0] == 1)

        df = pd.read_csv(EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/predictions.csv")
        self.assertEqual(12, df.shape[0])

        shutil.rmtree(EnvironmentSettings.root_path + "test/tmp/mlmethodassessment/")