def test_run(self):

        path = EnvironmentSettings.tmp_test_path / "mlapplicationtest/"
        PathBuilder.build(path)

        dataset = RandomDatasetGenerator.generate_repertoire_dataset(50, {5: 1}, {5: 1}, {"l1": {1: 0.5, 2: 0.5}}, path / 'dataset/')
        ml_method = LogisticRegression()
        encoder = KmerFreqRepertoireEncoder(NormalizationType.RELATIVE_FREQUENCY, ReadsType.UNIQUE, SequenceEncodingType.CONTINUOUS_KMER, 3,
                                            scale_to_zero_mean=True, scale_to_unit_variance=True)
        label_config = LabelConfiguration([Label("l1", [1, 2])])

        enc_dataset = encoder.encode(dataset, EncoderParams(result_path=path, label_config=label_config, filename="tmp_enc_dataset.pickle", pool_size=4))
        ml_method.fit(enc_dataset.encoded_data, 'l1')

        hp_setting = HPSetting(encoder, {"normalization_type": "relative_frequency", "reads": "unique", "sequence_encoding": "continuous_kmer",
                                         "k": 3, "scale_to_zero_mean": True, "scale_to_unit_variance": True}, ml_method, {}, [], 'enc1', 'ml1')

        PathBuilder.build(path / 'result/instr1/')
        shutil.copy(path / 'dict_vectorizer.pickle', path / 'result/instr1/dict_vectorizer.pickle')
        shutil.copy(path / 'scaler.pickle', path / 'result/instr1/scaler.pickle')

        ml_app = MLApplicationInstruction(dataset, label_config, hp_setting, 4, "instr1", False)
        ml_app.run(path / 'result/')

        predictions_path = path / "result/instr1/predictions.csv"
        self.assertTrue(os.path.isfile(predictions_path))

        df = pd.read_csv(predictions_path)
        self.assertEqual(50, df.shape[0])

        shutil.rmtree(path)
Пример #2
0
    def test_run(self):
        path = EnvironmentSettings.root_path / "test/tmp/mlmethodassessment/"
        PathBuilder.build(path)
        dataset = RepertoireDataset(repertoires=RepertoireBuilder.build(
            [["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"], ["AA"], ["CC"],
             ["AA"], ["CC"], ["AA"], ["CC"]], path)[0])
        dataset.encoded_data = EncodedData(
            examples=np.array([[1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3, 3],
                               [1, 1], [1, 1], [3, 3], [1, 1], [1, 1], [3,
                                                                        3]]),
            labels={
                "l1": [1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3],
                "l2": [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
            })

        label_config = LabelConfiguration()
        label_config.add_label("l1", [1, 3])

        label = Label(name='l1', values=[1, 2])

        method1 = LogisticRegression()
        method1.fit(dataset.encoded_data, label=label)

        res = MLMethodAssessment.run(
            MLMethodAssessmentParams(
                dataset=dataset,
                method=method1,
                metrics={
                    Metric.ACCURACY, Metric.BALANCED_ACCURACY, Metric.F1_MACRO
                },
                optimization_metric=Metric.LOG_LOSS,
                predictions_path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/predictions.csv",
                label=label,
                ml_score_path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/ml_score.csv",
                split_index=1,
                path=EnvironmentSettings.root_path /
                "test/tmp/mlmethodassessment/"))

        self.assertTrue(isinstance(res, dict))
        self.assertTrue(res[Metric.LOG_LOSS.name.lower()] <= 0.1)

        self.assertTrue(
            os.path.isfile(EnvironmentSettings.root_path /
                           "test/tmp/mlmethodassessment/ml_score.csv"))

        df = pd.read_csv(EnvironmentSettings.root_path /
                         "test/tmp/mlmethodassessment/ml_score.csv")
        self.assertTrue(df.shape[0] == 1)

        df = pd.read_csv(EnvironmentSettings.root_path /
                         "test/tmp/mlmethodassessment/predictions.csv")
        self.assertEqual(12, df.shape[0])

        shutil.rmtree(EnvironmentSettings.root_path /
                      "test/tmp/mlmethodassessment/")
Пример #3
0
    def test_predict(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"test1": [1, 0, 2, 0], "test2": [1, 0, 2, 0]}

        lr = LogisticRegression()
        lr.fit(EncodedData(x, y), Label("test2"))

        test_x = np.array([[0, 1, 0], [1, 0, 0]])
        y = lr.predict(EncodedData(test_x), Label("test2"))

        self.assertTrue(len(y["test2"]) == 2)
        self.assertTrue(y["test2"][1] in [0, 1, 2])
Пример #4
0
    def test_store(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array([1, 0, 2, 0])}

        lr = LogisticRegression()
        lr.fit(EncodedData(x, y), Label("default"))

        path = EnvironmentSettings.root_path / "test/tmp/lr/"

        lr.store(path, ["f1", "f2", "f3"])
        self.assertTrue(os.path.isfile(path / "logistic_regression.pickle"))

        with open(path / "logistic_regression.pickle", "rb") as file:
            lr2 = pickle.load(file)

        self.assertTrue(isinstance(lr2, SklearnLogisticRegression))

        shutil.rmtree(path)
Пример #5
0
    def test_load(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"default": np.array([1, 0, 2, 0])}

        lr = LogisticRegression()
        lr.fit(EncodedData(x, y), Label("default"))

        path = EnvironmentSettings.root_path / "test/tmp/lr2/"
        PathBuilder.build(path)

        with open(path / "logistic_regression.pickle", "wb") as file:
            pickle.dump(lr.model, file)

        lr2 = LogisticRegression()
        lr2.load(path)

        self.assertTrue(isinstance(lr2.model, SklearnLogisticRegression))

        shutil.rmtree(path)
Пример #6
0
    def test_fit(self):
        x = np.array([[1, 0, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1]])
        y = {"test": np.array([1, 0, 2, 0])}

        lr = LogisticRegression()
        lr.fit(EncodedData(x, y), Label("test"))