Python MemorySourceConfig示例，dffml.source.memory.MemorySourceConfig Python示例

示例#1

0

显示文件

文件： test_anomalydetection.py 项目： up1512001/dffml

    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = AnomalyModel(
            features=Features(
                Feature("A", int, 1),
                Feature("B", int, 2),
            ),
            predict=Feature("Y", int, 1),
            directory=cls.model_dir.name,
        )

        # Generating data

        _n_data = 1800
        _temp_data = np.random.normal(2, 1, size=(2, _n_data))
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "A": float(_temp_data[0][i]),
                        "B": float(_temp_data[1][i]),
                        "Y":
                        (_temp_data[0][i] > 1 - _temp_data[1][i]).astype(int),
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1400])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1400:])))

示例#2

0

显示文件

文件： test_regressor_model.py 项目： sakshamarora1/dffml

    def setUpClass(cls):
        # Create a temporary directory to store the trained model
        cls.model_dir = tempfile.TemporaryDirectory()
        # Create an instance of the model
        cls.model = XGBRegressorModel(
            XGBRegressorModelConfig(
                features=Features(Feature("Feature1", float, 1),
                                  Feature("Feature2")),
                predict=Feature("Target", float, 1),
                directory=cls.model_dir.name,
            ))
        # Generating data f(x1,x2) = 2*x1 + 3*x2
        _n_data = 2000
        _temp_data = np.random.rand(2, _n_data)
        cls.records = [
            Record(
                "x" + str(random.random()),
                data={
                    "features": {
                        "Feature1": float(_temp_data[0][i]),
                        "Feature2": float(_temp_data[1][i]),
                        "Target": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                    }
                },
            ) for i in range(0, _n_data)
        ]

        cls.trainingsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[:1800])))
        cls.testsource = Sources(
            MemorySource(MemorySourceConfig(records=cls.records[1800:])))

示例#3

0

显示文件

    def setUpClass(cls):
        (
            A_train,
            B_train,
            C_train,
            X_train,
            D_train,
            E_train,
        ) = list(zip(*TRAIN_DATA))
        A_test, B_test, C_test, X_test, D_test, E_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_train[i],
                        "context": B_train[i],
                        "question": C_train[i],
                        "answer_text": X_train[i],
                        "start_pos_char": D_train[i],
                        "is_impossible": E_train[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "title": A_test[i],
                        "context": B_test[i],
                        "question": C_test[i],
                        "answer_text": X_test[i],
                        "start_pos_char": D_test[i],
                        "is_impossible": E_test[i],
                        "answers": [],
                    }
                },
            ) for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records)))
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = QAModel(
            QAModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=CACHE_DIR,
                directory=cls.model_dir.name,
                log_dir=cls.model_dir.name,
                model_type="bert",
                no_cuda=True,
            ))

示例#4

0

显示文件

    def setUpClass(cls):
        A_train, B_train, X = list(zip(*TRAIN_DATA))
        A_predict, B_predict = list(zip(*PREDICT_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_train[i],
                        "words": B_train[i],
                        "ner_tag": X[i],
                    }
                },
            )
            for i in range(0, len(X))
        ]
        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )

        cls.predict_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence_id": A_predict[i],
                        "words": B_predict[i],
                    }
                },
            )
            for i in range(0, len(A_predict))
        ]
        cls.predict_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.predict_records))
        )

        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = NERModel(
            NERModelConfig(
                sid=Feature("sentence_id", int, 1),
                words=Feature("words", str, 1),
                predict=Feature("ner_tag", str, 1),
                output_dir=cls.model_dir.name,
                model_architecture_type="bert",
                model_name_or_path="bert-base-cased",
                no_cuda=True,
            )
        )

示例#5

0

显示文件

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = Misc(ModelConfig(directory=cls.model_dir.name))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.classifications = ['a', 'not a']
     cls.repos = [
         Repo('a' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 1
                  },
                  'classification': 'a'
              }) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo('b' + str(random.random()),
              data={
                  'features': {
                      cls.feature.NAME: 0
                  },
                  'classification': 'not a'
              }) for _ in range(0, 1000)
     ]
     cls.sources = \
         Sources(MemorySource(MemorySourceConfig(repos=cls.repos)))

示例#6

0

显示文件

文件： test_dnnc.py 项目： oliverob/dffml

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature = Feature("starts_with_a", int, 1)
     cls.features = Features(cls.feature)
     cls.records = [
         Record(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.name: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.records += [
         Record(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.name: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=Feature("string", str, 1),
             classifications=["a", "not a"],
             clstype=str,
             features=cls.features,
         ))

示例#7

0

显示文件

 async def test_02_predict(self):
     test_feature_val = [
         0,
         1.5,
         2,
     ]  # inserting zero so that its 1-indexable
     test_target = 2 * test_feature_val[1] + 3 * test_feature_val[2]
     # should be same function used in TestDNN.setupclass
     a = Repo(
         "a",
         data={
             "features": {
                 self.feature1.NAME: test_feature_val[1],
                 self.feature2.NAME: test_feature_val[2],
             }
         },
     )
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a]))) as sources, self.model as model:
         target_name = model.config.predict.NAME
         async with sources() as sctx, model() as mctx:
             res = [repo async for repo in mctx.predict(sctx.repos())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         test_error_norm = abs(
             (test_target - res[0].prediction(target_name).value) /
             test_target + 1e-6)
         error_threshold = 0.3
         self.assertLess(test_error_norm, error_threshold)

示例#8

0

显示文件

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.feature1 = Feature_1()
     cls.feature2 = Feature_2()
     cls.features = Features(cls.feature1, cls.feature2)
     cls.model = DNNRegressionModel(
         DNNRegressionModelConfig(
             directory=cls.model_dir.name,
             steps=1000,
             epochs=40,
             hidden=[50, 20, 10],
             predict=DefFeature("TARGET", float, 1),
             features=cls.features,
         ))
     # Generating data f(x1,x2) = 2*x1 + 3*x2
     _n_data = 2000
     _temp_data = np.random.rand(2, _n_data)
     cls.repos = [
         Repo(
             "x" + str(random.random()),
             data={
                 "features": {
                     cls.feature1.NAME: float(_temp_data[0][i]),
                     cls.feature2.NAME: float(_temp_data[1][i]),
                     "TARGET": 2 * _temp_data[0][i] + 3 * _temp_data[1][i],
                 }
             },
         ) for i in range(0, _n_data)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))

示例#9

0

显示文件

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.features = Features()
     if cls.MODEL_TYPE is "CLASSIFICATION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         cls.features.append(DefFeature("D", float, 1))
         cls.features.append(DefFeature("E", float, 1))
         cls.features.append(DefFeature("F", float, 1))
         cls.features.append(DefFeature("G", float, 1))
         cls.features.append(DefFeature("H", float, 1))
         cls.features.append(DefFeature("I", float, 1))
         A, B, C, D, E, F, G, H, I, X = list(
             zip(*FEATURE_DATA_CLASSIFICATION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "D": D[i],
                         "E": E[i],
                         "F": F[i],
                         "G": G[i],
                         "H": H[i],
                         "I": I[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     elif cls.MODEL_TYPE is "REGRESSION":
         cls.features.append(DefFeature("A", float, 1))
         cls.features.append(DefFeature("B", float, 1))
         cls.features.append(DefFeature("C", float, 1))
         A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
         cls.repos = [
             Repo(
                 str(i),
                 data={
                     "features": {
                         "A": A[i],
                         "B": B[i],
                         "C": C[i],
                         "X": X[i],
                     }
                 },
             ) for i in range(0, len(A))
         ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))
     cls.model = cls.MODEL(
         cls.MODEL_CONFIG(
             directory=cls.model_dir.name,
             predict="X",
             features=cls.features,
         ))

示例#10

0

显示文件

 def setUpClass(cls):
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = MiscModel(
         MiscModelConfig(
             directory=cls.model_dir.name,
             classifications=["not a", "a"],
             features=cls.features,
         ))
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))

示例#11

0

显示文件

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = DNNClassifierModel(
         DNNClassifierModelConfig(directory=cls.model_dir.name,
                                  steps=1000,
                                  epochs=30,
                                  hidden=[10, 20, 10],
                                  classification="string",
                                  classifications=["a", "not a"],
                                  clstype=str))
     cls.feature = StartsWithA()
     cls.features = Features(cls.feature)
     cls.repos = [
         Repo(
             "a" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 1,
                 "string": "a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.repos += [
         Repo(
             "b" + str(random.random()),
             data={"features": {
                 cls.feature.NAME: 0,
                 "string": "not a"
             }},
         ) for _ in range(0, 1000)
     ]
     cls.sources = Sources(MemorySource(
         MemorySourceConfig(repos=cls.repos)))

示例#12

0

显示文件

文件： test_model.py 项目： jankeromnes/dffml

 def setUpClass(cls):
     cls.features = Features()
     cls.features.append(Feature("A", str, 1))
     A, X = list(zip(*DATA))
     cls.records = [
         Record(str(i), data={"features": {
             "A": A[i],
             "X": X[i]
         }}) for i in range(0, len(X))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(records=cls.records)))
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = TextClassificationModel(
         TextClassifierConfig(
             directory=cls.model_dir.name,
             classifications=[0, 1],
             features=cls.features,
             predict=Feature("X", int, 1),
             add_layers=True,
             layers=[
                 "Dense(units = 120, activation='relu')",
                 "Dense(units = 64, activation=relu)",
                 "Dense(units = 2, activation='softmax')",
             ],
             model_path=
             "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim-with-oov/1",
             epochs=30,
         ))

示例#13

0

显示文件

文件： test_vw.py 项目： programmer290399/dffml

    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        cls.features.append(Feature("A", float, 1))
        cls.features.append(Feature("B", float, 1))
        cls.features.append(Feature("C", float, 1))
        cls.features.append(Feature("D", float, 1))
        cls.features.append(Feature("E", float, 1))
        cls.features.append(Feature("F", float, 1))
        cls.features.append(Feature("G", int, 1))
        cls.features.append(Feature("H", int, 1))

        A, B, C, D, E, F, G, H, X = list(zip(*DATA))
        cls.records = [
            Record(
                str(i),
                data={
                    "features": {
                        "A": A[i],
                        "B": B[i],
                        "C": C[i],
                        "D": D[i],
                        "E": E[i],
                        "F": F[i],
                        "G": G[i],
                        "H": H[i],
                        "X": X[i],
                    }
                },
            )
            for i in range(0, len(A))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        cls.model = VWModel(
            VWConfig(
                location=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", float, 1),
                # A and B will be namespace n1
                # A and C will be in namespace n2
                namespace=["n1_A_B", "n2_A_C"],
                importance=Feature("H", int, 1),
                tag=Feature("G", int, 1),
                task="regression",
                vwcmd=[
                    "l2",
                    "0.1",
                    "loss_function",
                    "squared",
                    "passes",
                    "10",
                ],
            )
        )
        cls.scorer = MeanSquaredErrorAccuracy()

示例#14

0

显示文件

文件： test_dnnc.py 项目： shabnam99/dffml

 async def test_02_predict(self):
     a = Repo("a", data={"features": {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a]))) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             res = [repo async for repo in mctx.predict(sctx.repos())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].src_url, a.src_url)
         self.assertTrue(res[0].prediction().value)

示例#15

0

显示文件

 async def test_02_predict(self):
     a = Record("a", data={"features": {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(
             records=[a]))) as sources, self.model as model:
         target_name = model.config.predict.NAME
         async with sources() as sctx, model() as mctx:
             res = [record async for record in mctx.predict(sctx.records())]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0].key, a.key)
         self.assertTrue(res[0].prediction(target_name).value)

示例#16

0

显示文件

    def setUpClass(cls):
        A_train, X_train = list(zip(*TRAIN_DATA))
        A_test, X_test = list(zip(*TEST_DATA))

        cls.train_records = [
            Record(
                str(i),
                data={
                    "features": {
                        "sentence": A_train[i],
                        "entities": X_train[i],
                    }
                },
            )
            for i in range(len(X_train))
        ]
        cls.test_records = [
            Record(
                str(i),
                data={
                    "features": {"sentence": A_test[i], "entities": X_test[i],}
                },
            )
            for i in range(len(X_test))
        ]

        cls.train_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.train_records))
        )
        cls.test_sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.test_records))
        )
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = SpacyNERModel(
            SpacyNERModelConfig(
                model_name="en_core_web_sm",
                location=cls.model_dir.name,
                n_iter=10,
                dropout=0.4,
            )
        )
        cls.scorer = SpacyNerAccuracy()

示例#17

0

显示文件

 async def _add_memory_source(self):
     async with MemorySource(
             MemorySourceConfig(repos=[
                 Repo(str(i), data={"features": {
                     "by_ten": i * 10
                 }}) for i in range(0, self.num_repos)
             ])) as source:
         self.source = self.cli.app["sources"][self.slabel] = source
         async with source() as sctx:
             self.sctx = self.cli.app["source_contexts"][self.slabel] = sctx
             yield

示例#18

0

显示文件

 async def test_02_predict(self):
     a = Repo('a', data={'features': {self.feature.NAME: 1}})
     async with Sources(MemorySource(MemorySourceConfig(repos=[a]))) \
             as sources, self.features as features, self.model as model:
         async with sources() as sctx, model() as mctx:
             res = [
                 repo async for repo in mctx.predict(
                     sctx.repos(), features, self.classifications)
             ]
             self.assertEqual(len(res), 1)
         self.assertEqual(res[0][0].src_url, a.src_url)
         self.assertTrue(res[0][1])

示例#19

0

显示文件

 async def test_02_predict(self):
     a = Repo("a", data={"features": {self.feature.NAME: 1}})
     b = Repo("not a", data={"features": {self.feature.NAME: 0}})
     async with Sources(MemorySource(MemorySourceConfig(
             repos=[a, b]))) as sources, self.model as model:
         async with sources() as sctx, model() as mctx:
             num = 0
             async for repo, prediction, confidence in mctx.predict(
                     sctx.repos()):
                 with self.subTest(repo=repo):
                     self.assertEqual(prediction, repo.key)
                 num += 1
             self.assertEqual(num, 2)

示例#20

0

显示文件

文件： test_slr.py 项目： pradeepbhadani/dffml

 def setUpClass(cls):
     cls.model_dir = tempfile.TemporaryDirectory()
     cls.model = SLR(SLRConfig(directory=cls.model_dir.name, predict="Y"))
     cls.feature = DefFeature("X", float, 1)
     cls.features = Features(cls.feature)
     X, Y = list(zip(*FEATURE_DATA))
     cls.repos = [
         Repo(str(i), data={"features": {"X": X[i], "Y": Y[i]}})
         for i in range(0, len(Y))
     ]
     cls.sources = Sources(
         MemorySource(MemorySourceConfig(repos=cls.repos))
     )

示例#21

0

显示文件

文件： test_df.py 项目： oliverob/dffml

 def setUpClass(self):
     self.records = [
         Record(
             str(i),
             data={
                 "features": {
                     "Years": A[i],
                     "Expertise": B[i],
                     "Trust": C[i],
                     "Salary": D[i],
                 }
             },
         ) for i in range(4)
     ]
     self.source = Sources(
         MemorySource(MemorySourceConfig(records=self.records)))

示例#22

0

显示文件

    def setUpClass(cls):
        cls.features = Features()
        cls.features.append(Feature("A", str, 1))
        A, X = list(zip(*DATA))
        cls.records = [
            Record(str(i), data={"features": {
                "A": A[i],
                "X": X[i]
            }}) for i in range(len(X))
        ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records)))
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.model = HFClassificationModel(
            HFClassificationModelConfig(
                model_name_or_path="bert-base-cased",
                cache_dir=cls.model_dir.name,
                logging_dir=cls.model_dir.name,
                output_dir=cls.model_dir.name,
                features=cls.features,
                predict=Feature("X", int, 1),
                label_list=["0", "1"],
            ))

示例#23

0

显示文件

    def setUpClass(cls):
        cls.is_multi = "MULTI_" in cls.MODEL_TYPE
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE in classifier_types:
            A, B, C, D, E, F, G, H, X, Y = list(
                zip(*FEATURE_DATA_CLASSIFICATION)
            )
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            cls.features.append(Feature("E", float, 1))
            cls.features.append(Feature("F", float, 1))
            cls.features.append(Feature("G", float, 1))
            cls.features.append(Feature("H", float, 1))
            if cls.MODEL_TYPE == "CLASSIFICATION":
                cls.features.append(Feature("X", float, 1))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        elif cls.MODEL_TYPE in regressor_types:
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            if cls.MODEL_TYPE == "REGRESSION":
                cls.features.append(Feature("X", float, 1))
            A, B, C, D, X, Y = list(zip(*FEATURE_DATA_REGRESSION))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                            "Y": Y[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE == "CLUSTERING":
            cls.features.append(Feature("A", float, 1))
            cls.features.append(Feature("B", float, 1))
            cls.features.append(Feature("C", float, 1))
            cls.features.append(Feature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.records = [
                Record(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                )
                for i in range(0, len(A))
            ]

        cls.sources = Sources(
            MemorySource(MemorySourceConfig(records=cls.records))
        )
        properties = {
            "location": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            if cls.is_multi:
                config_fields["predict"] = Features(
                    Feature("X", float, 1), Feature("Y", float, 1)
                )
            else:
                config_fields["predict"] = Feature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            # TODO If cls.TRUE_CLSTR_PRESENT then we want to use the
            # mutual_info_score scikit accuracy scorer. In this case we might
            # want to change tcluster to a boolean config property.
            # For more info see commit e4f523976bf37d3457cda140ceab7899420ae2c7
            config_fields["predict"] = Feature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{**properties, **config_fields})
        )
        cls.scorer = cls.SCORER()

示例#24

0

显示文件

    def setUpClass(cls):
        cls.model_dir = tempfile.TemporaryDirectory()
        cls.features = Features()
        if cls.MODEL_TYPE is "CLASSIFICATION":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            cls.features.append(DefFeature("D", float, 1))
            cls.features.append(DefFeature("E", float, 1))
            cls.features.append(DefFeature("F", float, 1))
            cls.features.append(DefFeature("G", float, 1))
            cls.features.append(DefFeature("H", float, 1))
            cls.features.append(DefFeature("I", float, 1))
            A, B, C, D, E, F, G, H, I, X = list(
                zip(*FEATURE_DATA_CLASSIFICATION))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "E": E[i],
                            "F": F[i],
                            "G": G[i],
                            "H": H[i],
                            "I": I[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "REGRESSION":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            A, B, C, X = list(zip(*FEATURE_DATA_REGRESSION))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]
        elif cls.MODEL_TYPE is "CLUSTERING":
            cls.features.append(DefFeature("A", float, 1))
            cls.features.append(DefFeature("B", float, 1))
            cls.features.append(DefFeature("C", float, 1))
            cls.features.append(DefFeature("D", float, 1))
            A, B, C, D, X = list(zip(*FEATURE_DATA_CLUSTERING))
            cls.repos = [
                Repo(
                    str(i),
                    data={
                        "features": {
                            "A": A[i],
                            "B": B[i],
                            "C": C[i],
                            "D": D[i],
                            "X": X[i],
                        }
                    },
                ) for i in range(0, len(A))
            ]

        cls.sources = Sources(MemorySource(
            MemorySourceConfig(repos=cls.repos)))
        properties = {
            "directory": cls.model_dir.name,
            "features": cls.features,
        }
        config_fields = dict()
        estimator_type = cls.MODEL.SCIKIT_MODEL._estimator_type
        if estimator_type in supervised_estimators:
            config_fields["predict"] = DefFeature("X", float, 1)
        elif estimator_type in unsupervised_estimators:
            if cls.TRUE_CLSTR_PRESENT:
                config_fields["tcluster"] = DefFeature("X", float, 1)
        cls.model = cls.MODEL(
            cls.MODEL_CONFIG(**{
                **properties,
                **config_fields
            }))