示例#1
0
    def test_fit_and_predict_kfold(self):
        self.assertTrue(
            "Private" in list(self.data["train"]["X"]["workclass"]))

        early_stop = EarlyStopping({"metric": {"name": "logloss"}})
        metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})

        params = copy.deepcopy(self.train_params)
        params["validation"] = {
            "validation_type": "kfold",
            "k_folds": 5,
            "shuffle": True,
        }
        il = IterativeLearner(params, callbacks=[early_stop, metric_logger])
        il.train(self.data)
        oof = il.get_out_of_folds()

        self.assertEqual(len(np.unique(oof.index)), oof.shape[0])
        self.assertTrue(
            np.array_equal(oof.index, self.data["train"]["X"].index))
        self.assertTrue(oof.shape[0], self.data["train"]["X"].shape[0])

        self.assertTrue(
            "Private" in list(self.data["train"]["X"]["workclass"]))

        y_predicted = il.predict(self.data["train"]["X"])
        self.assertTrue(
            "Private" in list(self.data["train"]["X"]["workclass"]))

        metric = Metric({"name": "logloss"})
        loss = metric(self.data["train"]["y"], y_predicted)
        self.assertTrue(loss < 0.6)
示例#2
0
    def test_save_and_load(self):
        self.assertTrue(
            "Private" in list(self.data["train"]["X"]["workclass"]))
        early_stop = EarlyStopping({"metric": {"name": "logloss"}})
        metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})

        il = IterativeLearner(self.train_params,
                              callbacks=[early_stop, metric_logger])
        il.train(self.data)
        y_predicted = il.predict(self.data["train"]["X"])
        metric = Metric({"name": "logloss"})
        loss_1 = metric(self.data["train"]["y"], y_predicted)

        json_desc = il.to_json()

        il2 = IterativeLearner(self.train_params, callbacks=[])
        self.assertTrue(il.uid != il2.uid)
        il2.from_json(json_desc)
        self.assertTrue(il.uid == il2.uid)
        y_predicted_2 = il2.predict(self.data["train"]["X"])
        loss_2 = metric(self.data["train"]["y"], y_predicted_2)

        assert_almost_equal(loss_1, loss_2)

        uids = [i.uid for i in il.learners]
        uids2 = [i.uid for i in il2.learners]
        for u in uids:
            self.assertTrue(u in uids2)
示例#3
0
    def run(self):
        # update status
        mlmodel = MLModel.objects.get(pk=self.job_params.get("db_id"))
        mlmodel.status = "started"
        mlmodel.save()
        mlexperiment = MLExperiment.objects.get(
            pk=mlmodel.parent_experiment_id)
        print("mlexperiment", mlexperiment.id)
        print(mlexperiment.parent_columns_usage)

        # prepare data
        columns_usage = mlexperiment.parent_columns_usage.columns_usage
        print("cols", columns_usage)
        training_dataframe = mlexperiment.parent_training_dataframe
        print("training", training_dataframe.absolute_path)
        metric_params = mlexperiment.params.get("metric")
        validation_params = mlexperiment.params.get("validation")
        preprocessing_params = mlexperiment.params.get("preprocessing")

        df_train = DataServe.get(training_dataframe.absolute_path)

        training_data = {
            "train": {
                "X": df_train[columns_usage.get("input")],
                "y": df_train[columns_usage.get("target")],
            }
        }

        # prepare model hyper parameters
        learner_params = {
            "learner_type": mlmodel.model_type,
            "max_iters": 3,
            "max_depth": 1,
        }
        for k, v in mlmodel.params.items():
            learner_params[k] = v

        train_params = {
            "preprocessing": preprocessing_params,
            "validation": validation_params,
            "learner": learner_params,
        }
        print(train_params)
        # prepare needed callbacks
        early_stop = EarlyStopping({"metric": {"name": "logloss"}})
        metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})
        # run the training
        il = IterativeLearner(train_params,
                              callbacks=[early_stop, metric_logger])
        il.train(training_data)
        # save the model
        save_details = il.save()
        logger.info(save_details)
        # store model details in platform database
        mlmodel.status = "done"
        mlmodel.save_details = save_details
        mlmodel.all_params = (
            train_params)  # all parameters will be needed for models loading
        mlmodel.save()
    def test_fit_and_predict(self):
        il = IterativeLearner(self.train_params, callbacks=[])
        il.train(self.data)

        y_predicted = il.predict(self.X)
        metric = Metric({"name": "logloss"})
        loss = metric(self.y, y_predicted)
        self.assertTrue(loss < 0.4)
示例#5
0
 def train_model(self, params, X, y):
     early_stop = EarlyStopping({"metric": {"name": "logloss"}})
     time_constraint = TimeConstraint({"train_seconds_time_limit": self._time_limit})
     il = IterativeLearner(params, callbacks=[early_stop, time_constraint])
     il_key = il.get_params_key()
     if il_key in self._models_params_keys:
         return None
     self._models_params_keys += [il_key]
     if self.should_train_next(il.get_name()):
         il.train({"train": {"X": X, "y": y}})
         return il
     return None
    def test_fit_and_predict(self):

        early_stop = EarlyStopping({"metric": {"name": "logloss"}})
        metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})
        il = IterativeLearner(self.train_params,
                              callbacks=[early_stop, metric_logger])
        il.train(self.data)

        y_predicted = il.predict(self.X)
        metric = Metric({"name": "logloss"})
        loss = metric(self.y, y_predicted)
        self.assertTrue(loss < 0.4)
    def test_fit_and_predict_split(self):
        self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"]))

        early_stop = EarlyStopping({"metric": {"name": "logloss"}})
        metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})
        il = IterativeLearner(self.train_params, callbacks=[early_stop, metric_logger])
        il.train(self.data)

        self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"]))

        y_predicted = il.predict(self.data["train"]["X"])
        self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"]))

        metric = Metric({"name": "logloss"})
        loss = metric(self.data["train"]["y"], y_predicted)
        self.assertTrue(loss < 0.6)
示例#8
0
 def test_fit_and_predict(self):
     MAX_STEPS = 100
     additional["max_steps"] = MAX_STEPS
     iters_cnt = 5
     max_iters = MaxItersConstraint({"max_iters": iters_cnt})
     metric_logger = MetricLogger({"metric_names": ["logloss"]})
     il = IterativeLearner(self.train_params, callbacks=[max_iters, metric_logger])
     il.train(self.data)
     metric_logs = il.get_metric_logs()
     for k in range(self.kfolds):
         self.assertEqual(
             len(metric_logs[il.learners[k].uid]["train"]["logloss"]), iters_cnt
         )
         self.assertNotEqual(
             len(metric_logs[il.learners[k].uid]["train"]["logloss"]), MAX_STEPS
         )
示例#9
0
 def train_model(self, params, X, y):
     metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})
     early_stop = EarlyStopping({"metric": {"name": self._optimize_metric}})
     time_constraint = TimeConstraint({"train_seconds_time_limit": self._time_limit})
     il = IterativeLearner(
         params, callbacks=[early_stop, time_constraint, metric_logger]
     )
     il_key = il.get_params_key()
     if il_key in self._models_params_keys:
         self._progress_bar.update(1)
         return None
     self._models_params_keys += [il_key]
     if self.should_train_next(il.get_name()):
         il.train({"train": {"X": X, "y": y}})
         self._progress_bar.update(1)
         return il
     self._progress_bar.update(1)
     return None
 def test_fit_and_predict(self):
     MAX_STEPS = 10
     additional["max_steps"] = MAX_STEPS
     metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]})
     il = IterativeLearner(self.train_params, callbacks=[metric_logger])
     il.train(self.data)
     metric_logs = il.get_metric_logs()
     self.assertEqual(
         len(metric_logs[il.learners[0].uid]["train"]["logloss"]),
         len(metric_logs[il.learners[0].uid]["train"]["auc"]),
     )
     self.assertEqual(
         len(metric_logs[il.learners[0].uid]["train"]["logloss"]),
         len(metric_logs[il.learners[0].uid]["iters"]),
     )
     self.assertEqual(
         len(metric_logs[il.learners[0].uid]["train"]["logloss"]),
         MAX_STEPS)
    def test_save_and_load(self):
        il = IterativeLearner(self.train_params, callbacks=[])
        il.train(self.data)

        metric = Metric({"name": "logloss"})
        loss = metric(self.y, il.predict(self.X))

        json_desc = il.to_json()
        il2 = IterativeLearner(json_desc.get("params"), callbacks=[])
        self.assertTrue(il.uid != il2.uid)

        il2.from_json(json_desc)
        self.assertTrue(il.uid == il2.uid)
        loss2 = metric(self.y, il2.predict(self.X))
        assert_almost_equal(loss, loss2)

        uids = [i.uid for i in il.learners]
        uids2 = [i.uid for i in il2.learners]
        for u in uids:
            self.assertTrue(u in uids2)