def test_fit_and_predict_kfold(self): self.assertTrue( "Private" in list(self.data["train"]["X"]["workclass"])) early_stop = EarlyStopping({"metric": {"name": "logloss"}}) metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) params = copy.deepcopy(self.train_params) params["validation"] = { "validation_type": "kfold", "k_folds": 5, "shuffle": True, } il = IterativeLearner(params, callbacks=[early_stop, metric_logger]) il.train(self.data) oof = il.get_out_of_folds() self.assertEqual(len(np.unique(oof.index)), oof.shape[0]) self.assertTrue( np.array_equal(oof.index, self.data["train"]["X"].index)) self.assertTrue(oof.shape[0], self.data["train"]["X"].shape[0]) self.assertTrue( "Private" in list(self.data["train"]["X"]["workclass"])) y_predicted = il.predict(self.data["train"]["X"]) self.assertTrue( "Private" in list(self.data["train"]["X"]["workclass"])) metric = Metric({"name": "logloss"}) loss = metric(self.data["train"]["y"], y_predicted) self.assertTrue(loss < 0.6)
def test_save_and_load(self): self.assertTrue( "Private" in list(self.data["train"]["X"]["workclass"])) early_stop = EarlyStopping({"metric": {"name": "logloss"}}) metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) il = IterativeLearner(self.train_params, callbacks=[early_stop, metric_logger]) il.train(self.data) y_predicted = il.predict(self.data["train"]["X"]) metric = Metric({"name": "logloss"}) loss_1 = metric(self.data["train"]["y"], y_predicted) json_desc = il.to_json() il2 = IterativeLearner(self.train_params, callbacks=[]) self.assertTrue(il.uid != il2.uid) il2.from_json(json_desc) self.assertTrue(il.uid == il2.uid) y_predicted_2 = il2.predict(self.data["train"]["X"]) loss_2 = metric(self.data["train"]["y"], y_predicted_2) assert_almost_equal(loss_1, loss_2) uids = [i.uid for i in il.learners] uids2 = [i.uid for i in il2.learners] for u in uids: self.assertTrue(u in uids2)
def run(self): # update status mlmodel = MLModel.objects.get(pk=self.job_params.get("db_id")) mlmodel.status = "started" mlmodel.save() mlexperiment = MLExperiment.objects.get( pk=mlmodel.parent_experiment_id) print("mlexperiment", mlexperiment.id) print(mlexperiment.parent_columns_usage) # prepare data columns_usage = mlexperiment.parent_columns_usage.columns_usage print("cols", columns_usage) training_dataframe = mlexperiment.parent_training_dataframe print("training", training_dataframe.absolute_path) metric_params = mlexperiment.params.get("metric") validation_params = mlexperiment.params.get("validation") preprocessing_params = mlexperiment.params.get("preprocessing") df_train = DataServe.get(training_dataframe.absolute_path) training_data = { "train": { "X": df_train[columns_usage.get("input")], "y": df_train[columns_usage.get("target")], } } # prepare model hyper parameters learner_params = { "learner_type": mlmodel.model_type, "max_iters": 3, "max_depth": 1, } for k, v in mlmodel.params.items(): learner_params[k] = v train_params = { "preprocessing": preprocessing_params, "validation": validation_params, "learner": learner_params, } print(train_params) # prepare needed callbacks early_stop = EarlyStopping({"metric": {"name": "logloss"}}) metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) # run the training il = IterativeLearner(train_params, callbacks=[early_stop, metric_logger]) il.train(training_data) # save the model save_details = il.save() logger.info(save_details) # store model details in platform database mlmodel.status = "done" mlmodel.save_details = save_details mlmodel.all_params = ( train_params) # all parameters will be needed for models loading mlmodel.save()
def test_fit_and_predict(self): il = IterativeLearner(self.train_params, callbacks=[]) il.train(self.data) y_predicted = il.predict(self.X) metric = Metric({"name": "logloss"}) loss = metric(self.y, y_predicted) self.assertTrue(loss < 0.4)
def train_model(self, params, X, y): early_stop = EarlyStopping({"metric": {"name": "logloss"}}) time_constraint = TimeConstraint({"train_seconds_time_limit": self._time_limit}) il = IterativeLearner(params, callbacks=[early_stop, time_constraint]) il_key = il.get_params_key() if il_key in self._models_params_keys: return None self._models_params_keys += [il_key] if self.should_train_next(il.get_name()): il.train({"train": {"X": X, "y": y}}) return il return None
def test_fit_and_predict(self): early_stop = EarlyStopping({"metric": {"name": "logloss"}}) metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) il = IterativeLearner(self.train_params, callbacks=[early_stop, metric_logger]) il.train(self.data) y_predicted = il.predict(self.X) metric = Metric({"name": "logloss"}) loss = metric(self.y, y_predicted) self.assertTrue(loss < 0.4)
def test_fit_and_predict_split(self): self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"])) early_stop = EarlyStopping({"metric": {"name": "logloss"}}) metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) il = IterativeLearner(self.train_params, callbacks=[early_stop, metric_logger]) il.train(self.data) self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"])) y_predicted = il.predict(self.data["train"]["X"]) self.assertTrue("Private" in list(self.data["train"]["X"]["workclass"])) metric = Metric({"name": "logloss"}) loss = metric(self.data["train"]["y"], y_predicted) self.assertTrue(loss < 0.6)
def test_fit_and_predict(self): MAX_STEPS = 100 additional["max_steps"] = MAX_STEPS iters_cnt = 5 max_iters = MaxItersConstraint({"max_iters": iters_cnt}) metric_logger = MetricLogger({"metric_names": ["logloss"]}) il = IterativeLearner(self.train_params, callbacks=[max_iters, metric_logger]) il.train(self.data) metric_logs = il.get_metric_logs() for k in range(self.kfolds): self.assertEqual( len(metric_logs[il.learners[k].uid]["train"]["logloss"]), iters_cnt ) self.assertNotEqual( len(metric_logs[il.learners[k].uid]["train"]["logloss"]), MAX_STEPS )
def train_model(self, params, X, y): metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) early_stop = EarlyStopping({"metric": {"name": self._optimize_metric}}) time_constraint = TimeConstraint({"train_seconds_time_limit": self._time_limit}) il = IterativeLearner( params, callbacks=[early_stop, time_constraint, metric_logger] ) il_key = il.get_params_key() if il_key in self._models_params_keys: self._progress_bar.update(1) return None self._models_params_keys += [il_key] if self.should_train_next(il.get_name()): il.train({"train": {"X": X, "y": y}}) self._progress_bar.update(1) return il self._progress_bar.update(1) return None
def test_fit_and_predict(self): MAX_STEPS = 10 additional["max_steps"] = MAX_STEPS metric_logger = MetricLogger({"metric_names": ["logloss", "auc"]}) il = IterativeLearner(self.train_params, callbacks=[metric_logger]) il.train(self.data) metric_logs = il.get_metric_logs() self.assertEqual( len(metric_logs[il.learners[0].uid]["train"]["logloss"]), len(metric_logs[il.learners[0].uid]["train"]["auc"]), ) self.assertEqual( len(metric_logs[il.learners[0].uid]["train"]["logloss"]), len(metric_logs[il.learners[0].uid]["iters"]), ) self.assertEqual( len(metric_logs[il.learners[0].uid]["train"]["logloss"]), MAX_STEPS)
def test_save_and_load(self): il = IterativeLearner(self.train_params, callbacks=[]) il.train(self.data) metric = Metric({"name": "logloss"}) loss = metric(self.y, il.predict(self.X)) json_desc = il.to_json() il2 = IterativeLearner(json_desc.get("params"), callbacks=[]) self.assertTrue(il.uid != il2.uid) il2.from_json(json_desc) self.assertTrue(il.uid == il2.uid) loss2 = metric(self.y, il2.predict(self.X)) assert_almost_equal(loss, loss2) uids = [i.uid for i in il.learners] uids2 = [i.uid for i in il2.learners] for u in uids: self.assertTrue(u in uids2)