def model(dataset): train_path, _ = download_data(dataset.train_path) model = Model(method='dt', params=DT_PARAMS, judgment_metric='roc_auc', class_column=dataset.class_column) model.train_test(train_path=train_path) return model
def test_classifier(self, method, params): """ Given a set of fully-qualified hyperparameters, create and test a classifier model. Returns: Model object and metrics dictionary """ model = Model(method=method, params=params, judgment_metric=self.datarun.metric, class_column=self.dataset.class_column, verbose_metrics=self.verbose_metrics) train_path, test_path = download_data(self.dataset.train_path, self.dataset.test_path, self.aws_config) metrics = model.train_test(train_path=train_path, test_path=test_path) target = self.datarun.score_target def metric_string(model): if 'cv' in target or 'mu_sigma' in target: return '%.3f +- %.3f' % (model.cv_judgment_metric, 2 * model.cv_judgment_metric_stdev) else: return '%.3f' % model.test_judgment_metric logger.info('Judgment metric (%s, %s): %s' % (self.datarun.metric, target[:-len('_judgment_metric')], metric_string(model))) old_best = self.db.get_best_classifier(datarun_id=self.datarun.id, score_target=target) if old_best is not None: if getattr(model, target) > getattr(old_best, target): logger.info( 'New best score! Previous best (classifier %s): %s' % (old_best.id, metric_string(old_best))) else: logger.info('Best so far (classifier %s): %s' % (old_best.id, metric_string(old_best))) return model, metrics
def test_classifier(self, method, params): """ Given a set of fully-qualified hyperparameters, create and test a classification model. Returns: Model object and performance dictionary """ model = Model(code=method, params=params, judgment_metric=self.datarun.metric, label_column=self.dataset.label_column) train_path, test_path = download_data(self.dataset.train_path, self.dataset.test_path, self.aws_config) performance = model.train_test(train_path=train_path, test_path=test_path) old_best = self.db.get_best_classifier(datarun_id=self.datarun.id) if old_best is not None: old_val = old_best.cv_judgment_metric old_err = 2 * old_best.cv_judgment_metric_stdev new_val = model.cv_judgment_metric new_err = 2 * model.cv_judgment_metric_stdev _log('Judgment metric (%s): %.3f +- %.3f' % (self.datarun.metric, new_val, new_err)) if old_best is not None: if (new_val - new_err) > (): _log( 'New best score! Previous best (classifier %s): %.3f +- %.3f' % (old_best.id, old_val, old_err)) else: _log('Best so far (classifier %s): %.3f +- %.3f' % (old_best.id, old_val, old_err)) return model, performance