def cv_on_dataset(dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger, ) print('Feature parameters: {}'.format(feature_param.feature_dict)) print('Model type: {}'.format(model_param.model_type)) print('Model parameters: {}'.format(model_param.model_param_dict)) print('Stats: {}'.format( model_class.format_stats_for_print(cv_output['aggr_stats']))) if ax is not None: model_class.plot_scatter(ax, cv_output['aggr_stats'], content_ids=cv_output['contentids']) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats_for_plot(cv_output['aggr_stats']))) return assets, cv_output
def test_run_kfold_cross_validation_with_list_input(self): train_test_model_class = SklearnRandomForestTrainTestModel model_param = { 'norm_type': 'normalize', 'n_estimators': 10, 'random_state': 0 } output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, [[0, 3, 8], [2, 1, 5], [4, 6, 7]]) self.assertAlmostEqual(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4) self.assertAlmostEqual(output['aggr_stats']['PCC'], 0.35513638509959689, places=4) self.assertAlmostEqual(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3) self.assertAlmostEqual(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
def test_run_kfold_cross_validation_libsvmnusvr(self): train_test_model_class = LibsvmNusvrTrainTestModel model_param = {'norm_type': 'normalize'} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEqual(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4) self.assertAlmostEqual(output['aggr_stats']['PCC'], 0.33103132578536021, places=4) self.assertAlmostEqual(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4) self.assertAlmostEqual(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
def test_run_kfold_cross_validation_extratrees(self): train_test_model_class = SklearnExtraTreesTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0, 'n_estimators': 10} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEqual(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4) self.assertAlmostEqual(output['aggr_stats']['PCC'], 0.33023719320146966, places=4) self.assertAlmostEqual(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4) self.assertAlmostEqual(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
def cv_on_dataset(dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger, ) print 'Feature parameters: {}'.format(feature_param.feature_dict) print 'Model type: {}'.format(model_param.model_type) print 'Model parameters: {}'.format(model_param.model_param_dict) print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats'])) if ax is not None: model_class.plot_scatter(ax, cv_output['aggr_stats'], cv_output['contentids']) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output['aggr_stats']) )) return assets, cv_output
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmNusvrTrainTestModel model_param = {'norm_type': 'normalize'} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
def test_run_kfold_cross_validation_extratrees(self): print "test k-fold cross validation on extra trees..." train_test_model_class = SklearnExtraTreesTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, [[0, 3, 8], [2, 1, 5], [4, 6, 7]]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)