def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param = {'norm_type': 'normalize'} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513, places=4)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, [range(0, 50), range(130, 200), range(50, 130)]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256, places=4)
def cv_on_dataset(dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger, ) print 'Feature parameters: {}'.format(feature_param.feature_dict) print 'Model type: {}'.format(model_param.model_type) print 'Model parameters: {}'.format(model_param.model_param_dict) print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats'])) if ax is not None: model_class.plot_scatter(ax, cv_output['aggr_stats'], cv_output['contentids']) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output['aggr_stats']))) return assets, cv_output
def cv_on_dataset( dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean ): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger ) print "Feature parameters: {}".format(feature_param.feature_dict) print "Model type: {}".format(model_param.model_type) print "Model parameters: {}".format(model_param.model_param_dict) print "Stats: {}".format(model_class.format_stats(cv_output["aggr_stats"])) if ax is not None: model_class.plot_scatter(ax, cv_output["aggr_stats"], cv_output["contentids"]) ax.set_xlabel("True Score") ax.set_ylabel("Predicted Score") ax.grid() ax.set_title( "Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output["aggr_stats"]), ) ) return assets, cv_output
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmNusvrTrainTestModel model_param = {'norm_type': 'normalize'} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
def test_run_kfold_cross_validation_extratrees(self): print "test k-fold cross validation on extra trees..." train_test_model_class = SklearnExtraTreesTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmNusvrTrainTestModel model_param = {'norm_type': 'normalize'} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
def test_run_kfold_cross_validation_extratrees(self): print "test k-fold cross validation on extra trees..." train_test_model_class = SklearnExtraTreesTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, [[0, 3, 8], [2, 1, 5], [4, 6, 7]]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, [[0, 3, 8], [2, 1, 5], [4, 6, 7]]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param = {'norm_type': 'normalize'} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, [range(0,50), range(130, 200), range(50, 130)]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)