Exemplo n.º 1
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92387451180595015,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.92481147926825724,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.75416215405673581,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.42231775639097513,
                                places=4)
Exemplo n.º 2
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df,
            [range(0, 50), range(130, 200),
             range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.90636761259756715)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.90819953685397914)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.72937284548325965)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.49899297305829415)
Exemplo n.º 3
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92695443548602008,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.93189074441713937,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76031309571294092,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.40381451586590256,
                                places=4)
Exemplo n.º 4
0
def cv_on_dataset(dataset,
                  feature_param,
                  model_param,
                  ax,
                  result_store,
                  contentid_groups,
                  logger=None,
                  aggregate_method=np.mean):

    assets = read_dataset(dataset)
    kfold = construct_kfold_list(assets, contentid_groups)

    fassembler = FeatureAssembler(
        feature_dict=feature_param.feature_dict,
        feature_option_dict=None,
        assets=assets,
        logger=logger,
        delete_workdir=True,
        result_store=result_store,
        optional_dict=None,
        optional_dict2=None,
        parallelize=True,
        fifo_mode=True,
        # parallelize=False, fifo_mode=False, # VQM
    )
    fassembler.run()
    results = fassembler.results

    for result in results:
        result.set_score_aggregate_method(aggregate_method)

    model_class = TrainTestModel.find_subclass(model_param.model_type)
    # run nested kfold cv for each combintation
    cv_output = ModelCrossValidation.run_kfold_cross_validation(
        model_class,
        model_param.model_param_dict,
        results,
        kfold,
        logger=logger,
    )

    print 'Feature parameters: {}'.format(feature_param.feature_dict)
    print 'Model type: {}'.format(model_param.model_type)
    print 'Model parameters: {}'.format(model_param.model_param_dict)
    print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats']))

    if ax is not None:
        model_class.plot_scatter(ax, cv_output['aggr_stats'],
                                 cv_output['contentids'])
        ax.set_xlabel('True Score')
        ax.set_ylabel("Predicted Score")
        ax.grid()
        ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format(
            dataset=dataset.dataset_name,
            model=model_param.model_type,
            stats=model_class.format_stats(cv_output['aggr_stats'])))

    return assets, cv_output
Exemplo n.º 5
0
def cv_on_dataset(
    dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean
):

    assets = read_dataset(dataset)
    kfold = construct_kfold_list(assets, contentid_groups)

    fassembler = FeatureAssembler(
        feature_dict=feature_param.feature_dict,
        feature_option_dict=None,
        assets=assets,
        logger=logger,
        delete_workdir=True,
        result_store=result_store,
        optional_dict=None,
        optional_dict2=None,
        parallelize=True,
        fifo_mode=True,
        # parallelize=False, fifo_mode=False, # VQM
    )
    fassembler.run()
    results = fassembler.results

    for result in results:
        result.set_score_aggregate_method(aggregate_method)

    model_class = TrainTestModel.find_subclass(model_param.model_type)
    # run nested kfold cv for each combintation
    cv_output = ModelCrossValidation.run_kfold_cross_validation(
        model_class, model_param.model_param_dict, results, kfold, logger=logger
    )

    print "Feature parameters: {}".format(feature_param.feature_dict)
    print "Model type: {}".format(model_param.model_type)
    print "Model parameters: {}".format(model_param.model_param_dict)
    print "Stats: {}".format(model_class.format_stats(cv_output["aggr_stats"]))

    if ax is not None:
        model_class.plot_scatter(ax, cv_output["aggr_stats"], cv_output["contentids"])
        ax.set_xlabel("True Score")
        ax.set_ylabel("Predicted Score")
        ax.grid()
        ax.set_title(
            "Dataset: {dataset}, Model: {model},\n{stats}".format(
                dataset=dataset.dataset_name,
                model=model_param.model_type,
                stats=model_class.format_stats(cv_output["aggr_stats"]),
            )
        )

    return assets, cv_output
Exemplo n.º 6
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
Exemplo n.º 7
0
    def test_run_kfold_cross_validation_extratrees(self):

        print "test k-fold cross validation on extra trees..."

        train_test_model_class = SklearnExtraTreesTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
Exemplo n.º 8
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
Exemplo n.º 9
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
Exemplo n.º 10
0
    def test_run_kfold_cross_validation_extratrees(self):

        print "test k-fold cross validation on extra trees..."

        train_test_model_class = SklearnExtraTreesTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
Exemplo n.º 11
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
Exemplo n.º 12
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features,
            [[0, 3, 8], [2, 1, 5], [4, 6, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
Exemplo n.º 13
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features,
            [[0, 3, 8], [2, 1, 5], [4, 6, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
Exemplo n.º 14
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513)
Exemplo n.º 15
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256)
Exemplo n.º 16
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df,
            [range(0,50), range(130, 200), range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)