示例#1
0
    def test_get_xs_ys(self):
        xs = TrainTestModel.get_xs_from_results(self.features, [0, 1, 2])

        self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 3)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']),
                                128.26146851380497,
                                places=4)
        self.assertEquals(len(xs['Moment_noref_feature_var_score']), 3)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']),
                                1569.2395085695462,
                                places=4)

        xs = TrainTestModel.get_xs_from_results(self.features)
        self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 9)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']),
                                111.59099599173773,
                                places=4)
        self.assertEquals(len(xs['Moment_noref_feature_var_score']), 9)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']),
                                1806.8620377229011,
                                places=4)

        ys = TrainTestModel.get_ys_from_results(self.features, [0, 1, 2])
        expected_ys = {
            'label': np.array([2.5, 3.9, 5.0]),
            'content_id': np.array([0, 1, 2])
        }
        self.assertTrue(all(ys['label'] == expected_ys['label']))
        self.assertTrue(all(ys['content_id'] == expected_ys['content_id']))
示例#2
0
    def run_cross_validation(train_test_model_class,
                             model_param,
                             results_or_df,
                             train_indices,
                             test_indices):
        """
        Simple cross validation.
        :param train_test_model_class:
        :param model_param:
        :param results_or_df: list of BasicResult, or pandas.DataFrame
        :param train_indices:
        :param test_indices:
        :return:
        """
        xys_train = TrainTestModel.get_xys_from_results(results_or_df, train_indices)
        xs_test = TrainTestModel.get_xs_from_results(results_or_df, test_indices)
        ys_test = TrainTestModel.get_ys_from_results(results_or_df, test_indices)

        model = train_test_model_class(model_param, None)
        model.train(xys_train)
        stats = model.evaluate(xs_test, ys_test)

        output = {}
        output['stats'] = stats
        output['model'] = model
        output['contentids'] = ys_test['content_id'] # for plotting purpose

        return output
示例#3
0
    def test_get_xs_ys(self):
        xs = TrainTestModel.get_xs_from_results(self.features, [0, 1, 2])

        self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 3)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 128.26146851380497, places=4)
        self.assertEquals(len(xs['Moment_noref_feature_var_score']), 3)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1569.2395085695462, places=4)

        xs = TrainTestModel.get_xs_from_results(self.features)
        self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 9)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 111.59099599173773, places=4)
        self.assertEquals(len(xs['Moment_noref_feature_var_score']), 9)
        self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1806.8620377229011, places=4)

        ys = TrainTestModel.get_ys_from_results(self.features, [0, 1, 2])
        expected_ys = {'label': np.array([2.5, 3.9, 5.0]),
                       'content_id': np.array([0, 1, 2])}
        self.assertTrue(all(ys['label'] == expected_ys['label']))
        self.assertTrue(all(ys['content_id'] == expected_ys['content_id']))
示例#4
0
文件: routine.py 项目: haiqianw/vmaf
def train_test_on_dataset(train_dataset, test_dataset,
                          feature_param, model_param,
                          train_ax, test_ax, result_store,
                          parallelize=True, logger=None, fifo_mode=True,
                          output_model_filepath=None):

    train_assets = read_dataset(train_dataset)
    train_fassembler = FeatureAssembler(
        feature_dict = feature_param.feature_dict,
        feature_option_dict = None,
        assets = train_assets,
        logger=logger,
        fifo_mode=fifo_mode,
        delete_workdir=True,
        result_store=result_store,
        parallelize=parallelize,
    )
    train_fassembler.run()
    train_features = train_fassembler.results

    train_xys = TrainTestModel.get_xys_from_results(train_features)
    train_xs = TrainTestModel.get_xs_from_results(train_features)
    train_ys = TrainTestModel.get_ys_from_results(train_features)

    model_type = model_param.model_type
    model_param_dict = model_param.model_param_dict

    model_class = TrainTestModel.find_subclass(model_type)
    model = model_class(model_param_dict, logger)

    model.train(train_xys)

    # append additional information to model before saving, so that
    # VmafQualityRunner can read and process
    model.append_info('feature_dict', feature_param.feature_dict)
    if 'score_clip' in model_param_dict:
        VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip'])

    train_ys_pred = model.predict(train_xs)

    # apply instructions indicated in the appended info
    train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred)

    train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred)

    if logger:
        logger.info('Stats on training data: {}'.format(TrainTestModel.
                                                        format_stats(train_stats)))

    # save model
    if output_model_filepath is not None:
        model.to_file(output_model_filepath)

    if train_ax is not None:
        train_content_ids = map(lambda asset: asset.content_id, train_assets)
        TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids)
        train_ax.set_xlabel('DMOS')
        train_ax.set_ylabel("Predicted Score")
        train_ax.grid()
        train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format(
            dataset=train_dataset.dataset_name,
            model=model.model_id,
            stats=TrainTestModel.format_stats(train_stats)
        ))

    # === test model on test dataset ===

    if test_dataset is None:
        test_assets = None
        test_stats = None
        test_fassembler = None
    else:
        test_assets = read_dataset(test_dataset)
        test_fassembler = FeatureAssembler(
            feature_dict = feature_param.feature_dict,
            feature_option_dict = None,
            assets = test_assets,
            logger=logger,
            fifo_mode=fifo_mode,
            delete_workdir=True,
            result_store=result_store,
            parallelize=True,
        )
        test_fassembler.run()
        test_features = test_fassembler.results

        test_xs = TrainTestModel.get_xs_from_results(test_features)
        test_ys = TrainTestModel.get_ys_from_results(test_features)

        test_ys_pred = model.predict(test_xs)

        # apply instructions indicated in the appended info
        test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred)

        test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred)

        if logger:
            logger.info('Stats on testing data: {}'.format(
                TrainTestModel.format_stats(test_stats)))

        if test_ax is not None:
            test_content_ids = map(lambda asset: asset.content_id, test_assets)
            TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids)
            test_ax.set_xlabel('DMOS')
            test_ax.set_ylabel("Predicted Score")
            test_ax.grid()
            test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format(
                dataset=test_dataset.dataset_name,
                model=model.model_id,
                stats=TrainTestModel.format_stats(test_stats)
            ))

    return train_fassembler, train_assets, train_stats, \
           test_fassembler, test_assets, test_stats
示例#5
0
def train_test_on_dataset(train_dataset,
                          test_dataset,
                          feature_param,
                          model_param,
                          train_ax,
                          test_ax,
                          result_store,
                          parallelize=True,
                          logger=None,
                          fifo_mode=True,
                          output_model_filepath=None):

    train_assets = read_dataset(train_dataset)
    train_fassembler = FeatureAssembler(
        feature_dict=feature_param.feature_dict,
        feature_option_dict=None,
        assets=train_assets,
        logger=logger,
        fifo_mode=fifo_mode,
        delete_workdir=True,
        result_store=result_store,
        parallelize=parallelize,
    )
    train_fassembler.run()
    train_features = train_fassembler.results

    train_xys = TrainTestModel.get_xys_from_results(train_features)
    train_xs = TrainTestModel.get_xs_from_results(train_features)
    train_ys = TrainTestModel.get_ys_from_results(train_features)

    model_type = model_param.model_type
    model_param_dict = model_param.model_param_dict

    model_class = TrainTestModel.find_subclass(model_type)
    model = model_class(model_param_dict, logger)

    model.train(train_xys)

    # append additional information to model before saving, so that
    # VmafQualityRunner can read and process
    model.append_info('feature_dict', feature_param.feature_dict)
    if 'score_clip' in model_param_dict:
        VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip'])

    train_ys_pred = model.predict(train_xs)

    # apply instructions indicated in the appended info
    train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred)

    train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred)

    if logger:
        logger.info('Stats on training data: {}'.format(
            TrainTestModel.format_stats(train_stats)))

    # save model
    if output_model_filepath is not None:
        model.to_file(output_model_filepath)

    if train_ax is not None:
        train_content_ids = map(lambda asset: asset.content_id, train_assets)
        TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids)
        train_ax.set_xlabel('DMOS')
        train_ax.set_ylabel("Predicted Score")
        train_ax.grid()
        train_ax.set_title(
            "Dataset: {dataset}, Model: {model}\n{stats}".format(
                dataset=train_dataset.dataset_name,
                model=model.model_id,
                stats=TrainTestModel.format_stats(train_stats)))

    # === test model on test dataset ===

    if test_dataset is None:
        test_assets = None
        test_stats = None
        test_fassembler = None
    else:
        test_assets = read_dataset(test_dataset)
        test_fassembler = FeatureAssembler(
            feature_dict=feature_param.feature_dict,
            feature_option_dict=None,
            assets=test_assets,
            logger=logger,
            fifo_mode=fifo_mode,
            delete_workdir=True,
            result_store=result_store,
            parallelize=True,
        )
        test_fassembler.run()
        test_features = test_fassembler.results

        test_xs = TrainTestModel.get_xs_from_results(test_features)
        test_ys = TrainTestModel.get_ys_from_results(test_features)

        test_ys_pred = model.predict(test_xs)

        # apply instructions indicated in the appended info
        test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred)

        test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred)

        if logger:
            logger.info('Stats on testing data: {}'.format(
                TrainTestModel.format_stats(test_stats)))

        if test_ax is not None:
            test_content_ids = map(lambda asset: asset.content_id, test_assets)
            TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids)
            test_ax.set_xlabel('DMOS')
            test_ax.set_ylabel("Predicted Score")
            test_ax.grid()
            test_ax.set_title(
                "Dataset: {dataset}, Model: {model}\n{stats}".format(
                    dataset=test_dataset.dataset_name,
                    model=model.model_id,
                    stats=TrainTestModel.format_stats(test_stats)))

    return train_fassembler, train_assets, train_stats, \
           test_fassembler, test_assets, test_stats