Python DataFrame примеры, a2ml.api.utils.dataframe.DataFrame Python примеры использования

Пример #1

0

Показать файл

    def test_process_prediction_proba(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = None  #[0, 1, 2, 0, 1, 2]
        results_proba = [[0.8, 0.1, 0.1], [0.4, 0.6, 0.1], [0.1, 0.2, 0.7],
                         [0.7, 0.2, 0.1], [0.3, 0.7, 0.1], [0.1, 0.3, 0.6]]
        results_proba = np.array(results_proba)
        proba_classes = [0, 1, 2]

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, 0.5, None,
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(
            ds.columns, ds_test.columns +
            ["proba_setosa", "proba_versicolor", "proba_virginica"])
        self.assertEqual(ds.df[options['targetFeature']].values.tolist(),
                         ds_test.df[options['targetFeature']].values.tolist())

Пример #2

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def build_review_data(self, data_path=None, output=None):
        if not data_path:
            data_path = self.options['data_path']

        ds_train = DataFrame.create_dataframe(data_path)

        all_files = fsclient.list_folder(os.path.join(
            self.model_path, "predictions/*_actuals.feather.zstd"),
                                         wild=True,
                                         remove_folder_name=False,
                                         meta_info=True)
        all_files.sort(key=lambda f: f['last_modified'], reverse=True)

        for (file, ds_actuals) in DataFrame.load_from_files(all_files):
            if not ds_actuals.df.empty:
                ds_actuals.drop(['prediction_id', 'prediction_group_id'])

                ds_train.df = pd.concat(
                    [ds_train.df, ds_actuals.df[ds_train.columns]],
                    ignore_index=True)
                ds_train.drop_duplicates()

        if not output:
            output = os.path.splitext(
                data_path)[0] + "_review_%s.feather.zstd" % (get_uid())

        ds_train.saveToFile(output)
        return output

Пример #3

0

Показать файл

    def test_process_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = [
            "setosa", "versicolor", "virginica", "setosa", "versicolor",
            "virginica"
        ]
        results_proba = None
        proba_classes = None

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, None,
                                       options.get('minority_target_class'),
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(ds.dtypes, ds_test.dtypes)
        self.assertEqual(ds.df.values.tolist(), ds_test.df.values.tolist())

Пример #4

0

Показать файл

Файл: test_model_review.py Проект: chrinide/a2ml

def test_score_actuals_for_candidate_prediction():
    # Prediction data:
    # { 'prediction_id':'bef9be07-5534-434e-ab7c-c379d8fcfe77', 'species':'versicolor' },
    # { 'prediction_id':'f61b1bbc-6f7b-4e7e-9a3b-6acb6e1462cd', 'species':'virginica' }
    model_path = 'tests/fixtures/test_score_actuals/pr_can/candidate'
    prediction_group_id = '272B088D17A7490'

    # Primary prediction data:
    # { 'prediction_id':'09aaa96b-5d9c-4c45-ab04-726da868624b', 'species':'virginica' },
    # { 'prediction_id':'5e5ad22b-6789-47c6-9a4d-a3a998065127', 'species':'virginica' }
    primary_model_path = 'tests/fixtures/test_score_actuals/pr_can/primary'
    primary_prediction_group_id = 'A4FD5B64FEE5434'

    for actuals_path in glob.glob(model_path +
                                  '/predictions/*_actuals.feather.zstd'):
        os.remove(actuals_path)

    actuals = [{
        'prediction_id': '09aaa96b-5d9c-4c45-ab04-726da868624b',
        'actual': 'versicolor'
    }, {
        'prediction_id': '5e5ad22b-6789-47c6-9a4d-a3a998065127',
        'actual': 'virginica'
    }]

    res = ModelReview({
        'model_path': model_path
    }).add_actuals(actual_records=actuals,
                   prediction_group_id=prediction_group_id,
                   primary_prediction_group_id=primary_prediction_group_id,
                   primary_model_path=primary_model_path,
                   calc_score=True)

    assert type(res) == dict
    assert res['accuracy'] == 1.0

    actual_files = glob.glob(model_path +
                             '/predictions/*_actuals.feather.zstd')
    assert len(actual_files) == 1
    actual_file = actual_files[0]
    assert str(datetime.date.today()) in actual_file

    stored_actuals = DataFrame({})
    stored_actuals.loadFromFeatherFile(actual_file)
    assert 'prediction_group_id' in stored_actuals.columns

    stored_actuals = json.loads(
        stored_actuals.df.sort_values(by=['prediction_id']).to_json(
            orient='records'))

    assert stored_actuals[0][
        'prediction_id'] == 'bef9be07-5534-434e-ab7c-c379d8fcfe77'
    assert stored_actuals[0]['prediction_group_id'] == prediction_group_id
    assert stored_actuals[0]['species'] == 'versicolor'

    assert stored_actuals[1][
        'prediction_id'] == 'f61b1bbc-6f7b-4e7e-9a3b-6acb6e1462cd'
    assert stored_actuals[1]['prediction_group_id'] == prediction_group_id
    assert stored_actuals[1]['species'] == 'virginica'

Пример #5

0

Показать файл

Файл: predict.py Проект: chrinide/a2ml

    def _predict_locally(self, filename_arg, model_id, threshold, data,
                         columns, output):
        model_deploy = ModelDeploy(self.ctx, None)
        is_model_loaded, model_path, model_name = \
            model_deploy.verify_local_model(model_id)

        if not is_model_loaded:
            raise AugerException(
                'Model isn\'t loaded locally. '
                'Please use a2ml deploy command to download model.')

        model_path, model_existed = self._extract_model(model_name)
        model_options = fsclient.read_json_file(
            os.path.join(model_path, "model", "options.json"))

        filename = filename_arg
        if not filename:
            ds = DataFrame.create_dataframe(filename, data, columns)
            filename = os.path.join(self.ctx.config.get_path(), '.augerml',
                                    'predict_data.csv')
            ds.saveToCsvFile(filename, compression=None)

        try:
            predicted = \
                self._docker_run_predict(filename, threshold, model_path)
        finally:
            # clean up unzipped model
            # if it wasn't unzipped before
            if not model_existed:
                shutil.rmtree(model_path, ignore_errors=True)
                model_path = None

        if not filename_arg:
            ds_result = DataFrame.create_dataframe(predicted)

            ds_result.options['data_path'] = None
            ds_result.loaded_columns = columns

            return ModelHelper.save_prediction_result(
                ds_result,
                prediction_id=None,
                support_review_model=model_options.get("support_review_model")
                if model_path else False,
                json_result=False,
                count_in_result=False,
                prediction_date=None,
                model_path=model_path,
                model_id=model_id,
                output=output)
        elif output:
            fsclient.move_file(predicted, output)
            predicted = output

        return predicted

Пример #6

0

Показать файл

Файл: predict.py Проект: gitter-badger/a2ml

    def _predict_on_cloud(self, filename, model_id, threshold=None):
        target = self.ctx.config.get('target', None)
        records, features = DataFrame.load_records(filename, target)

        pipeline_api = AugerPipelineApi(self.ctx, None, model_id)
        predictions = pipeline_api.predict(
            records, features, threshold)

        predicted = os.path.splitext(filename)[0] + "_predicted.csv"
        DataFrame.save(predicted, predictions)

        return predicted

Пример #7

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def count_actuals_by_prediction_id(self):
        res = {}
        features = [
            'prediction_group_id', 'prediction_id', self.target_feature
        ]
        counter = ProbabilisticCounter()

        all_files = fsclient.list_folder(os.path.join(
            self.model_path, "predictions/*_actuals.feather.zstd"),
                                         wild=True,
                                         remove_folder_name=False,
                                         meta_info=False)

        for (file, df) in DataFrame.load_from_files(all_files, features):
            ModelReview._remove_duplicates_by(df, 'prediction_id', counter)

            agg = df.df.groupby(['prediction_group_id',
                                 'prediction_id']).count()
            agg[self.
                target_feature] = 1  # exclude duplication prediction_id's inside groups
            agg = agg.groupby('prediction_group_id').count()

            for prediction_group_id, row, in agg.iterrows():
                count = row[0]

                if prediction_group_id not in res:
                    res[prediction_group_id] = count
                else:
                    res[prediction_group_id] = res[prediction_group_id] + count

        return res

Пример #8

0

Показать файл

Файл: model.py Проект: chrinide/a2ml

    def predict(self, filename, model_id,
        threshold=None, locally=False, data=None, columns=None, output = None,
        json_result=False, count_in_result=False, prediction_date=None, prediction_id=None):
        ds = DataFrame.create_dataframe(filename, data, columns)
        model_path = self.ctx.config.get_model_path(model_id)
        options = fsclient.read_json_file(os.path.join(model_path, "options.json"))

        results, results_proba, proba_classes, target_categories = \
            self._predict_locally(ds.df, model_id, threshold) if locally else self._predict_remotely(ds.df, model_id, threshold)

        if target_categories and len(target_categories) == 2:
            for idx, item in enumerate(target_categories):
                if item == "False":
                    target_categories[idx] = False
                if item == "True":
                    target_categories[idx] = True

        ModelHelper.process_prediction(ds,
            results, results_proba, proba_classes,
            threshold,
            options.get('minority_target_class', self.ctx.config.get('minority_target_class')),
            options.get('targetFeature', self.ctx.config.get('target', None)),
            target_categories)

        predicted = ModelHelper.save_prediction(ds, prediction_id,
            options.get('support_review_model', True), json_result, count_in_result, prediction_date,
            model_path, model_id, output)

        if filename:
            self.ctx.log('Predictions stored in %s' % predicted)

        return {'predicted': predicted}

Пример #9

0

Показать файл

Файл: model_helper.py Проект: chrinide/a2ml

    def preprocess_target(model_path,
                          data_path=None,
                          records=None,
                          features=None):
        ds = DataFrame.create_dataframe(data_path, records, features)

        return ModelHelper.preprocess_target_ds(model_path, ds)

Пример #10

0

Показать файл

    def predict(self, filename, model_id, threshold, locally):
        ws = AzureProject(self.ctx)._get_ws()
        experiment_name = self.ctx.config.get('experiment/name', None)
        if experiment_name is None:
            raise AzureException('Please specify Experiment name...')
        experiment = Experiment(ws, experiment_name)

        target = self.ctx.config.get('target', None)
        predict_data = DataFrame.load(filename, target)

        y_pred = []
        if locally:
            y_pred, y_proba, proba_classes = self._predict_locally(
                experiment, predict_data, model_id, threshold)
        else:
            y_pred, y_proba, proba_classes = self._predict_remotely(
                ws, experiment, predict_data, model_id, threshold)

        predict_data[target] = y_pred

        if y_proba is not None:
            for idx, name in enumerate(proba_classes):
                predict_data['proba_' + str(name)] = list(y_proba[:, idx])

        predicted = self._save_predictions(predict_data, filename)

        return {'predicted': predicted}

Пример #11

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def score_model_performance_daily(self, date_from, date_to):
        features = ['prediction_id', self.target_feature]
        res = {}

        for (curr_date, files) in ModelReview._prediction_files_by_day(
                self.model_path, date_from, date_to,
                "_*_actuals.feather.zstd"):
            df_actuals = DataFrame({})
            for (file, df) in DataFrame.load_from_files(files, features):
                df_actuals.df = pd.concat([df_actuals.df, df.df])

            if df_actuals.count() > 0:
                df_actuals.df.rename(
                    columns={self.target_feature: 'a2ml_actual'}, inplace=True)
                scores = self._process_actuals(ds_actuals=df_actuals,
                                               calc_score=True)
                res[str(curr_date)] = scores[self.options.get('score_name')]

        return res

Пример #12

0

Показать файл

Файл: predict.py Проект: ANN-KOREA/a2ml

    def _predict_on_cloud(self, filename, model_id, threshold, data, columns,
                          output):
        ds = DataFrame.create_dataframe(filename, data, columns)

        pipeline_api = AugerPipelineApi(self.ctx, None, model_id)
        predictions = pipeline_api.predict(ds.get_records(), ds.columns,
                                           threshold)

        ds_result = DataFrame.create_dataframe(None,
                                               records=predictions['data'],
                                               features=predictions['columns'])
        ds_result.options['data_path'] = filename
        return ModelHelper.save_prediction_result(ds_result,
                                                  prediction_id=None,
                                                  support_review_model=False,
                                                  json_result=False,
                                                  count_in_result=False,
                                                  prediction_date=None,
                                                  model_path=None,
                                                  model_id=model_id,
                                                  output=output)

Пример #13

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def add_actuals(self,
                    actuals_path=None,
                    actual_records=None,
                    prediction_group_id=None,
                    primary_prediction_group_id=None,
                    primary_model_path=None,
                    actual_date=None,
                    actuals_id=None,
                    calc_score=True):

        features = None
        if actuals_path or (actual_records
                            and type(actual_records[0]) == list):
            features = ['prediction_id', 'actual']

        ds_actuals = DataFrame.create_dataframe(actuals_path,
                                                actual_records,
                                                features=features)

        result = self._process_actuals(ds_actuals,
                                       prediction_group_id,
                                       primary_prediction_group_id,
                                       primary_model_path,
                                       actual_date,
                                       actuals_id,
                                       calc_score,
                                       raise_not_found=True)

        ds_actuals.drop(self.target_feature)
        ds_actuals.df = ds_actuals.df.rename(
            columns={'a2ml_actual': self.target_feature})

        if not actuals_id:
            actuals_id = get_uid()

        file_name = str(
            actual_date or
            datetime.date.today()) + '_' + actuals_id + "_actuals.feather.zstd"
        ds_actuals.saveToFeatherFile(
            os.path.join(self.model_path, "predictions", file_name))

        return result

Пример #14

0

Показать файл

    def test_save_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))

        prediction_id = "123"
        prediction_date = "today"
        results_file_path = os.path.join(
            model_path, "predictions",
            prediction_date + '_' + prediction_id + "_results.feather.zstd")
        predicted_file_path = os.path.join(
            model_path, "predictions", "iris_test_" + prediction_id + "_" +
            options.get('uid') + "_predicted.csv")

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res, predicted_file_path)
        self.assertTrue(fsclient.is_file_exists(predicted_file_path))
        self.assertTrue(fsclient.is_file_exists(results_file_path))

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=True,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        res = json.loads(res)
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(type(res[0]), dict)
        self.assertEqual(res[0][options['targetFeature']], 'setosa')

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        ds.loaded_columns = ds.columns
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)
        self.assertEqual(type(res['data'][0]), list)

Пример #15

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def _distribution_stats(self,
                            date_from,
                            date_to,
                            path_suffix,
                            features,
                            categoricalFeatures=[],
                            feature_mapper={}):
        res = {}
        feature_importances = self._get_feature_importances()
        counter = ProbabilisticCounter()
        second_pass_counter = ProbabilisticCounter()

        for (curr_date, files) in ModelReview._prediction_files_by_day(
                self.model_path, date_from, date_to, path_suffix):
            stats = {}

            for feature in features:
                stats[feature] = {
                    'count': 0,
                    'sum': 0,
                    'sq_sum': 0,
                    'dist': None,
                    'imp': feature_importances.get(feature, 0)
                }

            df_list = []
            for (file, df) in DataFrame.load_from_files(files, features):
                ModelReview._remove_duplicates_by(df, 'prediction_id', counter)

                df_list.append(df)

            # First pass: calc sum and count in each column for average
            for df in df_list:
                for feature in features:
                    stats[feature]['count'] += df.df[feature].count()

                    if df.df[feature].dtype.name in [
                            'category', 'string', 'object'
                    ] or feature in categoricalFeatures:
                        stats[feature]['dist'] = merge_dicts(
                            stats[feature]['dist'] or {},
                            dict(df.df[feature].value_counts()),
                            lambda v, ov: v + ov)
                    else:
                        stats[feature]['sum'] += df.df[feature].sum()

            # Calc average
            for feature in features:
                if stats[feature]['count'] > 0 and stats[feature][
                        'dist'] == None:
                    stats[feature]['average'] = stats[feature]['sum'] / stats[
                        feature]['count']

            # Second pass: sum of squares of value and average for std dev
            for df in df_list:
                ModelReview._remove_duplicates_by(df, 'prediction_id',
                                                  second_pass_counter)

                for feature in features:
                    if 'average' in stats[feature]:
                        avg = stats[feature]['average']
                        stats[feature]['sq_sum'] += ((df.df[feature] -
                                                      avg)**2).sum()

            # Calc std dev
            if len(files) > 0:
                res[str(curr_date)] = ModelReview._calc_stddev_for_features(
                    stats, features, feature_mapper)

        return res

Пример #16

0

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def _process_actuals(self,
                         ds_actuals,
                         prediction_group_id=None,
                         primary_prediction_group_id=None,
                         primary_model_path=None,
                         actual_date=None,
                         actuals_id=None,
                         calc_score=False,
                         raise_not_found=False):

        ds_actuals.df.rename(columns={"actual": 'a2ml_actual'}, inplace=True)

        actuals_count = ds_actuals.count()

        primary_ds = None
        if primary_prediction_group_id:
            files = ModelReview._get_prediction_files(
                primary_model_path, primary_prediction_group_id)
            for (_,
                 df) in DataFrame.load_from_files(files,
                                                  features=['prediction_id']):
                primary_ds = df
                # should be only one file
                break

        origin_dtypes = []
        origin_columns = []
        prediction_files = ModelReview._get_prediction_files(
            self.model_path, prediction_group_id)
        actual_index = False

        for (file, df_prediction_results
             ) in DataFrame.load_from_files(prediction_files):
            origin_dtypes = df_prediction_results.df.dtypes
            origin_columns = df_prediction_results.df.columns

            if primary_ds is not None:
                ds_actuals.df[
                    'prediction_id'] = ModelReview._map_primary_prediction_id_to_candidate(
                        ds_actuals.df['prediction_id'],
                        primary_ds.df['prediction_id'],
                        df_prediction_results.df['prediction_id'])

            if not actual_index:
                ds_actuals.df.set_index('prediction_id', inplace=True)
                actual_index = True

            underscore_split = os.path.basename(file['path']).split('_')

            if len(underscore_split
                   ) == 3:  # date_group-id_suffix (new file name with date)
                prediction_group_id = underscore_split[1]
            else:  # group-id_suffix (old file name without date)
                prediction_group_id = underscore_split[0]

            df_prediction_results.df[
                'prediction_group_id'] = prediction_group_id

            matched_scope = df_prediction_results.df[
                df_prediction_results.df['prediction_id'].isin(
                    ds_actuals.df.index)]
            matched_scope.set_index('prediction_id', inplace=True)
            ds_actuals.df = ds_actuals.df.combine_first(matched_scope)

            match_count = ds_actuals.df.count()[self.target_feature]
            if actuals_count == match_count or primary_ds is not None:
                break

        if raise_not_found and match_count == 0 and primary_ds is None:
            raise Exception(
                "Actual Prediction IDs not found in model predictions.")

        ds_actuals.df.reset_index(inplace=True)
        ds_actuals.dropna(columns=[self.target_feature, 'a2ml_actual'])

        # combine_first changes orginal non float64 types to float64 when NaN values appear during merging tables
        # Good explanations https://stackoverflow.com/a/15353297/898680
        # Fix: store original datypes and force them after merging
        for col in origin_columns:
            if col != 'prediction_id':
                ds_actuals.df[col] = ds_actuals.df[col].astype(
                    origin_dtypes[col], copy=False)

        ds_actuals.df['a2ml_actual'] = ds_actuals.df['a2ml_actual'].astype(
            origin_dtypes[self.target_feature], copy=False)

        result = True
        if calc_score:
            ds_true = DataFrame({})
            ds_true.df = ds_actuals.df[[
                'a2ml_actual'
            ]].rename(columns={'a2ml_actual': self.target_feature})

            y_pred, _ = ModelHelper.preprocess_target_ds(
                self.model_path, ds_actuals)
            y_true, _ = ModelHelper.preprocess_target_ds(
                self.model_path, ds_true)

            result = ModelHelper.calculate_scores(self.options,
                                                  y_test=y_true,
                                                  y_pred=y_pred)

        return result

Пример #17

0

Показать файл

Файл: test_model_review.py Проект: chrinide/a2ml

def test_score_actuals_with_not_full_actuals():
    model_path = 'tests/fixtures/test_score_actuals'

    for actuals_path in glob.glob(model_path +
                                  '/predictions/*_actuals.feather.zstd'):
        os.remove(actuals_path)

    actuals = [
        {
            'prediction_id': '5c93079c-00c9-497a-8967-53fa0dd02054',
            'actual': False
        },
        {
            'prediction_id': 'b1bf9ebf-0277-4771-9bc5-236690a21194',
            'actual': False
        },
        {
            'prediction_id': 'f61b1bbc-6f7b-4e7e-9a3b-6acb6e1462cd',
            'actual': True
        },
    ]

    actual_date = datetime.date.today() - datetime.timedelta(days=1)

    res = ModelReview({
        'model_path': model_path
    }).add_actuals(actuals_path=None,
                   actual_records=actuals,
                   actual_date=actual_date)
    actual_files = glob.glob(model_path +
                             '/predictions/*_actuals.feather.zstd')
    assert len(actual_files) > 0
    assert str(actual_date) in actual_files[0]

    stored_actuals = DataFrame({})
    stored_actuals.loadFromFeatherFile(actual_files[0])
    assert 'prediction_group_id' in stored_actuals.columns

    stored_actuals = json.loads(
        stored_actuals.df.sort_values(by=['prediction_id']).to_json(
            orient='records'))

    assert len(stored_actuals) == len(actuals)  #+ 1

    assert stored_actuals[0][
        'prediction_id'] == '5c93079c-00c9-497a-8967-53fa0dd02054'
    assert stored_actuals[0][
        'prediction_group_id'] == '2ab1e430-6082-4465-b057-3408d36de144'
    assert stored_actuals[0]['feature1'] == 1
    assert stored_actuals[0]['income'] == False

    assert stored_actuals[1][
        'prediction_id'] == 'b1bf9ebf-0277-4771-9bc5-236690a21194'
    assert stored_actuals[1][
        'prediction_group_id'] == '2ab1e430-6082-4465-b057-3408d36de144'
    assert stored_actuals[1]['feature1'] == 1.1
    assert stored_actuals[1]['income'] == False

    assert stored_actuals[2][
        'prediction_id'] == 'f61b1bbc-6f7b-4e7e-9a3b-6acb6e1462cd'
    assert stored_actuals[2][
        'prediction_group_id'] == '03016c26-f69a-416f-817f-4c58cd69d675'
    assert stored_actuals[2]['feature1'] == 1.3
    assert stored_actuals[2]['income'] == True

Python DataFrame примеры использования