Python DataFrame.df примеры использования

Язык программирования: Python

Пространство имен/Пакет: a2ml.api.utils.dataframe

Класс/Тип: DataFrame

Метод/Функция: df

Примеров на hotexamples.com: 2

Python DataFrame.df - 2 примера найдено. Это лучшие примеры Python кода для a2ml.api.utils.dataframe.DataFrame.df, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

create_dataframe(9)

load_from_files(5)

DataFrame(4)

df(2)

loadFromFeatherFile(2)

count(1)

load(1)

load_records(1)

save(1)

Пример #1

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def score_model_performance_daily(self, date_from, date_to):
        features = ['prediction_id', self.target_feature]
        res = {}

        for (curr_date, files) in ModelReview._prediction_files_by_day(
                self.model_path, date_from, date_to,
                "_*_actuals.feather.zstd"):
            df_actuals = DataFrame({})
            for (file, df) in DataFrame.load_from_files(files, features):
                df_actuals.df = pd.concat([df_actuals.df, df.df])

            if df_actuals.count() > 0:
                df_actuals.df.rename(
                    columns={self.target_feature: 'a2ml_actual'}, inplace=True)
                scores = self._process_actuals(ds_actuals=df_actuals,
                                               calc_score=True)
                res[str(curr_date)] = scores[self.options.get('score_name')]

        return res

Пример #2

Показать файл

Файл: model_review.py Проект: chrinide/a2ml

    def _process_actuals(self,
                         ds_actuals,
                         prediction_group_id=None,
                         primary_prediction_group_id=None,
                         primary_model_path=None,
                         actual_date=None,
                         actuals_id=None,
                         calc_score=False,
                         raise_not_found=False):

        ds_actuals.df.rename(columns={"actual": 'a2ml_actual'}, inplace=True)

        actuals_count = ds_actuals.count()

        primary_ds = None
        if primary_prediction_group_id:
            files = ModelReview._get_prediction_files(
                primary_model_path, primary_prediction_group_id)
            for (_,
                 df) in DataFrame.load_from_files(files,
                                                  features=['prediction_id']):
                primary_ds = df
                # should be only one file
                break

        origin_dtypes = []
        origin_columns = []
        prediction_files = ModelReview._get_prediction_files(
            self.model_path, prediction_group_id)
        actual_index = False

        for (file, df_prediction_results
             ) in DataFrame.load_from_files(prediction_files):
            origin_dtypes = df_prediction_results.df.dtypes
            origin_columns = df_prediction_results.df.columns

            if primary_ds is not None:
                ds_actuals.df[
                    'prediction_id'] = ModelReview._map_primary_prediction_id_to_candidate(
                        ds_actuals.df['prediction_id'],
                        primary_ds.df['prediction_id'],
                        df_prediction_results.df['prediction_id'])

            if not actual_index:
                ds_actuals.df.set_index('prediction_id', inplace=True)
                actual_index = True

            underscore_split = os.path.basename(file['path']).split('_')

            if len(underscore_split
                   ) == 3:  # date_group-id_suffix (new file name with date)
                prediction_group_id = underscore_split[1]
            else:  # group-id_suffix (old file name without date)
                prediction_group_id = underscore_split[0]

            df_prediction_results.df[
                'prediction_group_id'] = prediction_group_id

            matched_scope = df_prediction_results.df[
                df_prediction_results.df['prediction_id'].isin(
                    ds_actuals.df.index)]
            matched_scope.set_index('prediction_id', inplace=True)
            ds_actuals.df = ds_actuals.df.combine_first(matched_scope)

            match_count = ds_actuals.df.count()[self.target_feature]
            if actuals_count == match_count or primary_ds is not None:
                break

        if raise_not_found and match_count == 0 and primary_ds is None:
            raise Exception(
                "Actual Prediction IDs not found in model predictions.")

        ds_actuals.df.reset_index(inplace=True)
        ds_actuals.dropna(columns=[self.target_feature, 'a2ml_actual'])

        # combine_first changes orginal non float64 types to float64 when NaN values appear during merging tables
        # Good explanations https://stackoverflow.com/a/15353297/898680
        # Fix: store original datypes and force them after merging
        for col in origin_columns:
            if col != 'prediction_id':
                ds_actuals.df[col] = ds_actuals.df[col].astype(
                    origin_dtypes[col], copy=False)

        ds_actuals.df['a2ml_actual'] = ds_actuals.df['a2ml_actual'].astype(
            origin_dtypes[self.target_feature], copy=False)

        result = True
        if calc_score:
            ds_true = DataFrame({})
            ds_true.df = ds_actuals.df[[
                'a2ml_actual'
            ]].rename(columns={'a2ml_actual': self.target_feature})

            y_pred, _ = ModelHelper.preprocess_target_ds(
                self.model_path, ds_actuals)
            y_true, _ = ModelHelper.preprocess_target_ds(
                self.model_path, ds_true)

            result = ModelHelper.calculate_scores(self.options,
                                                  y_test=y_true,
                                                  y_pred=y_pred)

        return result