Python DataSource示例

编程语言: Python

命名空间/包名称: datacode

类/类型: DataSource

hotexamples.com的示例: 10

Python DataSource - 已找到10个示例。这些是从开源项目中提取的最受好评的datacode.DataSource现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DataSource(8)

df(2)

touch(1)

示例#1

显示文件

    def test_auto_run_pipeline_by_load_source_with_no_location(self):
        dgp = self.create_generator_pipeline()

        ds = DataSource(pipeline=dgp, location=self.csv_path_output)
        ds.touch()  # even with last_modified set, should still load from pipeline
        df = ds.df
        assert_frame_equal(df, EXPECT_GENERATED_DF)
        self.assert_all_pipeline_operations_have_pipeline(dgp)

示例#2

显示文件

文件： base.py 项目： nickderobertis/transforms-fin

 def create_source(self, **kwargs) -> DataSource:
     config_dict = dict(
         df=self.test_df,
         location=self.csv_path,
     )
     config_dict.update(kwargs)
     return DataSource(**config_dict)

示例#3

显示文件

文件： test_data_transformation.py 项目： nickderobertis/data-code

    def test_auto_run_pipeline_by_load_source_with_no_location(self):
        dtp = self.create_transformation_pipeline()

        ds = DataSource(pipeline=dtp, location=self.csv_path_output)
        df = ds.df
        assert_frame_equal(df, self.expect_func_df)
        self.assert_all_pipeline_operations_have_pipeline(dtp)

示例#4

显示文件

文件： test_data_combine.py 项目： nickderobertis/data-code

    def test_auto_run_pipeline_by_load_source_with_no_location(self):
        dp = self.create_combine_pipeline()

        ds = DataSource(pipeline=dp, location=self.csv_path_output)
        df = ds.df
        assert_frame_equal(df, self.expect_combined_rows_1_2)
        self.assert_all_pipeline_operations_have_pipeline(dp)

示例#5

显示文件

    def test_graph(self):
        dp = self.create_merge_pipeline()

        ds = DataSource(pipeline=dp, location=self.csv_path_output)
        df = ds.df

        # TODO [#80]: better tests for graph
        #
        # Currently just checking to make sure they can be generated with no errors.
        # Should also check the contents of the graphs. Also see TestCreateSource.test_graph
        ds.graph
        dp.graph

示例#6

显示文件

def portfolio_data_func(col: dc.Column, variable: dc.Variable, source: dc.DataSource, **kwargs) -> dc.DataSource:
    if 'portvar' in kwargs:
        raise ValueError('cannot pass portvar as variable will be transformed into portvar')

    if 'byvars' not in kwargs and source.index_vars:
        by_vars: List[dc.Variable] = []
        other_indices = [col_idx for col_idx in col.indices if col_idx]
        if len(other_indices) > 0:
            # Got other indices
            for col_idx in other_indices:
                by_vars.extend(col_idx.variables)
        by_var_names = [var.name for var in by_vars]
        if by_var_names:
            kwargs['byvars'] = by_var_names

    # TODO [#1]: remove portfolio column reordering once pd_utils.portfolio retains order
    orig_columns = [col for col in source.df.columns]

    # TODO [#2]: remore portfolio index handling once pd_utils.portfolio supports using index
    if source.index_vars:
        orig_index_names = source.df.index.names
        source.df.reset_index(inplace=True)
        orig_columns = [col for col in source.df.columns]

    source.df = portfolio(
        source.df,
        variable.name,
        **kwargs
    )

    source.df.drop([variable.name], axis=1, inplace=True)
    source.df.rename(columns={'portfolio': variable.name}, inplace=True)
    source.df = source.df[orig_columns]

    if source.index_vars:
        source.df.set_index(orig_index_names, inplace=True)
    return source

示例#7

显示文件

文件： test_data_transformation.py 项目： nickderobertis/data-code

    def test_auto_run_pipeline_by_load_source_with_no_location_and_shared_columns(
            self):
        self.create_csv()
        all_cols = self.create_columns()

        def transform_func(source: DataSource) -> DataSource:
            new_ds = DataSource(df=source.df, columns=all_cols)
            return new_ds

        dtp = self.create_transformation_pipeline(func=transform_func)

        ds = DataSource(pipeline=dtp,
                        location=self.csv_path_output,
                        columns=all_cols)
        df = ds.df
        assert_frame_equal(df, self.expect_loaded_df_rename_only)
        self.assert_all_pipeline_operations_have_pipeline(dtp)

示例#8

显示文件

文件： test_data_transformation.py 项目： nickderobertis/data-code

 def transform_func(source: DataSource) -> DataSource:
     new_ds = DataSource(df=source.df, columns=all_cols)
     return new_ds

示例#9

显示文件

def winsorize_data_func(col: dc.Column, variable: dc.Variable,
                        source: dc.DataSource, *args,
                        **kwargs) -> dc.DataSource:
    source.df = winsorize(source.df, *args, subset=variable.name, **kwargs)

    return source

示例#10

显示文件

文件： base.py 项目： nickderobertis/data-code

def ds_generator_func(columns: Sequence[Column]) -> DataSource:
    ds = DataSource(df=EXPECT_GENERATED_DF, columns=columns)
    return ds