Python DataSource示例

编程语言: Python

命名空间/包名称: biome.data.sources

类/类型: DataSource

hotexamples.com的示例: 13

Python DataSource - 已找到13个示例。这些是从开源项目中提取的最受好评的biome.data.sources.DataSource现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DataSource(13)

to_dataframe(7)

add_supported_format(1)

to_mapped_dataframe(1)

示例#1

显示文件

文件： test_json_source.py 项目： recognai/biome-data

    def test_flatten_json(self):
        file_path = os.path.join(FILES_PATH, "to-be-flattened.jsonl")
        ds = DataSource(format="json", flatten=True, path=file_path)
        df = ds.to_dataframe().compute()

        for c in ["persons.*.lastName", "persons.*.name"]:
            self.assertIn(c, df.columns, f"Expected {c} as column name")

示例#2

显示文件

    def test_read_parquet(self):
        file_path = os.path.join(FILES_PATH, "test.parquet")
        ds = DataSource(format="parquet", path=file_path)

        df = ds.to_dataframe().compute()
        self.assertTrue("reviewerID" in df.columns)
        self.assertTrue("path" in df.columns)

示例#3

显示文件

 def test_reader_csv_with_leading_and_trailing_spaces_in_examples(self):
     ds = DataSource(
         format="csv",
         source=os.path.join(TEST_RESOURCES, "trailing_coma_in_headers.csv"),
         attributes=dict(sep=";"),
     )
     df = ds.to_dataframe().compute()
     self.assertIn("name", df.columns)

示例#4

显示文件

    def test_read_csv(self):
        file_path = os.path.join(TEST_RESOURCES, "dataset_source.csv")

        datasource = DataSource(format="csv", path=file_path)
        data_frame = datasource.to_dataframe().compute()

        assert len(data_frame) > 0
        self.assertTrue("path" in data_frame.columns)

示例#5

显示文件

文件： test_json_source.py 项目： recognai/biome-data

    def test_read_json(self):
        file_path = os.path.join(FILES_PATH, "dataset_source.jsonl")

        datasource = DataSource(format="json", path=file_path)
        data_frame = datasource.to_dataframe().compute()

        assert len(data_frame) > 0
        self.assertTrue("path" in data_frame.columns)

示例#6

显示文件

文件： test_excel_source.py 项目： recognai/biome-data

    def test_read_excel(self):
        file_path = os.path.join(FILES_PATH, "test.xlsx")

        datasource = DataSource(format="xlsx", path=file_path)
        data_frame = datasource.to_dataframe().compute()

        assert len(data_frame) > 0
        self.assertTrue("path" in data_frame.columns)

示例#7

显示文件

文件： test_json_source.py 项目： recognai/biome-data

    def test_flatten_nested_list(self):
        file_path = os.path.join(FILES_PATH, "nested-list.jsonl")

        ds = DataSource(format="json", flatten=True, path=file_path)
        df = ds.to_dataframe().compute()

        for c in [
                "classification.*.origin.*.key",
                "classification.*.origin.*.source"
        ]:
            self.assertIn(c, df.columns, f"Expected {c} as data column")

示例#8

显示文件