Python read_pydict示例

编程语言: Python

命名空间/包名称: cjwkernel.parquet

方法/功能: read_pydict

hotexamples.com的示例: 8

Python read_pydict - 已找到8个示例。这些是从开源项目中提取的最受好评的cjwkernel.parquet.read_pydict现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： io.py 项目： afcarl/cjworkbench

def read_cached_render_result_pydict(crr: CachedRenderResult,
                                     only_columns: range,
                                     only_rows: range) -> Dict[str, List[Any]]:
    """
    Return a dict mapping column name to data (Python objects).

    Python data consumes RAM, so you must specify columns and rows.

    `retval.keys()` is in table-column order (not `only_columns` order).

    Missing rows and columns are ignored.

    `NaN` is returned as float("nan").

    Raise CorruptCacheError if the cached data does not match `crr`. That can
    mean:

        * The cached Parquet file is corrupt
        * The cached Parquet file is missing
        * `crr` is stale -- the cached result is for a different delta. This
          could be detected by a `Workflow.cooperative_lock()`, too, should the
          caller want to distinguish this error from the others.
    """
    if not crr.table_metadata.columns:
        # Zero-column tables aren't written to cache
        return {}

    try:
        with downloaded_parquet_file(crr) as parquet_path:
            return parquet.read_pydict(parquet_path, only_columns, only_rows)
    except (pyarrow.ArrowIOError, FileNotFoundError):  # FIXME unit-test
        raise CorruptCacheError

示例#2

显示文件

 def test_pydict_zero_rows(self):
     with tempfile_context() as path:
         # ensure at least 1 row group
         parquet.write(
             path,
             pyarrow.table({
                 "A":
                 pyarrow.array([], type=pyarrow.string()),
                 "B":
                 pyarrow.DictionaryArray.from_arrays(
                     pyarrow.array([], type=pyarrow.int32()),
                     pyarrow.array([], type=pyarrow.string()),
                 ),
                 "C":
                 pyarrow.array([], type=pyarrow.timestamp("ns")),
                 "D":
                 pyarrow.array([], type=pyarrow.float64()),
             }),
         )
         self.assertEqual(
             parquet.read_pydict(path, range(4), range(0)),
             {
                 "A": [],
                 "B": [],
                 "C": [],
                 "D": []
             },
         )

示例#3

显示文件

 def test_pydict_zero_row_groups(self):
     table = pyarrow.Table.from_batches([],
                                        schema=pyarrow.schema([
                                            ("A", pyarrow.string())
                                        ]))
     with parquet_file(table) as path:
         self.assertEqual(parquet.read_pydict(path, range(1), range(0)),
                          {"A": []})

示例#4

显示文件

 def test_pydict_nan(self):
     with parquet_file({
             "A":
             pyarrow.array([1.1, float("nan"), None],
                           type=pyarrow.float64())
     }) as path:
         result = parquet.read_pydict(path, range(1), range(3))
         self.assertEqual(result["A"][0], 1.1)
         self.assert_(math.isnan(result["A"][1]))
         self.assert_(math.isnan(result["A"][2]))

示例#5

显示文件

 def test_pydict_lots_of_types(self):
     dt1 = datetime.now()
     dt2 = datetime.now()
     with parquet_file({
             "str": ["x", "y", None, "z"],
             "cat":
             pyarrow.array(["x", "y", None, "x"]).dictionary_encode(),
             "dt": [dt1, None, dt2, None],
             "int32": [1, 2, 3, 2**31],
             "float": [1.1, 2.2, 3.3, 4.4],
     }) as path:
         self.assertEqual(
             parquet.read_pydict(path, range(5), range(4)),
             {
                 "str": ["x", "y", None, "z"],
                 "cat": ["x", "y", None, "x"],
                 "dt": [dt1, None, dt2, None],
                 "int32": [1, 2, 3, 2**31],
                 "float": [1.1, 2.2, 3.3, 4.4],
             },
         )

示例#6

显示文件

 def test_pydict_ignore_missing_rows(self):
     with parquet_file({"A": [0, 1, 2, 3]}) as path:
         self.assertEqual(parquet.read_pydict(path, range(1), range(2, 5)),
                          {"A": [2, 3]})

示例#7

显示文件

 def test_pydict_only_rows(self):
     with parquet_file({"A": [0, 1, 2, 3, 4, 5, 6, 7]}) as path:
         self.assertEqual(parquet.read_pydict(path, range(1), range(2, 5)),
                          {"A": [2, 3, 4]})

示例#8

显示文件

 def test_pydict_ignore_missing_columns(self):
     with parquet_file({"A": [1]}) as path:
         self.assertEqual(parquet.read_pydict(path, range(3), range(1)),
                          {"A": [1]})