Exemplo n.º 1
0
    def test_render_with_input_columns(self):
        def render(*args, input_columns):
            self.assertEqual(
                input_columns,
                {
                    "A": ptypes.RenderColumn("A", "text", None),
                    "B": ptypes.RenderColumn("B", "number", "{:,.3f}"),
                    "C": ptypes.RenderColumn("C", "datetime", None),
                },
            )

        with arrow_table_context(
            {
                "A": ["x"],
                "B": [1],
                "C": pa.array([datetime.now()], pa.timestamp("ns"))
            },
                columns=[
                    Column("A", ColumnType.Text()),
                    Column("B", ColumnType.Number("{:,.3f}")),
                    Column("C", ColumnType.Datetime()),
                ],
                dir=self.basedir,
        ) as arrow_table:
            self._test_render(render, arrow_table=arrow_table)
Exemplo n.º 2
0
    def test_metadata_comes_from_db_columns(self):
        columns = [
            Column("A", ColumnType.Number(format="{:,.2f}")),
            Column("B", ColumnType.Datetime()),
            Column("C", ColumnType.Text()),
        ]
        result = RenderResult(
            arrow_table({
                "A": [1],
                "B": [datetime.datetime.now()],
                "C": ["x"]
            },
                        columns=columns))
        cache_render_result(self.workflow, self.wf_module, self.delta.id,
                            result)
        # Delete from disk entirely, to prove we did not read.
        minio.remove(BUCKET,
                     crr_parquet_key(self.wf_module.cached_render_result))

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_wf_module = WfModule.objects.get(id=self.wf_module.id)
        cached_result = fresh_wf_module.cached_render_result

        self.assertEqual(cached_result.table_metadata,
                         TableMetadata(1, columns))
Exemplo n.º 3
0
def _arrow_column_to_column(name: str, column: pyarrow.ChunkedArray) -> Column:
    if pyarrow.types.is_floating(column.type) or pyarrow.types.is_integer(
            column.type):
        column_type = ColumnType.Number("{:,}")
    elif pyarrow.types.is_timestamp(column.type):
        column_type = ColumnType.Datetime()
    elif pyarrow.types.is_string(column.type) or pyarrow.types.is_dictionary(
            column.type):
        column_type = ColumnType.Text()
    else:
        raise RuntimeError("Unknown column type %r" % column.type)
    return Column(name, column_type)
Exemplo n.º 4
0
 def test_read_cached_render_result_slice_as_text_datetime(self):
     result = RenderResult(
         arrow_table(
             {
                 "A": pa.array([2134213412341232967, None],
                               pa.timestamp("ns"))
             },
             columns=[Column("A", ColumnType.Datetime())],
         ))
     cache_render_result(self.workflow, self.wf_module, self.delta.id,
                         result)
     crr = self.wf_module.cached_render_result
     self.assertEqual(
         read_cached_render_result_slice_as_text(crr, "csv", range(2),
                                                 range(3)),
         "A\n2037-08-18T13:03:32.341232967Z\n",
     )
Exemplo n.º 5
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableMetadata(
            3,
            [
                Column("A", ColumnType.Number()),
                Column("B", ColumnType.Datetime()),
                Column("C", ColumnType.Text()),
            ],
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, "A,B", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
Exemplo n.º 6
0
    def test_render_arrow_table_infer_output_column_formats_from_input(self):
        input_columns = [
            Column("A", ColumnType.Number("{:,.3f}")),
            Column("B", ColumnType.Number("{:,.3f}")),
            Column("C", ColumnType.Number("{:,.3f}")),
            Column("D", ColumnType.Datetime()),
            Column("E", ColumnType.Datetime()),
            Column("F", ColumnType.Datetime()),
            Column("G", ColumnType.Text()),
            Column("H", ColumnType.Text()),
            Column("I", ColumnType.Text()),
        ]
        # The param name "arrow_table" is a special case
        def render(arrow_table, params, output_path, *, columns, **kwargs):
            # Test the "columns" kwarg
            self.assertEqual(columns, input_columns)
            table = pa.table(
                {
                    "A": [1],
                    "B": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "C": ["a"],
                    "D": [1],
                    "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "F": ["a"],
                    "G": [1],
                    "H": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "I": ["a"],
                }
            )
            with pa.ipc.RecordBatchFileWriter(output_path, table.schema) as writer:
                writer.write_table(table)
            return []

        with arrow_table_context(
            {
                "A": [1],
                "B": [1],
                "C": [1],
                "D": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "F": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "G": ["a"],
                "H": ["a"],
                "I": ["a"],
            },
            columns=input_columns,
            dir=self.basedir,
        ) as arrow_table:
            result = self._test_render(render, arrow_table=arrow_table)
            self.assertEqual(
                result.table.metadata.columns,
                [
                    Column("A", ColumnType.Number("{:,.3f}")),  # recalled
                    Column("B", ColumnType.Datetime()),  # inferred
                    Column("C", ColumnType.Text()),  # inferred
                    Column("D", ColumnType.Number("{:,}")),  # inferred
                    Column("E", ColumnType.Datetime()),  # recalled
                    Column("F", ColumnType.Text()),  # inferred
                    Column("G", ColumnType.Number("{:,}")),  # inferred
                    Column("H", ColumnType.Datetime()),  # inferred
                    Column("I", ColumnType.Text()),  # recalled
                ],
            )
Exemplo n.º 7
0
def Datetime(name: str) -> Column:
    return Column(name, ColumnType.Datetime())