示例#1
0
    def test_render_using_tab_output(self):
        def render(table, params):
            self.assertEqual(params["tabparam"].name, "Tab 1")
            self.assertEqual(
                params["tabparam"].columns,
                {
                    "X": ptypes.RenderColumn("X", "number", "{:,d}"),
                    "Y": ptypes.RenderColumn("Y", "text", None),
                },
            )
            assert_frame_equal(params["tabparam"].dataframe,
                               pd.DataFrame({
                                   "X": [1],
                                   "Y": ["y"]
                               }))

        param_schema = ParamSchema.Dict({"tabparam": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tabparam": "tab-1"},
                    tab_outputs={
                        "tab-1":
                        TabOutput(tab_name="Tab 1", table_filename=path.name)
                    },
                )
示例#2
0
 def test_clean_file_safe_filename(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(module_id_name="uploadfile",
                             order=0,
                             slug="step-1")
     key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
     s3.put_bytes(s3.UserFilesBucket, key, b"1234")
     model = UploadedFileModel.objects.create(
         step=step,
         name="/etc/passwd.$/etc/passwd",
         size=4,
         uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
         key=key,
     )
     with ExitStack() as inner_stack:
         result = self._call_prep_params(
             ParamSchema.Dict({"file": ParamSchema.File()}),
             {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
             step_id=step.id,
             exit_stack=inner_stack,
         )
         self.assertEqual(
             result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"],
             UploadedFile(
                 "/etc/passwd.$/etc/passwd",
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd",
                 model.created_at,
             ),
         )
    def test_render_uploaded_files(self):
        def render_arrow_v1(table, params, *, uploaded_files, **kwargs):
            self.assertEqual(params["file"],
                             "406b5e37-f217-4e87-b6b2-eede3bec6492")
            uploaded_file = uploaded_files[params["file"]]
            self.assertEqual(uploaded_file.name, "x.data")
            self.assertEqual(uploaded_file.uploaded_at,
                             datetime(2021, 4, 21, 12, 4, 5))
            return ArrowRenderResult(make_table())

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema,
                           render_arrow_v1=render_arrow_v1) as env:
            temp_path = env.basedir / "406b5e37-f217-4e87-b6b2-eede3bec6492_x.data"
            temp_path.write_bytes(b"hello, world!")
            env.call_render(
                make_table(),
                params={"file": "406b5e37-f217-4e87-b6b2-eede3bec6492"},
                uploaded_files={
                    "406b5e37-f217-4e87-b6b2-eede3bec6492":
                    UploadedFile(
                        name="x.data",
                        filename=temp_path.name,
                        uploaded_at=datetime(2021, 4, 21, 12, 4, 5),
                    ),
                },
            )
示例#4
0
 def test_clean_condition_empty_column_is_none(self):
     self.assertIsNone(
         self._call_clean_value(
             ParamSchema.Condition(),
             {
                 "operation": "text_is",
                 "column": "",
                 "value": "",
                 "isCaseSensitive": False,
                 "isRegex": False,
             },
             input_table_columns=[NUMBER("A")],
         ))
     # And test it in the context of a broader and/or
     self.assertIsNone(
         self._call_clean_value(
             ParamSchema.Condition(),
             {
                 "operation":
                 "and",
                 "conditions": [{
                     "operation":
                     "or",
                     "conditions": [{
                         "operation": "text_is",
                         "column": "",
                         "value": "",
                         "isCaseSensitive": False,
                         "isRegex": False,
                     }],
                 }],
             },
             input_table_columns=[NUMBER("A")],
         ))
示例#5
0
    def test_render_empty_file_param(self):
        def render(arrow_table, params, output_path, *args, **kwargs):
            self.assertIsNone(params["file"])

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            env.call_render(make_table(), {"file": None})
示例#6
0
def test_param_schema_explicit():
    spec = load_spec(
        dict(
            id_name="x",
            name="x",
            category="Clean",
            parameters=[{"id_name": "whee", "type": "custom"}],
            param_schema={
                "id_name": {
                    "type": "dict",
                    "properties": {
                        "x": {"type": "integer"},
                        "y": {"type": "string", "default": "X"},
                    },
                }
            },
        )
    )

    assert spec.param_schema == ParamSchema.Dict(
        {
            "id_name": ParamSchema.Dict(
                {"x": ParamSchema.Integer(), "y": ParamSchema.String(default="X")}
            )
        }
    )
示例#7
0
def test_param_schema_implicit():
    spec = load_spec(
        dict(
            id_name="googlesheets",
            name="x",
            category="Clean",
            parameters=[
                {"id_name": "foo", "type": "string", "default": "X"},
                {
                    "id_name": "bar",
                    "type": "secret",
                    "secret_logic": {"provider": "oauth2", "service": "google"},
                },
                {
                    "id_name": "baz",
                    "type": "menu",
                    "options": [
                        {"value": "a", "label": "A"},
                        "separator",
                        {"value": "c", "label": "C"},
                    ],
                    "default": "c",
                },
            ],
        )
    )

    assert spec.param_schema == ParamSchema.Dict(
        {
            "foo": ParamSchema.String(default="X"),
            # secret is not in param_schema
            "baz": ParamSchema.Enum(choices=frozenset({"a", "c"}), default="c"),
        }
    )
示例#8
0
def test_list_recurse():
    assert parse({
        "type": "list",
        "inner_dtype": {
            "type": "string"
        }
    }) == ParamSchema.List(ParamSchema.String())
示例#9
0
    def test_dict_prompting_error_concatenate_different_types(self):
        schema = ParamSchema.Dict({
            "x":
            ParamSchema.Column(column_types=frozenset({"number"})),
            "y":
            ParamSchema.Column(column_types=frozenset({"number"})),
        })
        with self.assertRaises(PromptingError) as cm:
            self._call_clean_value(
                schema,
                {
                    "x": "A",
                    "y": "B"
                },
                input_table_columns=[TEXT("A"), TIMESTAMP("B")],
            )

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "timestamp",
                                               frozenset({"number"})),
            ],
        )
示例#10
0
def test_map_recurse():
    assert parse({
        "type": "map",
        "value_dtype": {
            "type": "string"
        }
    }) == ParamSchema.Map(value_schema=ParamSchema.String())
示例#11
0
    def test_render_tab_outputs(self):
        def render_arrow_v1(table, params, *, tab_outputs, **kwargs):
            self.assertEqual(params["tab"], "tab-x")
            self.assertEqual(tab_outputs["tab-x"].tab_name, "Tab X")
            assert_arrow_table_equals(
                tab_outputs["tab-x"].table,
                make_table(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                ),
            )
            return ArrowRenderResult(make_table())

        param_schema = ParamSchema.Dict({"tab": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema,
                           render_arrow_v1=render_arrow_v1) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tab": "tab-x"},
                    tab_outputs={
                        "tab-x":
                        TabOutput(tab_name="Tab X", table_filename=path.name)
                    },
                )
示例#12
0
def test_dict_recurse():
    assert parse({
        "type": "dict",
        "properties": {
            "x": {
                "type": "string"
            }
        }
    }) == ParamSchema.Dict(properties={"x": ParamSchema.String()})
示例#13
0
 def test_clean_normal_dict(self):
     schema = ParamSchema.Dict({
         "str": ParamSchema.String(),
         "int": ParamSchema.Integer()
     })
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = self._call_clean_value(schema, value)
     self.assertEqual(result, expected)
示例#14
0
    def _(self, schema: ParamSchema.Multitab, value: List[str]) -> List[str]:
        slugs = frozenset(
            # recurse -- the same way we clean a list.
            slug for slug in self.clean_value_list(
                ParamSchema.List(inner_schema=ParamSchema.Tab()), value)
            if slug is not None)

        # Order based on `self.tabs`.
        return [slug for slug in self.tabs.keys() if slug in slugs]
示例#15
0
 def test_clean_normal_dict(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     schema = ParamSchema.Dict({
         "str": ParamSchema.String(),
         "int": ParamSchema.Integer()
     })
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = clean_value(schema, value, input_shape)
     self.assertEqual(result, expected)
示例#16
0
    def test_render_with_no_kwargs(self):
        def render(table, params):
            return table * params["n"]

        param_schema = ParamSchema.Dict({"n": ParamSchema.Float()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            outcome = env.call_render(make_table(make_column("A", [1])),
                                      {"n": 2})
            assert_arrow_table_equals(outcome.read_table(),
                                      make_table(make_column("A", [2])))
示例#17
0
 def test_clean_tab_omit_unused_tabs_from_tab_outputs(self):
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Tab()}),
         {"x": "tab-1"},
         tab_results={
             Tab("tab-1", "Tab 1"):
             StepResult(Path("tab-1.arrow"), [TEXT("A")]),
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [TEXT("A")]),
             Tab("tab-3", "Tab 3"):
             StepResult(Path("tab-3.arrow"), [TEXT("A")]),
         },
     )
     self.assertEqual(result.tab_outputs,
                      {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
示例#18
0
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        schema = ParamSchema.List(inner_schema=ParamSchema.Column(
            column_types=frozenset({"text"})))
        with self.assertRaises(PromptingError) as cm:
            self._call_clean_value(
                schema, ["A", "B"],
                input_table_columns=[NUMBER("A"), TIMESTAMP("B")])

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
示例#19
0
    def test_list_prompting_error_concatenate_same_type(self):
        schema = ParamSchema.List(inner_schema=ParamSchema.Column(
            column_types=frozenset({"number"})))
        with self.assertRaises(PromptingError) as cm:
            self._call_clean_value(schema, ["A", "B"],
                                   input_table_columns=[TEXT("A"),
                                                        TEXT("B")])

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], "text",
                                               frozenset({"number"}))
            ],
        )
示例#20
0
 def test_clean_condition_and_or_simplify(self):
     self.assertEqual(
         self._call_clean_value(
             ParamSchema.Condition(),
             {
                 "operation":
                 "and",
                 "conditions": [
                     {
                         "operation":
                         "or",
                         "conditions": [
                             {
                                 "operation": "cell_is_blank",
                                 "column": "A",
                                 "value": "",
                                 "isCaseSensitive": False,
                                 "isRegex": False,
                             },
                         ],
                     },
                 ],
             },
             input_table_columns=[NUMBER("A")],
         ),
         {
             "operation": "cell_is_blank",
             "column": "A",
         },
     )
示例#21
0
 def test_clean_file_no_s3_file(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(module_id_name="uploadfile",
                             order=0,
                             slug="step-1")
     step2 = tab.steps.create(module_id_name="uploadfile",
                              order=1,
                              slug="step-2")
     id = str(uuid.uuid4())
     key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
     # Oops -- let's _not_ put the file!
     # s3.put_bytes(s3.UserFilesBucket, key, b'1234')
     UploadedFileModel.objects.create(step=step2,
                                      name="x.csv.gz",
                                      size=4,
                                      uuid=id,
                                      key=key)
     result = self._call_clean_value(ParamSchema.File(),
                                     id,
                                     step_id=step.id)
     self.assertIsNone(result)
     # Assert that if a temporary file was created to house the download, it
     # no longer exists.
     self.assertListEqual(list(self.basedir.iterdir()), [])
示例#22
0
 def test_clean_float_with_int_value(self):
     # ParamSchema.Float can have `int` values (because values come from
     # json.parse(), which only gives Numbers so can give "3" instead of
     # "3.0". We want to pass that as `float` in the `params` dict.
     result = clean_value(ParamSchema.Float(), 3, None)
     self.assertEqual(result, 3.0)
     self.assertIsInstance(result, float)
示例#23
0
 def test_validate_ok(self):
     S.Multichartseries().validate(
         [
             dict(column="A", color="#aaaaaa"),
             dict(column="B", color="#bbbbbb"),
         ]
     )
示例#24
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        with self.assertRaises(PromptingError) as cm:
            self._call_clean_value(
                ParamSchema.Multichartseries(),
                [
                    {
                        "column": "A",
                        "color": "#aaaaaa"
                    },
                    {
                        "column": "B",
                        "color": "#cccccc"
                    },
                ],
                input_table_columns=[TEXT("A"), TIMESTAMP("B")],
            )

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "timestamp",
                                               frozenset({"number"})),
            ],
        )
示例#25
0
 def test_clean_tabs_tab_cycle(self):
     with self.assertRaises(TabCycleError):
         self._call_clean_value(
             ParamSchema.Multitab(),
             ["tab-1"],
             tab_results={Tab("tab-1", "Tab 1"): None},
         )
示例#26
0
 def test_clean_column_happy_path(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     self.assertEqual(
         clean_value(ParamSchema.Column(column_types=frozenset({"number"})),
                     "A", input_shape),
         "A",
     )
示例#27
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(module_id_name="uploadfile",
                                order=0,
                                slug="step-1")
        key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
        s3.put_bytes(s3.UserFilesBucket, key, b"1234")
        model = UploadedFileModel.objects.create(
            step=step,
            name="x.csv.gz",
            size=4,
            uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
            key=key,
        )
        with ExitStack() as inner_stack:
            result = self._call_prep_params(
                ParamSchema.Dict({"file": ParamSchema.File()}),
                {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                step_id=step.id,
                exit_stack=inner_stack,
            )
            self.assertEqual(
                result,
                PrepParamsResult(
                    {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                    tab_outputs={},
                    uploaded_files={
                        "6e00511a-8ac4-4b72-9acc-9d069992b5cf":
                        UploadedFile(
                            "x.csv.gz",
                            "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz",
                            model.created_at,
                        )
                    },
                ),
            )
            self.assertEqual(
                (self.basedir /
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").read_bytes(),
                b"1234",
            )

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(
            (self.basedir /
             "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").exists())
示例#28
0
    def test_render_file_param(self):
        def render(arrow_table, params, output_path, *args, **kwargs):
            self.assertEqual(params["file"].read_bytes(), b"hi")

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            filename = "839526fa-1adb-4eec-9d29-f5b4d2fbba30_x.tar.gz"
            (env.basedir / filename).write_bytes(b"hi")
            env.call_render(
                make_table(),
                {"file": "839526fa-1adb-4eec-9d29-f5b4d2fbba30"},
                uploaded_files={
                    "839526fa-1adb-4eec-9d29-f5b4d2fbba30": UploadedFile(
                        "x.tar.gz", filename, datetime.now()
                    )
                },
            )
示例#29
0
 def test_clean_multicolumn_sort_in_table_order(self):
     input_shape = TableMetadata(3, [
         Column("B", ColumnType.Number()),
         Column("A", ColumnType.Number())
     ])
     result = clean_value(ParamSchema.Multicolumn(), ["A", "B"],
                          input_shape)
     self.assertEqual(result, ["B", "A"])
示例#30
0
 def test_clean_multicolumn_missing_is_removed(self):
     input_shape = TableMetadata(3, [
         Column("A", ColumnType.Number()),
         Column("B", ColumnType.Number())
     ])
     result = clean_value(ParamSchema.Multicolumn(), ["A", "X", "B"],
                          input_shape)
     self.assertEqual(result, ["A", "B"])