Пример #1
0
def test_param_schema_explicit():
    spec = load_spec(
        dict(
            id_name="x",
            name="x",
            category="Clean",
            parameters=[{"id_name": "whee", "type": "custom"}],
            param_schema={
                "id_name": {
                    "type": "dict",
                    "properties": {
                        "x": {"type": "integer"},
                        "y": {"type": "string", "default": "X"},
                    },
                }
            },
        )
    )

    assert spec.param_schema == ParamSchema.Dict(
        {
            "id_name": ParamSchema.Dict(
                {"x": ParamSchema.Integer(), "y": ParamSchema.String(default="X")}
            )
        }
    )
Пример #2
0
    def test_render_uploaded_files(self):
        def render_arrow_v1(table, params, *, uploaded_files, **kwargs):
            self.assertEqual(params["file"],
                             "406b5e37-f217-4e87-b6b2-eede3bec6492")
            uploaded_file = uploaded_files[params["file"]]
            self.assertEqual(uploaded_file.name, "x.data")
            self.assertEqual(uploaded_file.uploaded_at,
                             datetime(2021, 4, 21, 12, 4, 5))
            return ArrowRenderResult(make_table())

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema,
                           render_arrow_v1=render_arrow_v1) as env:
            temp_path = env.basedir / "406b5e37-f217-4e87-b6b2-eede3bec6492_x.data"
            temp_path.write_bytes(b"hello, world!")
            env.call_render(
                make_table(),
                params={"file": "406b5e37-f217-4e87-b6b2-eede3bec6492"},
                uploaded_files={
                    "406b5e37-f217-4e87-b6b2-eede3bec6492":
                    UploadedFile(
                        name="x.data",
                        filename=temp_path.name,
                        uploaded_at=datetime(2021, 4, 21, 12, 4, 5),
                    ),
                },
            )
Пример #3
0
    def test_render_using_tab_output(self):
        def render(table, params):
            self.assertEqual(params["tabparam"].name, "Tab 1")
            self.assertEqual(
                params["tabparam"].columns,
                {
                    "X": ptypes.RenderColumn("X", "number", "{:,d}"),
                    "Y": ptypes.RenderColumn("Y", "text", None),
                },
            )
            assert_frame_equal(params["tabparam"].dataframe,
                               pd.DataFrame({
                                   "X": [1],
                                   "Y": ["y"]
                               }))

        param_schema = ParamSchema.Dict({"tabparam": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tabparam": "tab-1"},
                    tab_outputs={
                        "tab-1":
                        TabOutput(tab_name="Tab 1", table_filename=path.name)
                    },
                )
Пример #4
0
    def test_render_empty_file_param(self):
        def render(arrow_table, params, output_path, *args, **kwargs):
            self.assertIsNone(params["file"])

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            env.call_render(make_table(), {"file": None})
Пример #5
0
def test_param_schema_implicit():
    spec = load_spec(
        dict(
            id_name="googlesheets",
            name="x",
            category="Clean",
            parameters=[
                {"id_name": "foo", "type": "string", "default": "X"},
                {
                    "id_name": "bar",
                    "type": "secret",
                    "secret_logic": {"provider": "oauth2", "service": "google"},
                },
                {
                    "id_name": "baz",
                    "type": "menu",
                    "options": [
                        {"value": "a", "label": "A"},
                        "separator",
                        {"value": "c", "label": "C"},
                    ],
                    "default": "c",
                },
            ],
        )
    )

    assert spec.param_schema == ParamSchema.Dict(
        {
            "foo": ParamSchema.String(default="X"),
            # secret is not in param_schema
            "baz": ParamSchema.Enum(choices=frozenset({"a", "c"}), default="c"),
        }
    )
Пример #6
0
    def test_dict_prompting_error_concatenate_different_types(self):
        schema = ParamSchema.Dict({
            "x":
            ParamSchema.Column(column_types=frozenset({"number"})),
            "y":
            ParamSchema.Column(column_types=frozenset({"number"})),
        })
        with self.assertRaises(PromptingError) as cm:
            self._call_clean_value(
                schema,
                {
                    "x": "A",
                    "y": "B"
                },
                input_table_columns=[TEXT("A"), TIMESTAMP("B")],
            )

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "timestamp",
                                               frozenset({"number"})),
            ],
        )
Пример #7
0
    def test_render_tab_outputs(self):
        def render_arrow_v1(table, params, *, tab_outputs, **kwargs):
            self.assertEqual(params["tab"], "tab-x")
            self.assertEqual(tab_outputs["tab-x"].tab_name, "Tab X")
            assert_arrow_table_equals(
                tab_outputs["tab-x"].table,
                make_table(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                ),
            )
            return ArrowRenderResult(make_table())

        param_schema = ParamSchema.Dict({"tab": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema,
                           render_arrow_v1=render_arrow_v1) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tab": "tab-x"},
                    tab_outputs={
                        "tab-x":
                        TabOutput(tab_name="Tab X", table_filename=path.name)
                    },
                )
Пример #8
0
 def test_clean_file_safe_filename(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(module_id_name="uploadfile",
                             order=0,
                             slug="step-1")
     key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
     s3.put_bytes(s3.UserFilesBucket, key, b"1234")
     model = UploadedFileModel.objects.create(
         step=step,
         name="/etc/passwd.$/etc/passwd",
         size=4,
         uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
         key=key,
     )
     with ExitStack() as inner_stack:
         result = self._call_prep_params(
             ParamSchema.Dict({"file": ParamSchema.File()}),
             {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
             step_id=step.id,
             exit_stack=inner_stack,
         )
         self.assertEqual(
             result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"],
             UploadedFile(
                 "/etc/passwd.$/etc/passwd",
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd",
                 model.created_at,
             ),
         )
Пример #9
0
def test_dict_recurse():
    assert parse({
        "type": "dict",
        "properties": {
            "x": {
                "type": "string"
            }
        }
    }) == ParamSchema.Dict(properties={"x": ParamSchema.String()})
Пример #10
0
 def test_clean_normal_dict(self):
     schema = ParamSchema.Dict({
         "str": ParamSchema.String(),
         "int": ParamSchema.Integer()
     })
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = self._call_clean_value(schema, value)
     self.assertEqual(result, expected)
Пример #11
0
 def test_clean_normal_dict(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     schema = ParamSchema.Dict({
         "str": ParamSchema.String(),
         "int": ParamSchema.Integer()
     })
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = clean_value(schema, value, input_shape)
     self.assertEqual(result, expected)
Пример #12
0
    def test_render_with_no_kwargs(self):
        def render(table, params):
            return table * params["n"]

        param_schema = ParamSchema.Dict({"n": ParamSchema.Float()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            outcome = env.call_render(make_table(make_column("A", [1])),
                                      {"n": 2})
            assert_arrow_table_equals(outcome.read_table(),
                                      make_table(make_column("A", [2])))
Пример #13
0
 def test_clean_tab_omit_unused_tabs_from_tab_outputs(self):
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Tab()}),
         {"x": "tab-1"},
         tab_results={
             Tab("tab-1", "Tab 1"):
             StepResult(Path("tab-1.arrow"), [TEXT("A")]),
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [TEXT("A")]),
             Tab("tab-3", "Tab 3"):
             StepResult(Path("tab-3.arrow"), [TEXT("A")]),
         },
     )
     self.assertEqual(result.tab_outputs,
                      {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
Пример #14
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(module_id_name="uploadfile",
                                order=0,
                                slug="step-1")
        key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf"
        s3.put_bytes(s3.UserFilesBucket, key, b"1234")
        model = UploadedFileModel.objects.create(
            step=step,
            name="x.csv.gz",
            size=4,
            uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf",
            key=key,
        )
        with ExitStack() as inner_stack:
            result = self._call_prep_params(
                ParamSchema.Dict({"file": ParamSchema.File()}),
                {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                step_id=step.id,
                exit_stack=inner_stack,
            )
            self.assertEqual(
                result,
                PrepParamsResult(
                    {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"},
                    tab_outputs={},
                    uploaded_files={
                        "6e00511a-8ac4-4b72-9acc-9d069992b5cf":
                        UploadedFile(
                            "x.csv.gz",
                            "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz",
                            model.created_at,
                        )
                    },
                ),
            )
            self.assertEqual(
                (self.basedir /
                 "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").read_bytes(),
                b"1234",
            )

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(
            (self.basedir /
             "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").exists())
Пример #15
0
    def test_render_file_param(self):
        def render(arrow_table, params, output_path, *args, **kwargs):
            self.assertEqual(params["file"].read_bytes(), b"hi")

        param_schema = ParamSchema.Dict({"file": ParamSchema.File()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            filename = "839526fa-1adb-4eec-9d29-f5b4d2fbba30_x.tar.gz"
            (env.basedir / filename).write_bytes(b"hi")
            env.call_render(
                make_table(),
                {"file": "839526fa-1adb-4eec-9d29-f5b4d2fbba30"},
                uploaded_files={
                    "839526fa-1adb-4eec-9d29-f5b4d2fbba30": UploadedFile(
                        "x.tar.gz", filename, datetime.now()
                    )
                },
            )
Пример #16
0
 def test_clean_tab_happy_path(self):
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Tab()}),
         {"x": "tab-1"},
         tab_results={
             Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"),
                                               [TEXT("A")])
         },
     )
     self.assertEqual(
         result,
         PrepParamsResult(
             {"x": "tab-1"},
             tab_outputs={"tab-1": TabOutput("Tab 1", "tab-1.arrow")},
             uploaded_files={},
         ),
     )
Пример #17
0
 def test_clean_multicolumn_from_other_tab(self):
     schema = ParamSchema.Dict({
         "tab":
         ParamSchema.Tab(),
         "columns":
         ParamSchema.Multicolumn(tab_parameter="tab"),
     })
     params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]}
     result = self._call_prep_params(
         schema,
         params,
         input_table_columns=[NUMBER("A-from-tab-1")],
         tab_results={
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [NUMBER("A-from-tab-2")])
         },
     )
     self.assertEqual(result.params["columns"], ["A-from-tab-2"])
Пример #18
0
 def test_clean_multicolumn_from_other_tab_that_does_not_exist(self):
     # The other tab would not exist if the user selected and then deleted
     # it.
     result = self._call_prep_params(
         schema=ParamSchema.Dict({
             "tab":
             ParamSchema.Tab(),
             "columns":
             ParamSchema.Multicolumn(tab_parameter="tab"),
         }),
         params={
             "tab": "tab-missing",
             "columns": ["A-from-tab-1"]
         },
         input_table_columns=[NUMBER("A-from-tab-1")],
         tab_results={},
     )
     # result.params['tab'] is not what we're testing here
     self.assertEqual(result.params["columns"], [])
Пример #19
0
 def test_clean_tabs_happy_path(self):
     self.assertEqual(
         self._call_prep_params(
             ParamSchema.Dict({"x": ParamSchema.Multitab()}),
             {"x": ["tab-2", "tab-3"]},
             tab_results={
                 Tab("tab-2", "Tab 2"):
                 StepResult(Path("tab-2.arrow"), [NUMBER("B")]),
                 Tab("tab-3", "Tab 3"):
                 StepResult(Path("tab-3.arrow"), [NUMBER("C")]),
             },
         ),
         PrepParamsResult(
             {"x": ["tab-2", "tab-3"]},
             {
                 "tab-2": TabOutput("Tab 2", "tab-2.arrow"),
                 "tab-3": TabOutput("Tab 3", "tab-3.arrow"),
             },
             uploaded_files={},
         ),
     )
Пример #20
0
def test_param_schema_includes_empty_tuples():
    # Bug on 2021-04-21: empty NamedTuple ParamSchema classes evaluate to
    # False; but they should still be included in the param_schema.
    spec = load_spec(
        dict(
            id_name="x",
            name="x",
            category="Clean",
            parameters=[
                dict(id_name="timezone", name="timezone", type="timezone"),
                dict(id_name="tab", name="tab", type="tab"),
                dict(id_name="condition", type="condition"),
            ],
        )
    )

    assert spec.param_schema == ParamSchema.Dict(
        {
            "timezone": ParamSchema.Timezone(),
            "tab": ParamSchema.Tab(),
            "condition": ParamSchema.Condition(),
        }
    )
Пример #21
0
    def test_dict_prompting_error(self):
        input_shape = TableMetadata(
            3,
            [Column("A", ColumnType.Text()),
             Column("B", ColumnType.Text())])
        schema = ParamSchema.Dict({
            "col1":
            ParamSchema.Column(column_types=frozenset({"number"})),
            "col2":
            ParamSchema.Column(column_types=frozenset({"timestamp"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"col1": "A", "col2": "B"}, input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "text",
                                               frozenset({"timestamp"})),
            ],
        )
Пример #22
0
    def _(self, schema: ParamSchema.Multichartseries,
          value: List[Dict[str, str]]) -> List[Dict[str, str]]:
        # Recurse to clean_value(ParamSchema.Column) to clear missing columns
        inner_schema = ParamSchema.Dict({
            "color":
            ParamSchema.String(default="#000000"),
            "column":
            ParamSchema.Column(column_types=frozenset(["number"])),
        })

        ret = []
        error_agg = PromptingErrorAggregator()

        for v in value:
            try:
                clean_v = self.clean_value(inner_schema, v)
                if clean_v["column"]:  # it's a valid column
                    ret.append(clean_v)
            except PromptingError as err:
                error_agg.extend(err.errors)

        error_agg.raise_if_nonempty()
        return ret
Пример #23
0
 def test_clean_tabs_preserve_ordering(self):
     # "x" gives wrongly-ordered tabs; renderprep should reorder them.
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Multitab()}),
         {"x": ["tab-2", "tab-3"]},
         tab_results={
             Tab("tab-3", "Tab 3"):
             StepResult(Path("tab-3.arrow"), [NUMBER("C")]),
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [NUMBER("B")]),
         },
     )
     self.assertEqual(
         result,
         PrepParamsResult(
             {"x": ["tab-3", "tab-2"]},
             {
                 "tab-3": TabOutput("Tab 3", "tab-3.arrow"),
                 "tab-2": TabOutput("Tab 2", "tab-2.arrow"),
             },
             uploaded_files={},
         ),
     )
Пример #24
0
 def test_validate_invalid_child(self):
     with pytest.raises(ValueError, match="not a string"):
         S.Dict({"foo": S.String()}).validate({"foo": 3})
Пример #25
0
 def __init__(self,
              param_schema: ParamSchema = ParamSchema.Dict({}),
              **defs):
     self.defs = {"ModuleSpec": MockModuleSpec(param_schema), **defs}
Пример #26
0
 def test_default(self):
     assert S.Dict(
         {"foo": S.String(default="FOO"), "bar": S.Integer(default=3)}
     ).default == {"foo": "FOO", "bar": 3}
Пример #27
0
 def test_validate_ok(self):
     S.Dict({"foo": S.String(default="FOO"), "bar": S.Integer(default=3)}).validate(
         {"foo": "FOO", "bar": 3}
     )
Пример #28
0
class MockModuleVersion(NamedTuple):
    id_name: str = "mod"
    source_version_hash: str = "abc123"
    param_schema: ParamSchema.Dict = ParamSchema.Dict({})
Пример #29
0
 def test_validate_not_dict(self):
     with pytest.raises(ValueError, match="not a dict"):
         S.Dict({"foo": S.String()}).validate([])
Пример #30
0
 def test_validate_extra_key(self):
     with pytest.raises(ValueError, match="wrong keys"):
         S.Dict({"foo": S.String()}).validate({"foo": "x", "bar": "y"})