示例#1
0
    def test_load_dynamic_is_cached(self):
        code = b'def render(table, params):\n    return table * 2'
        minio.minio_client.put_object(minio.ExternalModulesBucket,
                                      'imported/abcdef/imported.py',
                                      io.BytesIO(code), len(code))

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', 'now'))

        with patch('importlib.util.module_from_spec', None):
            lm2 = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', 'now'))

        self.assertIs(lm.render_impl, lm2.render_impl)
示例#2
0
def _get_loaded_module(wf_module: WfModule) -> Optional[LoadedModule]:
    """
    Query WfModule.module_version, then LoadedModule.for_module_version()
    """
    module_version = wf_module.module_version  # invokes DB query
    # .for_module_version() allows null
    return LoadedModule.for_module_version_sync(module_version)
示例#3
0
    def test_load_dynamic(self):
        code = b'def render(table, params):\n    return table * 2'
        minio.client.put_object(Bucket=minio.ExternalModulesBucket,
                                Key='imported/abcdef/imported.py',
                                Body=code,
                                ContentLength=len(code))

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}),
                                  'now'))

        self.assertEqual(lm.name, 'imported:abcdef')
        # We can't test that render_impl is exactly something, because we
        # don't have a handle on the loaded Python module outside of
        # LoadedModule. So we'll test by executing it.
        #
        # This ends up being kinda an integration test.
        with self.assertLogs('server.models.loaded_module'):
            result = lm.render(ProcessResult(pd.DataFrame({'A': [1, 2]})),
                               {'col': 'A'},
                               tab_name='x',
                               fetch_result=ProcessResult())
        self.assertEqual(result.error, '')
        assert_frame_equal(result.dataframe, pd.DataFrame({'A': [2, 4]}))
示例#4
0
def _execute_wfmodule_pre(
        workflow: Workflow, wf_module: WfModule, params: Params,
        input_table_shape: TableShape,
        tab_shapes: Dict[str, Optional[StepResultShape]]) -> Tuple:
    """
    First step of execute_wfmodule().

    Return a Tuple in this order:
        * loaded_module: a ModuleVersion for dispatching render
        * fetch_result: optional ProcessResult for dispatching render
        * param_values: a dict for dispatching render

    Raise TabCycleError or TabOutputUnreachableError if the module depends on
    tabs with errors. (We won't call the render() method in that case.)

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    `tab_shapes.keys()` must be ordered as the Workflow's tabs are.
    """
    # raises UnneededExecution
    with locked_wf_module(workflow, wf_module) as safe_wf_module:
        module_version = safe_wf_module.module_version
        fetch_result = safe_wf_module.get_fetch_result()
        render_context = renderprep.RenderContext(
            workflow.id,
            input_table_shape,
            tab_shapes,
            params  # ugh
        )
        param_values = renderprep.get_param_values(params, render_context)
        loaded_module = LoadedModule.for_module_version_sync(module_version)

        return (loaded_module, fetch_result, param_values)
示例#5
0
    def test_load_dynamic(self):
        code = b"def render(table, params):\n    return table * 2"
        minio.client.put_object(
            Bucket=minio.ExternalModulesBucket,
            Key="imported/abcdef/imported.py",
            Body=code,
            ContentLength=len(code),
        )

        with self.assertLogs("server.models.loaded_module"):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        self.assertEqual(lm.name, "imported:abcdef")
        # We can't test that render_impl is exactly something, because we
        # don't have a handle on the loaded Python module outside of
        # LoadedModule. So we'll test by executing it.
        #
        # This ends up being kinda an integration test.
        with self.assertLogs("server.models.loaded_module"):
            result = lm.render(
                ProcessResult(pd.DataFrame({"A": [1, 2]})),
                {"col": "A"},
                tab_name="x",
                fetch_result=ProcessResult(),
            )
        self.assertEqual(result.error, "")
        assert_frame_equal(result.dataframe, pd.DataFrame({"A": [2, 4]}))
示例#6
0
 def test_load_static(self):
     # Test with a _real_ static module
     lm = LoadedModule.for_module_version_sync(
         MockModuleVersion('pastecsv', '(ignored)', ParamDType.Dict({}),
                           'now'))
     self.assertEqual(lm.name, 'pastecsv:internal')
     self.assertEqual(lm.render_impl, server.modules.pastecsv.render)
示例#7
0
    def test_load_dynamic(self):
        destdir = os.path.join(settings.IMPORTED_MODULES_ROOT, 'imported')
        os.makedirs(destdir)

        versiondir = os.path.join(destdir, 'abcdef')
        shutil.copytree(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
                'test_data', 'imported'), versiondir)

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                ModuleVersion(module=Module(id_name='imported'),
                              source_version_hash='abcdef'))

        self.assertEqual(lm.name, 'imported:abcdef')
        self.assertEqual(lm.is_external, True)
        # We can't test that render_impl is exactly something, because we
        # don't have a handle on the loaded Python module outside of
        # LoadedModule. So we'll test by executing it.
        #
        # This ends up being kinda an integration test.
        with self.assertLogs('server.models.loaded_module'):
            result = lm.render(MockParams(col='A'),
                               pd.DataFrame({'A': [1, 2]}),
                               fetch_result=ProcessResult())
        self.assertEqual(result.error, '')
        assert_frame_equal(result.dataframe, pd.DataFrame({'A': [2, 4]}))
示例#8
0
    def test_load_dynamic_ignore_test_py(self):
        code = b'def render(table, params):\n    return table * 2'
        minio.minio_client.put_object(minio.ExternalModulesBucket,
                                      'imported/abcdef/imported.py',
                                      io.BytesIO(code), len(code))
        # write other .py files that aren't module code and should be ignored
        minio.minio_client.put_object(minio.ExternalModulesBucket,
                                      'imported/abcdef/setup.py',
                                      io.BytesIO(b''), 0)
        minio.minio_client.put_object(minio.ExternalModulesBucket,
                                      'imported/abcdef/test_imported.py',
                                      io.BytesIO(b''), 0)

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', 'now'))

        # We can't test that render_impl is exactly something, because we
        # don't have a handle on the loaded Python module outside of
        # LoadedModule. So we'll test by executing it.
        #
        # This ends up being kinda an integration test.
        with self.assertLogs('server.models.loaded_module'):
            result = lm.render(ProcessResult(pd.DataFrame({'A': [1, 2]})),
                               {'col': 'A'},
                               tab_name='x',
                               fetch_result=ProcessResult())
        self.assertEqual(result.error, '')
        assert_frame_equal(result.dataframe, pd.DataFrame({'A': [2, 4]}))
示例#9
0
    def test_load_dynamic_is_cached(self):
        code = b'def render(table, params):\n    return table * 2'
        minio.client.put_object(Bucket=minio.ExternalModulesBucket,
                                Key='imported/abcdef/imported.py',
                                Body=code,
                                ContentLength=len(code))

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}),
                                  'now'))

        with patch('importlib.util.module_from_spec', None):
            lm2 = LoadedModule.for_module_version_sync(
                MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}),
                                  'now'))

        self.assertIs(lm.render_impl, lm2.render_impl)
示例#10
0
 def test_load_static(self):
     # Test with a _real_ static module
     lm = LoadedModule.for_module_version_sync(
         ModuleVersion(module=Module(id_name='pastecsv'),
                       source_version_hash='(ignored)'))
     self.assertEqual(lm.name, 'pastecsv:internal')
     self.assertEqual(lm.is_external, False)
     self.assertEqual(lm.render_impl,
                      server.modules.pastecsv.PasteCSV.render)
示例#11
0
    def test_load_dynamic_is_cached(self):
        code = b"def render(table, params):\n    return table * 2"
        minio.client.put_object(
            Bucket=minio.ExternalModulesBucket,
            Key="imported/abcdef/imported.py",
            Body=code,
            ContentLength=len(code),
        )

        with self.assertLogs("server.models.loaded_module"):
            lm = LoadedModule.for_module_version_sync(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        with patch("importlib.util.module_from_spec", None):
            lm2 = LoadedModule.for_module_version_sync(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        self.assertIs(lm.render_impl, lm2.render_impl)
示例#12
0
    def test_load_dynamic_is_cached(self):
        destdir = os.path.join(settings.IMPORTED_MODULES_ROOT, 'imported')
        os.makedirs(destdir)

        versiondir = os.path.join(destdir, 'abcdef')
        shutil.copytree(
            os.path.join(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
                'test_data', 'imported'), versiondir)

        with self.assertLogs('server.models.loaded_module'):
            lm = LoadedModule.for_module_version_sync(
                ModuleVersion(module=Module(id_name='imported'),
                              source_version_hash='abcdef'))

        with patch('importlib.util.module_from_spec', None):
            lm2 = LoadedModule.for_module_version_sync(
                ModuleVersion(module=Module(id_name='imported'),
                              source_version_hash='abcdef'))

        self.assertIs(lm.render_impl, lm2.render_impl)
示例#13
0
def _execute_wfmodule_pre(
    workflow: Workflow,
    wf_module: WfModule,
    params: Dict[str, Any],
    input_table_shape: TableShape,
    tab_shapes: Dict[str, Optional[StepResultShape]],
) -> Tuple[Optional[LoadedModule], Optional[ProcessResult], Dict[str, Any]]:
    """
    First step of execute_wfmodule().

    Return a Tuple in this order:
        * loaded_module: a ModuleVersion for dispatching render
        * fetch_result: optional ProcessResult for dispatching render
        * param_values: a dict for dispatching render

    Raise TabCycleError or TabOutputUnreachableError if the module depends on
    tabs with errors. (We won't call the render() method in that case.)

    Raise PromptingError if the module parameters are invalid. (We'll skip
    render() and prompt the user with quickfixes in that case.)

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    `tab_shapes.keys()` must be ordered as the Workflow's tabs are.
    """
    # raises UnneededExecution
    with locked_wf_module(workflow, wf_module) as safe_wf_module:
        module_version = safe_wf_module.module_version
        loaded_module = LoadedModule.for_module_version_sync(module_version)
        if loaded_module is None:
            # module was deleted. Skip other fetches.
            return (None, None, {})

        fetch_result = safe_wf_module.get_fetch_result()
        render_context = renderprep.RenderContext(
            workflow.id,
            wf_module.id,
            input_table_shape,
            tab_shapes,
            params  # ugh
        )
        if module_version is None:
            param_schema = ParamDType.Dict({})
        else:
            param_schema = module_version.param_schema
        param_values = renderprep.get_param_values(param_schema, params,
                                                   render_context)

        return (loaded_module, fetch_result, param_values)
示例#14
0
    def test_load_dynamic_from_none(self):
        lm = LoadedModule.for_module_version_sync(None)

        with self.assertLogs('server.models.loaded_module'):
            result = lm.render(ProcessResult(pd.DataFrame({'A': [1]})), {},
                               tab_name='x',
                               fetch_result=ProcessResult())
        self.assertEqual(
            result, ProcessResult(error='Cannot render: module was deleted'))

        with self.assertLogs('server.models.loaded_module'):
            result = call_fetch(lm, {})
        self.assertEqual(
            result, ProcessResult(error='Cannot fetch: module was deleted'))
    def test_load_and_dispatch(self):
        try:
            test_dir = self._test_module_path("importable")
            with self.assertLogs():
                module_version = import_module_from_directory(
                    "123456", Path(test_dir))

            # Create a test workflow that uses this imported module
            workflow = Workflow.objects.create()
            tab = workflow.tabs.create(position=0)
            wfm = tab.wf_modules.create(
                order=0,
                slug="step-1",
                module_id_name=module_version.id_name,
                params={
                    **module_version.default_params,
                    "test_column": "M",  # double this
                    "test_multicolumn": ["F", "Other"],  # triple these
                },
            )

            # Does it render right?
            test_table = pd.DataFrame({
                "Class": ["math", "english", "history", "economics"],
                "M": [10, np.nan, 11, 20],
                "F": [12, 7, 13, 20],
                "Other": [100, 100, 13, 20],
            })
            expected = pd.DataFrame({
                "Class": ["math", "english", "history", "economics"],
                "M": [20, np.nan, 22, 40],
                "F": [36, 21, 39, 60],
                "Other": [300, 300, 39, 60],
            })

            with self.assertLogs():
                lm = LoadedModule.for_module_version_sync(module_version)
                result = lm.render(ProcessResult(test_table), wfm.get_params(),
                                   "x", None)
            self.assertEqual(result.error, "")
            assert_frame_equal(result.dataframe, expected)
        finally:
            server.models.loaded_module.load_external_module.cache_clear()
    def test_load_and_dispatch(self):
        try:
            test_dir = self._test_module_path('importable')
            with self.assertLogs():
                module_version = import_module_from_directory('123456',
                                                              Path(test_dir))

            # Create a test workflow that uses this imported module
            workflow = Workflow.objects.create()
            tab = workflow.tabs.create(position=0)
            wfm = tab.wf_modules.create(
                order=0,
                module_id_name=module_version.id_name,
                params={
                    **module_version.default_params,
                    'test_column': 'M',  # double this
                    'test_multicolumn': ['F', 'Other']  # triple these
                }
            )

            # Does it render right?
            test_table = pd.DataFrame({
                'Class': ['math', 'english', 'history', 'economics'],
                'M': [10, np.nan, 11, 20],
                'F': [12, 7, 13, 20],
                'Other': [100, 100, 13, 20],
            })
            expected = pd.DataFrame({
                'Class': ['math', 'english', 'history', 'economics'],
                'M': [20, np.nan, 22, 40],
                'F': [36, 21, 39, 60],
                'Other': [300, 300, 39, 60],
            })

            with self.assertLogs():
                lm = LoadedModule.for_module_version_sync(module_version)
                result = lm.render(ProcessResult(test_table), wfm.get_params(),
                                   'x', None)
            self.assertEqual(result.error, '')
            assert_frame_equal(result.dataframe, expected)
        finally:
            server.models.loaded_module.load_external_module.cache_clear()
示例#17
0
    def test_load_and_dispatch(self):
        try:
            test_dir = self.fake_github_clone()

            import_module_from_directory('https://github.com/account/reponame',
                                         'reponame', '123456', test_dir)

            # Module and ModuleVersion should have loaded -- these will raise
            # exception if they don't exist
            module = Module.objects.get(id_name=self.importable_id_name)
            module_version = ModuleVersion.objects.get(module=module)

            # Create a test workflow that uses this imported module
            workflow = add_new_workflow('Dynamic Dispatch Test Workflow')
            wfm = add_new_wf_module(workflow, module_version, order=1)

            # These will fail if we haven't correctly loaded the json
            # describing the parameters
            colparam = get_param_by_id_name('test_column', wf_module=wfm)
            multicolparam = get_param_by_id_name('test_multicolumn',
                                                 wf_module=wfm)

            # Does it render right?
            test_csv = 'Class,M,F,Other\n' \
                       'math,10,12,100\n' \
                       'english,,7\,200\n' \
                       'history,11,13,\n' \
                       'economics,20,20,20'
            test_table = pd.read_csv(io.StringIO(test_csv), header=0,
                                     skipinitialspace=True)
            test_table_out = test_table.copy()
            test_table_out['M'] *= 2
            test_table_out[['F', 'Other']] *= 3

            colparam.set_value('M')  # double this
            multicolparam.set_value('F,Other')  # triple these
            with self.assertLogs():
                lm = LoadedModule.for_module_version_sync(module_version)
                result = lm.render(wfm.get_params(), test_table, None)
            self.assertEqual(result, ProcessResult(test_table_out))
        finally:
            server.models.loaded_module.load_external_module.cache_clear()
    def test_load_and_dispatch(self):
        try:
            test_dir = self._test_module_path('importable')
            with self.assertLogs():
                module_version = import_module_from_directory(
                    '123456', Path(test_dir))

            # Create a test workflow that uses this imported module
            workflow = Workflow.objects.create()
            tab = workflow.tabs.create(position=0)
            wfm = tab.wf_modules.create(order=0,
                                        module_id_name=module_version.id_name,
                                        params=module_version.default_params)

            # Does it render right?
            test_csv = 'Class,M,F,Other\n' \
                       'math,10,12,100\n' \
                       'english,,7,200\n' \
                       'history,11,13,\n' \
                       'economics,20,20,20'
            test_table = pd.read_csv(io.StringIO(test_csv),
                                     header=0,
                                     skipinitialspace=True)
            test_table_out = test_table.copy()
            test_table_out['M'] *= 2
            test_table_out[['F', 'Other']] *= 3

            wfm.params = {
                **wfm.params,
                'test_column': 'M',  # double this
                'test_multicolumn': 'F,Other'  # triple these
            }
            wfm.save(update_fields=['params'])

            with self.assertLogs():
                lm = LoadedModule.for_module_version_sync(module_version)
                result = lm.render(ProcessResult(test_table), wfm.get_params(),
                                   'x', None)
            self.assertEqual(result, ProcessResult(test_table_out))
        finally:
            server.models.loaded_module.load_external_module.cache_clear()
示例#19
0
def _execute_wfmodule_pre(wf_module: WfModule) -> Tuple:
    """
    First step of execute_wfmodule().

    Returns a Tuple in this order:
        * cached_render_result: if non-None, the quick return value of
          execute_wfmodule().
        * loaded_module: a ModuleVersion for dispatching render
        * params: Params for dispatching render
        * fetch_result: optional ProcessResult for dispatching render
        * old_result: if wf_module.notifications is set, the previous
          result we'll compare against after render.

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)
    """
    with locked_wf_module(wf_module) as safe_wf_module:
        cached_render_result = wf_module.get_cached_render_result()

        old_result = None
        if cached_render_result:
            # If the cache is good, skip everything. No need for old_result,
            # because we know the output won't change (since we won't even run
            # render()).
            if (cached_render_result.delta_id ==
                    wf_module.last_relevant_delta_id):
                return (cached_render_result, None, None, None, None)

            if safe_wf_module.notifications:
                old_result = cached_render_result.result

        module_version = wf_module.module_version
        params = safe_wf_module.get_params()
        fetch_result = safe_wf_module.get_fetch_result()

        loaded_module = LoadedModule.for_module_version_sync(module_version)

        return (None, loaded_module, params, fetch_result, old_result)
示例#20
0
 def test_load_dynamic_from_none(self):
     result = LoadedModule.for_module_version_sync(None)
     self.assertIsNone(result)