def test_render_exception(self): module = self.kernel.compile( MockPath( ["foo.py"], b"import os\ndef render(table, params): raise RuntimeError('fail')", ), "foo", ) with self.assertRaises(ModuleExitedError) as cm: with arrow_table_context({"A": [1]}, dir=self.basedir) as input_table: input_table.path.chmod(0o644) with self.chroot_context.tempfile_context( prefix="output-", dir=self.basedir) as output_path: self.kernel.render( module, self.chroot_context, self.basedir, input_table, types.Params({ "m": 2.5, "s": "XX" }), types.Tab("tab-1", "Tab 1"), None, output_filename=output_path.name, ) self.assertEquals(cm.exception.exit_code, 1) # Python exit code self.assertRegex(cm.exception.log, r"\bRuntimeError\b") self.assertRegex(cm.exception.log, r"\bfail\b") # Regression test: [2019-10-02], the "pyspawner_main()->spawn_child()" # process would raise _another_ exception while exiting. It would try to # close an already-closed socket. self.assertNotRegex(cm.exception.log, r"Bad file descriptor")
def test_fetch_happy_path(self): module = self.kernel.compile( MockPath( ["foo.py"], textwrap.dedent(""" import pandas as pd def fetch(params): return pd.DataFrame({"A": [params["a"]]}) """).encode("utf-8"), ), "foo", ) with self.chroot_context.tempfile_context( prefix="output-", dir=self.basedir) as output_path: result = self.kernel.fetch( module, self.chroot_context, self.basedir, types.Params({"a": 1}), {}, None, None, output_filename=output_path.name, ) self.assertEquals(result.errors, []) table = pyarrow.parquet.read_pandas(str(result.path)) self.assertEquals(table.to_pydict(), {"A": [1]})
def test_compile_validate_works_with_dataclasses(self): """ Test we can compile @dataclass @dataclass inspects `sys.modules`, so the module needs to be in `sys.modules` when @dataclass is run. """ result = self.kernel.compile( MockPath( ["foo.py"], textwrap.dedent(""" from __future__ import annotations from dataclasses import dataclass def render(table, params): return table @dataclass class A: y: int """).encode("utf-8"), ), "foo", ) self.assertEquals(result.module_slug, "foo")
def test_migrate_params(self): module = self.kernel.compile( MockPath(["foo.py"], b"def migrate_params(params): return {'nested': params}"), "foo", ) result = self.kernel.migrate_params(module, {"foo": 123}) self.assertEquals(result, {"nested": {"foo": 123}})
def test_compile_validate_render_arrow_instead_of_render(self): result = self.kernel.compile( MockPath( ["foo.py"], b"from cjwkernel.types import RenderResult\ndef render_arrow(table, params, _1, _2, _3, output_path): return RenderResult()", ), "foo", ) self.assertEquals(result.module_slug, "foo") self.assertIsInstance(result.marshalled_code_object, bytes)
def test_compile_validate_bad_fetch_signature(self): with self.assertRaises(ModuleExitedError) as cm: # The child will print an assertion error to stderr. self.kernel.compile( MockPath(["foo.py"], b"def fetch(table, params): return table"), "foo") self.assertRegex(cm.exception.log, r"AssertionError") self.assertRegex(cm.exception.log, r"fetch must take one positional argument") self.assertEqual(cm.exception.exit_code, 1)
def test_sandbox_no_open_file_descriptors(self): self.kernel.compile( # and validate! MockPath( ["foo.py"], textwrap.dedent(""" import errno import os for i in range(3, 100): try: os.fstat(i) assert False, f"We passed fd{i} which can be used to escape chroot" except OSError as err: assert err.errno == errno.EBADF, "we wanted EBADF; got %d" % err.errno """).encode("utf-8"), ), "foo", )
def test_render_happy_path(self): module = self.kernel.compile( MockPath( ["foo.py"], b"import pandas as pd\ndef render(table, params): return pd.DataFrame({'A': table['A'] * params['m'], 'B': table['B'] + params['s']})", ), "foo", ) with arrow_table_context( { "A": [1, 2, 3], "B": ["a", "b", "c"] }, columns=[ types.Column("A", types.ColumnType.Number("{:,d}")), types.Column("B", types.ColumnType.Text()), ], dir=self.basedir, ) as input_table: input_table.path.chmod(0o644) with self.chroot_context.tempfile_context( prefix="output-", dir=self.basedir) as output_path: result = self.kernel.render( module, self.chroot_context, self.basedir, input_table, types.Params({ "m": 2.5, "s": "XX" }), types.Tab("tab-1", "Tab 1"), None, output_filename=output_path.name, ) self.assertEquals( result.table.table.to_pydict(), { "A": [2.5, 5.0, 7.5], "B": ["aXX", "bXX", "cXX"] }, )
def test_sandbox_no_open_file_descriptors(self): self.kernel.compile( # and validate! MockPath( ["foo.py"], textwrap.dedent(""" import errno import os for i in range(3, 100): try: os.fstat(i) assert False, f"We passed fd{i} which can be used to escape chroot" except OSError as err: if err.errno == errno.EBADF: pass # this is what we expect: no FDs else: raise # what the heck happened? """).encode("utf-8"), ), "foo", )
def test_render_killed_hard_out_of_memory(self): # This is similar to out-of-memory kill (but with different exit_code). # Testing out-of-memory is slow because we have to force the kernel to, # er, run out of memory. On a typical dev machine, that means filling # swap space -- gumming up the whole system. Not practical. # # In case of out-of-memory, the Linux out-of-memory killer will find # and kill a process using SIGKILL. # # So let's simulate that SIGKILL. module = self.kernel.compile( MockPath( ["foo.py"], b"import os\nimport time\ndef render(table, params): os.kill(os.getpid(), 9); time.sleep(1)", ), "foo", ) with self.assertRaises(ModuleExitedError) as cm: with arrow_table_context({"A": [1]}, dir=self.basedir) as input_table: input_table.path.chmod(0o644) with tempfile_context(prefix="output-", dir=self.basedir) as output_path: result = self.kernel.render( module, self.basedir, input_table, types.Params({ "m": 2.5, "s": "XX" }), types.Tab("tab-1", "Tab 1"), None, output_filename=output_path.name, ) print(repr(result)) self.assertEquals(cm.exception.exit_code, -9) # SIGKILL self.assertEquals(cm.exception.log, "")
def test_render_kill_timeout(self): module = self.kernel.compile( MockPath( ["foo.py"], b"import time\ndef render(table, params):\n time.sleep(2)"), "foo", ) with patch.object(self.kernel, "render_timeout", 0.001): with self.assertRaises(ModuleTimeoutError): with arrow_table_context({"A": [1]}, dir=self.basedir) as input_table: input_table.path.chmod(0o644) with tempfile_context(prefix="output-", dir=self.basedir) as output_path: self.kernel.render( module, self.basedir, input_table, types.Params({}), types.Tab("tab-1", "Tab 1"), None, output_filename=output_path.name, )
def _load(self, filename, data): path = MockPath(["root", filename], data) return ModuleSpec.load_from_path(path)
def test_render_error(self): result = upload.render( pd.DataFrame(), {"file": MockPath(["x.csv"], b""), "has_header": True} ) self.assertEqual(result, "This file is empty")
def test_render_success(self): result = upload.render( pd.DataFrame(), {"file": MockPath(["x.csv"], b"A,B\na,b"), "has_header": True}, ) assert_frame_equal(result, pd.DataFrame({"A": ["a"], "B": ["b"]}))
def test_compile_validate_happy_path(self): result = self.kernel.compile( MockPath(["foo.py"], b"def render(table, params): return table"), "foo") self.assertEquals(result.module_slug, "foo") self.assertIsInstance(result.marshalled_code_object, bytes)
def test_migrate_params_retval_not_thrift_ready(self): module = self.kernel.compile( MockPath(["foo.py"], b"def migrate_params(params): return range(2)"), "foo") with self.assertRaises(ModuleExitedError): self.kernel.migrate_params(module, {"foo": 123})
def test_compile_validate_exited_error(self): with self.assertRaises(ModuleExitedError) as cm: # The child will print an assertion error to stderr. self.kernel.compile(MockPath(["foo.py"], b"undefined()"), "foo") self.assertRegex(cm.exception.log, r"NameError") self.assertEqual(cm.exception.exit_code, 1)
def test_compile_syntax_error(self): with self.assertRaises(ModuleCompileError): self.kernel.compile( MockPath(["foo.py"], b"de render(table, params): return table"), "foo")