def _test_fetch( self, fetch_fn, *, params={}, secrets={}, last_fetch_result=None, input_table_parquet_path=None, output_filename=None, ): # TODO simplify this logic and move it to ModuleTestEnv with ExitStack() as ctx: ctx.enter_context(patch.object(module, "fetch", fetch_fn)) if output_filename is None: # Make a temporary output filename -- this will make `fetch()` # complete, but callers won't be able to see the data it # outputs because we'll delete the file too soon. output_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.fetch_thrift( ttypes.FetchRequest( basedir=str(self.basedir), params=pydict_to_thrift_json_object(params), secrets=pydict_to_thrift_json_object(secrets), last_fetch_result=( arrow_fetch_result_to_thrift(last_fetch_result) if last_fetch_result is not None else None), input_table_parquet_filename=(input_table_parquet_path.name if input_table_parquet_path is not None else None), output_filename=output_filename, )) return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
def fetch( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, params: Dict[str, Any], secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: Optional[str], output_filename: str, ) -> FetchResult: """Run the module's `fetch_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.FetchRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), secrets=pydict_to_thrift_json_object(secrets), last_fetch_result=( None if last_fetch_result is None else arrow_fetch_result_to_thrift(last_fetch_result)), input_table_parquet_filename=input_parquet_filename, output_filename=output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.filename and result.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return thrift_fetch_result_to_arrow(result, basedir)
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_filename: str, params: Dict[str, Any], tab_name: str, fetch_result: Optional[FetchResult], tab_outputs: List[TabOutput], uploaded_files: Dict[str, UploadedFile], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, fetch_result=(None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename=output_filename, input_filename=input_filename, ) if compiled_module.module_slug in {"pythoncode", "ACS2016"}: # TODO disallow networking; make network_config always None network_config = pyspawner.NetworkConfig() else: network_config = None try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=network_config, compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() return thrift_render_result_to_arrow(result)
def call_render(render: Callable, request: ttypes.RenderRequest) -> ttypes.RenderResult: basedir = Path(request.basedir) table = load_trusted_arrow_file(basedir / request.input_filename) params = thrift_json_object_to_pydict(request.params) tab_outputs = { k: TabOutput( tab_name=v.tab_name, table=load_trusted_arrow_file(basedir / v.table_filename), ) for k, v in request.tab_outputs.items() } uploaded_files = { k: UploadedFile( name=v.name, path=(basedir / v.filename), uploaded_at=datetime.datetime.utcfromtimestamp( v.uploaded_at_timestampus / 1000000.0), ) for k, v in request.uploaded_files.items() } if request.fetch_result is None: fetch_result = None else: fetch_result = thrift_fetch_result_to_arrow(request.fetch_result, basedir) raw_result = render( table, params, settings=settings, tab_name=request.tab_name, tab_outputs=tab_outputs, uploaded_files=uploaded_files, fetch_result=fetch_result, ) if not isinstance(raw_result, ArrowRenderResult): # Crash. The module author wrote a buggy module. raise ValueError( "render_arrow_v1() must return a cjwmodule.arrow.types.ArrowRenderResult" ) with pa.ipc.RecordBatchFileWriter( basedir / request.output_filename, schema=raw_result.table.schema) as writer: writer.write_table(raw_result.table) return ttypes.RenderResult( errors=[arrow_render_error_to_thrift(e) for e in raw_result.errors], json=pydict_to_thrift_json_object(raw_result.json), )
def migrate_params(self, compiled_module: CompiledModule, params: Dict[str, Any]) -> None: """Call a module's migrate_params().""" response = self._run_in_child( chroot_dir=READONLY_CHROOT_DIR, network_config=None, compiled_module=compiled_module, timeout=self.migrate_params_timeout, result=ttypes.MigrateParamsResult(), function="migrate_params_thrift", args=[pydict_to_thrift_json_object(params)], ) return thrift_json_object_to_pydict(response.params)
def call_render( self, table: pa.Table, params: Dict[str, Any], tab_name: str = "Tab 1", tab_outputs: Dict[str, TabOutput] = {}, fetch_result: Optional[FetchResult] = None, uploaded_files: Dict[str, UploadedFile] = {}, ) -> RenderOutcome: """Conveniently call the module's `render_thrift()`. The calling convention is designed for ease of testing. """ # tempfile will be deleted in __exit__(). fd, output_filename = mkstemp(prefix="out-", suffix=".arrow", dir=self.basedir) os.close(fd) output_path = Path(output_filename) with arrow_table_context(table, dir=self.basedir) as (input_path, _): old_cwd = os.getcwd() os.chdir(self.basedir) try: thrift_result = cjwkernel.pandas.module.render_thrift( ttypes.RenderRequest( basedir=self.basedir, input_filename=input_path.name, params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, fetch_result=( arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None), uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, output_filename=output_path.name, )) finally: os.chdir(old_cwd) arrow_result = thrift_render_result_to_arrow(thrift_result) return RenderOutcome(arrow_result, output_path)
def test_pydict_to_thrift_json_object(self): self.assertEqual( types.pydict_to_thrift_json_object({ "str": "s", "int": 2, "float": 1.2, "null": None, "bool": False, "arrayofobjects": [{ "A": "a", "B": "b" }, { "C": "c", "D": "d" }], }), { "str": ttypes.Json(string_value="s"), "int": ttypes.Json(int64_value=2), "float": ttypes.Json(number_value=1.2), "null": ttypes.Json(), "bool": ttypes.Json(boolean_value=False), "arrayofobjects": ttypes.Json(array_value=[ ttypes.Json( object_value={ "A": ttypes.Json(string_value="a"), "B": ttypes.Json(string_value="b"), }), ttypes.Json( object_value={ "C": ttypes.Json(string_value="c"), "D": ttypes.Json(string_value="d"), }), ]), }, )
def migrate_params_thrift(thrift_params: Dict[str, ttypes.Json]): params_dict: Dict[str, Any] = thrift_json_object_to_pydict(thrift_params) result_dict = migrate_params(params_dict) return ttypes.MigrateParamsResult( pydict_to_thrift_json_object(result_dict))