示例#1
0
 def test_default_returns_params(self):
     self.assertEqual(
         module.migrate_params_thrift(
             arrow_raw_params_to_thrift(RawParams({"A": [1], "B": "x"}))
         ),
         arrow_raw_params_to_thrift(RawParams({"A": [1], "B": "x"})),
     )
示例#2
0
 def test_default_returns_params(self):
     thrift_result = module.migrate_params_thrift(
         RawParams({
             "A": [1],
             "B": "x"
         }).to_thrift())
     result = RawParams.from_thrift(thrift_result).params
     self.assertEqual(result, {"A": [1], "B": "x"})
示例#3
0
 def migrate_params(self, compiled_module: CompiledModule,
                    params: Dict[str, Any]) -> None:
     request = RawParams(params).to_thrift()
     with _chroot_dir_context() as chroot:
         response = self._run_in_child(
             chroot=chroot,
             chroot_paths=DATA_PATHS,
             compiled_module=compiled_module,
             timeout=self.migrate_params_timeout,
             result=ttypes.RawParams(),
             function="migrate_params_thrift",
             args=[request],
         )
     return RawParams.from_thrift(response).params
示例#4
0
 def _test_fetch(
     self,
     fetch_fn,
     *,
     params={},
     secrets={},
     last_fetch_result=None,
     input_table_parquet_path=None,
     output_filename=None,
 ):
     with ExitStack() as ctx:
         ctx.enter_context(patch.object(module, "fetch", fetch_fn))
         if output_filename is None:
             # Make a temporary output filename -- this will make `fetch()`
             # complete, but callers won't be able to see the data it
             # outputs because we'll delete the file too soon.
             output_filename = ctx.enter_context(
                 tempfile_context(dir=self.basedir)).name
         thrift_result = module.fetch_thrift(
             ttypes.FetchRequest(
                 basedir=str(self.basedir),
                 params=arrow_params_to_thrift(Params(params)),
                 secrets=arrow_raw_params_to_thrift(RawParams(secrets)),
                 last_fetch_result=(
                     arrow_fetch_result_to_thrift(last_fetch_result)
                     if last_fetch_result is not None else None),
                 input_table_parquet_filename=(input_table_parquet_path.name
                                               if input_table_parquet_path
                                               is not None else None),
                 output_filename=output_filename,
             ))
         return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
示例#5
0
 def migrate_params(
     self, compiled_module: CompiledModule, params: Dict[str, Any]
 ) -> None:
     """
     Call a module's migrate_params().
     """
     request = RawParams(params).to_thrift()
     response = self._run_in_child(
         chroot_dir=READONLY_CHROOT_DIR,
         network_config=None,
         compiled_module=compiled_module,
         timeout=self.migrate_params_timeout,
         result=ttypes.RawParams(),
         function="migrate_params_thrift",
         args=[request],
     )
     return RawParams.from_thrift(response).params
示例#6
0
    def fetch(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        params: Params,
        secrets: Dict[str, Any],
        last_fetch_result: Optional[FetchResult],
        input_parquet_filename: Optional[str],
        output_filename: str,
    ) -> FetchResult:
        """
        Run the module's `fetch_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.FetchRequest(
            str(basedir_seen_by_module),
            arrow_params_to_thrift(params),
            arrow_raw_params_to_thrift(RawParams(secrets)),
            (None if last_fetch_result is None else
             arrow_fetch_result_to_thrift(last_fetch_result)),
            input_parquet_filename,
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(),
                    compiled_module=compiled_module,
                    timeout=self.fetch_timeout,
                    result=ttypes.FetchResult(),
                    function="fetch_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.filename and result.filename != output_filename:
            raise ModuleExitedError(0, "Module wrote to wrong output file")

        # TODO validate result isn't too large. If result is dataframe it makes
        # sense to truncate; but fetch results aren't necessarily data frames.
        # It's up to the module to enforce this logic ... but we need to set a
        # maximum file size.
        return thrift_fetch_result_to_arrow(result, basedir)
示例#7
0
 def fetch(
     self,
     compiled_module: CompiledModule,
     basedir: Path,
     params: Params,
     secrets: Dict[str, Any],
     last_fetch_result: Optional[FetchResult],
     input_parquet_filename: str,
     output_filename: str,
 ) -> FetchResult:
     request = ttypes.FetchRequest(
         str(basedir),
         params.to_thrift(),
         RawParams(secrets).to_thrift(),
         None
         if last_fetch_result is None else last_fetch_result.to_thrift(),
         input_parquet_filename,
         output_filename,
     )
     with _chroot_dir_context(provide_paths=[basedir],
                              extract_paths=[basedir / output_filename
                                             ]) as chroot:
         result = self._run_in_child(
             chroot=chroot,
             chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS +
             NETWORKING_PATHS,
             compiled_module=compiled_module,
             timeout=self.fetch_timeout,
             result=ttypes.FetchResult(),
             function="fetch_thrift",
             args=[request],
         )
         if result.filename and result.filename != output_filename:
             raise ModuleExitedError(0, "Module wrote to wrong output file")
     # TODO validate result isn't too large. If result is dataframe it makes
     # sense to truncate; but fetch results aren't necessarily data frames.
     # It's up to the module to enforce this logic ... but we need to set a
     # maximum file size.
     return FetchResult.from_thrift(result, basedir)
示例#8
0
 def _test(self, fn, params={}):
     with patch.object(module, "migrate_params", fn):
         thrift_result = module.migrate_params_thrift(
             arrow_raw_params_to_thrift(RawParams(params)))
         return thrift_raw_params_to_arrow(thrift_result).params
示例#9
0
 def _test(self, fn, params={}):
     with patch.object(module, "migrate_params", fn):
         thrift_result = module.migrate_params_thrift(RawParams(params).to_thrift())
         return RawParams.from_thrift(thrift_result).params