示例#1
0
 def setUp(self):
     super().setUp()
     self.ctx = contextlib.ExitStack()
     self.chroot_context = self.ctx.enter_context(
         EDITABLE_CHROOT.acquire_context())
     self.basedir = self.ctx.enter_context(
         self.chroot_context.tempdir_context(prefix="basedir-"))
示例#2
0
 def setUp(self):
     super().setUp()
     self.ctx = contextlib.ExitStack()
     self.chroot_context = self.ctx.enter_context(EDITABLE_CHROOT.acquire_context())
     self.basedir = self.ctx.enter_context(self.chroot_context.tempdir_context())
     self.output_path = self.ctx.enter_context(
         self.chroot_context.tempfile_context(dir=self.basedir)
     )
 def setUp(self):
     super().setUp()
     self.ctx = contextlib.ExitStack()
     self.chroot_context = self.ctx.enter_context(
         EDITABLE_CHROOT.acquire_context())
     basedir = self.ctx.enter_context(
         self.chroot_context.tempdir_context(prefix="test_wf_module-"))
     self.output_path = self.ctx.enter_context(
         self.chroot_context.tempfile_context(prefix="output-",
                                              dir=basedir))
示例#4
0
 def _execute(self, workflow, flow, tab_results, expect_log_level=logging.DEBUG):
     with EDITABLE_CHROOT.acquire_context() as chroot_context:
         with chroot_context.tempdir_context(prefix="test_tab") as tempdir:
             with chroot_context.tempfile_context(
                 prefix="execute-tab-output", suffix=".arrow", dir=tempdir
             ) as out_path:
                 with self.assertLogs("renderer.execute", level=expect_log_level):
                     result = self.run_with_async_db(
                         execute_tab_flow(
                             chroot_context, workflow, flow, tab_results, out_path
                         )
                     )
                     yield result
示例#5
0
 def setUp(self):
     super().setUp()
     self.ctx = contextlib.ExitStack()
     self.chroot_context = self.ctx.enter_context(EDITABLE_CHROOT.acquire_context())
     basedir = self.ctx.enter_context(
         self.chroot_context.tempdir_context(prefix="test_step-")
     )
     self.empty_table_path = self.ctx.enter_context(
         self.chroot_context.tempfile_context(prefix="empty-table-", dir=basedir)
     )
     with pa.ipc.RecordBatchFileWriter(self.empty_table_path, pa.schema([])):
         pass
     self.output_path = self.ctx.enter_context(
         self.chroot_context.tempfile_context(prefix="output-", dir=basedir)
     )
示例#6
0
async def execute_workflow(workflow: Workflow, delta_id: int) -> None:
    """
    Ensure all `workflow.tabs[*].live_wf_modules` cache fresh render results.

    Raise UnneededExecution if the inputs become stale (at which point we don't
    care about results any more).

    WEBSOCKET NOTES: each wf_module is executed in turn. After each execution,
    we notify clients of its new columns and status.
    """
    # raises UnneededExecution
    pending_tab_flows = await _load_tab_flows(workflow, delta_id)

    # tab_shapes: keep track of outputs of each tab. (Outputs are used as
    # inputs into other tabs.) Before render begins, all outputs are `None`.
    # We'll execute tabs dependencies-first; if a WfModule depends on a
    # `tab_shape` we haven't rendered yet, that's because it _couldn't_ be
    # rendered first -- prompting a `TabCycleError`.
    #
    # `tab_shapes.keys()` returns tab slugs in the Workflow's tab order -- that
    # is, the order the user determines.
    tab_results: Dict[Tab, Optional[RenderResult]] = {
        flow.tab: None
        for flow in pending_tab_flows
    }
    output_paths = []

    # Execute one tab_flow at a time.
    #
    # We don't hold a DB lock throughout the loop: the loop can take a long
    # time; it might be run multiple times simultaneously (even on different
    # computers); and `await` doesn't work with locks.

    with EDITABLE_CHROOT.acquire_context() as chroot_context:
        with chroot_context.tempdir_context("render-") as basedir:

            async def execute_tab_flow_into_new_file(
                    tab_flow: TabFlow) -> RenderResult:
                nonlocal workflow, tab_results, output_paths
                output_path = basedir / ("tab-output-%s.arrow" %
                                         tab_flow.tab_slug.replace("/", "-"))
                return await execute_tab_flow(chroot_context, workflow,
                                              tab_flow, tab_results,
                                              output_path)

            while pending_tab_flows:
                ready_flows, dependent_flows = partition_ready_and_dependent(
                    pending_tab_flows)

                if not ready_flows:
                    # All flows are dependent -- meaning they all have cycles. Execute
                    # them last; they can detect their cycles through `tab_results`.
                    break

                for tab_flow in ready_flows:
                    result = await execute_tab_flow_into_new_file(tab_flow)
                    tab_results[tab_flow.tab] = result

                pending_tab_flows = dependent_flows  # iterate

            # Now, `pending_tab_flows` only contains flows with cycles. Execute
            # them. No need to update `tab_results`: If tab1 and tab 2 depend on
            # each other, they should have the same error ("Cycle").
            for tab_flow in pending_tab_flows:
                await execute_tab_flow_into_new_file(tab_flow)
示例#7
0
async def fetch(*,
                workflow_id: int,
                wf_module_id: int,
                now: Optional[timezone.datetime] = None) -> None:
    # 1. Load database objects
    #    - missing WfModule? Return prematurely
    #    - database error? _exit(1)
    #    - module_zipfile missing/invalid? user-visible error
    #    - migrate_params() fails? user-visible error
    # 2. Calculate result
    #    2a. Build fetch kwargs
    #    2b. Call fetch (no errors possible -- LoadedModule catches them)
    # 3. Save result (and send delta)
    #    - database errors? _exit(1)
    #    - other error (bug in `save`)? Log exception and ignore
    # 4. Update WfModule last-fetch time
    #    - database errors? _exit(1)
    with crash_on_database_error():
        logger.info("begin fetch(workflow_id=%d, wf_module_id=%d)",
                    workflow_id, wf_module_id)

        try:
            (
                wf_module,
                module_zipfile,
                migrated_params,
                stored_object,
                input_crr,
            ) = await load_database_objects(workflow_id, wf_module_id)
        except (Workflow.DoesNotExist, WfModule.DoesNotExist):
            logger.info("Skipping fetch of deleted WfModule %d-%d",
                        workflow_id, wf_module_id)
            return

    # Prepare secrets -- mangle user values so modules have all they need.
    #
    # This can involve, e.g., HTTP request to OAuth2 token servers.
    #
    # TODO unit-test this code path
    if module_zipfile is None:
        secrets = {}
    else:
        module_spec = module_zipfile.get_spec()
        secrets = await fetcher.secrets.prepare_secrets(
            module_spec.param_fields, wf_module.secrets)

    if now is None:
        now = timezone.now()

    with contextlib.ExitStack() as ctx:
        chroot_context = ctx.enter_context(EDITABLE_CHROOT.acquire_context())
        basedir = ctx.enter_context(
            chroot_context.tempdir_context(prefix="fetch-"))
        output_path = ctx.enter_context(
            chroot_context.tempfile_context(prefix="fetch-result-",
                                            dir=basedir))
        # get last_fetch_result (This can't error.)
        last_fetch_result = _stored_object_to_fetch_result(
            ctx, stored_object, wf_module.fetch_errors, dir=basedir)
        result = await asyncio.get_event_loop().run_in_executor(
            None,
            fetch_or_wrap_error,
            ctx,
            chroot_context,
            basedir,
            wf_module.module_id_name,
            module_zipfile,
            migrated_params,
            secrets,
            last_fetch_result,
            input_crr,
            output_path,
        )

        try:
            with crash_on_database_error():
                if last_fetch_result is not None and versions.are_fetch_results_equal(
                        last_fetch_result, result):
                    await save.mark_result_unchanged(workflow_id, wf_module,
                                                     now)
                else:
                    await save.create_result(workflow_id, wf_module, result,
                                             now)
        except asyncio.CancelledError:
            raise
        except Exception:
            # Log exceptions but keep going.
            # TODO [adamhooper, 2019-09-12] really? I think we don't want this.
            # Make `fetch.save() robust, then nix this handler
            logger.exception(f"Error fetching {wf_module}")

    with crash_on_database_error():
        await update_next_update_time(workflow_id, wf_module, now)
示例#8
0
async def fetch(*,
                workflow_id: int,
                step_id: int,
                now: Optional[datetime.datetime] = None) -> None:
    # 1. Load database objects
    #    - missing Step? Return prematurely
    #    - database error? Raise
    #    - module_zipfile missing/invalid? user-visible error
    #    - migrate_params() fails? user-visible error
    # 2. Calculate result
    #    2a. Build fetch kwargs
    #    2b. Call fetch (no errors possible -- LoadedModule catches them)
    # 3. Save result (and create SetStepDataVersion => queueing a render)
    #    - database errors? Raise
    #    - rabbitmq errors? Raise
    #    - other error (bug in `save`)? Raise
    # 4. Update Step last-fetch time
    #    - database errors? Raise
    logger.info("begin fetch(workflow_id=%d, step_id=%d)", workflow_id,
                step_id)

    try:
        (
            step,
            module_zipfile,
            migrated_params,
            stored_object,
            input_crr,
        ) = await load_database_objects(workflow_id, step_id)
    except (Workflow.DoesNotExist, Step.DoesNotExist):
        logger.info("Skipping fetch of deleted Step %d-%d", workflow_id,
                    step_id)
        return

    # Prepare secrets -- mangle user values so modules have all they need.
    #
    # This can involve, e.g., HTTP request to OAuth2 token servers.
    #
    # TODO unit-test this code path
    if module_zipfile is None:
        secrets = {}
    else:
        module_spec = module_zipfile.get_spec()
        secrets = await fetcher.secrets.prepare_secrets(
            module_spec.param_fields, step.secrets)

    if now is None:
        now = datetime.datetime.now()

    with contextlib.ExitStack() as exit_stack:
        chroot_context = exit_stack.enter_context(
            EDITABLE_CHROOT.acquire_context())
        basedir = exit_stack.enter_context(
            chroot_context.tempdir_context(prefix="fetch-"))
        output_path = exit_stack.enter_context(
            chroot_context.tempfile_context(prefix="fetch-result-",
                                            dir=basedir))
        # get last_fetch_result (This can't error.)
        last_fetch_result = _stored_object_to_fetch_result(exit_stack,
                                                           stored_object,
                                                           step.fetch_errors,
                                                           dir=basedir)
        result = await asyncio.get_event_loop().run_in_executor(
            None,
            fetch_or_wrap_error,
            exit_stack,
            chroot_context,
            basedir,
            step.module_id_name,
            module_zipfile,
            migrated_params,
            secrets,
            last_fetch_result,
            input_crr,
            output_path,
        )

        if last_fetch_result is not None and versions.are_fetch_results_equal(
                last_fetch_result, result):
            await save.mark_result_unchanged(workflow_id, step, now)
        else:
            await save.create_result(workflow_id, step, result, now)

    await update_next_update_time(workflow_id, step, now)