Exemplo n.º 1
0
    def test_load_and_dispatch(self):
        test_dir = self.fake_github_clone()

        import_module_from_directory('https://github.com/account/reponame',
                                     'reponame', '123456', test_dir)

        # Module and ModuleVersion should have loaded -- these will raise exception if they don't exist
        module = Module.objects.get(id_name=self.importable_id_name)
        module_version = ModuleVersion.objects.get(module=module)

        # Create a test workflow that uses this imported module
        workflow = add_new_workflow('Dynamic Dispatch Test Workflow')
        wfm = add_new_wf_module(workflow, module_version, order=1)

        # These will fail if we haven't correctly loaded the json describing
        # the parameters
        stringparam = get_param_by_id_name('test', wf_module=wfm)
        colparam = get_param_by_id_name('test_column', wf_module=wfm)
        multicolparam = get_param_by_id_name('test_multicolumn', wf_module=wfm)

        # Does it render right?
        test_csv = 'Class,M,F,Other\n' \
                   'math,10,12,100\n' \
                   'english,,7\,200\n' \
                   'history,11,13,\n' \
                   'economics,20,20,20'
        test_table = pd.read_csv(io.StringIO(test_csv),
                                 header=0,
                                 skipinitialspace=True)
        test_table_out = test_table.copy()
        test_table_out['M'] *= 2
        test_table_out[['F', 'Other']] *= 3

        colparam.set_value('M')  # double this
        multicolparam.set_value('F,Other')  # triple these
        with self.assertLogs(dynamicdispatch.__name__):
            result = module_dispatch_render(module_version, wfm.get_params(),
                                            test_table, None)
        self.assertEqual(result, ProcessResult(test_table_out))

        # Test that bad column parameter values are removed
        colparam.set_value('missing_column_name')
        multicolparam.set_value('Other,junk_column_name')
        test_table_out = test_table.copy()
        # multicolumn parameter has only one valid col
        test_table_out[['Other']] *= 3
        result = module_dispatch_render(module_version, wfm.get_params(),
                                        test_table, None)
        self.assertEqual(result, ProcessResult(test_table_out))

        # if the module crashes, we should get an error with a line number
        stringparam.set_value('crashme')
        result = module_dispatch_render(module_version, wfm.get_params(),
                                        test_table, None)
        self.assertEqual(
            result,
            ProcessResult(
                error='ValueError: we crashed! at line 7 of importable.py'))
Exemplo n.º 2
0
def find_output_deltas_to_notify_from_fetched_tables(
        wf_module: 'WfModule', old_result: Optional[ProcessResult],
        new_result: ProcessResult) -> List[OutputDelta]:
    """Compute a list of OutputDeltas to email to the owner.

    `wf_module` is the fetch module whose data just changed from `old_table` to
    `new_table`. (Either may be `None` or empty.)

    Assumes `old_result` and `new_result` are different.

    Must be called within a workflow.cooperative_lock().

    TODO make this easier to unit-test, and then unit-test it.
    """
    # Import here, to prevent recursive import
    from server.dispatch import module_dispatch_render

    output_deltas = []

    all_modules = list(wf_module.workflow.wf_modules.all())

    # Truncate all_modules: nix all after the last `.notifications` module
    while all_modules and not all_modules[-1].notifications:
        all_modules.pop()

    # Advance in the list up until one _after_ `wf_module`
    while all_modules and all_modules[0].id != wf_module.id:
        all_modules.pop(0)
    if all_modules:
        # remove wf_module itself
        all_modules.pop(0)

    if wf_module.notifications:
        # Notify on wf_module itself
        output_deltas.append(OutputDelta(wf_module, old_result, new_result))

    if old_result is None:
        old_result = ProcessResult()

    # Now iterate through dependent modules: calculate tables and compare
    for wf_module in all_modules:
        old_result = module_dispatch_render(wf_module, old_result.dataframe)
        new_result = module_dispatch_render(wf_module, new_result.dataframe)

        if old_result == new_result:
            # From this point forward, tables will never diverge so we should
            # never notify the user.
            return output_deltas

        if wf_module.notifications:
            output_deltas.append(OutputDelta(wf_module, old_result,
                                             new_result))

    return output_deltas
Exemplo n.º 3
0
def execute_wfmodule(wf_module: WfModule) -> ProcessResult:
    """
    Process all WfModules until the given one; return its result.

    By default, this will both read and write each WfModule's cached render
    result. Pass nocache=True to avoid modifying the cache.

    You must call this within a workflow.cooperative_lock().
    """
    # Do we already have what we need? If so, return quickly.
    cached_result = _get_render_cache(wf_module)
    if cached_result:
        return cached_result.result

    # Recurse -- ensuring the smallest possible number of renders
    input_wf_module = wf_module.previous_in_stack()
    if input_wf_module:
        input_result = execute_wfmodule(input_wf_module)
    else:
        input_result = ProcessResult()

    result = dispatch.module_dispatch_render(wf_module, input_result.dataframe)
    wf_module.cache_render_result(wf_module.last_relevant_delta_id, result)
    wf_module.save()

    return result
Exemplo n.º 4
0
def execute_wfmodule(wf_module: WfModule,
                     last_result: ProcessResult) -> CachedRenderResult:
    """
    Render a single WfModule; cache and return output.

    CONCURRENCY NOTES: This function is reasonably concurrency-friendly:

    * It locks the workflow, so two renders won't happen on the same workflow
      at the same time.
    * It returns a valid cache result immediately.
    * It checks with the database that `wf_module` hasn't been deleted from
      its workflow.
    * It checks with the database that `wf_module` hasn't been deleted from
      the database entirely.
    * It checks with the database that `wf_module` hasn't been modified. (It
      is very common for a user to request a module's output -- kicking off a
      sequence of `execute_wfmodule` -- and then change a param in a prior
      module, making all those calls obsolete.
    * It runs in a transaction (obviously -- FOR UPDATE and all), which will
      stall `models.Delta` as it tries to write last_relevant_delta_id,
      effectively stalling users' update HTTP requests until after the
      `wf_module`'s render is complete.

    These guarantees mean:

    * It's relatively cheap to render twice.
    * Users who modify a WfModule while it's rendering will be stalled -- for
      as short a duration as possible.
    * When a user changes a workflow significantly, all prior renders will end
      relatively cheaply.

    Raises `UnneededExecution` when the input WfModule should not be rendered.
    """
    with locked_wf_module(wf_module) as safe_wf_module:
        cached_render_result = wf_module.get_cached_render_result()

        # If the cache is good, just return it -- skipping the render() call
        if (
            cached_render_result
            and (cached_render_result.delta_id
                 == wf_module.last_relevant_delta_id)
        ):
            return cached_render_result

        result = dispatch.module_dispatch_render(safe_wf_module,
                                                 last_result.dataframe)
        cached_render_result = safe_wf_module.cache_render_result(
            safe_wf_module.last_relevant_delta_id,
            result
        )

        # Save safe_wf_module, not wf_module, because we know we've only
        # changed the cached_render_result columns. (We know because we
        # locked the row before fetching it.) `wf_module.save()` might
        # overwrite some newer values.
        safe_wf_module.save()

        return cached_render_result
Exemplo n.º 5
0
 def test_table_truncation(self):
     nrows = settings.MAX_ROWS_PER_TABLE + 1
     bigtable = pd.DataFrame(np.random.randint(0, 100, size=(nrows, 4)),
                             columns=list('ABCD'))
     wfm = load_and_add_module(
         'editcells')  # because it never changes row count
     out = module_dispatch_render(wfm, bigtable)
     self.assertTrue(len(out) == settings.MAX_ROWS_PER_TABLE)
     self.assertEqual(wfm.status, WfModule.ERROR)
Exemplo n.º 6
0
 def test_render_static_truncates_table(self):
     table = pd.DataFrame({'a': [1, 2, 3]})
     wfm = load_and_add_module('editcells')  # it never changes row count
     result = module_dispatch_render(wfm, table)
     self.assertEqual(
         result,
         ProcessResult(dataframe=pd.DataFrame({'a': [1, 2]}),
                       error='Truncated output from 3 rows to 2'))
     wfm.refresh_from_db()
Exemplo n.º 7
0
def execute_wfmodule(wfmodule):
    table = pd.DataFrame()
    workflow = wfmodule.workflow
    for wfm in workflow.wf_modules.all():
        table = module_dispatch_render(wfm, table)
        if wfm == wfmodule:
            break

    if table is None:
        table = pd.DataFrame()

    return table
Exemplo n.º 8
0
    def test_error_render(self):
        # Force an error, ensure that it's returned and the output is a NOP
        wfm = load_and_add_module('pythoncode', workflow=self.workflow)
        code_pval = get_param_by_id_name('code')
        code_pval.set_value('not python code')

        out = module_dispatch_render(wfm, self.test_table)
        wfm.refresh_from_db()
        self.assertTrue(wfm.status == WfModule.ERROR)
        self.assertEqual(wfm.error_msg,
                         'invalid syntax (<string>, line 2) at line 1')
        self.assertTrue(out.equals(self.test_table))
Exemplo n.º 9
0
    def test_error_render(self):
        # Force an error, ensure that it's returned and the output is a NOP
        wfm = load_and_add_module('pythoncode', workflow=self.workflow)
        code_pval = get_param_by_id_name('code')
        code_pval.set_value('not python code')

        result = module_dispatch_render(wfm, self.test_table)
        self.assertEqual(
            result,
            ProcessResult(
                error='Line 1: invalid syntax (user input, line 1)',
                json={'output': ''}  # not part of this test
            ))
Exemplo n.º 10
0
def execute_wfmodule(wfmodule, nocache=False):
    workflow = wfmodule.workflow
    target_rev = workflow.revision()

    # Do we already have what we need?
    cache = None
    if not nocache:
        cache = get_render_cache(wfmodule, target_rev)
    if cache:
        return cache.get_table()

    # No, let's render from the top, shortcutting with cache whenever possible
    table = pd.DataFrame()

    # Start from the top, re-rendering any modules which do not have a cache at the current revision
    # Assumes not possible to have later revision cache after a module which has an earlier revision cache
    # (i.e. module stack always rendered in order)
    # If everything is rendered already, this will just return the cache
    for wfm in workflow.wf_modules.all():

        # Get module output from cache, if available and desired
        cache = None
        if not nocache:
            cache = get_render_cache(wfm, target_rev)

        # if we did not find an available cache, render
        if cache is None:
            # previous revisions are dead to us now (well, maybe good for undo, but we can re-render)
            StoredObject.objects.filter(wf_module=wfm, type=StoredObject.CACHED_TABLE).delete()
            table = module_dispatch_render(wfm, table)
            StoredObject.create_table(wfm, StoredObject.CACHED_TABLE, table, metadata=target_rev)
        else:
            table = cache.get_table()

        # found the module we were looking for, all done
        if wfm == wfmodule:
            break

    return table
Exemplo n.º 11
0
 def test_internal_render(self):
     out = module_dispatch_render(self.wfm, self.test_table)
     self.assertTrue(out.equals(self.test_table_MF))
Exemplo n.º 12
0
 def test_none_table_render(self):
     result = module_dispatch_render(self.wfm, pd.DataFrame())
     self.assertEqual(result, ProcessResult())
Exemplo n.º 13
0
 def test_missing_module(self):
     workflow = add_new_workflow('Missing module')
     wfm = add_new_wf_module(workflow, None, 0)
     result = module_dispatch_render(wfm, mock_csv_table)
     self.assertEqual(result, ProcessResult())
Exemplo n.º 14
0
 def test_multicolumn_sanitize(self):
     # no M,F cols
     result = module_dispatch_render(self.wfm, mock_csv_table)
     self.assertEqual(result, ProcessResult(pd.DataFrame([{}, {}])))
Exemplo n.º 15
0
 def test_internal_render(self):
     result = module_dispatch_render(self.wfm, self.test_table)
     self.assertEqual(result, ProcessResult(self.test_table_MF))
Exemplo n.º 16
0
 def test_missing_module(self):
     workflow = add_new_workflow('Missing module')
     wfm = add_new_wf_module(workflow, None, 0)
     out = module_dispatch_render(wfm, mock_csv_table)
     self.assertTrue(out.empty)
Exemplo n.º 17
0
 def test_none_table_render(self):
     out = module_dispatch_render(self.wfm, pd.DataFrame())
     self.assertTrue(out.empty)
Exemplo n.º 18
0
 def test_multicolumn_sanitize(self):
     out = module_dispatch_render(self.wfm, mock_csv_table)  # no M,F cols
     self.assertTrue(out.empty)