예제 #1
0
    def execute_script(self, args, context):
        """Execute a R script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get R script from user arguments
        source = args.get_value(cmd.PARA_R_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        
        mimir_table_names = dict()
        for ds_name_o in context.datasets:
            dataset_id = context.datasets[ds_name_o]
            dataset = context.datastore.get_dataset(dataset_id)
            if dataset is None:
                raise ValueError('unknown dataset \'' + ds_name_o + '\'')
            mimir_table_names[ds_name_o] = dataset.identifier
        # Run the r code
        try:
            evalresp = mimir.evalR(mimir_table_names, source)
            ostd = evalresp['stdout']
            oerr = evalresp['stderr']
            if not ostd == '':
                outputs.stdout.append(HtmlOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(HtmlOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(
            is_success=(len(outputs.stderr) == 0),
            outputs=outputs,
            provenance=provenance
        )
예제 #2
0
    def execute_script(self, args, context):
        """Execute a Markdown script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Markdown script from user arguments
        source = args.get_value(cmd.PARA_MARKDOWN_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        # Run the markdown code
        try:
            #we should validate the markdown here
            ostd = source
            oerr = ''
            if not ostd == '':
                outputs.stdout.append(MarkdownOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(MarkdownOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
예제 #3
0
    def execute_script(self, args, context):
        """Execute a Python script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Python script from user arguments
        source = args.get_value(cmd.PYTHON_SOURCE)
        # Initialize the scope variables that are available to the executed
        # Python script. At this point this includes only the client to access
        # and manipulate datasets in the undelying datastore
        client = VizierDBClient(
            datastore=context.datastore,
            datasets=context.datasets
        )
        variables = {VARS_DBCLIENT: client}
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        # Keep track of exception that is thrown by the code
        exception = None
        # Run the Python code
        try:
            python_cell_preload(variables)
            exec(source, variables, variables)
        except Exception as ex:
            exception = ex
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        outputs = ModuleOutputs()
        is_success = (exception is None)
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(HtmlOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
                is_success = False
        if is_success:
            # Create provenance information. Ensure that all dictionaries
            # contain elements of expected types, i.e, ensure that the user did
            # not attempt anything tricky.
            read = dict()
            for name in client.read:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                if name in context.datasets:
                    read[name] = context.datasets[name]
                    if not isinstance(read[name], str):
                        raise RuntimeError('invalid element in mapping dictionary')
                else:
                    read[name] = None
            write = dict()
            for name in client.write:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                ds_id = client.datasets[name]
                if not ds_id is None:
                    if not isinstance(ds_id, str):
                        raise RuntimeError('invalid value in mapping dictionary')
                    elif ds_id in client.descriptors:
                        write[name] = client.descriptors[ds_id]
                    else:
                        write[name] = client.datastore.get_descriptor(ds_id)
                else:
                    write[name] = None
            provenance = ModuleProvenance(
                read=read,
                write=write,
                delete=client.delete
            )
        else:
            outputs.error(exception)
            provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(
            is_success=is_success,
            outputs=outputs,
            provenance=provenance
        )
예제 #4
0
    def execute_script(self, args: ModuleArguments,
                       context: TaskContext) -> ExecResult:
        """Execute a Python script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Python script from user arguments.  It is the source for VizierDBClient
        cell_src = args.get_value(cmd.PYTHON_SOURCE)

        # prepend python objects exported in previous cells to the source
        exported_methods = [
            context.datastore.get_object(descriptor.identifier).decode()
            for name, descriptor in context.dataobjects.items()
            if descriptor.artifact_type == ARTIFACT_TYPE_PYTHON
        ]
        overrides = [
            "def show(x):", "  global vizierdb", "  vizierdb.show(x)",
            "def export(x):", "  global vizierdb",
            "  vizierdb.export_module(x)", "def return_type(dt):",
            "  def wrap(x):", "    return x", "  return wrap", "pass"
        ]

        injected_source = "\n".join(exported_methods + overrides)
        injected_lines = len([x for x in injected_source if x == '\n']) + 1

        source = injected_source + '\n' + cell_src

        # Initialize the scope variables that are available to the executed
        # Python script. At this point this includes only the client to access
        # and manipulate datasets in the undelying datastore
        #
        # Use "any" type, since there's a (probably unnecessary) hack down
        # below that creates something that pretends to be a client.
        client: Any = VizierDBClient(datastore=context.datastore,
                                     datasets=context.datasets,
                                     source=cell_src,
                                     dataobjects=context.dataobjects,
                                     project_id=context.project_id,
                                     output_format=args.get_value(
                                         cmd.OUTPUT_FORMAT,
                                         default_value=OUTPUT_TEXT))
        variables = {VARS_DBCLIENT: client, VARS_OPEN: client.pycell_open}
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream: List[Tuple[str, str]] = list()
        sys.stdout = cast(TextIO, OutputStream(tag='out', stream=stream))
        sys.stderr = cast(TextIO, OutputStream(tag='err', stream=stream))
        # Keep track of exception that is thrown by the code
        exception = None
        resdata: Dict[str, Any] = dict()
        # Run the Python code
        try:
            python_cell_preload(variables, client=client)
            if SANDBOX_PYTHON_EXECUTION == "True":
                json_data = {
                    'source': source,
                    'datasets': context.datasets,
                    'dataobjects': context.dataobjects,
                    'datastore': context.datastore.__class__.__name__,
                    'basepath': context.datastore.base_path,
                    'project_id': context.project_id,
                    'output_format': client.output_format
                }
                res = requests.post(SANDBOX_PYTHON_URL, json=json_data)
                resdata = res.json()
                client = DotDict()
                for key, value in resdata['provenance'].items():
                    client.setattr(key, value)
                client.setattr('descriptors', {})
                client.setattr('datastore', context.datastore)
                client.setattr('datasets', resdata['datasets'])
                client.setattr('dataobjects', resdata['dataobjects'])
                client.setattr('output_format', resdata['output_format'])
                client.setattr('stdout', [
                    OutputObject(type=item['type'], value=item['value'])
                    for item in resdata.get('explicit_stdout', [])
                ])

            else:
                exec(source, variables, variables)

        except Exception as ex:
            exception = ex
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        outputs = ModuleOutputs()
        is_success = (exception is None)
        if SANDBOX_PYTHON_EXECUTION == "True":
            for text in resdata['stdout']:
                outputs.stdout.append(
                    OutputObject(value=text, type=client.output_format))
            for text in resdata['stderr']:
                outputs.stderr.append(TextOutput(text))
                is_success = False
        else:
            for tag, text in stream:
                text = ''.join(text).strip()
                if tag == 'out':
                    outputs.stdout.append(
                        OutputObject(value=text, type=client.output_format))
                else:
                    outputs.stderr.append(TextOutput(text))
                    is_success = False
        for output in client.stdout:
            outputs.stdout.append(output)

        if is_success:
            # Create provenance information. Ensure that all dictionaries
            # contain elements of expected types, i.e, ensure that the user did
            # not attempt anything tricky.
            read = dict()
            for name in client.read:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                if name in context.datasets:
                    read[name] = context.datasets[name].identifier
                    if not isinstance(read[name], str):
                        raise RuntimeError(
                            'invalid element in read mapping dictionary: {} (expecting str)'
                            .format(read[name]))
                elif name in context.dataobjects:
                    read[name] = context.dataobjects[name].identifier
                    if not isinstance(read[name], str):
                        raise RuntimeError(
                            'invalid element in read mapping dictionary: {} (expecting str)'
                            .format(read[name]))
                else:
                    raise RuntimeError('Unknown read artifact {}'.format(name))
            write = dict()
            for name in client.write:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')

                if name in client.datasets:
                    write_descriptor = client.datasets[name]
                    if not isinstance(write_descriptor, ArtifactDescriptor):
                        raise RuntimeError(
                            'invalid element in write mapping dictionary: {} (expecting str)'
                            .format(name))
                    else:
                        write[name] = write_descriptor
                elif name in client.dataobjects:
                    #wr_id = client.dataobjects[name]
                    write_descriptor = client.dataobjects[name]
                    #write_descriptor = client.datastore.get_object(identifier=wr_id)
                    if not isinstance(write_descriptor, ArtifactDescriptor):
                        raise RuntimeError(
                            'invalid element in write mapping dictionary: {} (expecting str)'
                            .format(name))
                    else:
                        write[name] = write_descriptor
                else:
                    raise RuntimeError(
                        'Unknown write artifact {}'.format(name))
            print("Pycell Execution Finished")
            print("     read: {}".format(read))
            print("     write: {}".format(write))
            provenance = ModuleProvenance(read=read,
                                          write=write,
                                          delete=client.delete)
        else:
            print("ERROR: {}".format(exception))
            assert (exception is not None)
            outputs.error(exception, offset_lines=-injected_lines)
            provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(is_success=is_success,
                          outputs=outputs,
                          provenance=provenance)