示例#1
0
    def execute_script(self, args, context):
        """Execute a R script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get R script from user arguments
        source = args.get_value(cmd.PARA_R_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        
        mimir_table_names = dict()
        for ds_name_o in context.datasets:
            dataset_id = context.datasets[ds_name_o]
            dataset = context.datastore.get_dataset(dataset_id)
            if dataset is None:
                raise ValueError('unknown dataset \'' + ds_name_o + '\'')
            mimir_table_names[ds_name_o] = dataset.identifier
        # Run the r code
        try:
            evalresp = mimir.evalR(mimir_table_names, source)
            ostd = evalresp['stdout']
            oerr = evalresp['stderr']
            if not ostd == '':
                outputs.stdout.append(HtmlOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(HtmlOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(
            is_success=(len(outputs.stderr) == 0),
            outputs=outputs,
            provenance=provenance
        )
示例#2
0
    def compute_empty_dataset(self, args, context):
        """Execute empty dataset command.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        outputs = ModuleOutputs()
        default_columns = [("''", "unnamed_column")]
        ds_name = args.get_value(pckg.PARA_NAME).lower()
        if ds_name in context.datasets:
            raise ValueError('dataset \'' + ds_name + '\' exists')
        if not is_valid_name(ds_name):
            raise ValueError('invalid dataset name \'' + ds_name + '\'')
        try:
            source = "SELECT {};".format(", ".join(
                default_val + " AS " + col_name
                for default_val, col_name in default_columns))
            view_name, dependencies = mimir.createView(dict(), source)

            columns = [
                MimirDatasetColumn(identifier=col_id,
                                   name_in_dataset=col_defn[1])
                for col_defn, col_id in zip(default_columns,
                                            range(len(default_columns)))
            ]

            ds = context.datastore.register_dataset(table_name=view_name,
                                                    columns=columns,
                                                    row_counter=1)
            provenance = ModuleProvenance(
                write={
                    ds_name:
                    DatasetDescriptor(identifier=ds.identifier,
                                      columns=ds.columns,
                                      row_count=ds.row_count)
                },
                read=dict(
                )  # Need to explicitly declare a lack of dependencies.
            )
            outputs.stdout.append(
                TextOutput("Empty dataset '{}' created".format(ds_name)))
        except Exception as ex:
            provenance = ModuleProvenance()
            outputs.error(ex)
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
示例#3
0
    def execute_script(self, args, context):
        """Execute a Markdown script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Markdown script from user arguments
        source = args.get_value(cmd.PARA_MARKDOWN_SOURCE)
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        outputs = ModuleOutputs()
        # Run the markdown code
        try:
            #we should validate the markdown here
            ostd = source
            oerr = ''
            if not ostd == '':
                outputs.stdout.append(MarkdownOutput(ostd))
            if not oerr == '':
                outputs.stderr.append(TextOutput(oerr))
        except Exception as ex:
            outputs.error(ex)
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(MarkdownOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
        provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
示例#4
0
    def execute_query(self, args: ModuleArguments,
                      context: TaskContext) -> ExecResult:
        """Execute a SQL query in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get SQL source code that is in this cell and the global
        # variables
        source = args.get_value(cmd.PARA_SQL_SOURCE)
        if not source.endswith(';'):
            source = source
        ds_name = args.get_value(cmd.PARA_OUTPUT_DATASET, raise_error=False)
        # Get mapping of datasets in the context to their respective table
        # name in the Mimir backend
        mimir_table_names = dict()
        for ds_name_o in context.datasets:
            dataset_id = context.datasets[ds_name_o].identifier
            dataset = context.datastore.get_dataset(dataset_id)
            if dataset is None:
                raise ValueError('unknown dataset \'' + ds_name_o + '\'')
            mimir_table_names[ds_name_o] = dataset.identifier
        # Module outputs
        outputs = ModuleOutputs()
        is_success = True
        functions = {
            name: context.dataobjects[name].identifier
            for name in context.dataobjects
            if context.dataobjects[name].obj_type == ARTIFACT_TYPE_PYTHON
        }
        try:
            # Create the view from the SQL source
            view_name, dependencies, mimirSchema, properties, functionDeps = mimir.createView(
                datasets=mimir_table_names,
                query=source,
                functions=dict(functions))
            ds = MimirDatasetHandle.from_mimir_result(view_name, mimirSchema,
                                                      properties, ds_name)

            print(mimirSchema)

            if ds_name is None or ds_name == '':
                ds_name = "TEMPORARY_RESULT"

            from vizier.api.webservice import server

            ds_output = server.api.datasets.get_dataset(
                project_id=context.project_id,
                dataset_id=ds.identifier,
                offset=0,
                limit=10)
            if ds_output is None:
                outputs.stderr.append(
                    TextOutput("Error displaying dataset {}".format(ds_name)))
            else:
                ds_output['name'] = ds_name
                outputs.stdout.append(DatasetOutput(ds_output))

            dependenciesDict: Dict[str, str] = {
                dep_name.lower(): get_artifact_id(dep)
                for dep_name, dep in [(
                    dep_name, context.datasets.get(dep_name.lower(), None))
                                      for dep_name in dependencies]
                if dep is not None
            }
            functionDepDict: Dict[str, str] = {
                dep_name.lower(): get_artifact_id(dep)
                for dep_name, dep in [(
                    dep_name, context.dataobjects.get(dep_name.lower(), None))
                                      for dep_name in dependencies]
                if dep is not None
            }
            # print("---- SQL DATASETS ----\n{}\n{}".format(context.datasets, dependencies))

            provenance = ModuleProvenance(write={
                ds_name:
                DatasetDescriptor(identifier=ds.identifier,
                                  name=ds_name,
                                  columns=ds.columns)
            },
                                          read={
                                              **dependenciesDict,
                                              **functionDepDict
                                          })
        except Exception as ex:
            provenance = ModuleProvenance()
            outputs.error(ex)
            is_success = False
        # Return execution result
        return ExecResult(is_success=is_success,
                          outputs=outputs,
                          provenance=provenance)
示例#5
0
    def execute_query(self, args, context):
        """Execute a SQL query in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get SQL source code that is in this cell and the global
        # variables
        source = args.get_value(cmd.PARA_SQL_SOURCE)
        if not source.endswith(';'):
            source = source + ';'
        ds_name = args.get_value(cmd.PARA_OUTPUT_DATASET, raise_error=False)
        # Get mapping of datasets in the context to their respective table
        # name in the Mimir backend
        mimir_table_names = dict()
        for ds_name_o in context.datasets:
            dataset_id = context.datasets[ds_name_o]
            dataset = context.datastore.get_dataset(dataset_id)
            if dataset is None:
                raise ValueError('unknown dataset \'' + ds_name_o + '\'')
            mimir_table_names[ds_name_o] = dataset.table_name
        # Module outputs
        outputs = ModuleOutputs()
        try:
            # Create the view from the SQL source
            view_name, dependencies = mimir.createView(mimir_table_names,
                                                       source)
            sql = 'SELECT * FROM ' + view_name
            mimirSchema = mimir.getSchema(sql)

            columns = list()

            for col in mimirSchema:
                col_id = len(columns)
                name_in_dataset = col['name']
                col = MimirDatasetColumn(identifier=col_id,
                                         name_in_dataset=name_in_dataset)
                columns.append(col)

            row_count = mimir.countRows(view_name)

            provenance = None
            if ds_name is None or ds_name == '':
                ds_name = "TEMPORARY_RESULT"

            ds = context.datastore.register_dataset(table_name=view_name,
                                                    columns=columns,
                                                    row_counter=row_count)
            ds_output = server.api.datasets.get_dataset(
                project_id=context.project_id,
                dataset_id=ds.identifier,
                offset=0,
                limit=10)
            ds_output['name'] = ds_name

            dependencies = dict((dep_name.lower(),
                                 context.datasets.get(dep_name.lower(), None))
                                for dep_name in dependencies)
            # print("---- SQL DATASETS ----\n{}\n{}".format(context.datasets, dependencies))

            outputs.stdout.append(DatasetOutput(ds_output))
            provenance = ModuleProvenance(write={
                ds_name:
                DatasetDescriptor(identifier=ds.identifier,
                                  columns=ds.columns,
                                  row_count=ds.row_count)
            },
                                          read=dependencies)
        except Exception as ex:
            provenance = ModuleProvenance()
            outputs.error(ex)
        # Return execution result
        return ExecResult(is_success=(len(outputs.stderr) == 0),
                          outputs=outputs,
                          provenance=provenance)
示例#6
0
    def execute_script(self, args, context):
        """Execute a Python script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Python script from user arguments
        source = args.get_value(cmd.PYTHON_SOURCE)
        # Initialize the scope variables that are available to the executed
        # Python script. At this point this includes only the client to access
        # and manipulate datasets in the undelying datastore
        client = VizierDBClient(
            datastore=context.datastore,
            datasets=context.datasets
        )
        variables = {VARS_DBCLIENT: client}
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream = list()
        sys.stdout = OutputStream(tag='out', stream=stream)
        sys.stderr = OutputStream(tag='err', stream=stream)
        # Keep track of exception that is thrown by the code
        exception = None
        # Run the Python code
        try:
            python_cell_preload(variables)
            exec(source, variables, variables)
        except Exception as ex:
            exception = ex
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        outputs = ModuleOutputs()
        is_success = (exception is None)
        for tag, text in stream:
            text = ''.join(text).strip()
            if tag == 'out':
                outputs.stdout.append(HtmlOutput(text))
            else:
                outputs.stderr.append(TextOutput(text))
                is_success = False
        if is_success:
            # Create provenance information. Ensure that all dictionaries
            # contain elements of expected types, i.e, ensure that the user did
            # not attempt anything tricky.
            read = dict()
            for name in client.read:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                if name in context.datasets:
                    read[name] = context.datasets[name]
                    if not isinstance(read[name], str):
                        raise RuntimeError('invalid element in mapping dictionary')
                else:
                    read[name] = None
            write = dict()
            for name in client.write:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                ds_id = client.datasets[name]
                if not ds_id is None:
                    if not isinstance(ds_id, str):
                        raise RuntimeError('invalid value in mapping dictionary')
                    elif ds_id in client.descriptors:
                        write[name] = client.descriptors[ds_id]
                    else:
                        write[name] = client.datastore.get_descriptor(ds_id)
                else:
                    write[name] = None
            provenance = ModuleProvenance(
                read=read,
                write=write,
                delete=client.delete
            )
        else:
            outputs.error(exception)
            provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(
            is_success=is_success,
            outputs=outputs,
            provenance=provenance
        )
示例#7
0
    def execute_script(self, args: ModuleArguments,
                       context: TaskContext) -> ExecResult:
        """Execute a Python script in the given context.

        Parameters
        ----------
        args: vizier.viztrail.command.ModuleArguments
            User-provided command arguments
        context: vizier.engine.task.base.TaskContext
            Context in which a task is being executed

        Returns
        -------
        vizier.engine.task.processor.ExecResult
        """
        # Get Python script from user arguments.  It is the source for VizierDBClient
        cell_src = args.get_value(cmd.PYTHON_SOURCE)

        # prepend python objects exported in previous cells to the source
        exported_methods = [
            context.datastore.get_object(descriptor.identifier).decode()
            for name, descriptor in context.dataobjects.items()
            if descriptor.artifact_type == ARTIFACT_TYPE_PYTHON
        ]
        overrides = [
            "def show(x):", "  global vizierdb", "  vizierdb.show(x)",
            "def export(x):", "  global vizierdb",
            "  vizierdb.export_module(x)", "def return_type(dt):",
            "  def wrap(x):", "    return x", "  return wrap", "pass"
        ]

        injected_source = "\n".join(exported_methods + overrides)
        injected_lines = len([x for x in injected_source if x == '\n']) + 1

        source = injected_source + '\n' + cell_src

        # Initialize the scope variables that are available to the executed
        # Python script. At this point this includes only the client to access
        # and manipulate datasets in the undelying datastore
        #
        # Use "any" type, since there's a (probably unnecessary) hack down
        # below that creates something that pretends to be a client.
        client: Any = VizierDBClient(datastore=context.datastore,
                                     datasets=context.datasets,
                                     source=cell_src,
                                     dataobjects=context.dataobjects,
                                     project_id=context.project_id,
                                     output_format=args.get_value(
                                         cmd.OUTPUT_FORMAT,
                                         default_value=OUTPUT_TEXT))
        variables = {VARS_DBCLIENT: client, VARS_OPEN: client.pycell_open}
        # Redirect standard output and standard error streams
        out = sys.stdout
        err = sys.stderr
        stream: List[Tuple[str, str]] = list()
        sys.stdout = cast(TextIO, OutputStream(tag='out', stream=stream))
        sys.stderr = cast(TextIO, OutputStream(tag='err', stream=stream))
        # Keep track of exception that is thrown by the code
        exception = None
        resdata: Dict[str, Any] = dict()
        # Run the Python code
        try:
            python_cell_preload(variables, client=client)
            if SANDBOX_PYTHON_EXECUTION == "True":
                json_data = {
                    'source': source,
                    'datasets': context.datasets,
                    'dataobjects': context.dataobjects,
                    'datastore': context.datastore.__class__.__name__,
                    'basepath': context.datastore.base_path,
                    'project_id': context.project_id,
                    'output_format': client.output_format
                }
                res = requests.post(SANDBOX_PYTHON_URL, json=json_data)
                resdata = res.json()
                client = DotDict()
                for key, value in resdata['provenance'].items():
                    client.setattr(key, value)
                client.setattr('descriptors', {})
                client.setattr('datastore', context.datastore)
                client.setattr('datasets', resdata['datasets'])
                client.setattr('dataobjects', resdata['dataobjects'])
                client.setattr('output_format', resdata['output_format'])
                client.setattr('stdout', [
                    OutputObject(type=item['type'], value=item['value'])
                    for item in resdata.get('explicit_stdout', [])
                ])

            else:
                exec(source, variables, variables)

        except Exception as ex:
            exception = ex
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
        # Set module outputs
        outputs = ModuleOutputs()
        is_success = (exception is None)
        if SANDBOX_PYTHON_EXECUTION == "True":
            for text in resdata['stdout']:
                outputs.stdout.append(
                    OutputObject(value=text, type=client.output_format))
            for text in resdata['stderr']:
                outputs.stderr.append(TextOutput(text))
                is_success = False
        else:
            for tag, text in stream:
                text = ''.join(text).strip()
                if tag == 'out':
                    outputs.stdout.append(
                        OutputObject(value=text, type=client.output_format))
                else:
                    outputs.stderr.append(TextOutput(text))
                    is_success = False
        for output in client.stdout:
            outputs.stdout.append(output)

        if is_success:
            # Create provenance information. Ensure that all dictionaries
            # contain elements of expected types, i.e, ensure that the user did
            # not attempt anything tricky.
            read = dict()
            for name in client.read:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')
                if name in context.datasets:
                    read[name] = context.datasets[name].identifier
                    if not isinstance(read[name], str):
                        raise RuntimeError(
                            'invalid element in read mapping dictionary: {} (expecting str)'
                            .format(read[name]))
                elif name in context.dataobjects:
                    read[name] = context.dataobjects[name].identifier
                    if not isinstance(read[name], str):
                        raise RuntimeError(
                            'invalid element in read mapping dictionary: {} (expecting str)'
                            .format(read[name]))
                else:
                    raise RuntimeError('Unknown read artifact {}'.format(name))
            write = dict()
            for name in client.write:
                if not isinstance(name, str):
                    raise RuntimeError('invalid key for mapping dictionary')

                if name in client.datasets:
                    write_descriptor = client.datasets[name]
                    if not isinstance(write_descriptor, ArtifactDescriptor):
                        raise RuntimeError(
                            'invalid element in write mapping dictionary: {} (expecting str)'
                            .format(name))
                    else:
                        write[name] = write_descriptor
                elif name in client.dataobjects:
                    #wr_id = client.dataobjects[name]
                    write_descriptor = client.dataobjects[name]
                    #write_descriptor = client.datastore.get_object(identifier=wr_id)
                    if not isinstance(write_descriptor, ArtifactDescriptor):
                        raise RuntimeError(
                            'invalid element in write mapping dictionary: {} (expecting str)'
                            .format(name))
                    else:
                        write[name] = write_descriptor
                else:
                    raise RuntimeError(
                        'Unknown write artifact {}'.format(name))
            print("Pycell Execution Finished")
            print("     read: {}".format(read))
            print("     write: {}".format(write))
            provenance = ModuleProvenance(read=read,
                                          write=write,
                                          delete=client.delete)
        else:
            print("ERROR: {}".format(exception))
            assert (exception is not None)
            outputs.error(exception, offset_lines=-injected_lines)
            provenance = ModuleProvenance()
        # Return execution result
        return ExecResult(is_success=is_success,
                          outputs=outputs,
                          provenance=provenance)