def execute_script(self, args, context): """Execute a R script in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get R script from user arguments source = args.get_value(cmd.PARA_R_SOURCE) # Redirect standard output and standard error streams out = sys.stdout err = sys.stderr stream = list() sys.stdout = OutputStream(tag='out', stream=stream) sys.stderr = OutputStream(tag='err', stream=stream) outputs = ModuleOutputs() mimir_table_names = dict() for ds_name_o in context.datasets: dataset_id = context.datasets[ds_name_o] dataset = context.datastore.get_dataset(dataset_id) if dataset is None: raise ValueError('unknown dataset \'' + ds_name_o + '\'') mimir_table_names[ds_name_o] = dataset.identifier # Run the r code try: evalresp = mimir.evalR(mimir_table_names, source) ostd = evalresp['stdout'] oerr = evalresp['stderr'] if not ostd == '': outputs.stdout.append(HtmlOutput(ostd)) if not oerr == '': outputs.stderr.append(TextOutput(oerr)) except Exception as ex: outputs.error(ex) finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Set module outputs for tag, text in stream: text = ''.join(text).strip() if tag == 'out': outputs.stdout.append(HtmlOutput(text)) else: outputs.stderr.append(TextOutput(text)) provenance = ModuleProvenance() # Return execution result return ExecResult( is_success=(len(outputs.stderr) == 0), outputs=outputs, provenance=provenance )
def compute_empty_dataset(self, args, context): """Execute empty dataset command. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ outputs = ModuleOutputs() default_columns = [("''", "unnamed_column")] ds_name = args.get_value(pckg.PARA_NAME).lower() if ds_name in context.datasets: raise ValueError('dataset \'' + ds_name + '\' exists') if not is_valid_name(ds_name): raise ValueError('invalid dataset name \'' + ds_name + '\'') try: source = "SELECT {};".format(", ".join( default_val + " AS " + col_name for default_val, col_name in default_columns)) view_name, dependencies = mimir.createView(dict(), source) columns = [ MimirDatasetColumn(identifier=col_id, name_in_dataset=col_defn[1]) for col_defn, col_id in zip(default_columns, range(len(default_columns))) ] ds = context.datastore.register_dataset(table_name=view_name, columns=columns, row_counter=1) provenance = ModuleProvenance( write={ ds_name: DatasetDescriptor(identifier=ds.identifier, columns=ds.columns, row_count=ds.row_count) }, read=dict( ) # Need to explicitly declare a lack of dependencies. ) outputs.stdout.append( TextOutput("Empty dataset '{}' created".format(ds_name))) except Exception as ex: provenance = ModuleProvenance() outputs.error(ex) return ExecResult(is_success=(len(outputs.stderr) == 0), outputs=outputs, provenance=provenance)
def execute_script(self, args, context): """Execute a Markdown script in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get Markdown script from user arguments source = args.get_value(cmd.PARA_MARKDOWN_SOURCE) # Redirect standard output and standard error streams out = sys.stdout err = sys.stderr stream = list() sys.stdout = OutputStream(tag='out', stream=stream) sys.stderr = OutputStream(tag='err', stream=stream) outputs = ModuleOutputs() # Run the markdown code try: #we should validate the markdown here ostd = source oerr = '' if not ostd == '': outputs.stdout.append(MarkdownOutput(ostd)) if not oerr == '': outputs.stderr.append(TextOutput(oerr)) except Exception as ex: outputs.error(ex) finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Set module outputs for tag, text in stream: text = ''.join(text).strip() if tag == 'out': outputs.stdout.append(MarkdownOutput(text)) else: outputs.stderr.append(TextOutput(text)) provenance = ModuleProvenance() # Return execution result return ExecResult(is_success=(len(outputs.stderr) == 0), outputs=outputs, provenance=provenance)
def execute_query(self, args: ModuleArguments, context: TaskContext) -> ExecResult: """Execute a SQL query in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get SQL source code that is in this cell and the global # variables source = args.get_value(cmd.PARA_SQL_SOURCE) if not source.endswith(';'): source = source ds_name = args.get_value(cmd.PARA_OUTPUT_DATASET, raise_error=False) # Get mapping of datasets in the context to their respective table # name in the Mimir backend mimir_table_names = dict() for ds_name_o in context.datasets: dataset_id = context.datasets[ds_name_o].identifier dataset = context.datastore.get_dataset(dataset_id) if dataset is None: raise ValueError('unknown dataset \'' + ds_name_o + '\'') mimir_table_names[ds_name_o] = dataset.identifier # Module outputs outputs = ModuleOutputs() is_success = True functions = { name: context.dataobjects[name].identifier for name in context.dataobjects if context.dataobjects[name].obj_type == ARTIFACT_TYPE_PYTHON } try: # Create the view from the SQL source view_name, dependencies, mimirSchema, properties, functionDeps = mimir.createView( datasets=mimir_table_names, query=source, functions=dict(functions)) ds = MimirDatasetHandle.from_mimir_result(view_name, mimirSchema, properties, ds_name) print(mimirSchema) if ds_name is None or ds_name == '': ds_name = "TEMPORARY_RESULT" from vizier.api.webservice import server ds_output = server.api.datasets.get_dataset( project_id=context.project_id, dataset_id=ds.identifier, offset=0, limit=10) if ds_output is None: outputs.stderr.append( TextOutput("Error displaying dataset {}".format(ds_name))) else: ds_output['name'] = ds_name outputs.stdout.append(DatasetOutput(ds_output)) dependenciesDict: Dict[str, str] = { dep_name.lower(): get_artifact_id(dep) for dep_name, dep in [( dep_name, context.datasets.get(dep_name.lower(), None)) for dep_name in dependencies] if dep is not None } functionDepDict: Dict[str, str] = { dep_name.lower(): get_artifact_id(dep) for dep_name, dep in [( dep_name, context.dataobjects.get(dep_name.lower(), None)) for dep_name in dependencies] if dep is not None } # print("---- SQL DATASETS ----\n{}\n{}".format(context.datasets, dependencies)) provenance = ModuleProvenance(write={ ds_name: DatasetDescriptor(identifier=ds.identifier, name=ds_name, columns=ds.columns) }, read={ **dependenciesDict, **functionDepDict }) except Exception as ex: provenance = ModuleProvenance() outputs.error(ex) is_success = False # Return execution result return ExecResult(is_success=is_success, outputs=outputs, provenance=provenance)
def execute_query(self, args, context): """Execute a SQL query in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get SQL source code that is in this cell and the global # variables source = args.get_value(cmd.PARA_SQL_SOURCE) if not source.endswith(';'): source = source + ';' ds_name = args.get_value(cmd.PARA_OUTPUT_DATASET, raise_error=False) # Get mapping of datasets in the context to their respective table # name in the Mimir backend mimir_table_names = dict() for ds_name_o in context.datasets: dataset_id = context.datasets[ds_name_o] dataset = context.datastore.get_dataset(dataset_id) if dataset is None: raise ValueError('unknown dataset \'' + ds_name_o + '\'') mimir_table_names[ds_name_o] = dataset.table_name # Module outputs outputs = ModuleOutputs() try: # Create the view from the SQL source view_name, dependencies = mimir.createView(mimir_table_names, source) sql = 'SELECT * FROM ' + view_name mimirSchema = mimir.getSchema(sql) columns = list() for col in mimirSchema: col_id = len(columns) name_in_dataset = col['name'] col = MimirDatasetColumn(identifier=col_id, name_in_dataset=name_in_dataset) columns.append(col) row_count = mimir.countRows(view_name) provenance = None if ds_name is None or ds_name == '': ds_name = "TEMPORARY_RESULT" ds = context.datastore.register_dataset(table_name=view_name, columns=columns, row_counter=row_count) ds_output = server.api.datasets.get_dataset( project_id=context.project_id, dataset_id=ds.identifier, offset=0, limit=10) ds_output['name'] = ds_name dependencies = dict((dep_name.lower(), context.datasets.get(dep_name.lower(), None)) for dep_name in dependencies) # print("---- SQL DATASETS ----\n{}\n{}".format(context.datasets, dependencies)) outputs.stdout.append(DatasetOutput(ds_output)) provenance = ModuleProvenance(write={ ds_name: DatasetDescriptor(identifier=ds.identifier, columns=ds.columns, row_count=ds.row_count) }, read=dependencies) except Exception as ex: provenance = ModuleProvenance() outputs.error(ex) # Return execution result return ExecResult(is_success=(len(outputs.stderr) == 0), outputs=outputs, provenance=provenance)
def execute_script(self, args, context): """Execute a Python script in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get Python script from user arguments source = args.get_value(cmd.PYTHON_SOURCE) # Initialize the scope variables that are available to the executed # Python script. At this point this includes only the client to access # and manipulate datasets in the undelying datastore client = VizierDBClient( datastore=context.datastore, datasets=context.datasets ) variables = {VARS_DBCLIENT: client} # Redirect standard output and standard error streams out = sys.stdout err = sys.stderr stream = list() sys.stdout = OutputStream(tag='out', stream=stream) sys.stderr = OutputStream(tag='err', stream=stream) # Keep track of exception that is thrown by the code exception = None # Run the Python code try: python_cell_preload(variables) exec(source, variables, variables) except Exception as ex: exception = ex finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Set module outputs outputs = ModuleOutputs() is_success = (exception is None) for tag, text in stream: text = ''.join(text).strip() if tag == 'out': outputs.stdout.append(HtmlOutput(text)) else: outputs.stderr.append(TextOutput(text)) is_success = False if is_success: # Create provenance information. Ensure that all dictionaries # contain elements of expected types, i.e, ensure that the user did # not attempt anything tricky. read = dict() for name in client.read: if not isinstance(name, str): raise RuntimeError('invalid key for mapping dictionary') if name in context.datasets: read[name] = context.datasets[name] if not isinstance(read[name], str): raise RuntimeError('invalid element in mapping dictionary') else: read[name] = None write = dict() for name in client.write: if not isinstance(name, str): raise RuntimeError('invalid key for mapping dictionary') ds_id = client.datasets[name] if not ds_id is None: if not isinstance(ds_id, str): raise RuntimeError('invalid value in mapping dictionary') elif ds_id in client.descriptors: write[name] = client.descriptors[ds_id] else: write[name] = client.datastore.get_descriptor(ds_id) else: write[name] = None provenance = ModuleProvenance( read=read, write=write, delete=client.delete ) else: outputs.error(exception) provenance = ModuleProvenance() # Return execution result return ExecResult( is_success=is_success, outputs=outputs, provenance=provenance )
def execute_script(self, args: ModuleArguments, context: TaskContext) -> ExecResult: """Execute a Python script in the given context. Parameters ---------- args: vizier.viztrail.command.ModuleArguments User-provided command arguments context: vizier.engine.task.base.TaskContext Context in which a task is being executed Returns ------- vizier.engine.task.processor.ExecResult """ # Get Python script from user arguments. It is the source for VizierDBClient cell_src = args.get_value(cmd.PYTHON_SOURCE) # prepend python objects exported in previous cells to the source exported_methods = [ context.datastore.get_object(descriptor.identifier).decode() for name, descriptor in context.dataobjects.items() if descriptor.artifact_type == ARTIFACT_TYPE_PYTHON ] overrides = [ "def show(x):", " global vizierdb", " vizierdb.show(x)", "def export(x):", " global vizierdb", " vizierdb.export_module(x)", "def return_type(dt):", " def wrap(x):", " return x", " return wrap", "pass" ] injected_source = "\n".join(exported_methods + overrides) injected_lines = len([x for x in injected_source if x == '\n']) + 1 source = injected_source + '\n' + cell_src # Initialize the scope variables that are available to the executed # Python script. At this point this includes only the client to access # and manipulate datasets in the undelying datastore # # Use "any" type, since there's a (probably unnecessary) hack down # below that creates something that pretends to be a client. client: Any = VizierDBClient(datastore=context.datastore, datasets=context.datasets, source=cell_src, dataobjects=context.dataobjects, project_id=context.project_id, output_format=args.get_value( cmd.OUTPUT_FORMAT, default_value=OUTPUT_TEXT)) variables = {VARS_DBCLIENT: client, VARS_OPEN: client.pycell_open} # Redirect standard output and standard error streams out = sys.stdout err = sys.stderr stream: List[Tuple[str, str]] = list() sys.stdout = cast(TextIO, OutputStream(tag='out', stream=stream)) sys.stderr = cast(TextIO, OutputStream(tag='err', stream=stream)) # Keep track of exception that is thrown by the code exception = None resdata: Dict[str, Any] = dict() # Run the Python code try: python_cell_preload(variables, client=client) if SANDBOX_PYTHON_EXECUTION == "True": json_data = { 'source': source, 'datasets': context.datasets, 'dataobjects': context.dataobjects, 'datastore': context.datastore.__class__.__name__, 'basepath': context.datastore.base_path, 'project_id': context.project_id, 'output_format': client.output_format } res = requests.post(SANDBOX_PYTHON_URL, json=json_data) resdata = res.json() client = DotDict() for key, value in resdata['provenance'].items(): client.setattr(key, value) client.setattr('descriptors', {}) client.setattr('datastore', context.datastore) client.setattr('datasets', resdata['datasets']) client.setattr('dataobjects', resdata['dataobjects']) client.setattr('output_format', resdata['output_format']) client.setattr('stdout', [ OutputObject(type=item['type'], value=item['value']) for item in resdata.get('explicit_stdout', []) ]) else: exec(source, variables, variables) except Exception as ex: exception = ex finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Set module outputs outputs = ModuleOutputs() is_success = (exception is None) if SANDBOX_PYTHON_EXECUTION == "True": for text in resdata['stdout']: outputs.stdout.append( OutputObject(value=text, type=client.output_format)) for text in resdata['stderr']: outputs.stderr.append(TextOutput(text)) is_success = False else: for tag, text in stream: text = ''.join(text).strip() if tag == 'out': outputs.stdout.append( OutputObject(value=text, type=client.output_format)) else: outputs.stderr.append(TextOutput(text)) is_success = False for output in client.stdout: outputs.stdout.append(output) if is_success: # Create provenance information. Ensure that all dictionaries # contain elements of expected types, i.e, ensure that the user did # not attempt anything tricky. read = dict() for name in client.read: if not isinstance(name, str): raise RuntimeError('invalid key for mapping dictionary') if name in context.datasets: read[name] = context.datasets[name].identifier if not isinstance(read[name], str): raise RuntimeError( 'invalid element in read mapping dictionary: {} (expecting str)' .format(read[name])) elif name in context.dataobjects: read[name] = context.dataobjects[name].identifier if not isinstance(read[name], str): raise RuntimeError( 'invalid element in read mapping dictionary: {} (expecting str)' .format(read[name])) else: raise RuntimeError('Unknown read artifact {}'.format(name)) write = dict() for name in client.write: if not isinstance(name, str): raise RuntimeError('invalid key for mapping dictionary') if name in client.datasets: write_descriptor = client.datasets[name] if not isinstance(write_descriptor, ArtifactDescriptor): raise RuntimeError( 'invalid element in write mapping dictionary: {} (expecting str)' .format(name)) else: write[name] = write_descriptor elif name in client.dataobjects: #wr_id = client.dataobjects[name] write_descriptor = client.dataobjects[name] #write_descriptor = client.datastore.get_object(identifier=wr_id) if not isinstance(write_descriptor, ArtifactDescriptor): raise RuntimeError( 'invalid element in write mapping dictionary: {} (expecting str)' .format(name)) else: write[name] = write_descriptor else: raise RuntimeError( 'Unknown write artifact {}'.format(name)) print("Pycell Execution Finished") print(" read: {}".format(read)) print(" write: {}".format(write)) provenance = ModuleProvenance(read=read, write=write, delete=client.delete) else: print("ERROR: {}".format(exception)) assert (exception is not None) outputs.error(exception, offset_lines=-injected_lines) provenance = ModuleProvenance() # Return execution result return ExecResult(is_success=is_success, outputs=outputs, provenance=provenance)