def set_scenario_variables(self, **kwargs): """ Define additional variables in this scenario run """ backend_json_call("scenarios/set-variables/", {"variables": json.dumps(kwargs)}, err_msg="Failed to set scenario variables")
def set_percentage(self, percentage, no_fail=True): try: backend_json_call("futures/posttrain-set-percentage", data={ "futureId": self.future_id, "percentage": percentage }) return True except Exception as e: if no_fail: return False else: raise e
def wait_for_completion(self, step_future=None): """ Awaits the termination of the step and returns its result. If the step failed, an Exception is raised. """ if self.future_id is None: raise Exception("Step not started") if step_future is None: step_future = {'hasResult': False} while not step_future['hasResult']: time.sleep( 5) # sleep a lot, this is expected to be long running tasks step_future = backend_json_call("futures/get-update", data={"futureId": self.future_id}) if not step_future['hasResult']: raise Exception("Step failed to run") self.result = step_future['result'] ret = self.get_result() if self.fail_fatal: if ret.get_outcome() == "ABORTED": raise Exception("Scenario step was aborted") elif ret.get_outcome() == "FAILED": raise StepFailedException("Scenario step failed: %s" % ret.get_error_message()) return ret
def list(project_key=None): """Lists the names of datasets. If project_key is None, the current project key is used.""" project_key = project_key or default_project_key() return intercom.backend_json_call("datasets/list", data={"projectKey": project_key})
def get_cluster_from_dss_cluster(dss_cluster_id): # get the public API client client = dataiku.api_client() # get the cluster object in DSS found = False for c in client.list_clusters(): if c['name'] == dss_cluster_id: found = True if not found: raise Exception("DSS cluster %s doesn't exist" % dss_cluster_id) dss_cluster = client.get_cluster(dss_cluster_id) # get the settings in it dss_cluster_settings = dss_cluster.get_settings() dss_cluster_config = dss_cluster_settings.get_raw()['params']['config'] # resolve since we get the config with the raw preset setup dss_cluster_config = backend_json_call( 'plugins/get-resolved-settings', data={ 'elementConfig': json.dumps(dss_cluster_config), 'elementType': dss_cluster_settings.get_raw()['type'] }) logging.info("Resolved cluster config : %s" % json.dumps(dss_cluster_config)) # build the helper class from the cluster settings (the macro doesn't have the params) clusters = get_cluster_from_connection_info( dss_cluster_config['config']['connectionInfo'], dss_cluster_config['pluginConfig']['connectionInfo']) cluster_data = dss_cluster_settings.get_plugin_data() return cluster_data, clusters, dss_cluster_settings, dss_cluster_config
def get_last_check_values(self): """ Get the set of last values of the checks on this project, as a :class:`dataiku.ComputedChecks` object """ return metrics.ComputedChecks( backend_json_call("checks/projects/get-last-values", data={"projectKey": self.project_key}))
def get_previous_steps_outputs(self): """ Returns the results of the steps previously executed in this scenario run. For example, if a SQL step ran before in the scenario, and its name is 'the_sql', then the list returned by this function will be like:: [ ... { 'stepName': 'the_sql', 'result': { 'success': True, 'hasResultset': True, 'columns': [ {'type': 'int8', 'name': 'a'}, {'type': 'varchar', 'name': 'b'} ], 'totalRows': 2, 'rows': [ ['1000', 'min'], ['2500', 'max'] ], 'log': '', 'endedOn': 0, 'totalRowsClipped': False } }, ... ] Important note: the exact structure of each type of step run output is not precisely defined, and may vary from a DSS release to another """ return backend_json_call("scenarios/get-step-outputs/", err_msg="Failed to read step outputs")
def get_info(self): if self.info is None: self.info = backend_json_call("savedmodels/get-info", data={ "projectKey": self.project_key, "lookup": self.short_name })["info"] return self.info
def list_models(project_key=None): """ Retrieve the list of saved models :param project_key: key of the project from which to list models """ project_key = project_key or default_project_key() return backend_json_call("savedmodels/list", data={"projectKey": project_key})
def get_last_metric_values(self, partition=''): """ Get the set of last values of the metrics on this folder, as a :class:`dataiku.ComputedMetrics` object """ return metrics.ComputedMetrics( backend_json_call("metrics/managed-folders/get-last-values", data={ "projectKey": self.project_key, "folderId": self.get_id(), }))
def get_last_metric_values(self, partition=''): """ Get the set of last values of the metrics on this dataset, as a :class:`dataiku.ComputedMetrics` object """ return metrics.ComputedMetrics( intercom.backend_json_call("metrics/datasets/get-last-values", data={ "projectKey": self.project_key, "datasetName": self.short_name, "partition": partition }))
def get_definition(self): if self.definition is None: self.definition = backend_json_call("savedmodels/get", data={ "projectKey": self.project_key, "savedModelId": self.get_id() }) return self.definition
def get_state(): logging.info("poll state") remote_kernel = backend_json_call( "jupyter/poll-remote-kernel", data={ "contextProjectKey": remoterun.get_env_var("DKU_CURRENT_PROJECT_KEY"), "batchId": self.batch_id }) logging.info("Polled, got : %s" % json.dumps(remote_kernel)) return remote_kernel.get("state", None)
def send(self, additional_variables={}, **kwargs): final_params = self.channel_params.copy() final_params.update(kwargs) data = { "messaging" : json.dumps({ "type" : self.type, "configuration" : final_params, }), "variables" : json.dumps(additional_variables) } return backend_json_call("scenarios/send-message", data)
def get_dataset_last_build(self, dataset_name, project_key=None): """ Get informations about the last build of the dataset and returns a map of: - projectKey and id : the identifier of the dataset - jobProjectKey and jobId : the identifier of the job that built the dataset - buildEndTime : timestamp of when the build finished - buildSuccess : final status of the build """ data = {"objectId": dataset_name} if project_key is not None: data["projectKey"] = project_key return backend_json_call("scenarios/get-object-last-build", data)
def save_external_check_values(self, values_dict): """ Save checks on this project. The checks are saved with the type "external" :param values_dict: the values to save, as a dict. The keys of the dict are used as check names """ return backend_json_call( "checks/projects/save-external-values", data={ "projectKey": self.project_key, "data": json.dumps(values_dict) }, err_msg="Failed to save external check values")
def list_versions(self): """ List the versions this saved model contains """ if self.versions is None: self.versions = backend_json_call("savedmodels/list-versions", data={ "projectKey": self.project_key, "savedModelId": self.get_id() }) return self.versions
def get_check_history(self, check_lookup): """ Get the set of all values a given check took on this project :param check_lookup: check name or unique identifier """ return backend_json_call( "checks/projects/get-metric-history", data={ "projectKey": self.project_key, "checkLookup": check_lookup if isinstance(check_lookup, str) or isinstance( check_lookup, unicode) else json.dumps(check_lookup) })
def start(self): """ Launches the execution of the step """ if self.future_id is not None: raise Exception("Step already started") self.future_id = None self.result = None step_future = backend_json_call( "scenarios/run-step/", data={"stepData": json.dumps(self.step)}, err_msg="Step failed to start") self.future_id = step_future['jobId']
def get_metric_history(self, metric_lookup): """ Get the set of all values a given metric took on this project :param metric_lookup: metric name or unique identifier """ return backend_json_call( "metrics/projects/get-metric-history", data={ "projectKey": self.project_key, "metricLookup": metric_lookup if isinstance(metric_lookup, str) or isinstance( metric_lookup, unicode) else json.dumps(metric_lookup) })
def save_external_metric_values(self, values_dict): """ Save metrics on this folder. The metrics are saved with the type "external" :param values_dict: the values to save, as a dict. The keys of the dict are used as metric names """ return backend_json_call( "metrics/managed-folders/save-external-values", data={ "projectKey": self.project_key, "folderId": self.get_id(), "data": json.dumps(values_dict) }, err_msg="Failed to save external metric values")
def get_version_metrics(self, version_id): """ Get the training metrics of a version of this model, as a :class:`.SavedModelVersionMetrics` :param version_id: the unique identifier of the version for which to retrieve metrics """ return SavedModelVersionMetrics( metrics.ComputedMetrics( backend_json_call( "metrics/saved-models/get-values-for-version", data={ "projectKey": self.project_key, "modelId": self.get_id(), "modelVersionId": version_id })))
def save_external_check_values(self, values_dict, partition=''): """ Save checks on this dataset. The checks are saved with the type "external" :param values_dict: the values to save, as a dict. The keys of the dict are used as check names """ return intercom.backend_json_call( "checks/datasets/save-external-values", data={ "projectKey": self.project_key, "datasetName": self.short_name, "partitionId": partition, "data": json.dumps(values_dict) }, err_msg="Failed to save external check values")
def save_external_metric_values(self, values_dict, partition=''): """ Save metrics on this dataset. The metrics are saved with the type "external" :param values_dict: the values to save, as a dict. The keys of the dict are used as metric names :param partition: optionally, the partition for which the values are to be saved """ return intercom.backend_json_call( "metrics/datasets/save-external-values", data={ "projectKey": self.project_key, "datasetName": self.short_name, "partitionId": partition, "data": json.dumps(values_dict) }, err_msg="Failed to save external metric values")
def is_done(self): """ Checks whether a running step is finished """ if self.future_id is None: raise Exception("Step not started") step_future = backend_json_call("futures/get-update", data={"futureId": self.future_id}, err_msg="Failed to track step future") if step_future['hasResult']: self.result = step_future['result'] return True else: return False
def get_model_folder(self, version_id=None): if version_id is None: version_id = [ x for x in self.saved_model.list_versions() if x["active"] ][0]["versionId"] res = backend_json_call("savedmodels/get-model-details", data={ "projectKey": self.project_key, "smId": self.saved_model.get_id(), "versionId": version_id }) model_folder = res["model_folder"] return model_folder
def get_metric_history(self, metric_lookup, partition=''): """ Get the set of all values a given metric took on this folder :param metric_lookup: metric name or unique identifier :param partition: optionally, the partition for which the values are to be fetched """ return backend_json_call( "metrics/managed-folders/get-metric-history", data={ "projectKey": self.project_key, "folderId": self.get_id(), "metricLookup": metric_lookup if isinstance(metric_lookup, str) or isinstance( metric_lookup, unicode) else json.dumps(metric_lookup) })
def run(self, progress_callback): clobber = self.config.get("clobber", False) prefix = self.config.get("prefix") connections = set() done = 0 for project_key in self.project_keys: project = self.client.get_project(project_key) for dataset_name in Dataset.list(project_key=project_key): d = project.get_dataset(dataset_name) connection_name = d.get_definition().get('params', {}).get( 'connection', None) if connection_name is not None: connections.add(connection_name) sql_notebooks = intercom.backend_json_call( "sql-notebooks/list/", data={"projectKey": project_key}) for sql_notebook in sql_notebooks: connection_name = sql_notebook.get('connection', None) if connection_name is not None: m = re.search('@virtual\(([^\)]+)\):(.*)', connection_name) if m is not None: connection_name = 'hive-%s' % m.group(2) connections.add(connection_name) meta = project.get_metadata() # Update tags list if clobber: tags = [x for x in meta["tags"] if not x.startswith(prefix)] else: tags = meta["tags"] tags.extend([ "%s%s" % (prefix, connection) for connection in list(connections) ]) meta["tags"] = tags project.set_metadata(meta) done += 1 progress_callback(done)
def __init__(self, connection, query, pre_queries=None, post_queries=None, find_connection_from_dataset=False, db_type='sql', extra_conf={}, timeOut=600000, script_steps=None, script_input_schema=None, script_output_schema=None): data = { "connection": connection, "query": query, "preQueries": json.dumps(pre_queries), "postQueries": json.dumps(post_queries), "findConnectionFromDataset": find_connection_from_dataset, "timeOut": timeOut, "dbType": db_type, "extraConf": json.dumps(extra_conf), "scriptSteps": json.dumps(script_steps) if script_steps is not None else None, "scriptInputSchema": json.dumps(script_input_schema) if script_input_schema is not None else None, "scriptOutputSchema": json.dumps(script_output_schema) if script_output_schema is not None else None } logging.info("Starting SQL query reader") # initiate the streaming (blocks until the database says it's ready to return values) self.streamingSession = backend_json_call( "sql-queries/start-streaming", data=data) logging.info("Got initial SQL query response")
def get_metric_history(self, metric_lookup, partition=''): """ Get the set of all values a given metric took on this dataset :param metric_lookup: metric name or unique identifier :param partition: optionally, the partition for which the values are to be fetched """ return intercom.backend_json_call( "metrics/datasets/get-metric-history", data={ "projectKey": self.project_key, "datasetName": self.short_name, "partition": partition, "metricLookup": metric_lookup if isinstance(metric_lookup, str) or isinstance( metric_lookup, unicode) else json.dumps(metric_lookup) }, err_msg="Failed to get metric history")