def create(self, comment, lifetime_seconds=7776000): """ Creates a new personal access token Parameters ---------- comment : str The comment to be added for the token to be created lifetime_seconds : int, optional, default=7776000 (90 days) The lifetime seconds Returns ------- Token Value and Info : dict Dictionary with token value as 'token_value' key and TokenInfo object as 'token_info' """ METHOD = 'POST' API_PATH = '/token/create' data = {'lifetime_seconds': lifetime_seconds, 'comment': comment} resp = self._rest_call[METHOD](API_PATH, data=data) resp_json = resp.json() if resp.status_code == 200: return { 'token_value': resp_json.get('token_value'), 'token_info': TokenInfo(**resp_json.get('token_info')) } else: exception = choose_exception(resp) raise exception
def revoke(self, token_id): """ Deletes a token. Parameters ---------- token_id : str The ID of the token to be deleted. Returns ------- token_id : str If the token is deleted. """ METHOD = 'POST' API_PATH = '/token/delete' data = {'token_id': token_id} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return token_id else: exception = choose_exception(resp) raise exception
def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name, cluster_id): """ Private method to post cluster id only to a given endpoint Parameters ---------- method : str HTTP POST or GET method api_path : str API path that post request is sent to cluster_name : str, optional The name of the cluster. cluster_id : str, optional The id of the cluster to be terminated. Returns ------- The cluster ID of a stopped cluster Raises ------ ValueError When neither cluster_name or cluster_id are passed ResourceDoesNotExist When a cluster with the given name or id aren't found Returns ------- """ if not (cluster_name or cluster_id): raise ValueError( "Either cluster_id or cluster_name must be specified") if cluster_name and not cluster_id: try: cluster_id = self.get_cluster_id(cluster_name) except ResourceDoesNotExist: raise ResourceDoesNotExist( "No cluster named '{0}' was found".format(cluster_name)) data = {"cluster_id": cluster_id} resp = self._rest_call[method](api_path, data=data) if resp.status_code == 200 and method == 'GET': return resp.json() elif resp.status_code == 200: return cluster_id else: exception = choose_exception(resp) raise exception
def list(self): METHOD = 'GET' API_PATH = 'clusters/list' resp = self._rest_call[METHOD](API_PATH) if resp.status_code == 200: return resp.json().get('clusters', []) else: exception = choose_exception(resp) raise exception
def __read(self, path, offset, length=MB_BYTES): METHOD = 'GET' API_PATH = '/dbfs/read' data = {"path": path, "offset": offset, "length": length} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return FileReadInfo(**resp.json()) else: exception = choose_exception(resp) raise exception
def spark_versions(self): METHOD = 'GET' API_PATH = 'clusters/spark-versions' resp = self._rest_call[METHOD](API_PATH) if resp.status_code == 200: return { item['key']: item['name'] for item in resp.json()['versions'] } else: exception = choose_exception(resp) raise exception
def move(self, source_path, destination_path): """ Move a file or directory from one location to another location within DBFS. If the given source path is a directory, this will always recursively move all files. Parameters ---------- source_path : str The source path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required. destination_path : str The destination path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/bar/”). This field is required. Returns ------- destination_path if successful Raises ------ ResourceDoesNotExist: If the source file does not exist ResourceAlreadyExists: If there already exists a file in the destination path AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured abov """ METHOD = 'POST' API_PATH = '/dbfs/move' data = { "source_path": source_path, "destination_path": destination_path } resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return destination_path else: exception = choose_exception(resp) raise exception
def __put(self, path, data, overwrite=False): METHOD = 'POST' API_PATH = '/dbfs/put' payload = { "path": path, "contents": data.decode('utf-8'), "overwrite": overwrite } resp = self._rest_call[METHOD](API_PATH, data=payload) if resp.status_code == 200: return path else: exception = choose_exception(resp) raise exception
def add_block(self, handle, data_block): """ Adds a block of data to the specified handle Parameters ---------- handle : int The handle on an open stream. This field is required. data_block : bytes The base64-encoded data to append to the stream. This has a limit of 1 MB. This field is required. Returns ------- handle if successful Raises ------ MaxBlockSizeExceeded: If the block of data sent is greater that 1 MB ResourceDoesNotExist: If the handle does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/dbfs/add-block' data = {"handle": handle, "data": data_block.decode('utf-8')} # Make REST call resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return handle else: exception = choose_exception(resp) raise exception
def delete(self, path, recursive=False, not_exists_ok=False): """ Parameters ---------- path : str The path of the file or directory to delete. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required. recursive : bool Whether or not to recursively delete the directory’s contents. Deleting empty directories can be done without providing the recursive flag. not_exists_ok : bool Suppress any exceptions caused by trying to delete a file that does not exist. Returns ------- path if successful Raises ------ IOError: If the path is a non-empty directory and recursive is set to false or on other similar errors AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/dbfs/delete' data = {"path": path, "recursive": recursive} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return path else: exception = choose_exception(resp) if not_exists_ok and isinstance(exception, ResourceDoesNotExist): return path else: raise exception
def create(self, path, overwrite=False): """ Opens a new DBFS handle Parameters ---------- path : str The path of the new file. The path should be the absolute DBFS path (e.g. “/mnt/foo.txt”). This field is required. overwrite : bool optional The flag that specifies whether to overwrite existing file/files. Returns ------- handle if successful Raises ------ MaxBlockSizeExceeded: If blocksize sent is greater that 1 MB ResourceDoesNotExist: If the handle does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/dbfs/create' data = {"path": path, "overwrite": overwrite} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return resp.json().get('handle') else: exception = choose_exception(resp) raise exception
def list_node_types(self): """ List details on all possible node types for Databricks. Not all node types will be available for the given subscription. :return: List object with information (dict) of all possible node """ METHOD = 'GET' API_PATH = 'clusters/list-node-types' resp = self._rest_call[METHOD](API_PATH) if resp.status_code == 200: return resp.json()['node_types'] else: exception = choose_exception(resp) raise exception
def get_status(self, path): """ Gets the file information of a file or directory. Parameters ---------- path : str The path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required. Returns ------- FileInfo named tuple with path, is_dir and file_size Raises ------ ResourceDoesNotExist: If the file or directory does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'GET' API_PATH = '/dbfs/get-status' data = {"path": path} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return FileInfo(**resp.json()) else: exception = choose_exception(resp) raise exception
def mkdirs(self, path): """ Creates the given directory and necessary parent directories if they do not exist. Note: that if this operation fails it may have succeeded in creating some of the necessary parent directories. Parameters ---------- path : str The path of the new directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required. Returns ------- path if successful Raises ------ ResourceAlreadyExists: If there exists a file (not a directory) at any prefix of the input path AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/dbfs/mkdirs' data = {"path": path} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return path else: exception = choose_exception(resp) raise exception
def close(self, handle): """ Closes the specified handle Parameters ---------- handle : int The handle on an open stream. This field is required. Returns ------- handle if successful Raises ------ ResourceDoesNotExist: If the handle does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/dbfs/close' data = {"handle": handle} # Make REST call resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return handle else: exception = choose_exception(resp) raise exception
def list(self, path): """ Lists the contents of a directory, or details of the file. Parameters ---------- path : str The path of the file or directory. The path should be the absolute DBFS path (e.g. “/mnt/foo/”). This field is required. Returns ------- Array of FileInfo named tuples (with path, is_dir and file_size) Raises ------ ResourceDoesNotExist: If the file or directory does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'GET' API_PATH = '/dbfs/list' data = {"path": path} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: return [FileInfo(**file) for file in resp.json().get('files')] else: exception = choose_exception(resp) raise exception
def create(self, cluster_name, num_workers, spark_version, node_type_id, python_version=3, autotermination_minutes=60, custom_spark_version=False, **kwargs): """ Creates a new cluster in the given Parameters ---------- cluster_name : str The display name of the cluster being created num_workers : int The number of worker nodes in the cluster spark_version : str node_type_id : str python_version : int, optional, default=3 autotermination_minutes : int, optional, default=60 Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. custom_spark_version : bool, optional, default=False If a custom Spark version is passed - then this prevents error checking for supported Spark versions kwargs : optional Other keyword arguments are passed to the API in the JSON payload. See supported arguments here: https://docs.azuredatabricks.net/api/latest/clusters.html#create Returns ------- """ METHOD = 'POST' API_PATH = 'clusters/create' # Check if spark_version supported: if not spark_version in self.spark_versions( ) and not custom_spark_version: raise ValueError( "'{0}' is not a recognized spark_version. Please see the ". format(spark_version) + "spark_versions() method for available Spark Versions. ") available_vms = self.list_available_node_type_names() driver_vm_id = kwargs.get('driver_node_type_id') # Check if node_type available supported: if not node_type_id in available_vms or (driver_vm_id and driver_vm_id not in available_vms): raise ValueError( "'{0}' is not an available VM type. Please see the ".format( node_type_id) + "list_available_node_type_names() method for available node types" ) cluster_config = { 'cluster_name': cluster_name, 'spark_version': spark_version, 'node_type_id': node_type_id } # If python_version is set to Python 3, then overwrite the PYSPARK_PYTHON environment variable if python_version == 3: if kwargs.get('spark_env_vars'): kwargs['spark_env_vars'][ 'PYSPARK_PYTHON'] = '/databricks/python3/bin/python3' else: kwargs['spark_env_vars'] = { 'PYSPARK_PYTHON': '/databricks/python3/bin/python3' } # Set default value of autotermination minutes - this defaults to 60 minutes. if autotermination_minutes: kwargs['autotermination_minutes'] = autotermination_minutes # Specify the size of the cluster if isinstance(num_workers, dict): cluster_config['autoscale'] = num_workers else: cluster_config['num_workers'] = int(num_workers) # Merge kwargs and cluster_config cluster_config = dict_update(kwargs, cluster_config) resp = self._rest_call[METHOD](API_PATH, data=cluster_config) if resp.status_code == 200: return resp.json()['cluster_id'] else: exception = choose_exception(resp) raise exception