def export(self, dbx_path, file_path, file_format='DBC'): """ Exports the Databricks path to a file on the local PC. Parameters ---------- dbx_path : str The path, in the Databricks workspace, to export file_path : str The path, on the local PC, where the file should be created file_format: str, optional The format of the file to be saved. Defaults to DBC. Must be in SOURCT Returns ------- file_path if successful Raises ------ ResourceDoesNotExist: If the given Databricks path does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'GET' API_PATH = '/workspace/export' if file_format.upper() not in EXPORT_FORMATS: raise UnknownFormat('{0} is not a supported format type. Please use DBC, SOURCE, HTML, or JUPYTER') data = {'path': dbx_path, 'format': file_format, 'direct_download': True} resp = self._rest_call[METHOD](API_PATH, data=data) if resp.status_code == 200: with open(file_path, 'wb+') as fo: fo.write(resp.get('content')) return file_path elif resp.status_code == 403: raise AuthorizationError("User is not authorized or token is incorrect.") elif resp.json().get("error_code") == "MAX_NOTEBOOK_SIZE_EXCEEDED": raise MaxNotebookSizeExceeded(resp.json().get('message')) elif resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST": raise ResourceDoesNotExist(resp.json().get('message')) else: raise APIError("Response code {0}: {1} {2}".format(resp.status_code, resp.json().get('error_code'), resp.json().get('message')))
def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name, cluster_id): """ Private method to post cluster id only to a given endpoint Parameters ---------- method : str HTTP POST or GET method api_path : str API path that post request is sent to cluster_name : str, optional The name of the cluster. cluster_id : str, optional The id of the cluster to be terminated. Returns ------- The cluster ID of a stopped cluster Raises ------ ValueError When neither cluster_name or cluster_id are passed ResourceDoesNotExist When a cluster with the given name or id aren't found Returns ------- """ if not (cluster_name or cluster_id): raise ValueError( "Either cluster_id or cluster_name must be specified") if cluster_name and not cluster_id: try: cluster_id = self.get_cluster_id(cluster_name) except ResourceDoesNotExist: raise ResourceDoesNotExist( "No cluster named '{0}' was found".format(cluster_name)) data = {"cluster_id": cluster_id} resp = self._rest_call[method](api_path, data=data) if resp.status_code == 200 and method == 'GET': return resp.json() elif resp.status_code == 200: return cluster_id else: exception = choose_exception(resp) raise exception
def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name, cluster_id): """ Private method to post cluster id only to a given endpoint Parameters ---------- method : str HTTP POST or GET method api_path : str API path that post request is sent to cluster_name : str, optional The name of the cluster. cluster_id : str, optional The id of the cluster to be terminated. Returns ------- The cluster ID of a stopped cluster Raises ------ ValueError When neither cluster_name or cluster_id are passed ResourceDoesNotExist When a cluster with the given name or id aren't found Returns ------- """ if not (cluster_name or cluster_id): raise ValueError("Either cluster_id or cluster_name must be specified") if cluster_name and not cluster_id: cluster_id = self.get_cluster_id(cluster_name) data = {"cluster_id": cluster_id} resp = self._rest_call[method](api_path, data=data) if resp.status_code == 200 and method == 'GET': return resp.json() elif resp.status_code == 200: return cluster_id elif resp.status_code == 403: raise AuthorizationError("User is not authorized or token is incorrect.") elif resp.status_code == 400 and resp.json()['message'] == "Cluster {id} does not exist": raise ResourceDoesNotExist(resp.json()['message']) else: raise APIError("Response code {0}: {1} {2}".format(resp.status_code, resp.json().get('error_code'), resp.json().get('message')))
def delete(self, path, recursive=False, not_exists_ok=False): """ Deletes the path in the given workspace. Parameters ---------- path : str The path, in the Databricks workspace, to delete recursive : bool, optional Recursively delete the given path not_exists_ok : bool, optional If the given path is not found, avoid raising error Returns ------- path if successfully deleted Raises ------ ResourceDoesNotExist: If not_exists_ok is set to False and the given path does not exist AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'POST' API_PATH = '/workspace/delete' data = {'path': path, 'recursive': recursive} resp = self._rest_call[METHOD](API_PATH, data=data) # Process response if resp.status_code == 200: return path elif resp.status_code == 403: raise AuthorizationError("User is not authorized or token is incorrect.") elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST": if not_exists_ok: return path else: raise ResourceDoesNotExist(resp.json().get('message')) else: raise APIError("Response code {0}: {1} {2}".format(resp.status_code, resp.json().get('error_code'), resp.json().get('message')))
def get_cluster_id(self, cluster_name): """ Given a cluster name, get the cluster ID for that cluster Parameters ---------- cluster_name : str Display name of the cluster Returns ------- Sorts clusters by last_activity_time then returns ID of the first cluster found with matching cluster_name where state is RUNNING. If no clusters with that name are running, returns the first cluster Raises ------ ResourceDoesNotExist When no matching cluster name and cluster state are found """ ClusterInfo = collections.namedtuple('ClusterInfo', ['id', 'state', 'start_time']) # Get all clusters clusters = self.list() found_clusters = [ ClusterInfo(id=cluster['cluster_id'], state=cluster['state'], start_time=cluster['start_time']) for cluster in clusters if cluster['cluster_name'] == cluster_name ] if len(found_clusters) == 0: raise ResourceDoesNotExist( "No cluster named '{0}' was found".format(cluster_name)) found_clusters = sorted(found_clusters, key=lambda cluster: cluster.start_time) running_clusters = list( filter(lambda cluster: cluster.state == 'RUNNING', found_clusters)) if len(running_clusters) >= 1: return running_clusters[0].id else: return found_clusters[0].id
def list(self, path): """Lists the contents of the given director Parameters ---------- path : str The path, in the Databricks workspace, of which, the contents should be listed Returns ------- List of WorkspaceObjectgs Raises ------ AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'GET' API_PATH = '/workspace/list' data = {'path': path} resp = self._rest_call[METHOD](API_PATH, data=data) # Process response if resp.status_code == 200: if resp.json().get('objects'): return [WorkspaceObjectInfo(**obj) for obj in resp.json().get('objects')] else: return [] elif resp.status_code == 403: raise AuthorizationError("User is not authorized or token is incorrect.") elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST": raise ResourceDoesNotExist(resp.json().get('message')) else: raise APIError("Response code {0}: {1} {2}".format(resp.status_code, resp.json().get('error_code'), resp.json().get('message')))
def get_status(self, path): """ Gets the status of a given Databricks path Parameters ---------- path : str The path, in the Databricks workspace, to get the status of Returns ------- WorkspaceObject - details of the item at given path Raises ------ AuthorizationError: If the services returns a 403 status code APIError: If the status code returned by the service is anything except 200 and is not captured above """ METHOD = 'GET' API_PATH = '/workspace/get-status' data = {'path': path} resp = self._rest_call[METHOD](API_PATH, data=data) # Process response if resp.status_code == 200: return WorkspaceObjectInfo(**resp.json()) elif resp.status_code == 403: raise AuthorizationError("User is not authorized or token is incorrect.") elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST": raise ResourceDoesNotExist(resp.json().get('message')) else: raise APIError("Response code {0}: {1} {2}".format(resp.status_code, resp.json().get('error_code'), resp.json().get('message')))