def export(self, dbx_path, file_path, file_format='DBC'):
        """ Exports the Databricks path to a file on the local PC.

        Parameters
        ----------
        dbx_path : str
            The path, in the Databricks workspace, to export

        file_path : str
            The path, on the local PC, where the file should be created

        file_format: str, optional
            The format of the file to be saved. Defaults to DBC. Must be in SOURCT

        Returns
        -------
        file_path if successful

        Raises
        ------
        ResourceDoesNotExist:
            If the given Databricks path does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/export'

        if file_format.upper() not in EXPORT_FORMATS:
            raise UnknownFormat('{0} is not a supported format type. Please use DBC, SOURCE, HTML, or JUPYTER')

        data = {'path': dbx_path,
                'format': file_format,
                'direct_download': True}

        resp = self._rest_call[METHOD](API_PATH, data=data)

        if resp.status_code == 200:
            with open(file_path, 'wb+') as fo:
                fo.write(resp.get('content'))

            return file_path

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        elif resp.json().get("error_code") == "MAX_NOTEBOOK_SIZE_EXCEEDED":
            raise MaxNotebookSizeExceeded(resp.json().get('message'))

        elif resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST":
            raise ResourceDoesNotExist(resp.json().get('message'))

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
    def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name,
                                      cluster_id):
        """
        Private method to post cluster id only to a given endpoint

        Parameters
        ----------
        method : str
            HTTP POST or GET method
        api_path : str
            API path that post request is sent to

        cluster_name : str, optional
            The name of the cluster.

        cluster_id : str, optional
            The id of the cluster to be terminated.

        Returns
        -------
            The cluster ID of a stopped cluster

        Raises
        ------
        ValueError
            When neither cluster_name or cluster_id are passed
        ResourceDoesNotExist
            When a cluster with the given name or id aren't found

        Returns
        -------

        """
        if not (cluster_name or cluster_id):
            raise ValueError(
                "Either cluster_id or cluster_name must be specified")

        if cluster_name and not cluster_id:
            try:
                cluster_id = self.get_cluster_id(cluster_name)
            except ResourceDoesNotExist:
                raise ResourceDoesNotExist(
                    "No cluster named '{0}' was found".format(cluster_name))

        data = {"cluster_id": cluster_id}

        resp = self._rest_call[method](api_path, data=data)

        if resp.status_code == 200 and method == 'GET':
            return resp.json()

        elif resp.status_code == 200:
            return cluster_id

        else:
            exception = choose_exception(resp)
            raise exception
示例#3
0
    def __send_cluster_id_to_endpoint(self, method, api_path, cluster_name, cluster_id):
        """
        Private method to post cluster id only to a given endpoint

        Parameters
        ----------
        method : str
            HTTP POST or GET method
        api_path : str
            API path that post request is sent to

        cluster_name : str, optional
            The name of the cluster.

        cluster_id : str, optional
            The id of the cluster to be terminated.

        Returns
        -------
            The cluster ID of a stopped cluster

        Raises
        ------
        ValueError
            When neither cluster_name or cluster_id are passed
        ResourceDoesNotExist
            When a cluster with the given name or id aren't found

        Returns
        -------

        """
        if not (cluster_name or cluster_id):
            raise ValueError("Either cluster_id or cluster_name must be specified")

        if cluster_name and not cluster_id:
            cluster_id = self.get_cluster_id(cluster_name)

        data = {"cluster_id": cluster_id}

        resp = self._rest_call[method](api_path, data=data)

        if resp.status_code == 200 and method == 'GET':
            return resp.json()
        elif resp.status_code == 200:
            return cluster_id
        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")
        elif resp.status_code == 400 and resp.json()['message'] == "Cluster {id} does not exist":
            raise ResourceDoesNotExist(resp.json()['message'])
        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
    def delete(self, path, recursive=False, not_exists_ok=False):
        """
        Deletes the path in the given workspace.

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, to delete

        recursive : bool, optional
            Recursively delete the given path

        not_exists_ok : bool, optional
            If the given path is not found, avoid raising error

        Returns
        -------
        path if successfully deleted

        Raises
        ------
        ResourceDoesNotExist:
            If not_exists_ok is set to False and the given path does not exist

        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'POST'
        API_PATH = '/workspace/delete'

        data = {'path': path,
                'recursive': recursive}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            return path

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST":
            if not_exists_ok:
                return path
            else:
                raise ResourceDoesNotExist(resp.json().get('message'))
        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
    def get_cluster_id(self, cluster_name):
        """
        Given a cluster name, get the cluster ID for that cluster

        Parameters
        ----------
        cluster_name : str
            Display name of the cluster

        Returns
        -------
            Sorts clusters by last_activity_time then returns ID of the first cluster found with matching cluster_name
            where state is RUNNING. If no clusters with that name are running, returns the first cluster

        Raises
        ------
        ResourceDoesNotExist
            When no matching cluster name and cluster state are found
        """
        ClusterInfo = collections.namedtuple('ClusterInfo',
                                             ['id', 'state', 'start_time'])

        # Get all clusters
        clusters = self.list()

        found_clusters = [
            ClusterInfo(id=cluster['cluster_id'],
                        state=cluster['state'],
                        start_time=cluster['start_time'])
            for cluster in clusters if cluster['cluster_name'] == cluster_name
        ]

        if len(found_clusters) == 0:
            raise ResourceDoesNotExist(
                "No cluster named '{0}' was found".format(cluster_name))

        found_clusters = sorted(found_clusters,
                                key=lambda cluster: cluster.start_time)
        running_clusters = list(
            filter(lambda cluster: cluster.state == 'RUNNING', found_clusters))

        if len(running_clusters) >= 1:
            return running_clusters[0].id
        else:
            return found_clusters[0].id
    def list(self, path):
        """Lists the contents of the given director

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, of which, the contents should be listed

        Returns
        -------
        List of WorkspaceObjectgs

        Raises
        ------
        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/list'

        data = {'path': path}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            if resp.json().get('objects'):
                return [WorkspaceObjectInfo(**obj) for obj in resp.json().get('objects')]
            else:
                return []

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST":
            raise ResourceDoesNotExist(resp.json().get('message'))

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))
    def get_status(self, path):
        """ Gets the status of a given Databricks path

        Parameters
        ----------
        path : str
            The path, in the Databricks workspace, to get the status of

        Returns
        -------
            WorkspaceObject - details of the item at given path

        Raises
        ------
        AuthorizationError:
            If the services returns a 403 status code

        APIError:
            If the status code returned by the service is anything except 200 and is not captured above
        """
        METHOD = 'GET'
        API_PATH = '/workspace/get-status'

        data = {'path': path}
        resp = self._rest_call[METHOD](API_PATH, data=data)

        # Process response
        if resp.status_code == 200:
            return WorkspaceObjectInfo(**resp.json())

        elif resp.status_code == 403:
            raise AuthorizationError("User is not authorized or token is incorrect.")

        elif resp.status_code == 400 and resp.json().get("error_code") == "RESOURCE_DOES_NOT_EXIST":
            raise ResourceDoesNotExist(resp.json().get('message'))

        else:
            raise APIError("Response code {0}: {1} {2}".format(resp.status_code,
                                                               resp.json().get('error_code'),
                                                               resp.json().get('message')))