def get_job_status(self, job_id): """ Fetches the status of the batch job using Livy's batches endpoint Keyword arguments: job_id {str} -- Job identifier Returns: response {dict} -- Dictionary with job id, state and the application id. """ job_url = "{}/batches/{}".format(self.url, job_id) response = RestUtil.request_with_retry().get(url=job_url, auth=self.auth) if not response.ok: if response.status_code == 404: raise ObjectNotFoundError( "Job with id {} not found.".format(job_id)) raise ServiceError("Failed to get jobs state. " + response.text) job_response = response.json() response = { "id": job_response.get("id"), "state": job_response.get("state"), "appId": job_response.get("appId") } return response
def get_job_logs(self, job_id, size): """ Fetches the logs of the batch job using Livy's batches logs endpoint Keyword arguments: job_id {str} -- Job identifier size {int} -- Number of log lines to be returned Returns: response -- Http method response """ job_logs_url = "{}/batches/{}/logs".format(self.url, job_id) if size is not None and size > 0: job_logs_url = job_logs_url + "?size={}".format(size) response = RestUtil.request_with_retry().get(url=job_logs_url, auth=self.auth) if not response.ok: if response.status_code == 404: raise ObjectNotFoundError( "Job with id {} not found.".format(job_id)) raise ServiceError("Failed to get job logs. " + response.text) return response
def download_file(self, file_name_with_path): """ Downloads a file from HDFS location identified by the path Keyword arguments: file_name_with_path {str} -- Name of the file identified with a path Returns: response -- Default Flask response object with file content and appropriate headers set """ file_name_with_path = self._get_actual_download_file_path( file_name_with_path) open_file_url = self.url + file_name_with_path + "?op=OPEN" response = RestUtil.request_with_retry().get(open_file_url, auth=self.auth, allow_redirects=False) if response.status_code != 307: if response.status_code == 404: raise ObjectNotFoundError( "File {} not found.".format(file_name_with_path)) raise ServiceError( "Attempt to open file {0} failed with {1} and {2}.".format( file_name_with_path, response.status_code, response.reason)) file_download_url = None if response.headers is not None: file_download_url = response.headers["Location"] if file_download_url is not None: res = RestUtil.request_with_retry().get(file_download_url, auth=self.auth, stream=True) if not response.ok: raise ServiceError( "Attempt to download file {0} failed with {1} and {2}.". format(file_name_with_path, response.status_code, response.reason)) response = Response(res.content, headers=dict(res.headers)) response.headers['Content-Type'] = 'application/octet-stream' response.headers[ 'Content-Disposition'] = 'attachment;filename="{}"'.format( file_name_with_path.split("/")[-1]) return response
def _get_actual_download_file_path(self, file_name_with_path): download_file_path = None list_status_url = self.url + file_name_with_path + "?op=LISTSTATUS" response = RestUtil.request_with_retry().get(list_status_url, auth=self.auth) if not response.ok: if response.status_code == 404: raise ObjectNotFoundError( "File {} not found.".format(file_name_with_path)) raise ServiceError( "Attempt to open file {0} failed with {1} and {2}.".format( file_name_with_path, response.status_code, response.reason)) list_status_response = json.loads(response.text) if list_status_response is not None and list_status_response.get( "FileStatuses") is not None: files_statuses = list_status_response.get("FileStatuses") if files_statuses.get("FileStatus") is not None: file_status_list = files_statuses.get("FileStatus") if len(file_status_list) > 1: raise BadRequestError( "Specified path is a directory containing multiple files. Supported only if single part file is inside folder." ) path_suffix = file_status_list[0]["pathSuffix"] if len(path_suffix) > 0: if file_status_list[0]["type"] == "DIRECTORY": download_file_path = self._get_actual_download_file_path( file_name_with_path + "/" + path_suffix) elif file_status_list[0]["type"] == "FILE": download_file_path = file_name_with_path + "/" + path_suffix else: download_file_path = file_name_with_path return download_file_path