def runNotebookJob(notebookId: str, runStatusId: int = None, runType: str = "Scheduled"): """ Celery task to run a zeppelin notebook :param notebookId: ID of the zeppelin notebook which to run :param runStatusId: ID of genie.runStatus model """ if not runStatusId: runStatus = RunStatus.objects.create(notebookId=notebookId, status=NOTEBOOK_STATUS_RUNNING, runType=runType) else: runStatus = RunStatus.objects.get(id=runStatusId) runStatus.startTimestamp = dt.datetime.now() runStatus.save() try: # Check if notebook is already running isRunning, notebookName = checkIfNotebookRunning(notebookId) if (isRunning): runStatus.status = NOTEBOOK_STATUS_ERROR runStatus.message = "Notebook already running" runStatus.save() else: # Clear notebook results Zeppelin.clearNotebookResults(notebookId) response = Zeppelin.runNotebookJob(notebookId) if response: try: polling.poll(lambda: checkIfNotebookRunningAndStoreLogs( notebookId, runStatus) != True, step=3, timeout=3600) except Exception as ex: runStatus.status = NOTEBOOK_STATUS_ERROR runStatus.message = str(ex) runStatus.save() NotificationServices.notify(notebookName=notebookName, isSuccess=False, message=str(ex)) else: runStatus.status = NOTEBOOK_STATUS_ERROR runStatus.message = "Failed running notebook" runStatus.save() except Exception as ex: runStatus.status = NOTEBOOK_STATUS_ERROR runStatus.message = str(ex) runStatus.save() NotificationServices.notify(notebookName=notebookName, isSuccess=False, message=str(ex))
def addNotebook(payload): res = ApiResponse(message="Error adding notebook") notebookTemplate = NotebookTemplate.objects.get( id=payload.get("notebookTemplateId", 0)) context = payload # Storing payload in context variable so that it can be used for rendering # Handling connection variables if payload.get("sourceConnection", False): connection = Connection.objects.get(id=payload["sourceConnection"]) connectionParams = connection.cpvc.all() for cp in connectionParams: paramName = cp.connectionParam.name context["sourceConnection_" + paramName] = cp.value if payload.get("targetConnection", False): connection = Connection.objects.get(id=payload["sourceConnection"]) connectionParams = connection.cpvc.all() for cp in connectionParams: paramName = cp.connectionParam.name context["sourceConnection_" + paramName] = cp.value # Handling S3 path - Splitting it to get the table name if payload.get("destinationTableS3Path", False): destinationTableName = payload["destinationTableS3Path"].rsplit( '/', 1)[1] warehouseLocation = payload["destinationTableS3Path"].rsplit( '/', 1)[0] context["destinationTableName"] = destinationTableName context["warehouseLocation"] = warehouseLocation # Adding a temp table name to the context context["tempTableName"] = "tempTable_" + str(round( time.time() * 1000)) notebook = Template(notebookTemplate.template).render(Context(context)) response = Zeppelin.addNotebook(notebook) if response: res.update(True, "Notebook added successfully") return res
def checkIfNotebookRunningAndStoreLogs(notebookId, runStatus): response = Zeppelin.getNotebookDetails(notebookId) runStatus.logs = json.dumps(response) runStatus.save() isNotebookRunning = response.get("info", {}).get("isRunning", False) if not isNotebookRunning: setNotebookStatus(response, runStatus) return isNotebookRunning
def deleteNotebook(notebookId: str): """ Service to run notebook job """ res = ApiResponse(message="Error in cloning notebook") response = Zeppelin.deleteNotebook(notebookId) if response: res.update(True, "Notebook deleted successfully", None) return res
def clearNotebookResults(notebookId: str): """ Service to run notebook job """ res = ApiResponse(message="Error in clearing notebook") response = Zeppelin.clearNotebookResults(notebookId) if response: res.update(True, "Notebook cleared successfully", None) return res
def cloneNotebook(notebookId: str, payload: dict): """ Service to run notebook job """ res = ApiResponse(message="Error in cloning notebook") response = Zeppelin.cloneNotebook(notebookId, json.dumps(payload)) if response: res.update(True, "Notebook cloned successfully", None) return res
def unarchiveNotebook(notebookId: str, notebookName: str): """ Service to unarchive notebook """ res = ApiResponse(message="Error in archiving notebook") response = Zeppelin.renameNotebook(notebookId, notebookName) if response: res.update(True, "Notebook unarchived successfully", None) return res
def stopNotebookJob(notebookId: str): """ Service to run notebook job """ res = ApiResponse(message="Error in stopping notebook") # Updating runStatus that the task was aborted response = Zeppelin.stopNotebookJob(notebookId) if response: res.update(True, "Notebook stopped successfully", None) return res
def archivedNotebooks(): """ Get archived notebooks """ res = ApiResponse(message="Error retrieving archived notebooks") notebooks = Zeppelin.getAllNotebooks("~Trash") if notebooks: res.update(True, "Archived notebooks retrieved successfully", notebooks) return res
def runNotebookJob(notebookId: str, runType: str = "Scheduled"): """ Celery task to run a zeppelin notebook :param notebookId: ID of the zeppelin notebook which to run """ runStatus = RunStatus.objects.create(notebookId=notebookId, status="RUNNING", runType=runType) try: # Check if notebook is already running isRunning, notebookName = checkIfNotebookRunning(notebookId) if (isRunning): runStatus.status = "ERROR" runStatus.message = "Notebook already running" runStatus.save() else: # Clear noteook results Zeppelin.clearNotebookResults(notebookId) response = Zeppelin.runNotebookJob(notebookId) if response: try: polling.poll(lambda: checkIfNotebookRunningAndStoreLogs( notebookId, runStatus) != True, step=3, timeout=3600) except Exception as ex: runStatus.status = "ERROR" runStatus.message = str(ex) runStatus.save() NotificationServices.notify(notebookName=notebookName, isSuccess=False, message=str(ex)) else: runStatus.status = "ERROR" runStatus.message = "Failed running notebook" runStatus.save() except Exception as ex: runStatus.status = "ERROR" runStatus.message = str(ex) runStatus.save() NotificationServices.notify(notebookName=notebookName, isSuccess=False, message=str(ex))
def deleteNotebook(notebookId: str): """ Service to run notebook job """ res = ApiResponse(message="Error in deleting notebook") response = Zeppelin.deleteNotebook(notebookId) if response: NotebookObject.objects.filter( notebookZeppelinId=notebookId).delete() res.update(True, "Notebook deleted successfully", None) return res
async def _fetchNotebookStatuses(notebooks: list): """ Async method to fetch notebook status details for multiple notebooks Returns a dict with notebook ids as keys :param notebooks: List of notebook describing dicts each containing the 'id' field """ notebookStatuses = {} for future in asyncio.as_completed([ Zeppelin.getNotebookStatus(notebook["id"]) for notebook in notebooks ]): status = await future notebookStatuses[status["id"]] = status return notebookStatuses
def editNotebook(notebookObjId: int, payload: dict): """ Service to update a template based notebook :param notebookObjId: ID of the NotebookObject to be edited :param payload: Dict containing notebook template info """ res = ApiResponse(message="Error updating notebook") defaultPayload = payload.copy() notebookObject = NotebookObject.objects.get(id=notebookObjId) notebook, connection = NotebookJobServices._prepareNotebookJson( notebookObject.notebookTemplate, payload) updateSuccess = Zeppelin.updateNotebookParagraphs( notebookObject.notebookZeppelinId, notebook) if updateSuccess: if defaultPayload.get("name"): Zeppelin.renameNotebook(notebookObject.notebookZeppelinId, defaultPayload.get("name")) notebookObject.defaultPayload = defaultPayload notebookObject.connection = connection notebookObject.save() res.update(True, "Notebook updated successfully") return res
def addNotebook(payload: dict): """ Service to create and add a template based notebook :param payload: Dict containing notebook template info """ res = ApiResponse(message="Error adding notebook") defaultPayload = payload.copy() notebookTemplate = NotebookTemplate.objects.get( id=payload.get("notebookTemplateId", 0)) notebook, connection = NotebookJobServices._prepareNotebookJson( notebookTemplate, payload) notebookZeppelinId = Zeppelin.addNotebook(notebook) if notebookZeppelinId: NotebookObject.objects.create( notebookZeppelinId=notebookZeppelinId, connection=connection, notebookTemplate=notebookTemplate, defaultPayload=defaultPayload) res.update(True, "Notebook added successfully") return res
def getNotebooks(offset: int = 0): """ Service to fetch and serialize NotebookJob objects Number of NotebookJobs fetched is stored as the constant GET_NOTEBOOKJOBS_LIMIT :param offset: Offset for fetching NotebookJob objects """ res = ApiResponse(message="Error retrieving notebooks") notebooks = Zeppelin.getAllNotebooks() if notebooks: notebookCount = len(notebooks) notebooks = notebooks[offset:offset + GET_NOTEBOOKJOBS_LIMIT] notebookIds = [notebook["id"] for notebook in notebooks] notebookJobs = NotebookJob.objects.filter( notebookId__in=notebookIds) for notebook in notebooks: notebook["name"] = notebook["path"] notebookJob = next((notebookJob for notebookJob in notebookJobs if notebookJob.name == notebook["id"]), False) if notebookJob: notebook["isScheduled"] = True notebook["schedule"] = str(notebookJob.crontab) notebook["isActive"] = notebookJob.enabled notebook["notebookJobId"] = notebookJob.id else: notebook["isScheduled"] = False notebookRunStatus = RunStatus.objects.filter( notebookId=notebook["id"]).order_by( "-startTimestamp").first() if notebookRunStatus: notebook["lastRun"] = RunStatusSerializer( notebookRunStatus).data res.update(True, "NotebookJobs retrieved successfully", { "notebooks": notebooks, "count": notebookCount }) return res
def getNotebooksLight(): """ Gets concise notebook data""" res = ApiResponse(message="Error retrieving notebooks") notebooks = Zeppelin.getAllNotebooks() res.update(True, "Notebooks retrieved successfully", notebooks) return res
def checkIfNotebookRunning(notebookId: str): response = Zeppelin.getNotebookDetails(notebookId) isNotebookRunning = response.get("info", {}).get("isRunning", False) notebookName = response.get("name", "Undefined") return isNotebookRunning, notebookName
def getNotebooks(offset: int = 0, limit: int = None, searchQuery: str = None, sorter: dict = None, _filter: dict = None): """ Service to fetch and serialize NotebookJob objects Number of NotebookObjects fetched is stored as the constant GET_NOTEBOOKOJECTS_LIMIT :param offset: Offset for fetching NotebookJob objects """ res = ApiResponse(message="Error retrieving notebooks") notebooks = Zeppelin.getAllNotebooks() if searchQuery: notebooks = NotebookJobServices.search(notebooks, "path", searchQuery) if sorter.get('order', False): notebooks = NotebookJobServices.sortingOnNotebook( notebooks, sorter, _filter) if notebooks: notebookCount = len(notebooks) notebooks = notebooks[offset:offset + GET_NOTEBOOKOJECTS_LIMIT] notebookIds = [notebook["id"] for notebook in notebooks] notebookObjects = NotebookObject.objects.filter( notebookZeppelinId__in=notebookIds) notebookJobs = NotebookJob.objects.filter( notebookId__in=notebookIds) for notebook in notebooks: notebook["name"] = notebook["path"] notebookObj = next( (notebookObj for notebookObj in notebookObjects if notebookObj.notebookZeppelinId == notebook["id"]), False) if notebookObj: notebook["notebookObjId"] = notebookObj.id notebookJob = next( (notebookJob for notebookJob in notebookJobs if notebookJob.notebookId == notebook["id"]), False) if notebookJob: notebook["isScheduled"] = True notebook["schedule"] = str( notebookJob.crontab.customschedule.name) notebook["isActive"] = notebookJob.enabled notebook["notebookJobId"] = notebookJob.id else: notebook["isScheduled"] = False assignedWorkflowId = WorkflowNotebookMap.objects.filter( notebookId=notebook["id"]).values_list("workflow_id", flat=True) names = Workflow.objects.filter( id__in=assignedWorkflowId).values_list('name', flat=True) workflowNames = [] for name in names: workflowNames.append(name) notebook["assignedWorkflow"] = workflowNames notebookRunLogs = NotebookRunLogs.objects.filter( notebookId=notebook["id"]).order_by( "-startTimestamp").first() if notebookRunLogs: notebook[ "notebookStatus"] = notebookRunLogs.status if notebookRunLogs.status else None notebook["lastRun"] = NotebookRunLogsSerializer( notebookRunLogs).data res.update(True, "NotebookObjects retrieved successfully", { "notebooks": notebooks, "count": notebookCount }) else: res.update(True, "NotebookObjects retrieved successfully", []) return res