def get_session(cls, session_id, request, serializer): """ Returns information about the session :param session_id: Id of the session :rtype A tuple containing the status and a JSON string with the serialized session, or a potential error description """ try: session = Session.objects.get(id=session_id) except Session.DoesNotExist as e: log.error(e) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_404_NOT_FOUND, response] if request.method == consts.REST_VERB_GET: serializer = serializer(session) response = json.dumps({'contents': str(serializer.data)}) return [http_status.HTTP_200_OK, response] elif request.method == consts.REST_VERB_PUT: data = JSONParser().parse(request) serializer = serializer(session, data=data) if serializer.is_valid(): serializer.save() response = json.dumps({'contents': str(serializer.data)}) return [http_status.HTTP_200_OK, response] log.error(serializer.errors) response = json.dumps({'contents': str(serializer.errors)}) return [http_status.HTTP_400_BAD_REQUEST, response]
def update(cls, params): """ Updates some given rendering resource config :param params new config for the rendering resource """ try: settings_id = params['id'].lower() settings = RenderingResourceSettings.objects.get(id=settings_id) settings.command_line = params['command_line'] settings.environment_variables = params['environment_variables'] settings.modules = params['modules'] settings.process_rest_parameters_format = params['process_rest_parameters_format'] settings.scheduler_rest_parameters_format = params['scheduler_rest_parameters_format'] settings.project = params['project'] settings.queue = params['queue'] settings.exclusive = params['exclusive'] settings.nb_nodes = params['nb_nodes'] settings.nb_cpus = params['nb_cpus'] settings.nb_gpus = params['nb_gpus'] settings.graceful_exit = params['graceful_exit'] settings.wait_until_running = params['wait_until_running'] settings.name = params['name'] settings.description = params['description'] with transaction.atomic(): settings.save() return [http_status.HTTP_200_OK, ''] except RenderingResourceSettings.DoesNotExist as e: log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]
def create(cls, params): """ Creates new rendering resource config :param params Settings for the new rendering resource """ try: settings_id = params['id'].lower() settings = RenderingResourceSettings( id=settings_id, command_line=str(params['command_line']), environment_variables=str(params['environment_variables']), modules=str(params['modules']), process_rest_parameters_format=str(params['process_rest_parameters_format']), scheduler_rest_parameters_format=str(params['scheduler_rest_parameters_format']), project=str(params['project']), queue=str(params['queue']), exclusive=params['exclusive'], nb_nodes=params['nb_nodes'], nb_cpus=params['nb_cpus'], nb_gpus=params['nb_gpus'], graceful_exit=params['graceful_exit'], wait_until_running=params['wait_until_running'], name=params['name'], description=params['description'] ) with transaction.atomic(): settings.save(force_insert=True) msg = 'Rendering Resource ' + settings_id + ' successfully configured' response = json.dumps({'contents': msg}) return [http_status.HTTP_201_CREATED, response] except IntegrityError as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_409_CONFLICT, response]
def stop(session_info): """ Gently stops a given process, waits for 2 seconds and checks for its disappearance :param session_info: Session information containing the PID of the process :return: A Json response containing on ok status or a description of the error """ if session_info.process_pid == -1: return [404, 'Process does not exist'] try: settings = \ manager.RenderingResourceSettings.objects.get(id=session_info.renderer_id) if settings.graceful_exit: try: url = 'http://' + session_info.http_host + ':' + \ str(session_info.http_port) + '/' + 'EXIT' req = urllib2.Request(url=url) urllib2.urlopen(req).read() # pylint: disable=W0702 except urllib2.URLError as e: log.error('Cannot gracefully exit.' + str(e)) log.info(1, 'Terminating process ' + str(session_info.process_pid)) os.kill(session_info.process_pid, signal.SIGTERM) time.sleep(2.0) result = ProcessManager.__kill(session_info) except OSError as e: log.error(str(e)) result = [400, str(e)] return result
def delete_session(cls, session_id): """ Deletes a session :param session_id: Id of the session to delete :rtype A tuple containing the status and a potential error description """ try: session = Session.objects.get(id=session_id) if not session.status == SESSION_STATUS_STOPPING: log.info(1, 'Removing session ' + str(session_id)) session.status = SESSION_STATUS_STOPPING session.save() if session.process_pid != -1: process_manager.ProcessManager.stop(session) if session.job_id is not None and session.job_id != '': jm = job_manager.JobManager() jm.stop(session) session.delete() msg = 'Session successfully destroyed' log.info(1, msg) response = json.dumps({'contents': str(msg)}) return [http_status.HTTP_200_OK, response] except Session.DoesNotExist as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_404_NOT_FOUND, response] except Exception as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_500_INTERNAL_ERROR, response] msg = 'Session is currently being destroyed' response = json.dumps({'contents': msg}) log.info(1, msg) return [http_status.HTTP_200_OK, response]
def _query(session, attribute=None): """ Queries Slurm for information :param session: Current user session :param attribute: Attribute to be queried :return: A Json response containing an ok status or a description of the error """ value = '' if session.job_id is not None: try: command_line = SLURM_SSH_COMMAND + session.cluster_node + \ ' scontrol show job ' + str(session.job_id) process = subprocess.Popen( [command_line], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = process.communicate()[0] if attribute is None: return output value = re.search(attribute + r'=(\w+)', output).group(1) log.info(1, attribute + ' = ' + value) return value except OSError as e: log.error(str(e)) return value
def create_session(cls, session_id, owner, renderer_id): """ Creates a user session :param session_id: Id for the new session :param owner: Session owner :param renderer_id: Id of the renderer associated to the session :rtype A tuple containing the status and the description of the potential error """ sgs = SystemGlobalSettings.objects.get() if sgs.session_creation: try: session = Session( id=session_id, owner=owner, renderer_id=renderer_id, created=datetime.datetime.utcnow(), valid_until=datetime.datetime.now() + datetime.timedelta(seconds=sgs.session_keep_alive_timeout)) with transaction.atomic(): session.save(force_insert=True) msg = 'Session successfully created' log.debug(1, msg) response = json.dumps({'contents': msg}) return [http_status.HTTP_201_CREATED, response] except IntegrityError as e: log.error(e) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_409_CONFLICT, response] else: msg = 'Session creation is currently suspended' log.error(msg) response = json.dumps({'contents': str(msg)}) return [http_status.HTTP_403_FORBIDDEN, response]
def __do_request(self, method, uri): """ Creates an HTTP request and invokes the image streaming service :param method Method to be used by the HTTP request (GET, POST, DELETE, etc) :param uri JSON formatted URI to attach to the HTTP request :return 200 if successful, Error code and message otherwise """ try: url = settings.IMAGE_STREAMING_SERVICE_URL + '/route' log.info(1, '__do_request(' + method + ', ' + url + ')') headers = { 'Content-Type': 'application/json', 'Cookie': COOKIE_ID + '=' + str(self._session_id) } response = requests.request(method=method, timeout=settings.REQUEST_TIMEOUT, url=url, headers=headers, data=uri) log.info(1, 'Response: ' + response.text) response.close() return [response.status_code, response.text] except requests.exceptions.HTTPError as e: log.error(str(e)) return [400, str(e)] except requests.exceptions.RequestException as e: log.error(str(e)) return [400, str(e)]
def kill(session): """ Kills the given job. This method should only be used if the stop method failed. :param session: Current user session :return: A Json response containing on ok status or a description of the error """ result = [500, 'Unexpected error'] if session.job_id is not None: try: command_line = SLURM_SSH_COMMAND + session.cluster_node + \ ' scancel ' + session.job_id log.info(1, 'Stopping job ' + session.job_id) process = subprocess.Popen( [command_line], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) output = process.communicate()[0] log.info(1, output) msg = 'Job successfully cancelled' log.info(1, msg) response = json.dumps({'contents': msg}) result = [200, response] except OSError as e: msg = str(e) log.error(msg) response = json.dumps({'contents': msg}) result = [400, response] return result
def update(cls, params): """ Updates some given rendering resource config :param params new config for the rendering resource """ try: settings_id = params['id'].lower() settings = RenderingResourceSettings.objects.get(id=settings_id) settings.command_line = params['command_line'] settings.environment_variables = params['environment_variables'] settings.modules = params['modules'] settings.process_rest_parameters_format = params[ 'process_rest_parameters_format'] settings.scheduler_rest_parameters_format = params[ 'scheduler_rest_parameters_format'] settings.project = params['project'] settings.queue = params['queue'] settings.exclusive = params['exclusive'] settings.nb_nodes = params['nb_nodes'] settings.nb_cpus = params['nb_cpus'] settings.nb_gpus = params['nb_gpus'] settings.memory = params['memory'] settings.graceful_exit = params['graceful_exit'] settings.wait_until_running = params['wait_until_running'] settings.name = params['name'] settings.description = params['description'] with transaction.atomic(): settings.save() return [http_status.HTTP_200_OK, ''] except RenderingResourceSettings.DoesNotExist as e: log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]
def __open_process(cls, session, request): """ Starts a local rendering resource process :param : session: Session holding the rendering resource :param : request: HTTP request with a body containing a JSON representation of the process parameters :rtype : An HTTP response containing the status and description of the command """ parameters = '' try: parameters = request.DATA['params'] except KeyError: log.debug(1, 'No parameters specified') environment = '' try: environment = request.DATA['environment'] except KeyError: log.debug(1, 'No environment specified') log.debug(1, 'Executing command <Open> parameters=' + str(parameters) + ' environment=' + str(environment)) if session.process_pid == -1: session.http_host = consts.DEFAULT_RENDERER_HOST session.http_port = consts.DEFAULT_RENDERER_HTTP_PORT + random.randint(0, 1000) pm = process_manager.ProcessManager status = pm.start(session, parameters, environment) session.save() return HttpResponse(status=status[0], content=status[1]) else: msg = 'process is already started' log.error(msg) response = json.dumps({'contents': str(msg)}) return HttpResponse(status=401, content=response)
def execute(cls, request, command): """ Executes a command on the rendering resource :param : request: The REST request :param : command: Command to be executed on the rendering resource :rtype : A Json response containing on ok status or a description of the error """ # pylint: disable=R0912 try: session_id = session_manager.SessionManager( ).get_session_id_from_request(request) log.debug( 1, 'Processing command <' + command + '> for session ' + str(session_id)) session = Session.objects.get(id=session_id) response = None if command == 'schedule': response = cls.__schedule_job(session, request) elif command == 'open': response = cls.__open_process(session, request) elif command == 'status': status = cls.__session_status(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'log': status = cls.__rendering_resource_out_log(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'err': status = cls.__rendering_resource_err_log(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'job': status = cls.__job_information(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'imagefeed': status = cls.__image_feed(session_id) response = HttpResponse(status=status[0], content=status[1]) else: url = request.get_full_path() prefix = settings.BASE_URL_PREFIX + '/session/' cmd = url[url.find(prefix) + len(prefix) + 1:len(url)] response = cls.__forward_request(session, cmd, request) return response except (KeyError, TypeError) as e: log.debug(1, str(traceback.format_exc(e))) response = json.dumps({'contents': 'Cookie is missing'}) return HttpResponse(status=404, content=response) except Session.DoesNotExist as e: log.debug(1, str(traceback.format_exc(e))) response = json.dumps({'contents': 'Session does not exist'}) return HttpResponse(status=404, content=response) except Exception as e: msg = traceback.format_exc(e) log.error(str(msg)) response = json.dumps({'contents': str(msg)}) return HttpResponse(status=500, content=response)
def allocate(self, session, job_information): """ Allocates a job according to rendering resource configuration. If the allocation is successful, the session job_id is populated and the session status is set to SESSION_STATUS_SCHEDULED :param session: Current user session :param job_information: Information about the job :return: A Json response containing on ok status or a description of the error """ status = None for cluster_node in global_settings.SLURM_HOSTS: try: self._mutex.acquire() session.status = SESSION_STATUS_SCHEDULING session.cluster_node = cluster_node session.save() log.info(1, 'Scheduling job for session ' + session.id) job_information.cluster_node = cluster_node command_line = self._build_allocation_command(session, job_information) process = subprocess.Popen( [command_line], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) error = process.communicate()[1] if len(re.findall('Granted', error)) != 0: session.job_id = re.findall('\\d+', error)[0] log.info(1, 'Allocated job ' + str(session.job_id) + ' on cluster node ' + cluster_node) session.status = SESSION_STATUS_SCHEDULED session.save() response = json.dumps({'message': 'Job scheduled', 'jobId': session.job_id}) status = [200, response] break else: session.status = SESSION_STATUS_FAILED session.save() log.error(error) response = json.dumps({'contents': error}) status = [400, response] process.stdin.close() except OSError as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) status = [400, response] finally: if self._mutex.locked(): self._mutex.release() return status
def execute(cls, request, command): """ Executes a command on the rendering resource :param : request: The REST request :param : command: Command to be executed on the rendering resource :rtype : A Json response containing on ok status or a description of the error """ # pylint: disable=R0912 try: session_id = session_manager.SessionManager().get_session_id_from_request(request) log.debug(1, 'Processing command <' + command + '> for session ' + str(session_id)) session = Session.objects.get(id=session_id) response = None if command == 'schedule': response = cls.__schedule_job(session, request) elif command == 'open': response = cls.__open_process(session, request) elif command == 'status': status = cls.__session_status(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'log': status = cls.__rendering_resource_out_log(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'err': status = cls.__rendering_resource_err_log(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'job': status = cls.__job_information(session) response = HttpResponse(status=status[0], content=status[1]) elif command == 'imagefeed': status = cls.__image_feed(session_id) response = HttpResponse(status=status[0], content=status[1]) else: url = request.get_full_path() prefix = settings.BASE_URL_PREFIX + '/session/' cmd = url[url.find(prefix) + len(prefix) + 1: len(url)] response = cls.__forward_request(session, cmd, request) return response except KeyError as e: log.debug(1, str(traceback.format_exc(e))) response = json.dumps({'contents': 'Cookie ' + str(e) + ' is missing'}) return HttpResponse(status=404, content=response) except Session.DoesNotExist as e: log.debug(1, str(traceback.format_exc(e))) response = json.dumps({'contents': 'Session does not exist'}) return HttpResponse(status=404, content=response) except Exception as e: msg = traceback.format_exc(e) log.error(str(msg)) response = json.dumps({'contents': str(msg)}) return HttpResponse(status=500, content=response)
def delete(cls, settings_id): """ Removes some given rendering resource config :param settings_id Identifier of the Rendering resource config to remove """ try: settings = RenderingResourceSettings.objects.get(id=settings_id) with transaction.atomic(): settings.delete() return [http_status.HTTP_200_OK, 'Settings successfully deleted'] except RenderingResourceSettings.DoesNotExist as e: log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]
def start(session_info, params, environment): """ Gently starts a given process, waits for 2 seconds and checks for its appearance :param session_info: Session information containing the PID of the process :return: A Json response containing on ok status or a description of the error """ try: settings = manager.RenderingResourceSettingsManager.get_by_id( session_info.renderer_id.lower()) default_parameters = manager.RenderingResourceSettingsManager.format_rest_parameters( str(settings.process_rest_parameters_format), str(session_info.http_host), str(session_info.http_port), 'rest' + str(settings.id + session_info.id)) command_line = [ str(settings.command_line) ] + default_parameters.split() try: command_line += str(params).split() except KeyError: log.debug(1, 'No parameters specified') except TypeError: log.debug(1, 'No parameters specified') environment_variables = settings.environment_variables.split() + environment.split(',') process_env = os.environ.copy() for environment_variable in environment_variables: if environment_variable != '': variable = environment_variable.split('=') process_env[variable[0]] = variable[1] log.info(1, 'Launching ' + settings.id + ' with ' + str(command_line)) process = subprocess.Popen( command_line, env=process_env, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE) session_info.process_pid = process.pid session_info.status = SESSION_STATUS_STARTING response = json.dumps( {'message': 'Process started', 'processId': str(session_info.process_pid)}) return [200, response] except RenderingResourceSettings.DoesNotExist as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [404, response]
def destroy_session(cls, request): """ Stops the renderer and destroys the user session :param : request: The REST request :rtype : An HTTP response containing on ok status or a description of the error """ sm = session_manager.SessionManager() session_id = sm.get_session_id_from_request(request) log.info(1, 'Remove image feed route if it exists') ifm = image_feed_manager.ImageFeedManager(session_id) status = ifm.remove_route() if status[0] != 200: log.error(status[1]) log.info(1, 'Remove session from db') status = sm.delete_session(session_id) log.info(1, 'Session deleted ' + str(session_id)) return HttpResponse(status=status[0], content=status[1])
def keep_alive_session(cls, session_id): """ Updated the specified session with a new expiration timestamp :param session_id: Id of the session to update """ log.debug(1, 'Session ' + str(session_id) + ' is being updated') try: sgs = SystemGlobalSettings.objects.get(id=0) session = Session.objects.get(id=session_id) session.valid_until = datetime.datetime.now() + \ datetime.timedelta(seconds=sgs.session_keep_alive_timeout) session.save() msg = 'Session ' + str(session_id) + ' successfully updated' return [http_status.HTTP_200_OK, msg] except Session.DoesNotExist as e: log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]
def __verify_hostname(cls, session): """ Verify the existence of an hostname for the current session, and tries to populate it if null :param : session: Session holding the rendering resource """ log.info( 1, 'Verifying hostname ' + session.http_host + ' for session ' + str(session.id)) if not session.status == SESSION_STATUS_GETTING_HOSTNAME and \ session.job_id and session.http_host == '': session.status = SESSION_STATUS_GETTING_HOSTNAME session.save() log.info( 1, 'Querying JOB hostname for job id: ' + str(session.job_id)) hostname = job_manager.globalJobManager.hostname(session) if hostname == '': msg = 'Job scheduled but ' + session.renderer_id + ' is not yet running' log.error(msg) session.status = SESSION_STATUS_SCHEDULED session.save() response = json.dumps({'contents': str(msg)}) return [404, response] elif hostname == 'FAILED': sm = session_manager.SessionManager() sm.delete_session(session.id) msg = 'Job as been cancelled' log.error(msg) response = json.dumps({'contents': str(msg)}) return [404, response] else: session.http_host = hostname session.save() msg = 'Resolved hostname for job ' + str(session.job_id) + ' to ' + \ str(session.http_host) log.info(1, msg) response = json.dumps({'contents': str(msg)}) return [200, response] response = json.dumps( {'contents': str('Job is running on host ' + session.http_host)}) return [200, response]
def get_route(self): """ Queries the image streaming service for the route corresponding to the current session """ log.info(1, 'Check if route already exists') status = self.__do_request('GET', '') if status[0] == 200: log.info(1, 'Route exists: ' + str(status[1])) return status elif status[0] == 404: # Create new route log.error('Route does not exist for session ' + str(self._session_id) + ', creating it with ' + str(self.__get_uri())) status = self.add_route() if status[0] == 201: return self.__do_request('GET', '') else: response = 'Image streaming service (' + settings.IMAGE_STREAMING_SERVICE_URL + \ ') failed to create new route: ' + str(status[1]) log.error(response) return [400, response] else: response = 'Image streaming service (' + settings.IMAGE_STREAMING_SERVICE_URL + \ ') is unreachable: ' + str(status[1]) log.error(response) return [400, response]
def __open_process(cls, session, request): """ Starts a local rendering resource process :param : session: Session holding the rendering resource :param : request: HTTP request with a body containing a JSON representation of the process parameters :rtype : An HTTP response containing the status and description of the command """ parameters = '' try: parameters = request.DATA['params'] except KeyError: log.debug(1, 'No parameters specified') environment = '' try: environment = request.DATA['environment'] except KeyError: log.debug(1, 'No environment specified') log.debug( 1, 'Executing command <Open> parameters=' + str(parameters) + ' environment=' + str(environment)) if session.process_pid == -1: session.http_host = consts.DEFAULT_RENDERER_HOST session.http_port = consts.DEFAULT_RENDERER_HTTP_PORT + random.randint( 0, 1000) pm = process_manager.ProcessManager status = pm.start(session, parameters, environment) session.save() return HttpResponse(status=status[0], content=status[1]) else: msg = 'process is already started' log.error(msg) response = json.dumps({'contents': str(msg)}) return HttpResponse(status=401, content=response)
def __verify_hostname(cls, session): """ Verify the existence of an hostname for the current session, and tries to populate it if null :param : session: Session holding the rendering resource """ log.info(1, 'Verifying hostname ' + session.http_host + ' for session ' + str(session.id)) if not session.status == SESSION_STATUS_GETTING_HOSTNAME and \ session.job_id and session.http_host == '': session.status = SESSION_STATUS_GETTING_HOSTNAME session.save() log.info(1, 'Querying JOB hostname for job id: ' + str(session.job_id)) hostname = job_manager.globalJobManager.hostname(session.job_id) if hostname == '': msg = 'Job scheduled but ' + session.renderer_id + ' is not yet running' log.error(msg) session.status = SESSION_STATUS_SCHEDULED session.save() response = json.dumps({'contents': str(msg)}) return [404, response] elif hostname == 'FAILED': sm = session_manager.SessionManager() sm.delete_session(session.id) msg = 'Job as been cancelled' log.error(msg) response = json.dumps({'contents': str(msg)}) return [404, response] else: session.http_host = hostname session.save() msg = 'Resolved hostname for job ' + str(session.job_id) + ' to ' + \ str(session.http_host) log.info(1, msg) response = json.dumps({'contents': str(msg)}) return [200, response] response = json.dumps({'contents': str('Job is running on host ' + session.http_host)}) return [200, response]
def stop(self, session): """ Gently stops a given job, waits for 2 seconds and checks for its disappearance :param session: Current user session :return: A Json response containing on ok status or a description of the error """ result = [500, 'Unexpected error'] try: self._mutex.acquire() # pylint: disable=E1101 setting = \ manager.RenderingResourceSettings.objects.get( id=session.renderer_id) if setting.graceful_exit: log.info(1, 'Gracefully exiting rendering resource') try: url = 'http://' + session.http_host + \ ':' + str(session.http_port) + '/' + \ settings.RR_SPECIFIC_COMMAND_EXIT log.info(1, url) r = requests.put( url=url, timeout=global_settings.REQUEST_TIMEOUT) r.close() # pylint: disable=W0702 except requests.exceptions.RequestException as e: log.error(traceback.format_exc(e)) result = self.kill(session) except OSError as e: msg = str(e) log.error(msg) response = json.dumps({'contents': msg}) result = [400, response] finally: if self._mutex.locked(): self._mutex.release() return result
def __do_request(self, method, uri): """ Creates an HTTP request and invokes the image streaming service :param method Method to be used by the HTTP request (GET, POST, DELETE, etc) :param uri JSON formatted URI to attach to the HTTP request :return 200 if successful, Error code and message otherwise """ try: url = settings.IMAGE_STREAMING_SERVICE_URL + '/route' log.info(1, '__do_request(' + method + ', ' + url + ')') headers = {'Content-Type': 'application/json', 'Cookie': COOKIE_ID + '=' + str(self._session_id)} response = requests.request( method=method, timeout=settings.REQUEST_TIMEOUT, url=url, headers=headers, data=uri) log.info(1, 'Response: ' + response.text) response.close() return [response.status_code, response.text] except requests.exceptions.HTTPError as e: log.error(str(e)) return [400, str(e)] except requests.exceptions.RequestException as e: log.error(str(e)) return [400, str(e)]
def create(cls, params): """ Creates new rendering resource config :param params Settings for the new rendering resource """ try: settings_id = params['id'].lower() settings = RenderingResourceSettings( id=settings_id, command_line=str(params['command_line']), environment_variables=str(params['environment_variables']), modules=str(params['modules']), process_rest_parameters_format=str( params['process_rest_parameters_format']), scheduler_rest_parameters_format=str( params['scheduler_rest_parameters_format']), project=str(params['project']), queue=str(params['queue']), exclusive=params['exclusive'], nb_nodes=params['nb_nodes'], nb_cpus=params['nb_cpus'], nb_gpus=params['nb_gpus'], memory=params['memory'], graceful_exit=params['graceful_exit'], wait_until_running=params['wait_until_running'], name=params['name'], description=params['description']) with transaction.atomic(): settings.save(force_insert=True) msg = 'Rendering Resource ' + settings_id + ' successfully configured' response = json.dumps({'contents': msg}) return [http_status.HTTP_201_CREATED, response] except IntegrityError as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [http_status.HTTP_409_CONFLICT, response]
def query(session_info): """ Verifies that a given PID is up and running :param session_info: Session information containing the PID of the process :return: A Json response containing on ok status or a description of the error """ if session_info.process_pid != -1: response = "Process " + str(session_info.process_pid) + ": " try: os.kill(session_info.process_pid, 0) except OSError as e: log.error(str(response + e.message)) else: log.error(str(response + 'is running since ' + session_info.timestamp.strftime( '%Y-%m-%d %H:%M:%S'))) else: log.error('Invalid Process Id (" + str(session_info.process_pid) + ")')
def start(self, session, job_information): """ Start the rendering resource using the job allocated by the schedule method. If successful, the session status is set to SESSION_STATUS_STARTING :param session: Current user session :param job_information: Information about the job :return: A Json response containing on ok status or a description of the error """ try: self._mutex.acquire() session.status = SESSION_STATUS_STARTING session.save() rr_settings = \ manager.RenderingResourceSettingsManager.get_by_id(session.renderer_id.lower()) # Modules full_command = 'module purge\n' values = rr_settings.modules.split() for module in values: full_command += 'module load ' + module.strip() + '\n' # Environment variables values = rr_settings.environment_variables.split() values += job_information.environment.split() for variable in values: full_command += variable + ' ' # Command lines parameters rest_parameters = manager.RenderingResourceSettingsManager.format_rest_parameters( str(rr_settings.scheduler_rest_parameters_format), str(session.http_host), str(session.http_port), 'rest' + str(rr_settings.id + session.id)) values = rest_parameters.split() values += job_information.params.split() full_command += rr_settings.command_line for parameter in values: full_command += ' ' + parameter # Output redirection full_command += ' > ' + self._file_name(session, settings.SLURM_OUT_FILE) full_command += ' 2> ' + self._file_name(session, settings.SLURM_ERR_FILE) full_command += ' &\n' # Start Process on cluster command_line = '/usr/bin/ssh -i ' + \ global_settings.SLURM_SSH_KEY + ' ' + \ global_settings.SLURM_USERNAME + '@' + \ session.http_host log.info(1, 'Connect to cluster machine: ' + command_line) process = subprocess.Popen( [command_line], shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) log.info(1, 'Full command:\n' + full_command) process.stdin.write(full_command) output = process.communicate()[0] log.info(1, output) process.stdin.close() session.status = SESSION_STATUS_STARTING session.save() response = json.dumps({'message': session.renderer_id + ' successfully started'}) return [200, response] except OSError as e: log.error(str(e)) response = json.dumps({'contents': str(e)}) return [400, response] finally: if self._mutex.locked(): self._mutex.release()
def query_status(session_id): """ Queries the session status and updates it accordingly - Stopped: Default status, when no rendering resource is active - Scheduled: The slurm job was created but the rendering resource is not yet started. - Starting: The rendering resource is started but is not ready to respond to REST requests - Running: The rendering resource is started and ready to respond to REST requests - Stopping: tThe request for stopping the slurm job was made, but the application is not yet terminated :param session_id: Id of the session to be queried :return 200 code if rendering resource is able to process REST requests. 503 otherwise. 404 if specified session does not exist. """ try: session = Session.objects.get(id=session_id) status_description = 'Undefined' session_status = session.status log.info(1, 'Current session status is: ' + str(session_status)) if session_status == SESSION_STATUS_SCHEDULING: status_description = str(session.renderer_id + ' is scheduled') elif session_status == SESSION_STATUS_SCHEDULED or \ session_status == SESSION_STATUS_GETTING_HOSTNAME: if session.http_host != '': status_description = session.renderer_id + ' is starting' log.info(1, status_description) session.status = SESSION_STATUS_STARTING session.save() else: status_description = str(session.renderer_id + ' is scheduled') elif session_status == SESSION_STATUS_STARTING: # Rendering resource might be running but not yet capable of # serving REST requests. The vocabulary is invoked to make # sure that the rendering resource is ready to serve REST # requests. rr_settings = \ manager.RenderingResourceSettingsManager.get_by_id(session.renderer_id.lower()) if not rr_settings.wait_until_running: status_description = session.renderer_id + ' is up and running' log.info(1, status_description) session.status = SESSION_STATUS_RUNNING session.save() else: log.info(1, 'Requesting rendering resource vocabulary') status = SessionManager.request_vocabulary(session_id) if status[0] == http_status.HTTP_200_OK: status_description = session.renderer_id + ' is up and running' log.info(1, status_description) session.status = SESSION_STATUS_RUNNING session.save() else: status_description = session.renderer_id + \ ' is starting but the HTTP interface is not yet available' elif session_status == SESSION_STATUS_RUNNING: # Rendering resource is currently running status_description = session.renderer_id + ' is up and running' # Update the timestamp if the current value is expired sgs = SystemGlobalSettings.objects.get() if datetime.datetime.now() > session.valid_until: session.valid_until = datetime.datetime.now( ) + datetime.timedelta( seconds=sgs.session_keep_alive_timeout) session.save() elif session_status == SESSION_STATUS_STOPPING: # Rendering resource is currently in the process of terminating. status_description = str(session.renderer_id + ' is terminating...') session.delete() session.save() elif session_status == SESSION_STATUS_STOPPED: # Rendering resource is currently not active. status_description = str(session.renderer_id + ' is not active') elif session_status == SESSION_STATUS_FAILED: status_description = str('Job allocation failed for ' + session.renderer_id) status_code = session.status response = [ http_status.HTTP_200_OK, json.dumps({ 'session': str(session_id), 'code': status_code, 'description': status_description, 'hostname': session.http_host, 'port': str(session.http_port), }) ] return response except Session.DoesNotExist as e: # Requested session does not exist log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]
def query_status(session_id): """ Queries the session status and updates it accordingly - Stopped: Default status, when no rendering resource is active - Scheduled: The slurm job was created but the rendering resource is not yet started. - Starting: The rendering resource is started but is not ready to respond to REST requests - Running: The rendering resource is started and ready to respond to REST requests - Stopping: tThe request for stopping the slurm job was made, but the application is not yet terminated :param session_id: Id of the session to be queried :return 200 code if rendering resource is able to process REST requests. 503 otherwise. 404 if specified session does not exist. """ try: session = Session.objects.get(id=session_id) status_description = 'Undefined' session_status = session.status log.info(1, 'Current session status is: ' + str(session_status)) if session_status == SESSION_STATUS_SCHEDULING: status_description = str(session.renderer_id + ' is scheduled') elif session_status == SESSION_STATUS_SCHEDULED or \ session_status == SESSION_STATUS_GETTING_HOSTNAME: if session.http_host != '': status_description = session.renderer_id + ' is starting' log.info(1, status_description) session.status = SESSION_STATUS_STARTING session.save() else: status_description = str(session.renderer_id + ' is scheduled') elif session_status == SESSION_STATUS_STARTING: # Rendering resource might be running but not yet capable of # serving REST requests. The vocabulary is invoked to make # sure that the rendering resource is ready to serve REST # requests. rr_settings = \ manager.RenderingResourceSettingsManager.get_by_id(session.renderer_id.lower()) if not rr_settings.wait_until_running: status_description = session.renderer_id + ' is up and running' log.info(1, status_description) session.status = SESSION_STATUS_RUNNING session.save() else: log.info(1, 'Requesting rendering resource vocabulary') status = SessionManager.request_vocabulary(session_id) if status[0] == http_status.HTTP_200_OK: status_description = session.renderer_id + ' is up and running' log.info(1, status_description) session.status = SESSION_STATUS_RUNNING session.save() else: status_description = session.renderer_id + \ ' is starting but the HTTP interface is not yet available' elif session_status == SESSION_STATUS_RUNNING: # Rendering resource is currently running status_description = session.renderer_id + ' is up and running' # Update the timestamp if the current value is expired sgs = SystemGlobalSettings.objects.get() if datetime.datetime.now() > session.valid_until: session.valid_until = datetime.datetime.now() + datetime.timedelta( seconds=sgs.session_keep_alive_timeout) session.save() elif session_status == SESSION_STATUS_STOPPING: # Rendering resource is currently in the process of terminating. status_description = str(session.renderer_id + ' is terminating...') session.delete() session.save() elif session_status == SESSION_STATUS_STOPPED: # Rendering resource is currently not active. status_description = str(session.renderer_id + ' is not active') elif session_status == SESSION_STATUS_FAILED: status_description = str('Job allocation failed for ' + session.renderer_id) status_code = session.status response = [http_status.HTTP_200_OK, json.dumps({ 'session': str(session_id), 'code': status_code, 'description': status_description, 'hostname': session.http_host, 'port': str(session.http_port), })] return response except Session.DoesNotExist as e: # Requested session does not exist log.error(str(e)) return [http_status.HTTP_404_NOT_FOUND, str(e)]