Пример #1
0
    def _run_cluster_job(operation_identifier, user_name_label, adapter_instance):
        """
        Threaded Popen
        It is the function called by the ClusterSchedulerClient in a Thread.
        This function starts a new process.
        """
        # Load operation so we can estimate the execution time
        operation = dao.get_operation_by_id(operation_identifier)
        kwargs = parse_json_parameters(operation.parameters)
        time_estimate = int(adapter_instance.get_execution_time_approximation(**kwargs))
        hours = int(time_estimate / 3600)
        minutes = (int(time_estimate) % 3600) / 60
        seconds = int(time_estimate) % 60
        # Anything lower than 2 hours just use default walltime
        if hours < 2:
            walltime = "02:00:00"
        elif hours > 23:
            walltime = "23:59:59"
        else:
            walltime = datetime.time(hours, minutes, seconds)
            walltime = walltime.strftime("%H:%M:%S")

        call_arg = config.CLUSTER_SCHEDULE_COMMAND % (walltime, operation_identifier, user_name_label)
        LOGGER.info(call_arg)
        process_ = Popen([call_arg], stdout=PIPE, shell=True)
        job_id = process_.stdout.read().replace('\n', '').split('OAR_JOB_ID=')[-1]
        LOGGER.debug("Got jobIdentifier = %s for CLUSTER operationID = %s" % (operation_identifier, job_id))
        operation_identifier = model.OperationProcessIdentifier(operation_identifier, job_id=job_id)
        dao.store_entity(operation_identifier)
Пример #2
0
    def _run_cluster_job(operation_identifier, user_name_label, adapter_instance):
        """
        Threaded Popen
        It is the function called by the ClusterSchedulerClient in a Thread.
        This function starts a new process.
        """
        # Load operation so we can estimate the execution time
        operation = dao.get_operation_by_id(operation_identifier)
        kwargs = parse_json_parameters(operation.parameters)
        kwargs = adapter_instance.prepare_ui_inputs(kwargs)
        time_estimate = int(adapter_instance.get_execution_time_approximation(**kwargs))
        hours = int(time_estimate / 3600)
        minutes = (int(time_estimate) % 3600) / 60
        seconds = int(time_estimate) % 60
        # Anything lower than 5 hours just use default walltime
        if hours < 5:
            walltime = "05:00:00"
        else:
            if hours < 10:
                hours = "0%d" % hours
            else:
                hours = str(hours)
            walltime = "%s:%s:%s" % (hours, str(minutes), str(seconds))

        call_arg = TvbProfile.current.cluster.SCHEDULE_COMMAND % (walltime, operation_identifier, user_name_label)
        LOGGER.info(call_arg)
        process_ = Popen([call_arg], stdout=PIPE, shell=True)
        job_id = process_.stdout.read().replace('\n', '').split('OAR_JOB_ID=')[-1]
        LOGGER.debug("Got jobIdentifier = %s for CLUSTER operationID = %s" % (operation_identifier, job_id))
        operation_identifier = model.OperationProcessIdentifier(operation_identifier, job_id=job_id)
        dao.store_entity(operation_identifier)
Пример #3
0
    def run(self):
        """
        Get the required data from the operation queue and launch the operation.
        """
        #Try to get a spot to launch own operation.
        LOCKS_QUEUE.get(True)
        operation_id = self.operation_id
        run_params = [TvbProfile.current.PYTHON_INTERPRETER_PATH, '-m', 'tvb.core.operation_async_launcher',
                      str(operation_id), TvbProfile.CURRENT_PROFILE_NAME]

        # In the exceptional case where the user pressed stop while the Thread startup is done,
        # We should no longer launch the operation.
        if self.stopped() is False:

            env = os.environ.copy()
            env['PYTHONPATH'] = os.pathsep.join(sys.path)
            # anything that was already in $PYTHONPATH should have been reproduced in sys.path

            launched_process = Popen(run_params, stdout=PIPE, stderr=PIPE, env=env)

            LOGGER.debug("Storing pid=%s for operation id=%s launched on local machine." % (operation_id,
                                                                                            launched_process.pid))
            op_ident = model.OperationProcessIdentifier(operation_id, pid=launched_process.pid)
            dao.store_entity(op_ident)

            if self.stopped():
                # In the exceptional case where the user pressed stop while the Thread startup is done.
                # and stop_operation is concurrently asking about OperationProcessIdentity.
                self.stop_pid(launched_process.pid)

            subprocess_result = launched_process.communicate()
            LOGGER.info("Finished with launch of operation %s" % operation_id)
            returned = launched_process.wait()

            if returned != 0 and not self.stopped():
                # Process did not end as expected. (e.g. Segmentation fault)
                workflow_service = WorkflowService()
                operation = dao.get_operation_by_id(self.operation_id)
                LOGGER.error("Operation suffered fatal failure! Exit code: %s Exit message: %s" % (returned,
                                                                                                   subprocess_result))

                workflow_service.persist_operation_state(operation, model.STATUS_ERROR,
                                                         "Operation failed unexpectedly! Please check the log files.")

                burst_entity = dao.get_burst_for_operation_id(self.operation_id)
                if burst_entity:
                    message = "Error in operation process! Possibly segmentation fault."
                    workflow_service.mark_burst_finished(burst_entity, error_message=message)

            del launched_process

        #Give back empty spot now that you finished your operation
        CURRENT_ACTIVE_THREADS.remove(self)
        LOCKS_QUEUE.put(1)
Пример #4
0
 def _run_cluster_job(operation_identifier, user_name_label):
     """
     Threaded Popen
     It is the function called by the ClusterSchedulerClient in a Thread.
     This function starts a new process.
     """
     call_arg = config.CLUSTER_SCHEDULE_COMMAND % (operation_identifier, user_name_label)
     process_ = Popen([call_arg], stdout=PIPE, shell=True)
     job_id = process_.stdout.read().replace('\n', '').split('OAR_JOB_ID=')[-1]
     LOGGER.debug(
         "Storing job identifier=%s for operation id=%s launched on cluster." % (operation_identifier, job_id))
     operation_identifier = model.OperationProcessIdentifier(operation_identifier, job_id=job_id)
     dao.store_entity(operation_identifier)
Пример #5
0
    def run(self):
        """
        Get the required data from the operation queue and launch the operation.
        """
        #Try to get a spot to launch own operation.
        LOCKS_QUEUE.get(True)
        operation_id = self.operation_id
        user_name_label = self.user_name_label
        run_params = [config().get_python_path(), '-m', 'tvb.core.cluster_launcher', str(operation_id), user_name_label]

        if tvb_profile.CURRENT_SELECTED_PROFILE is not None:
            run_params.extend([tvb_profile.SUBPARAM_PROFILE, tvb_profile.CURRENT_SELECTED_PROFILE])

        # In the exceptional case where the user pressed stop while the Thread startup is done,
        # We should no longer launch the operation.
        if self.stopped() is False:
            launched_process = Popen(run_params, stdout=PIPE, stderr=PIPE)
            LOGGER.debug("Storing pid=%s for operation id=%s launched on local machine." % (operation_id,
                                                                                            launched_process.pid))
            op_ident = model.OperationProcessIdentifier(operation_id, pid=launched_process.pid)
            dao.store_entity(op_ident)

            if self.stopped():
                # In the exceptional case where the user pressed stop while the Thread startup is done.
                # and stop_operation is concurrently asking about OperationProcessIdentity.
                self.stop_pid(launched_process.pid)

            launched_process.communicate()
            LOGGER.info("Finished with launch of operation %s" % operation_id)
            returned = launched_process.wait()

            if returned != 0 and not self.stopped():
                # Process did not end as expected. (e.g. Segmentation fault)
                operation = dao.get_operation_by_id(self.operation_id)
                LOGGER.error("Operation suffered fatal failure with exit code: %s" % returned)

                operation.mark_complete(model.STATUS_ERROR,
                                        "Operation failed unexpectedly! Probably segmentation fault.")
                dao.store_entity(operation)

                burst_entity = dao.get_burst_for_operation_id(self.operation_id)
                if burst_entity:
                    message = "Error on burst operation! Probably segmentation fault."
                    WorkflowService().mark_burst_finished(burst_entity, error=True, error_message=message)

            del launched_process

        #Give back empty spot now that you finished your operation
        CURRENT_ACTIVE_THREADS.remove(self)
        LOCKS_QUEUE.put(1)