class Slave(object): API_VERSION = 'v1' _slave_id_counter = Counter() def __init__(self, slave_url, num_executors): """ :type slave_url: str :type num_executors: int """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._is_alive = True self._is_in_shutdown_mode = False self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._logger = log.get_logger(__name__) def api_representation(self): return { 'url': self.url, 'id': self.id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build_id, 'is_alive': self.is_alive(), 'is_in_shutdown_mode': self._is_in_shutdown_mode, } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. If the slave is in shutdown mode, clear the build_id, kill the slave, and raise an error. """ if self._num_executors_in_use.value() != 0: raise Exception( 'Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None if self._is_in_shutdown_mode: self.kill() raise SlaveMarkedForShutdownError def setup(self, build, executor_start_index): """ Execute a setup command on the slave for the specified build. The setup process executes asynchronously on the slave and the slave will alert the master when setup is complete and it is ready to start working on subjobs. :param build: The build to set up this slave to work on :type build: Build :param executor_start_index: The index the slave should number its executors from for this build :type executor_start_index: int """ slave_project_type_params = build.build_request.build_parameters( ).copy() slave_project_type_params.update( build.project_type.slave_param_overrides()) setup_url = self._slave_api.url('build', build.build_id(), 'setup') post_data = { 'project_type_params': slave_project_type_params, 'build_executor_start_index': executor_start_index, } self.current_build_id = build.build_id() self._network.post_with_digest(setup_url, post_data, Secret.get()) def teardown(self): """ Tell the slave to run the build teardown """ if self.is_alive(): teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') self._network.post(teardown_url) else: self._logger.notice( 'Teardown request to slave {} was not sent since slave is disconnected.', self.url) def start_subjob(self, subjob): """ :type subjob: Subjob """ if not self.is_alive(): raise DeadSlaveError( 'Tried to start a subjob on a dead slave! ({}, id: {})'.format( self.url, self.id)) if self._is_in_shutdown_mode: raise SlaveMarkedForShutdownError( 'Tried to start a subjob on a slave in shutdown mode. ({}, id: {})' .format(self.url, self.id)) SafeThread(target=self._async_start_subjob, args=(subjob, )).start() def _async_start_subjob(self, subjob): """ :type subjob: Subjob """ execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = {'atomic_commands': subjob.atomic_commands()} response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_id=self.id) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception( 'Cannot claim executor on slave {}. No executors left.'.format( self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception( 'Cannot free executor on slave {}. All are free.'.format( self.url)) return new_count def is_alive(self, use_cached=True): """ Is the slave API responsive? :param use_cached: Should we use the last returned value of the network check to the slave? If True, will return cached value. If False, this method will perform an actual network call to the slave. :type use_cached: bool :rtype: bool """ if use_cached: return self._is_alive try: response = self._network.get(self._slave_api.url()) if not response.ok: self._is_alive = False else: response_data = response.json() if 'slave' not in response_data or 'is_alive' not in response_data[ 'slave']: self._logger.warning( '{}\'s API is missing key slave[\'is_alive\'].', self.url) self._is_alive = False elif not isinstance(response_data['slave']['is_alive'], bool): self._logger.warning( '{}\'s API key \'is_alive\' is not a boolean.', self.url) self._is_alive = False else: self._is_alive = response_data['slave']['is_alive'] except requests.exceptions.ConnectionError: self._logger.warning('Slave with url {} is offline.', self.url) self._is_alive = False return self._is_alive def set_is_alive(self, value): """ Setter for the self._is_alive attribute. :type value: bool """ self._is_alive = value def set_shutdown_mode(self): """ Mark this slave as being in shutdown mode. Slaves in shutdown mode will not get new subjobs and will be killed when they finish teardown, or killed immediately if they are not processing a build. """ self._is_in_shutdown_mode = True if self.current_build_id is None: self.kill() def is_shutdown(self): """ Whether the slave is in shutdown mode. """ return self._is_in_shutdown_mode def kill(self): """ Instructs the slave process to kill itself. """ kill_url = self._slave_api.url('kill') self._network.post_with_digest(kill_url, {}, Secret.get()) self.mark_dead() def mark_dead(self): """ Marks the slave dead. """ self.set_is_alive(False) self.current_build_id = None
class ClusterSlave(object): API_VERSION = 'v1' def __init__(self, port, host, num_executors=10): """ :param port: The port number the slave service is running on :type port: int :param host: The hostname at which the slave is reachable :type host: str :param num_executors: The number of executors this slave should operate with -- this determines how many concurrent subjobs the slave can execute. :type num_executors: int """ self.port = port self.host = host self.is_alive = True self._slave_id = None self._num_executors = num_executors self._logger = log.get_logger(__name__) self._idle_executors = Queue(maxsize=num_executors) self.executors_by_id = {} for executor_id in range(num_executors): executor = SubjobExecutor(executor_id) self._idle_executors.put(executor) self.executors_by_id[executor_id] = executor self._master_url = None self._network = Network(min_connection_poolsize=num_executors) self._master_api = None # wait until we connect to a master first self._project_type = None # this will be instantiated during build setup self._current_build_id = None self._build_teardown_coin = None def api_representation(self): """ Gets a dict representing this resource which can be returned in an API response. :rtype: dict [str, mixed] """ executors_representation = [executor.api_representation() for executor in self.executors_by_id.values()] return { 'is_alive': self.is_alive, 'master_url': self._master_url, 'current_build_id': self._current_build_id, 'slave_id': self._slave_id, 'executors': executors_representation, } def get_status(self): """ Just returns a dumb message and prints it to the console. """ return 'Slave service is up. <Port: {}>'.format(self.port) def setup_build(self, build_id, project_type_params, build_executor_start_index): """ Usually called once per build to do build-specific setup. Will block any subjobs from executing until setup completes. The actual setup is performed on another thread and will unblock subjobs (via an Event) once it finishes. :param build_id: The id of the build to run setup on :type build_id: int :param project_type_params: The parameters that define the project_type this build will execute in :type project_type_params: dict :param build_executor_start_index: How many executors have alreayd been allocated on other slaves for this build :type build_executor_start_index: int """ self._logger.info('Executing setup for build {} (type: {}).', build_id, project_type_params.get('type')) self._current_build_id = build_id self._build_teardown_coin = SingleUseCoin() # protects against build_teardown being executed multiple times # create an project_type instance for build-level operations self._project_type = util.create_project_type(project_type_params) # verify all executors are idle if not self._idle_executors.full(): raise RuntimeError('Slave tried to setup build but not all executors are idle. ({}/{} executors idle.)' .format(self._idle_executors.qsize(), self._num_executors)) # Collect all the executors to pass to project_type.fetch_project(). This will create a new project_type for # each executor (for subjob-level operations). executors = list(self._idle_executors.queue) SafeThread( target=self._async_setup_build, name='Bld{}-Setup'.format(build_id), args=(executors, project_type_params, build_executor_start_index) ).start() def _async_setup_build(self, executors, project_type_params, build_executor_start_index): """ Called from setup_build(). Do asynchronous setup for the build so that we can make the call to setup_build() non-blocking. :type executors: list[SubjobExecutor] :type project_type_params: dict :type build_executor_start_index: int """ self._base_executor_index = build_executor_start_index try: self._project_type.fetch_project() for executor in executors: executor.configure_project_type(project_type_params) self._project_type.run_job_config_setup() except SetupFailureError as ex: self._logger.error(ex) self._logger.info('Notifying master that build setup has failed for build {}.', self._current_build_id) self._notify_master_of_state_change(SlaveState.SETUP_FAILED) else: self._logger.info('Notifying master that build setup is complete for build {}.', self._current_build_id) self._notify_master_of_state_change(SlaveState.SETUP_COMPLETED) def teardown_build(self, build_id=None): """ Called at the end of each build on each slave before it reports back to the master that it is idle again. :param build_id: The build id to teardown -- this parameter is used solely for correctness checking of the master, to make sure that the master is not erroneously sending teardown commands for other builds. :type build_id: int | None """ if self._current_build_id is None: raise BadRequestError('Tried to teardown a build but no build is active on this slave.') if build_id is not None and build_id != self._current_build_id: raise BadRequestError('Tried to teardown build {}, ' 'but slave is running build {}!'.format(build_id, self._current_build_id)) SafeThread( target=self._async_teardown_build, name='Bld{}-Teardwn'.format(build_id) ).start() def _async_teardown_build(self): """ Called from teardown_build(). Do asynchronous teardown for the build so that we can make the call to teardown_build() non-blocking. Also take care of posting back to the master when teardown is complete. """ self._do_build_teardown_and_reset() while not self._idle_executors.full(): time.sleep(1) self._send_master_idle_notification() def _do_build_teardown_and_reset(self, timeout=None): """ Kill any currently running subjobs. Run the teardown_build commands for the current build (with an optional timeout). Clear attributes related to the currently running build. :param timeout: A maximum time in seconds to allow the teardown process to run before killing :type timeout: int | None """ # Kill all subjob executors' processes. This only has an effect if we are tearing down before a build completes. for executor in self.executors_by_id.values(): executor.kill() # Order matters! Spend the coin if it has been initialized. if not self._build_teardown_coin or not self._build_teardown_coin.spend() or not self._project_type: return # There is no build to tear down or teardown is already in progress. self._logger.info('Executing teardown for build {}.', self._current_build_id) # todo: Catch exceptions raised during teardown_build so we don't skip notifying master of idle/disconnect. self._project_type.teardown_build(timeout=timeout) self._logger.info('Build teardown complete for build {}.', self._current_build_id) self._current_build_id = None self._project_type = None def _send_master_idle_notification(self): if not self._is_master_responsive(): self._logger.notice('Could not post idle notification to master because master is unresponsive.') return # Notify master that this slave is finished with teardown and ready for a new build. self._logger.info('Notifying master that this slave is ready for new builds.') self._notify_master_of_state_change(SlaveState.IDLE) def _disconnect_from_master(self): """ Perform internal bookkeeping, as well as notify the master, that this slave is disconnecting itself from the slave pool. """ self.is_alive = False if not self._is_master_responsive(): self._logger.notice('Could not post disconnect notification to master because master is unresponsive.') return # Notify master that this slave is shutting down and should not receive new builds. self._logger.info('Notifying master that this slave is disconnecting.') self._notify_master_of_state_change(SlaveState.DISCONNECTED) def connect_to_master(self, master_url=None): """ Notify the master that this slave exists. :param master_url: The URL of the master service. If none specified, defaults to localhost:43000. :type master_url: str | None """ self.is_alive = True self._master_url = master_url or 'localhost:43000' self._master_api = UrlBuilder(self._master_url) connect_url = self._master_api.url('slave') data = { 'slave': '{}:{}'.format(self.host, self.port), 'num_executors': self._num_executors, } response = self._network.post(connect_url, data=data) self._slave_id = int(response.json().get('slave_id')) self._logger.info('Slave {}:{} connected to master on {}.', self.host, self.port, self._master_url) # We disconnect from the master before build_teardown so that the master stops sending subjobs. (Teardown # callbacks are executed in the reverse order that they're added, so we add the build_teardown callback first.) UnhandledExceptionHandler.singleton().add_teardown_callback(self._do_build_teardown_and_reset, timeout=30) UnhandledExceptionHandler.singleton().add_teardown_callback(self._disconnect_from_master) def _is_master_responsive(self): """ Ping the master to check if it is still alive. Code using this method should treat the return value as a *probable* truth since the state of the master can change at any time. This method is not a replacement for error handling. :return: Whether the master is responsive or not :rtype: bool """ # todo: This method repeats some logic we have in the deployment code (checking a service). We should DRY it up. is_responsive = True try: self._network.get(self._master_api.url()) except requests.ConnectionError: is_responsive = False return is_responsive def start_working_on_subjob(self, build_id, subjob_id, subjob_artifact_dir, atomic_commands): """ Begin working on a subjob with the given build id and subjob id. This just starts the subjob execution asynchronously on a separate thread. :type build_id: int :type subjob_id: int :type subjob_artifact_dir: str :type atomic_commands: list[str] :return: The text to return in the API response. :rtype: dict[str, int] """ if build_id != self._current_build_id: raise BadRequestError('Attempted to start subjob {} for build {}, ' 'but current build id is {}.'.format(subjob_id, build_id, self._current_build_id)) # get idle executor from queue to claim it as in-use (or block until one is available) executor = self._idle_executors.get() # Start a thread to execute the job (after waiting for setup to complete) SafeThread( target=self._execute_subjob, args=(build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands), name='Bld{}-Sub{}'.format(build_id, subjob_id), ).start() self._logger.info('Slave ({}:{}) has received subjob. (Build {}, Subjob {})', self.host, self.port, build_id, subjob_id) return {'executor_id': executor.id} def _execute_subjob(self, build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands): """ This is the method for executing a subjob asynchronously. This performs the work required by executing the specified command, then does a post back to the master results endpoint to signal that the work is done. :type build_id: int :type subjob_id: int :type executor: SubjobExecutor :type subjob_artifact_dir: str :type atomic_commands: list[str] """ subjob_event_data = {'build_id': build_id, 'subjob_id': subjob_id, 'executor_id': executor.id} analytics.record_event(analytics.SUBJOB_EXECUTION_START, **subjob_event_data) results_file = executor.execute_subjob(build_id, subjob_id, subjob_artifact_dir, atomic_commands, self._base_executor_index) analytics.record_event(analytics.SUBJOB_EXECUTION_FINISH, **subjob_event_data) results_url = self._master_api.url('build', build_id, 'subjob', subjob_id, 'result') data = { 'slave': '{}:{}'.format(self.host, self.port), 'metric_data': {'executor_id': executor.id}, } files = {'file': ('payload', open(results_file, 'rb'), 'application/x-compressed')} self._idle_executors.put(executor) # work is done; mark executor as idle self._network.post(results_url, data=data, files=files) # todo: check return code self._logger.info('Build {}, Subjob {} completed and sent results to master.', build_id, subjob_id) def _notify_master_of_state_change(self, new_state): """ Send a state notification to the master. This is used to notify the master of events occurring on the slave related to build execution progress. :type new_state: SlaveState """ state_url = self._master_api.url('slave', self._slave_id) self._network.put_with_digest(state_url, request_params={'slave': {'state': new_state}}, secret=Secret.get(), error_on_failure=True) def kill(self): """ Exits without error. """ sys.exit(0)
class ClusterSlave(object): API_VERSION = 'v1' def __init__(self, port, host, num_executors=10): """ :param port: The port number the slave service is running on :type port: int :param host: The hostname at which the slave is reachable :type host: str :param num_executors: The number of executors this slave should operate with -- this determines how many concurrent subjobs the slave can execute. :type num_executors: int """ self.port = port self.host = host self._slave_id = None self._num_executors = num_executors self._logger = log.get_logger(__name__) self._idle_executors = Queue(maxsize=num_executors) self.executors = {} for executor_id in range(num_executors): executor = SubjobExecutor(executor_id) self._idle_executors.put(executor) self.executors[executor_id] = executor self._setup_complete_event = Event() self._master_url = None self._network = Network(min_connection_poolsize=num_executors) self._master_api = None # wait until we connect to a master first self._project_type = None # this will be instantiated during build setup self._current_build_id = None UnhandledExceptionHandler.singleton().add_teardown_callback(self._async_teardown_build, should_disconnect_from_master=True) def api_representation(self): """ Gets a dict representing this resource which can be returned in an API response. :rtype: dict [str, mixed] """ executors_representation = [executor.api_representation() for executor in self.executors.values()] return { 'connected': str(self._is_connected()), 'master_url': self._master_url, 'setup_complete': str(self._setup_complete_event.isSet()), 'slave_id': self._slave_id, 'executors': executors_representation, } def _is_connected(self): return self._master_url is not None def get_status(self): """ Just returns a dumb message and prints it to the console. """ return 'Slave service is up. <Port: {}>'.format(self.port) def setup_build(self, build_id, project_type_params): """ Usually called once per build to do build-specific setup. Will block any subjobs from executing until setup completes. The actual setup is performed on another thread and will unblock subjobs (via an Event) once it finishes. :param build_id: The id of the build to run setup on :type build_id: int :param project_type_params: The parameters that define the project_type this build will execute in :type project_type_params: dict """ self._logger.info('Executing setup for build {} (type: {}).', build_id, project_type_params.get('type')) self._setup_complete_event.clear() self._current_build_id = build_id # create an project_type instance for build-level operations self._project_type = util.create_project_type(project_type_params) # verify all executors are idle if not self._idle_executors.full(): raise RuntimeError('Slave tried to setup build but not all executors are idle. ({}/{} executors idle.)' .format(self._idle_executors.qsize(), self._num_executors)) # Collect all the executors to pass to project_type.setup_build(). This will create a new project_type for # each executor (for subjob-level operations). executors = list(self._idle_executors.queue) SafeThread(target=self._async_setup_build, args=(executors, project_type_params)).start() def _async_setup_build(self, executors, project_type_params): """ Called from setup_build(). Do asynchronous setup for the build so that we can make the call to setup_build() non-blocking. """ # todo(joey): It's strange that the project_type is setting up the executors, which in turn set up projects. # todo(joey): I think this can be untangled a bit -- we should call executor.configure_project_type() here. self._project_type.setup_build(executors, project_type_params) self._logger.info('Build setup complete for build {}.', self._current_build_id) self._setup_complete_event.set() # free any subjob threads that are waiting for setup to complete def teardown_build(self, build_id=None): """ Called at the end of each build on each slave before it reports back to the master that it is idle again. :param build_id: The build id to teardown -- this parameter is used solely for correctness checking of the master, to make sure that the master is not erroneously sending teardown commands for other builds. :type build_id: int | None """ if self._current_build_id is None: raise BadRequestError('Tried to teardown a build but no build is active on this slave.') if build_id is not None and build_id != self._current_build_id: raise BadRequestError('Tried to teardown build {}, ' 'but slave is running build {}!'.format(build_id, self._current_build_id)) self._logger.info('Executing teardown for build {}.', self._current_build_id) SafeThread(target=self._async_teardown_build).start() def _async_teardown_build(self, should_disconnect_from_master=False): """ Called from teardown_build(). Do asynchronous teardown for the build so that we can make the call to teardown_build() non-blocking. Also take care of posting back to the master when teardown is complete. """ if self._project_type: self._project_type.teardown_build() self._logger.info('Build teardown complete for build {}.', self._current_build_id) self._current_build_id = None self._project_type = None if not should_disconnect_from_master: # report back to master that this slave is finished with teardown and ready for a new build self._logger.info('Notifying master that this slave is ready for new builds.') idle_url = self._master_api.url('slave', self._slave_id, 'idle') response = self._network.post(idle_url) if response.status_code != http.client.OK: raise RuntimeError("Could not post teardown completion to master at {}".format(idle_url)) elif self._is_master_responsive(): # report back to master that this slave is shutting down and should not receive new builds self._logger.info('Notifying master to disconnect this slave.') disconnect_url = self._master_api.url('slave', self._slave_id, 'disconnect') response = self._network.post(disconnect_url) if response.status_code != http.client.OK: self._logger.error('Could not post disconnect notification to master at {}'.format(disconnect_url)) def connect_to_master(self, master_url=None): """ Notify the master that this slave exists. :param master_url: The URL of the master service. If none specified, defaults to localhost:43000. :type master_url: str """ self._master_url = master_url or 'localhost:43000' self._master_api = UrlBuilder(self._master_url) connect_url = self._master_api.url('slave') data = { 'slave': '{}:{}'.format(self.host, self.port), 'num_executors': self._num_executors, } response = self._network.post(connect_url, data) self._slave_id = int(response.json().get('slave_id')) self._logger.info('Slave {}:{} connected to master on {}.', self.host, self.port, self._master_url) def _is_master_responsive(self): """ Ping the master to check if it is still alive. Code using this method should treat the return value as a *probable* truth since the state of the master can change at any time. This method is not a replacement for error handling. :return: Whether the master is responsive or not :rtype: bool """ # todo: This method repeats some logic we have in the deployment code (checking a service). We should DRY it up. is_responsive = True try: self._network.get(self._master_api.url()) except requests.ConnectionError: is_responsive = False return is_responsive def start_working_on_subjob(self, build_id, subjob_id, subjob_artifact_dir, atomic_commands): """ Begin working on a subjob with the given build id and subjob id. This just starts the subjob execution asynchronously on a separate thread. :type build_id: int :type subjob_id: int :type subjob_artifact_dir: str :type atomic_commands: list[str] :return: The text to return in the API response. :rtype: dict[str, int] """ if build_id != self._current_build_id: raise BadRequestError('Attempted to start subjob {} for build {}, ' 'but current build id is {}.'.format(subjob_id, build_id, self._current_build_id)) # get idle executor from queue to claim it as in-use (or block until one is available) executor = self._idle_executors.get() # Start a thread to execute the job (after waiting for setup to complete) SafeThread( target=self._execute_subjob, args=(build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands), name='Build{}-Sub{}'.format(build_id, subjob_id), ).start() self._logger.info('Slave ({}:{}) has received subjob. (Build {}, Subjob {})', self.host, self.port, build_id, subjob_id) return {'executor_id': executor.id} def _execute_subjob(self, build_id, subjob_id, executor, subjob_artifact_dir, atomic_commands): """ This is the method for executing a subjob asynchronously. This performs the work required by executing the specified command, then does a post back to the master results endpoint to signal that the work is done. :type build_id: int :type subjob_id: int :type executor: SubjobExecutor :type subjob_artifact_dir: str :type atomic_commands: list[str] """ self._logger.debug('Waiting for setup to complete (Build {}, Subjob {})...', build_id, subjob_id) self._setup_complete_event.wait() # block until setup completes subjob_event_data = {'build_id': build_id, 'subjob_id': subjob_id, 'executor_id': executor.id} analytics.record_event(analytics.SUBJOB_EXECUTION_START, **subjob_event_data) results_file = executor.execute_subjob(build_id, subjob_id, subjob_artifact_dir, atomic_commands) analytics.record_event(analytics.SUBJOB_EXECUTION_FINISH, **subjob_event_data) results_url = self._master_api.url('build', build_id, 'subjob', subjob_id, 'result') data = { 'slave': '{}:{}'.format(self.host, self.port), 'metric_data': {'executor_id': executor.id}, } files = {'file': ('payload', open(results_file, 'rb'), 'application/x-compressed')} self._idle_executors.put(executor) # work is done; mark executor as idle self._network.post(results_url, data=data, files=files) # todo: check return code self._logger.info('Build {}, Subjob {} completed and sent results to master.', build_id, subjob_id) def kill(self): # TODO(dtran): Kill the threads and this server more gracefully sys.exit(0)
class Slave(object): _slave_id_counter = Counter() def __init__(self, slave_url, num_executors): """ :type slave_url: str :type num_executors: int """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self.is_alive = True self._slave_api = UrlBuilder(slave_url, app.master.cluster_master.ClusterMaster.API_VERSION) def api_representation(self): return { 'url': self.url, 'id': self.id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build(), } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. """ if self._num_executors_in_use.value() != 0: raise Exception('Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None def setup(self, build_id, project_type_params): """ Execute a setup command on the slave for the specified build. The command is executed asynchronously from the perspective of this method, but any subjobs will block until the slave finishes executing the setup command. :param build_id: The build id that this setup command is for. :type build_id: int :param project_type_params: The parameters that define the project type this build will execute in :typeproject_type_paramss: dict """ setup_url = self._slave_api.url('build', build_id, 'setup') post_data = { 'project_type_params': project_type_params, } self._network.post_with_digest(setup_url, post_data, Secret.get()) def teardown(self): """ Tell the slave to run the build teardown """ teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') self._network.post(teardown_url) def start_subjob(self, subjob): """ :type subjob: Subjob """ if not self.is_alive: raise RuntimeError('Tried to start a subjob on a dead slave! ({}, id: {})'.format(self.url, self.id)) SafeThread(target=self._async_start_subjob, args=(subjob,)).start() self.current_build_id = subjob.build_id() def _async_start_subjob(self, subjob): """ :type subjob: Subjob """ execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = { 'subjob_artifact_dir': subjob.artifact_dir(), 'atomic_commands': subjob.atomic_commands(), } response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_url=self.url) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception('Cannot claim executor on slave {}. No executors left.'.format(self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception('Cannot free executor on slave {}. All are free.'.format(self.url)) return new_count def current_build(self): """ :return: :rtype: int|None """ return self.current_build_id
class Slave(object): API_VERSION = 'v1' _slave_id_counter = Counter() def __init__(self, slave_url, num_executors, slave_session_id=None): """ :type slave_url: str :type num_executors: int :type slave_session_id: str """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._is_alive = True self._is_in_shutdown_mode = False self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._session_id = slave_session_id self._logger = log.get_logger(__name__) def __str__(self): return '<slave #{} - {}>'.format(self.id, self.url) def api_representation(self): return { 'url': self.url, 'id': self.id, 'session_id': self._session_id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build_id, 'is_alive': self.is_alive(), 'is_in_shutdown_mode': self._is_in_shutdown_mode, } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. If the slave is in shutdown mode, clear the build_id, kill the slave, and raise an error. """ if self._num_executors_in_use.value() != 0: raise Exception( 'Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None if self._is_in_shutdown_mode: self.kill() raise SlaveMarkedForShutdownError def setup(self, build: Build, executor_start_index: int) -> bool: """ Execute a setup command on the slave for the specified build. The setup process executes asynchronously on the slave and the slave will alert the master when setup is complete and it is ready to start working on subjobs. :param build: The build to set up this slave to work on :param executor_start_index: The index the slave should number its executors from for this build :return: Whether or not the call to start setup on the slave was successful """ slave_project_type_params = build.build_request.build_parameters( ).copy() slave_project_type_params.update( build.project_type.slave_param_overrides()) setup_url = self._slave_api.url('build', build.build_id(), 'setup') post_data = { 'project_type_params': slave_project_type_params, 'build_executor_start_index': executor_start_index, } self.current_build_id = build.build_id() try: self._network.post_with_digest(setup_url, post_data, Secret.get()) except (requests.ConnectionError, requests.Timeout) as ex: self._logger.warning('Setup call to {} failed with {}: {}.', self, ex.__class__.__name__, str(ex)) self.mark_dead() return False return True def teardown(self): """ Tell the slave to run the build teardown """ if not self.is_alive(): self._logger.notice( 'Teardown request to slave {} was not sent since slave is disconnected.', self.url) return teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') try: self._network.post(teardown_url) except (requests.ConnectionError, requests.Timeout): self._logger.warning( 'Teardown request to slave failed because slave is unresponsive.' ) self.mark_dead() def start_subjob(self, subjob): """ :type subjob: Subjob """ if not self.is_alive(): raise DeadSlaveError( 'Tried to start a subjob on a dead slave! ({}, id: {})'.format( self.url, self.id)) if self._is_in_shutdown_mode: raise SlaveMarkedForShutdownError( 'Tried to start a subjob on a slave in shutdown mode. ({}, id: {})' .format(self.url, self.id)) # todo: This should not be a SafeThread. https://github.com/box/ClusterRunner/issues/337 SafeThread(target=self._async_start_subjob, args=(subjob, )).start() def _async_start_subjob(self, subjob): """ :type subjob: Subjob """ execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = {'atomic_commands': subjob.atomic_commands()} response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_id=self.id) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception( 'Cannot claim executor on slave {}. No executors left.'.format( self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception( 'Cannot free executor on slave {}. All are free.'.format( self.url)) return new_count def is_alive(self, use_cached: bool = True) -> bool: """ Is the slave API responsive? Note that if the slave API responds but its session id does not match the one we've stored in this instance, then this method will still return false. :param use_cached: Should we use the last returned value of the network check to the slave? If True, will return cached value. If False, this method will perform an actual network call to the slave. :return: Whether or not the slave is alive """ if use_cached: return self._is_alive try: response = self._network.get( self._slave_api.url(), headers=self._expected_session_header()) if not response.ok: self.mark_dead() else: response_data = response.json() if 'slave' not in response_data or 'is_alive' not in response_data[ 'slave']: self._logger.warning( '{}\'s API is missing key slave[\'is_alive\'].', self.url) self.mark_dead() elif not isinstance(response_data['slave']['is_alive'], bool): self._logger.warning( '{}\'s API key \'is_alive\' is not a boolean.', self.url) self.mark_dead() else: self._is_alive = response_data['slave']['is_alive'] except (requests.ConnectionError, requests.Timeout): self.mark_dead() return self._is_alive def set_is_alive(self, value): """ Setter for the self._is_alive attribute. :type value: bool """ self._is_alive = value def set_shutdown_mode(self): """ Mark this slave as being in shutdown mode. Slaves in shutdown mode will not get new subjobs and will be killed when they finish teardown, or killed immediately if they are not processing a build. """ self._is_in_shutdown_mode = True if self.current_build_id is None: self.kill() def is_shutdown(self): """ Whether the slave is in shutdown mode. """ return self._is_in_shutdown_mode def kill(self): """ Instruct the slave process to kill itself. """ self._logger.notice('Killing {}', self) kill_url = self._slave_api.url('kill') try: self._network.post_with_digest(kill_url, {}, Secret.get()) except (requests.ConnectionError, requests.Timeout): pass self.mark_dead() def mark_dead(self): """ Mark the slave dead. """ self._logger.warning('{} has gone offline. Last build: {}', self, self.current_build_id) self._is_alive = False self.current_build_id = None self._network.reset_session( ) # Close any pooled connections for this slave. def _expected_session_header(self): """ Return headers that should be sent with slave requests to verify that the master is still talking to the same slave service that it originally connected to. Note that adding these headers to existing requests may add new failure cases (e.g., slave API would start returning a 412) so we should make sure all potential 412 errors are handled appropriately when adding these headers to existing requests. :rtype: dict """ headers = {} if self._session_id: headers[SessionId.EXPECTED_SESSION_HEADER_KEY] = self._session_id return headers
class Slave: API_VERSION = 'v1' _slave_id_counter = Counter() def __init__(self, slave_url, num_executors, slave_session_id=None): """ :type slave_url: str :type num_executors: int :type slave_session_id: str """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._last_heartbeat_time = datetime.now() self._is_alive = True self._is_in_shutdown_mode = False self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._session_id = slave_session_id self._logger = log.get_logger(__name__) def __str__(self): return '<slave #{} - {}>'.format(self.id, self.url) def api_representation(self): return { 'url': self.url, 'id': self.id, 'session_id': self._session_id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build_id, 'is_alive': self.is_alive(), 'is_in_shutdown_mode': self._is_in_shutdown_mode, } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. If the slave is in shutdown mode, clear the build_id, kill the slave, and raise an error. """ if self._num_executors_in_use.value() != 0: raise Exception('Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None if self._is_in_shutdown_mode: self.kill() self._remove_slave_from_registry() raise SlaveMarkedForShutdownError def setup(self, build: Build, executor_start_index: int) -> bool: """ Execute a setup command on the slave for the specified build. The setup process executes asynchronously on the slave and the slave will alert the master when setup is complete and it is ready to start working on subjobs. :param build: The build to set up this slave to work on :param executor_start_index: The index the slave should number its executors from for this build :return: Whether or not the call to start setup on the slave was successful """ slave_project_type_params = build.build_request.build_parameters().copy() slave_project_type_params.update(build.project_type.slave_param_overrides()) setup_url = self._slave_api.url('build', build.build_id(), 'setup') post_data = { 'project_type_params': slave_project_type_params, 'build_executor_start_index': executor_start_index, } self.current_build_id = build.build_id() try: self._network.post_with_digest(setup_url, post_data, Secret.get()) except (requests.ConnectionError, requests.Timeout) as ex: self._logger.warning('Setup call to {} failed with {}: {}.', self, ex.__class__.__name__, str(ex)) self.mark_dead() return False return True def teardown(self): """ Tell the slave to run the build teardown """ if not self.is_alive(): self._logger.notice('Teardown request to slave {} was not sent since slave is disconnected.', self.url) return teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') try: self._network.post(teardown_url) except (requests.ConnectionError, requests.Timeout): self._logger.warning('Teardown request to slave failed because slave is unresponsive.') self.mark_dead() def start_subjob(self, subjob: Subjob): """ Send a subjob of a build to this slave. The slave must have already run setup for the corresponding build. :param subjob: The subjob to send to this slave """ if not self.is_alive(): raise DeadSlaveError('Tried to start a subjob on a dead slave.') if self._is_in_shutdown_mode: raise SlaveMarkedForShutdownError('Tried to start a subjob on a slave in shutdown mode.') execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = {'atomic_commands': subjob.atomic_commands()} try: response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) except (requests.ConnectionError, requests.Timeout, RequestFailedError) as ex: raise SlaveCommunicationError('Call to slave service failed: {}.'.format(repr(ex))) from ex subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_id=self.id) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception('Cannot claim executor on slave {}. No executors left.'.format(self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception('Cannot free executor on slave {}. All are free.'.format(self.url)) return new_count def is_alive(self, use_cached: bool=True) -> bool: """ Is the slave API responsive? Note that if the slave API responds but its session id does not match the one we've stored in this instance, then this method will still return false. :param use_cached: Should we use the last returned value of the network check to the slave? If True, will return cached value. If False, this method will perform an actual network call to the slave. :return: Whether or not the slave is alive """ if use_cached: return self._is_alive try: response = self._network.get(self._slave_api.url(), headers=self._expected_session_header()) if not response.ok: self.mark_dead() else: response_data = response.json() if 'slave' not in response_data or 'is_alive' not in response_data['slave']: self._logger.warning('{}\'s API is missing key slave[\'is_alive\'].', self.url) self.mark_dead() elif not isinstance(response_data['slave']['is_alive'], bool): self._logger.warning('{}\'s API key \'is_alive\' is not a boolean.', self.url) self.mark_dead() else: self._is_alive = response_data['slave']['is_alive'] except (requests.ConnectionError, requests.Timeout): self.mark_dead() return self._is_alive def set_is_alive(self, value): """ Setter for the self._is_alive attribute. :type value: bool """ self._is_alive = value def set_shutdown_mode(self): """ Mark this slave as being in shutdown mode. Slaves in shutdown mode will not get new subjobs and will be killed and removed from slave registry when they finish teardown, or killed and removed from slave registry immediately if they are not processing a build. """ self._is_in_shutdown_mode = True if self.current_build_id is None: self.kill() self._remove_slave_from_registry() def is_shutdown(self): """ Whether the slave is in shutdown mode. """ return self._is_in_shutdown_mode def kill(self): """ Instruct the slave process to kill itself. """ self._logger.notice('Killing {}', self) kill_url = self._slave_api.url('kill') try: self._network.post_with_digest(kill_url, {}, Secret.get()) except (requests.ConnectionError, requests.Timeout): pass self.mark_dead() def mark_dead(self): """ Mark the slave dead. """ self._logger.warning('{} has gone offline. Last build: {}', self, self.current_build_id) self._is_alive = False self.current_build_id = None self._network.reset_session() # Close any pooled connections for this slave. def _expected_session_header(self): """ Return headers that should be sent with slave requests to verify that the master is still talking to the same slave service that it originally connected to. Note that adding these headers to existing requests may add new failure cases (e.g., slave API would start returning a 412) so we should make sure all potential 412 errors are handled appropriately when adding these headers to existing requests. :rtype: dict """ headers = {} if self._session_id: headers[SessionId.EXPECTED_SESSION_HEADER_KEY] = self._session_id return headers def update_last_heartbeat_time(self): self._last_heartbeat_time = datetime.now() def get_last_heartbeat_time(self) -> datetime: return self._last_heartbeat_time def _remove_slave_from_registry(self): """ Remove shutdown-ed slave from SlaveRegistry. """ self._logger.info('Removing slave (url={}; id={}) from Slave Registry.'.format(self.url, self.id)) SlaveRegistry.singleton().remove_slave(slave_url=self.url)
class Slave(object): API_VERSION = 'v1' _slave_id_counter = Counter() def __init__(self, slave_url, num_executors): """ :type slave_url: str :type num_executors: int """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._is_alive = True self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._logger = log.get_logger(__name__) def api_representation(self): return { 'url': self.url, 'id': self.id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build_id, 'is_alive': self.is_alive(), } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. """ if self._num_executors_in_use.value() != 0: raise Exception('Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None def setup(self, build): """ Execute a setup command on the slave for the specified build. The setup process executes asynchronously on the slave and the slave will alert the master when setup is complete and it is ready to start working on subjobs. :param build: The build to set up this slave to work on :type build: Build """ slave_project_type_params = build.build_request.build_parameters().copy() slave_project_type_params.update(build.project_type.slave_param_overrides()) setup_url = self._slave_api.url('build', build.build_id(), 'setup') post_data = { 'project_type_params': slave_project_type_params, 'build_executor_start_index': build.num_executors_allocated, } self._network.post_with_digest(setup_url, post_data, Secret.get()) self.current_build_id = build.build_id() def teardown(self): """ Tell the slave to run the build teardown """ if self.is_alive(): teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') self._network.post(teardown_url) else: self._logger.notice('Teardown request to slave {} was not sent since slave is disconnected.', self.url) def start_subjob(self, subjob): """ :type subjob: Subjob """ if not self.is_alive(): raise RuntimeError('Tried to start a subjob on a dead slave! ({}, id: {})'.format(self.url, self.id)) SafeThread(target=self._async_start_subjob, args=(subjob,)).start() def _async_start_subjob(self, subjob): """ :type subjob: Subjob """ execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = { 'subjob_artifact_dir': subjob.artifact_dir(), 'atomic_commands': subjob.atomic_commands(), } response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_id=self.id) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception('Cannot claim executor on slave {}. No executors left.'.format(self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception('Cannot free executor on slave {}. All are free.'.format(self.url)) return new_count def is_alive(self, use_cached=True): """ Is the slave API responsive? :param use_cached: Should we use the last returned value of the network check to the slave? If True, will return cached value. If False, this method will perform an actual network call to the slave. :type use_cached: bool :rtype: bool """ if use_cached: return self._is_alive try: response = self._network.get(self._slave_api.url()) if not response.ok: self._is_alive = False else: response_data = response.json() if 'slave' not in response_data or 'is_alive' not in response_data['slave']: self._logger.warning('{}\'s API is missing key slave[\'is_alive\'].', self.url) self._is_alive = False elif not isinstance(response_data['slave']['is_alive'], bool): self._logger.warning('{}\'s API key \'is_alive\' is not a boolean.', self.url) self._is_alive = False else: self._is_alive = response_data['slave']['is_alive'] except requests.exceptions.ConnectionError: self._logger.warning('Slave with url {} is offline.', self.url) self._is_alive = False return self._is_alive def set_is_alive(self, value): """ Setter for the self._is_alive attribute. :type value: bool """ self._is_alive = value
class Slave(object): API_VERSION = 'v1' _slave_id_counter = Counter() def __init__(self, slave_url, num_executors): """ :type slave_url: str :type num_executors: int """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._is_alive = True self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._logger = log.get_logger(__name__) def api_representation(self): return { 'url': self.url, 'id': self.id, 'num_executors': self.num_executors, 'num_executors_in_use': self.num_executors_in_use(), 'current_build_id': self.current_build_id, } def mark_as_idle(self): """ Do bookkeeping when this slave becomes idle. Error if the slave cannot be idle. """ if self._num_executors_in_use.value() != 0: raise Exception( 'Trying to mark slave idle while {} executors still in use.', self._num_executors_in_use.value()) self.current_build_id = None def setup(self, build_id, project_type_params): """ Execute a setup command on the slave for the specified build. The command is executed asynchronously from the perspective of this method, but any subjobs will block until the slave finishes executing the setup command. :param build_id: The build id that this setup command is for. :type build_id: int :param project_type_params: The parameters that define the project type this build will execute in :type project_type_params: dict """ setup_url = self._slave_api.url('build', build_id, 'setup') slave_project_type_params = util.project_type_params_for_slave( project_type_params) post_data = { 'project_type_params': slave_project_type_params, } self._network.post_with_digest(setup_url, post_data, Secret.get()) self.current_build_id = build_id def teardown(self): """ Tell the slave to run the build teardown """ if self.is_alive(): teardown_url = self._slave_api.url('build', self.current_build_id, 'teardown') self._network.post(teardown_url) else: self._logger.notice( 'Teardown request to slave {} was not sent since slave is disconnected.', self.url) def start_subjob(self, subjob): """ :type subjob: Subjob """ if not self.is_alive(): raise RuntimeError( 'Tried to start a subjob on a dead slave! ({}, id: {})'.format( self.url, self.id)) SafeThread(target=self._async_start_subjob, args=(subjob, )).start() def _async_start_subjob(self, subjob): """ :type subjob: Subjob """ execution_url = self._slave_api.url('build', subjob.build_id(), 'subjob', subjob.subjob_id()) post_data = { 'subjob_artifact_dir': subjob.artifact_dir(), 'atomic_commands': subjob.atomic_commands(), } response = self._network.post_with_digest(execution_url, post_data, Secret.get(), error_on_failure=True) subjob_executor_id = response.json().get('executor_id') analytics.record_event(analytics.MASTER_TRIGGERED_SUBJOB, executor_id=subjob_executor_id, build_id=subjob.build_id(), subjob_id=subjob.subjob_id(), slave_id=self.id) def num_executors_in_use(self): return self._num_executors_in_use.value() def claim_executor(self): new_count = self._num_executors_in_use.increment() if new_count > self.num_executors: raise Exception( 'Cannot claim executor on slave {}. No executors left.'.format( self.url)) return new_count def free_executor(self): new_count = self._num_executors_in_use.decrement() if new_count < 0: raise Exception( 'Cannot free executor on slave {}. All are free.'.format( self.url)) return new_count def is_alive(self, use_cached=True): """ Is the slave API responsive? :param use_cached: Should we use the last returned value of the network check to the slave? If True, will return cached value. If False, this method will perform an actual network call to the slave. :type use_cached: bool :rtype: bool """ if use_cached: return self._is_alive try: response = self._network.get(self._slave_api.url()) if not response.ok: self._is_alive = False else: response_data = response.json() if 'slave' not in response_data or 'is_alive' not in response_data[ 'slave']: self._logger.warning( '{}\'s API is missing key slave[\'is_alive\'].', self.url) self._is_alive = False elif not isinstance(response_data['slave']['is_alive'], bool): self._logger.warning( '{}\'s API key \'is_alive\' is not a boolean.', self.url) self._is_alive = False else: self._is_alive = response_data['slave']['is_alive'] except ConnectionError: self._logger.warning('Slave with url {} is offline.', self.url) self._is_alive = False return self._is_alive def set_is_alive(self, value): """ Setter for the self._is_alive attribute. :type value: bool """ self._is_alive = value