def connect_to_master(self, master_url=None): """ Notify the master that this slave exists. :param master_url: The URL of the master service. If none specified, defaults to localhost:43000. :type master_url: str | None """ self.is_alive = True self._master_url = master_url or 'localhost:43000' self._master_api = UrlBuilder(self._master_url) connect_url = self._master_api.url('slave') data = { 'slave': '{}:{}'.format(self.host, self.port), 'num_executors': self._num_executors, } response = self._network.post(connect_url, data=data) self._slave_id = int(response.json().get('slave_id')) self._logger.info('Slave {}:{} connected to master on {}.', self.host, self.port, self._master_url) # We disconnect from the master before build_teardown so that the master stops sending subjobs. (Teardown # callbacks are executed in the reverse order that they're added, so we add the build_teardown callback first.) UnhandledExceptionHandler.singleton().add_teardown_callback( self._do_build_teardown_and_reset, timeout=30) UnhandledExceptionHandler.singleton().add_teardown_callback( self._disconnect_from_master)
def _validate_successful_deployment(self, master_service_url, slaves_to_validate): """ Poll the master's /slaves endpoint until either timeout or until all of the slaves have registered with the master. Throws exception upon timeout or API response error. :param master_service_url: the hostname:port for the running master service :type master_service_url: str :param slaves_to_validate: the list of slave hostnames (no ports) to deploy to :type slaves_to_validate: list[str] """ master_api = UrlBuilder(master_service_url, BuildRunner.API_VERSION) slave_api_url = master_api.url('slave') network = Network() def all_slaves_registered(): return len(self._registered_slave_hostnames(slave_api_url, network)) == len(slaves_to_validate) if not wait_for( boolean_predicate=all_slaves_registered, timeout_seconds=self._SLAVE_REGISTRY_TIMEOUT_SEC, poll_period=1, exceptions_to_swallow=(requests.RequestException, requests.ConnectionError) ): try: registered_slaves = self._registered_slave_hostnames(slave_api_url, network) non_registered_slaves = self._non_registered_slaves(registered_slaves, slaves_to_validate) except ConnectionError: self._logger.error('Error contacting {} on the master.'.format(slave_api_url)) raise SystemExit(1) self._logger.error('Slave registration timed out after {} sec, with slaves {} missing.'.format( self._SLAVE_REGISTRY_TIMEOUT_SEC, ','.join(non_registered_slaves))) raise SystemExit(1)
def __init__(self, base_api_url): """ :param base_api_url: The base API url of the service (e.g., 'http://localhost:43000') :type base_api_url: str """ self._api = UrlBuilder(base_api_url) self._network = Network() self._logger = log.get_logger(__name__)
def test_url_should_generate_correct_url(self): host = 'master:9000' first, second, third = 'first', 'second', 'third' builder = UrlBuilder(host) url = builder.url(first, second, third) self.assertEqual( 'http://{}/v1/{}/{}/{}'.format(host, first, second, third), url, 'Url generated did not match expectation')
def __init__(self, slave_url, num_executors): """ :type slave_url: str :type num_executors: int """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._is_alive = True self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._logger = log.get_logger(__name__)
def __init__(self, master_url, request_params, secret): """ :param master_url: The url of the master which the build will be executed on :type master_url: str :param request_params: A dict of request params that will be json-encoded and sent in the build request :type request_params: dict :type secret: str """ self._master_url = self._ensure_url_has_scheme(master_url) self._request_params = request_params self._secret = secret self._build_id = None self._network = Network() self._logger = get_logger(__name__) self._last_build_status_details = None self._master_api = UrlBuilder(master_url, self.API_VERSION) self._cluster_master_api_client = ClusterMasterAPIClient(master_url)
def __init__(self, slave_url, num_executors, slave_session_id=None): """ :type slave_url: str :type num_executors: int :type slave_session_id: str """ self.url = slave_url self.num_executors = num_executors self.id = self._slave_id_counter.increment() self._num_executors_in_use = Counter() self._network = Network(min_connection_poolsize=num_executors) self.current_build_id = None self._last_heartbeat_time = datetime.now() self._is_alive = True self._is_in_shutdown_mode = False self._slave_api = UrlBuilder(slave_url, self.API_VERSION) self._session_id = slave_session_id self._logger = log.get_logger(__name__)
def get(self, build_id, subjob_id, atom_id): """ :type build_id: int :type subjob_id: int :type atom_id: int """ max_lines = int(self.get_query_argument('max_lines', 50)) offset_line = self.get_query_argument('offset_line', None) if offset_line is not None: offset_line = int(offset_line) try: response = self._cluster_master.get_console_output( build_id, subjob_id, atom_id, Configuration['results_directory'], max_lines, offset_line ) self.write(response) return except ItemNotFoundError as e: # If the master doesn't have the atom's console output, it's possible it's currently being worked on, # in which case the slave that is working on it may be able to provide the in-progress console output. build = self._cluster_master.get_build(int(build_id)) subjob = build.subjob(int(subjob_id)) slave = subjob.slave if slave is None: raise e api_url_builder = UrlBuilder(slave.url) slave_console_url = api_url_builder.url('build', build_id, 'subjob', subjob_id, 'atom', atom_id, 'console') query = {'max_lines': max_lines} if offset_line is not None: query['offset_line'] = offset_line query_string = urllib.parse.urlencode(query) self.redirect('{}?{}'.format(slave_console_url, query_string))
def block_until_build_queue_empty(self, timeout=60): """ This blocks until the master's build queue is empty. This data is exposed via the /queue endpoint and contains any jobs that are currently building or not yet started. If the queue is not empty before the timeout, this method raises an exception. :param timeout: The maximum number of seconds to block before raising an exception. :type timeout: int """ master_api = UrlBuilder(self._master_url) queue_url = master_api.url('queue') def is_queue_empty(): queue_resp = requests.get(queue_url) if queue_resp and queue_resp.ok: queue_data = queue_resp.json() if 'queue' in queue_data and len(queue_data['queue']) == 0: return True return False if not poll.wait_for(is_queue_empty, timeout, 0.5): raise Exception('Master service did not become idle before timeout.')