def _add_hosts_and_components(self, cluster_spec, servers, ambari_info, name): add_host_url = 'http://{0}/api/v1/clusters/{1}/hosts/{2}' add_host_component_url = ('http://{0}/api/v1/clusters/{1}' '/hosts/{2}/host_components/{3}') for host in servers: hostname = host.instance.fqdn().lower() result = self._post( add_host_url.format(ambari_info.get_address(), name, hostname), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host command failed. {0}').format(result.text)) raise ex.HadoopProvisionError( _('Failed to add host: %s') % result.text) node_group_name = host.node_group.name # TODO(jspeidel): ensure that node group exists node_group = cluster_spec.node_groups[node_group_name] for component in node_group.components: # don't add any AMBARI components if component.find('AMBARI') != 0: result = self._post( add_host_component_url.format( ambari_info.get_address(), name, hostname, component), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host_component command failed. %s'), result.text) raise ex.HadoopProvisionError( _('Failed to add host component: %s') % result.text)
def _install_services(self, cluster_name, ambari_info): LOG.info(_LI('Installing required Hadoop services ...')) ambari_address = ambari_info.get_address() install_url = ('http://{0}/api/v1/clusters/{' '1}/services?ServiceInfo/state=INIT'.format( ambari_address, cluster_name)) body = ('{"RequestInfo" : { "context" : "Install all services" },' '"Body" : {"ServiceInfo": {"state" : "INSTALLED"}}}') result = self._put(install_url, ambari_info, data=body) if result.status_code == 202: json_result = json.loads(result.text) request_id = json_result['Requests']['id'] success = self._wait_for_async_request( self._get_async_request_uri(ambari_info, cluster_name, request_id), ambari_info) if success: LOG.info(_LI("Install of Hadoop stack successful.")) self._finalize_ambari_state(ambari_info) else: LOG.critical(_LC('Install command failed.')) raise ex.HadoopProvisionError( _('Installation of Hadoop stack failed.')) elif result.status_code != 200: LOG.error(_LE('Install command failed. {0}').format(result.text)) raise ex.HadoopProvisionError( _('Installation of Hadoop stack failed.'))
def _exec_ambari_command(self, ambari_info, body, cmd_uri): LOG.debug('PUT URI: {0}'.format(cmd_uri)) result = self._put(cmd_uri, ambari_info, data=body) if result.status_code == 202: LOG.debug('PUT response: {0}'.format(result.text)) json_result = json.loads(result.text) href = json_result['href'] + '/tasks?fields=Tasks/status' success = self._wait_for_async_request(href, ambari_info) if success: LOG.info( _LI("Successfully changed state of Hadoop components ")) else: LOG.critical( _LC('Failed to change state of Hadoop ' 'components')) raise ex.HadoopProvisionError( _('Failed to change state of Hadoop components')) else: LOG.error( _LE('Command failed. Status: %(status)s, response: ' '%(response)s'), { 'status': result.status_code, 'response': result.text }) raise ex.HadoopProvisionError(_('Hadoop/Ambari command failed.'))
def start_services(self, cluster_name, cluster_spec, ambari_info): LOG.info('Starting Hadoop services ...') LOG.info('Cluster name: {0}, Ambari server address: {1}'.format( cluster_name, ambari_info.get_address())) start_url = ('http://{0}/api/v1/clusters/{1}/services?ServiceInfo/' 'state=INSTALLED'.format(ambari_info.get_address(), cluster_name)) body = ('{"RequestInfo" : { "context" : "Start all services" },' '"Body" : {"ServiceInfo": {"state" : "STARTED"}}}') self._fire_service_start_notifications(cluster_name, cluster_spec, ambari_info) result = self._put(start_url, ambari_info, data=body) if result.status_code == 202: json_result = json.loads(result.text) request_id = json_result['Requests']['id'] success = self._wait_for_async_request( self._get_async_request_uri(ambari_info, cluster_name, request_id), ambari_info) if success: LOG.info("Successfully started Hadoop cluster '{0}'.".format( cluster_name)) else: LOG.critical('Failed to start Hadoop cluster.') raise ex.HadoopProvisionError( 'Start of Hadoop services failed.') elif result.status_code != 200: LOG.error( 'Start command failed. Status: {0}, response: {1}'.format( result.status_code, result.text)) raise ex.HadoopProvisionError('Start of Hadoop services failed.')
def _start_cloudera_manager(cluster): manager = pu.get_manager(cluster) with manager.remote() as r: cmd.start_cloudera_db(r) cmd.start_manager(r) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: try: conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT) conn.close() break except IOError: context.sleep(2) else: message = _("Cloudera Manager failed to start in %(timeout)s minutes " "on node '%(node)s' of cluster '%(cluster)s'") % { 'timeout': timeout / 60, 'node': manager.management_ip, 'cluster': cluster.name } raise ex.HadoopProvisionError(message) LOG.info(_LI("Cloudera Manager has been started"))
def _set_ambari_credentials(self, cluster_spec, ambari_info, version): services = cluster_spec.services ambari_client = (self.version_factory.get_version_handler( version).get_ambari_client()) for service in services: if service.name == 'AMBARI': is_admin_provided = False admin_user = ambari_info.user admin_password = ambari_info.password for u in service.users: if u.name == 'admin': ambari_client.update_ambari_admin_user( u.password, ambari_info) is_admin_provided = True ambari_info.user = '******' ambari_info.password = u.password else: ambari_client.add_ambari_user(u, ambari_info) if 'admin' in u.groups: admin_user = u.name admin_password = u.password if not is_admin_provided: if admin_user is None: raise ex.HadoopProvisionError("An Ambari user in the " "admin group must be " "configured.") ambari_info.user = admin_user ambari_info.password = admin_password ambari_client.delete_ambari_user('admin', ambari_info) break
def validate_cluster_creating(cluster): if not cmu.have_cm_api_libs(): LOG.error( _LE("For provisioning cluster with CDH plugin install" "'cm_api' package version 6.0.2 or later.")) raise ex.HadoopProvisionError(_("'cm_api' is not installed.")) mng_count = _get_inst_count(cluster, 'MANAGER') if mng_count != 1: raise ex.InvalidComponentCountException('MANAGER', 1, mng_count) nn_count = _get_inst_count(cluster, 'NAMENODE') if nn_count != 1: raise ex.InvalidComponentCountException('NAMENODE', 1, nn_count) snn_count = _get_inst_count(cluster, 'SECONDARYNAMENODE') if snn_count != 1: raise ex.InvalidComponentCountException('SECONDARYNAMENODE', 1, snn_count) rm_count = _get_inst_count(cluster, 'RESOURCEMANAGER') if rm_count not in [0, 1]: raise ex.InvalidComponentCountException('RESOURCEMANAGER', '0 or 1', rm_count) hs_count = _get_inst_count(cluster, 'JOBHISTORY') if hs_count not in [0, 1]: raise ex.InvalidComponentCountException('JOBHISTORY', '0 or 1', hs_count) if rm_count > 0 and hs_count < 1: raise ex.RequiredServiceMissingException('JOBHISTORY', required_by='RESOURCEMANAGER') nm_count = _get_inst_count(cluster, 'NODEMANAGER') if rm_count == 0: if nm_count > 0: raise ex.RequiredServiceMissingException('RESOURCEMANAGER', required_by='NODEMANAGER') oo_count = _get_inst_count(cluster, 'OOZIE_SERVER') dn_count = _get_inst_count(cluster, 'DATANODE') if oo_count not in [0, 1]: raise ex.InvalidComponentCountException('OOZIE_SERVER', '0 or 1', oo_count) if oo_count == 1: if dn_count < 1: raise ex.RequiredServiceMissingException( 'DATANODE', required_by='OOZIE_SERVER') if nm_count < 1: raise ex.RequiredServiceMissingException( 'NODEMANAGER', required_by='OOZIE_SERVER') if hs_count != 1: raise ex.RequiredServiceMissingException( 'JOBHISTORY', required_by='OOZIE_SERVER')
def delete_ambari_user(self, user_name, ambari_info): user_url = 'http://{0}/api/v1/users/{1}'.format( ambari_info.get_address(), user_name) result = self._delete(user_url, ambari_info) if result.status_code != 200: raise ex.HadoopProvisionError('Unable to delete Ambari user: {0}' ' : {1}'.format( user_name, result.text))
def start_instance(instance): processes = instance.node_group.node_processes for process in processes: if process in ['namenode', 'datanode']: start_hadoop_process(instance, process) elif process in ['resourcemanager', 'nodemanager']: start_yarn_process(instance, process) else: raise ex.HadoopProvisionError("Process %s is not supported" % process)
def _add_cluster(self, ambari_info, name): add_cluster_url = 'http://{0}/api/v1/clusters/{1}'.format( ambari_info.get_address(), name) result = self._post(add_cluster_url, ambari_info, data='{"Clusters": {"version" : "HDP-' + self.handler.get_version() + '"}}') if result.status_code != 201: LOG.error('Create cluster command failed. %s' % result.text) raise ex.HadoopProvisionError( 'Failed to add cluster: %s' % result.text)
def add_ambari_user(self, user, ambari_info): user_url = 'http://{0}/api/v1/users/{1}'.format( ambari_info.get_address(), user.name) create_body = '{{"Users":{{"password":"******","roles":"{1}"}} }}'. \ format(user.password, '%s' % ','.join(map(str, user.groups))) result = self._post(user_url, ambari_info, data=create_body) if result.status_code != 201: raise ex.HadoopProvisionError( 'Unable to create Ambari user: {0}'.format(result.text))
def install_packages(remote, packages, timeout=1800): distrib = _get_os_distrib(remote) if distrib == 'ubuntu': cmd = 'RUNLEVEL=1 apt-get install -y %s' elif distrib == 'centos': cmd = 'yum install %s' else: raise ex.HadoopProvisionError( _("OS on image is not supported by CDH plugin")) cmd = cmd % ' '.join(packages) _root(remote, cmd, timeout=timeout)
def update_ambari_admin_user(self, password, ambari_info): old_pwd = ambari_info.password user_url = 'http://{0}/api/v1/users/admin'.format( ambari_info.get_address()) update_body = '{{"Users":{{"roles":"admin","password":"******",' \ '"old_password":"******"}} }}'.format(password, old_pwd) result = self._put(user_url, ambari_info, data=update_body) if result.status_code != 200: raise ex.HadoopProvisionError('Unable to update Ambari admin user' ' credentials: {0}'.format( result.text))
def _add_services_to_cluster(self, cluster_spec, ambari_info, name): services = cluster_spec.services add_service_url = 'http://{0}/api/v1/clusters/{1}/services/{2}' for service in services: if service.deployed and service.name != 'AMBARI': result = self._post( add_service_url.format(ambari_info.get_address(), name, service.name), ambari_info) if result.status_code not in [201, 409]: LOG.error('Create service command failed. {0}'.format( result.text)) raise ex.HadoopProvisionError( 'Failed to add services to cluster: %s' % result.text)
def _start_processes(instance, processes): with instance.remote() as r: for process in processes: if process in ['namenode', 'datanode']: r.execute_command( 'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process) elif process in ['resourcemanager', 'nodemanager']: r.execute_command( 'sudo su - -c "yarn-daemon.sh start %s" hadoop' % process) else: raise ex.HadoopProvisionError( _("Process %s is not supported") % process)
def _add_configurations_to_cluster( self, cluster_spec, ambari_info, name): existing_config_url = 'http://{0}/api/v1/clusters/{1}?fields=' \ 'Clusters/desired_configs'.format( ambari_info.get_address(), name) result = self._get(existing_config_url, ambari_info) json_result = json.loads(result.text) existing_configs = json_result['Clusters']['desired_configs'] configs = cluster_spec.get_deployed_configurations() if 'ambari' in configs: configs.remove('ambari') if len(configs) == len(existing_configs): # nothing to do return config_url = 'http://{0}/api/v1/clusters/{1}'.format( ambari_info.get_address(), name) body = {} clusters = {} version = 1 body['Clusters'] = clusters for config_name in configs: if config_name in existing_configs: if config_name == 'core-site' or config_name == 'global': existing_version = existing_configs[config_name]['tag']\ .lstrip('v') version = int(existing_version) + 1 else: continue config_body = {} clusters['desired_config'] = config_body config_body['type'] = config_name config_body['tag'] = 'v%s' % version config_body['properties'] = \ cluster_spec.configurations[config_name] result = self._put(config_url, ambari_info, data=json.dumps(body)) if result.status_code != 200: LOG.error( 'Set configuration command failed. {0}'.format( result.text)) raise ex.HadoopProvisionError( 'Failed to set configurations on cluster: %s' % result.text)
def _finalize_ambari_state(self, ambari_info): LOG.info('Finalizing Ambari cluster state.') persist_state_uri = 'http://{0}/api/v1/persist'.format( ambari_info.get_address()) # this post data has non-standard format because persist # resource doesn't comply with Ambari API standards persist_data = '{ "CLUSTER_CURRENT_STATUS":' \ '"{\\"clusterState\\":\\"CLUSTER_STARTED_5\\"}" }' result = self._post(persist_state_uri, ambari_info, data=persist_data) if result.status_code != 201 and result.status_code != 202: LOG.warning('Finalizing of Ambari cluster state failed. {0}'. format(result.text)) raise ex.HadoopProvisionError('Unable to finalize Ambari state.')
def _add_components_to_services(self, cluster_spec, ambari_info, name): add_component_url = 'http://{0}/api/v1/clusters/{1}/services/{'\ '2}/components/{3}' for service in cluster_spec.services: if service.deployed and service.name != 'AMBARI': for component in service.components: result = self._post( add_component_url.format(ambari_info.get_address(), name, service.name, component.name), ambari_info) if result.status_code not in [201, 409]: LOG.error( 'Create component command failed. {0}'.format( result.text)) raise ex.HadoopProvisionError( 'Failed to add components to services: %s' % result.text)
def _add_components_to_services(self, cluster_spec, ambari_info, name): add_component_url = ('http://{0}/api/v1/clusters/{1}/services/{' '2}/components/{3}') for service in cluster_spec.services: # Make sure the service is deployed and is managed by Ambari if service.deployed and service.ambari_managed: for component in service.components: result = self._post( add_component_url.format(ambari_info.get_address(), name, service.name, component.name), ambari_info) if result.status_code not in [201, 409]: LOG.error( _LE('Create component command failed. {0}').format( result.text)) raise ex.HadoopProvisionError( _('Failed to add components to services: %s') % result.text)
def install_rpms(self, r): LOG.info( _LI("{0}: Installing rpm's ...").format(self.instance.hostname())) # TODO(jspeidel): based on image type, use correct command curl_cmd = ('curl -f -s -o /etc/yum.repos.d/ambari.repo %s' % self.ambari_rpm) ret_code, stdout = r.execute_command(curl_cmd, run_as_root=True, raise_when_error=False) if ret_code == 0: yum_cmd = 'yum -y install %s' % EPEL_RELEASE_PACKAGE_NAME r.execute_command(yum_cmd, run_as_root=True) else: LOG.info( _LI("{0}: Unable to install rpm's from repo, " "checking for local install.").format( self.instance.hostname())) if not self.rpms_installed(): raise ex.HadoopProvisionError( _('Failed to install Hortonworks Ambari'))
def _start_components(self, ambari_info, auth, cluster_name, servers, cluster_spec): # query for all the host components in the INSTALLED state, # then get a list of the client services in the list installed_uri = 'http://{0}/api/v1/clusters/{'\ '1}/host_components?HostRoles/state=INSTALLED&'\ 'HostRoles/host_name.in({2})'.format( ambari_info.get_address(), cluster_name, self._get_host_list(servers)) result = self._get(installed_uri, ambari_info) if result.status_code == 200: LOG.debug('GET response: {0}'.format(result.text)) json_result = json.loads(result.text) items = json_result['items'] client_set = cluster_spec.get_components_for_type('CLIENT') inclusion_list = list( set([ x['HostRoles']['component_name'] for x in items if x['HostRoles']['component_name'] not in client_set ])) # query and start all non-client components on the given set of # hosts #TODO(jspeidel): Provide request context body = '{"HostRoles": {"state" : "STARTED"}}' start_uri = 'http://{0}/api/v1/clusters/{'\ '1}/host_components?HostRoles/state=INSTALLED&'\ 'HostRoles/host_name.in({2})'\ '&HostRoles/component_name.in({3})'.format( ambari_info.get_address(), cluster_name, self._get_host_list(servers), ",".join(inclusion_list)) self._exec_ambari_command(ambari_info, body, start_uri) else: raise ex.HadoopProvisionError( 'Unable to determine installed service ' 'components in scaled instances. status' ' code returned = {0}'.format(result.status))
def _await_agents(instances): api = cu.get_api_client(instances[0].node_group.cluster) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for agent connected to manager" % {'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: hostnames = [i.fqdn() for i in instances] hostnames_to_manager = [h.hostname for h in api.get_all_hosts('full')] is_ok = True for hostname in hostnames: if hostname not in hostnames_to_manager: is_ok = False break if not is_ok: context.sleep(5) else: break else: raise ex.HadoopProvisionError( _("Cloudera agents failed to connect to" " Cloudera Manager"))
def install_swift_integration(self, r): LOG.info( _LI("{0}: Installing swift integration ...").format( self.instance.hostname())) base_rpm_cmd = 'rpm -U --quiet ' rpm_cmd = base_rpm_cmd + HADOOP_SWIFT_RPM ret_code, stdout = r.execute_command(rpm_cmd, run_as_root=True, raise_when_error=False) if ret_code != 0: LOG.info( _LI("{0}: Unable to install swift integration from " "source, checking for local rpm.").format( self.instance.hostname())) ret_code, stdout = r.execute_command('ls ' + HADOOP_SWIFT_LOCAL_RPM, run_as_root=True, raise_when_error=False) if ret_code == 0: rpm_cmd = base_rpm_cmd + HADOOP_SWIFT_LOCAL_RPM r.execute_command(rpm_cmd, run_as_root=True) else: raise ex.HadoopProvisionError( _('Failed to install Hadoop Swift integration'))
def wrapper(*args, **kwargs): for cmd in f(*args, **kwargs): result = cmd.wait() if not result.success: raise ex.HadoopProvisionError(result.resultMessage)