def main(args): image = '{}/{}/topology_nodebase:{}'.format(args.registry, args.namespace or DEFAULT_NAMESPACE, args.operating_system or DEFAULT_OPERATING_SYSTEM) if args.node_disks: node_disks = yaml.load(args.node_disks) logger.debug('Parsed node disks: %s.', node_disks) cluster = Cluster(*[Node(hostname=hostname, group='nodes', image=image, devices=node_disks.get(hostname) if args.node_disks else None) for hostname in args.nodes]) cluster.start(args.network)
def main(args): quiet = not args.verbose node_image = '{}/{}/topology_apache_pulsar:pulsar-{}'.format(args.registry, args.namespace or DEFAULT_NAMESPACE, args.pulsar_version) ports = [{WEB_SERVICE_PORT: WEB_SERVICE_PORT} if args.predictable else WEB_SERVICE_PORT, {WEB_SERVICE_TLS_PORT: WEB_SERVICE_TLS_PORT} if args.predictable else WEB_SERVICE_TLS_PORT, {BROKER_SERVICE_PORT: BROKER_SERVICE_PORT} if args.predictable else BROKER_SERVICE_PORT, {BROKER_SERVICE_TLS_PORT: BROKER_SERVICE_TLS_PORT} if args.predictable else BROKER_SERVICE_TLS_PORT] clusterdock_config_host_dir = os.path.realpath(os.path.expanduser(args.clusterdock_config_directory)) volumes = [{clusterdock_config_host_dir: CLUSTERDOCK_CLIENT_CONTAINER_DIR}] proxy_node = Node(hostname=args.proxy_node_name, group='proxy', image=node_image, ports=ports, volumes=volumes) broker_nodes = [Node(hostname=hostname, group='broker', image=node_image, volumes=volumes) for hostname in args.broker_nodes] zk_nodes = [Node(hostname=hostname, group='zookeeper', image=node_image, volumes=volumes) for hostname in args.zookeeper_nodes] nodes = [proxy_node] + broker_nodes + zk_nodes cluster = Cluster(*nodes) cluster.start(args.network) logger.info('Starting pulsar cluster (%s) version %s ...', args.pulsar_cluster_name, args.pulsar_version) # zookeeper for idx, node in enumerate(zk_nodes, start=1): zookeeper_conf = node.get_file(ZOOKEEPER_CONF) zookeeper_properties = PropertiesFile.loads(zookeeper_conf) for srvidx, srvnode in enumerate(zk_nodes, start=1): zookeeper_properties['server.{}'.format(srvidx)] = '{}.{}:2888:3888'.format(srvnode.hostname, cluster.network) node.put_file(ZOOKEEPER_CONF, PropertiesFile.dumps(zookeeper_properties)) zookeeper_commands = [ 'mkdir -p {}/data/zookeeper'.format(PULSAR_HOME), 'echo {} > {}/data/zookeeper/myid'.format(idx, PULSAR_HOME), '{}/bin/pulsar-daemon start zookeeper'.format(PULSAR_HOME) ] execute_node_command(node, ' && '.join(zookeeper_commands), quiet, 'Zookeeper start failed') web_service_url = 'http://{}.{}:{}'.format(proxy_node.hostname, cluster.network, WEB_SERVICE_PORT) web_service_url_tls = 'https://{}.{}:{}'.format(proxy_node.hostname, cluster.network, WEB_SERVICE_TLS_PORT) broker_service_url = 'pulsar://{}.{}:{}'.format(proxy_node.hostname, cluster.network, BROKER_SERVICE_PORT) broker_service_url_tls = 'pulsar+ssl://{}.{}:{}'.format(proxy_node.hostname, cluster.network, BROKER_SERVICE_TLS_PORT) init_cluster_cmd = ('{home}/bin/pulsar initialize-cluster-metadata' ' --cluster {cluster_name}' ' --zookeeper {zkhostname}.{network}:2181' ' --configuration-store {zkhostname}.{network}:2181' ' --web-service-url {web_service_url}' ' --web-service-url-tls {web_service_url_tls}' ' --broker-service-url {broker_service_url}' ' --broker-service-url-tls {broker_service_url_tls}' .format(home=PULSAR_HOME, cluster_name=args.pulsar_cluster_name, zkhostname=zk_nodes[0].hostname, hostname=proxy_node.hostname, network=cluster.network, web_service_url=web_service_url, web_service_url_tls=web_service_url_tls, broker_service_url=broker_service_url, broker_service_url_tls=broker_service_url_tls)) execute_node_command(zk_nodes[0], init_cluster_cmd, quiet, 'Cluster initialization failed') zk_servers_conf = ','.join(['{}.{}:2181'.format(node.hostname, cluster.network) for node in zk_nodes]) # bookkeepers for node in broker_nodes: bookkeeper_conf = node.get_file(BOOKKEEPER_CONF) bookkeeper_properties = PropertiesFile.loads(bookkeeper_conf) bookkeeper_properties['zkServers'] = zk_servers_conf node.put_file(BOOKKEEPER_CONF, PropertiesFile.dumps(bookkeeper_properties)) execute_node_command(node, '{}/bin/pulsar-daemon start bookie'.format(PULSAR_HOME), quiet, 'Bookkeeper start failed') execute_node_command(node, '{}/bin/bookkeeper shell bookiesanity'.format(PULSAR_HOME), quiet, 'Book keeper sanity check failed') # brokers for node in broker_nodes: broker_conf = node.get_file(BROKER_CONF) broker_properties = PropertiesFile.loads(broker_conf) broker_properties.update({'zookeeperServers': zk_servers_conf, 'configurationStoreServers': zk_servers_conf, 'clusterName': args.pulsar_cluster_name}) node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties)) # proxy proxy_conf = proxy_node.get_file(PROXY_CONF) proxy_properties = PropertiesFile.loads(proxy_conf) proxy_properties.update({'zookeeperServers': zk_servers_conf, 'configurationStoreServers': zk_servers_conf, 'httpNumThreads': '8'}) proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties)) # TLS execute_node_command(proxy_node, 'rm -rf {}'.format(TLS_DIR), quiet=quiet) if args.tls: setup_commands = [ 'mkdir -p {}'.format(TLS_CLIENT_DIR), 'wget -P {} {}'.format(TLS_DIR, TLS_CONF_URL), 'mkdir -p {dir}/certs {dir}/crl {dir}/newcerts {dir}/private'.format(dir=TLS_DIR), 'chmod 700 {}/private'.format(TLS_DIR), 'touch {}/index.txt'.format(TLS_DIR), 'echo "unique_subject = no" > {}/index.txt.attr'.format(TLS_DIR), 'echo 1000 > {}/serial'.format(TLS_DIR), ] execute_node_command(proxy_node, ' && '.join(setup_commands), quiet, 'TLS system setup failed') ca_auth_commands = [ 'export CA_HOME={}'.format(TLS_DIR), 'openssl genrsa -out {dir}/private/ca.key.pem 4096'.format(dir=TLS_DIR), 'chmod 400 {}/private/ca.key.pem'.format(TLS_DIR), ('openssl req -config {dir}/openssl.cnf -key {dir}/private/ca.key.pem' ' -new -x509 -days 7300 -sha256 -extensions v3_ca -out {dir}/certs/ca.cert.pem' ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=*"').format(dir=TLS_DIR), 'chmod 444 {}/certs/ca.cert.pem'.format(TLS_DIR), 'cp {}/certs/ca.cert.pem {}'.format(TLS_DIR, TLS_CLIENT_DIR) ] execute_node_command(proxy_node, ' && '.join(ca_auth_commands), quiet, 'Certificate authority creation failed') server_cert_commands = [ 'export CA_HOME={}'.format(TLS_DIR), 'openssl genrsa -out {}/broker.key.pem 2048'.format(TLS_DIR), ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/broker.key.pem' ' -out {dir}/broker.key-pk8.pem -nocrypt').format(dir=TLS_DIR), # comman name (CN) needs to be *.<nw> so as that <nw> hosts can access Pulsar cluster ('openssl req -config {dir}/openssl.cnf -key {dir}/broker.key.pem -new -sha256 -out {dir}/broker.csr.pem' ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=*.{nw}"').format(dir=TLS_DIR, nw=cluster.network), ('openssl ca -batch -config {dir}/openssl.cnf -extensions server_cert -days 1000 -notext -md sha256' ' -in {dir}/broker.csr.pem -out {dir}/broker.cert.pem').format(dir=TLS_DIR) ] execute_node_command(proxy_node, ' && '.join(server_cert_commands), quiet, 'Broker certificate creation failed') for node in broker_nodes: broker_conf = node.get_file(BROKER_CONF) broker_properties = PropertiesFile.loads(broker_conf) broker_properties.update({'brokerServicePortTls': '6651', 'tlsEnabled': 'true', 'tlsCertificateFilePath': '{}/broker.cert.pem'.format(TLS_DIR), 'tlsKeyFilePath': '{}/broker.key-pk8.pem'.format(TLS_DIR), 'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR), 'webServicePortTls': '8443'}) node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties)) proxy_conf = proxy_node.get_file(PROXY_CONF) proxy_properties = PropertiesFile.loads(proxy_conf) proxy_properties.update({'servicePortTls': '6651', 'tlsEnabledInProxy': 'true', 'tlsCertificateFilePath': '{}/broker.cert.pem'.format(TLS_DIR), 'tlsKeyFilePath': '{}/broker.key-pk8.pem'.format(TLS_DIR), 'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR), 'tlsEnabledWithBroker': 'true', 'brokerClientTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR), 'webServicePortTls': '8443'}) proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties)) for node in nodes: client_conf = node.get_file(CLIENT_CONF) client_properties = PropertiesFile.loads(client_conf) client_properties.update({'webServiceUrl': web_service_url_tls, 'brokerServiceUrl': broker_service_url_tls, 'useTls': 'true', 'tlsAllowInsecureConnection': 'false', 'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR)}) node.put_file(CLIENT_CONF, PropertiesFile.dumps(client_properties)) # TLS auth if args.tls == 'authentication': client_cert_commands = [ 'export CA_HOME={}'.format(TLS_DIR), 'openssl genrsa -out {}/admin.key.pem 2048'.format(TLS_DIR), ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/admin.key.pem' ' -out {dir}/admin.key-pk8.pem -nocrypt').format(dir=TLS_DIR), # comman name (CN) needs to be admin - same as user principal in Pulsar ('openssl req -config {dir}/openssl.cnf -key {dir}/admin.key.pem -new -sha256 -out {dir}/admin.csr.pem' ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=admin"').format(dir=TLS_DIR), ('openssl ca -batch -config {dir}/openssl.cnf -extensions usr_cert -days 1000 -notext -md sha256' ' -in {dir}/admin.csr.pem -out {dir}/admin.cert.pem').format(dir=TLS_DIR), 'mv {}/admin.* {}'.format(TLS_DIR, TLS_CLIENT_DIR) ] execute_node_command(proxy_node, ' && '.join(client_cert_commands), quiet, 'Client certificate creation failed') proxy_cert_commands = [ 'export CA_HOME={}'.format(TLS_DIR), 'openssl genrsa -out {}/proxy.key.pem 2048'.format(TLS_DIR), ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/proxy.key.pem' ' -out {dir}/proxy.key-pk8.pem -nocrypt').format(dir=TLS_DIR), # comman name (CN) needs to be proxyadmin - same as proxy principal in Pulsar ('openssl req -config {dir}/openssl.cnf -key {dir}/proxy.key.pem -new -sha256 -out {dir}/proxy.csr.pem' ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=proxyadmin"').format(dir=TLS_DIR), ('openssl ca -batch -config {dir}/openssl.cnf -extensions usr_cert -days 1000 -notext -md sha256' ' -in {dir}/proxy.csr.pem -out {dir}/proxy.cert.pem').format(dir=TLS_DIR) ] execute_node_command(proxy_node, ' && '.join(proxy_cert_commands), quiet, 'Proxy certificate creation failed') for node in broker_nodes: broker_conf = node.get_file(BROKER_CONF) broker_properties = PropertiesFile.loads(broker_conf) broker_properties.update({ 'authenticationEnabled': 'true', 'authenticationProviders': 'org.apache.pulsar.broker.authentication.AuthenticationProviderTls', 'proxyRoles': 'proxyadmin', 'superUserRoles': 'proxyadmin,admin'}) node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties)) proxy_conf = proxy_node.get_file(PROXY_CONF) proxy_properties = PropertiesFile.loads(proxy_conf) proxy_properties.update({ 'authenticationEnabled': 'true', 'authenticationProviders': 'org.apache.pulsar.broker.authentication.AuthenticationProviderTls', 'brokerClientAuthenticationPlugin': 'org.apache.pulsar.client.impl.auth.AuthenticationTls', 'brokerClientAuthenticationParameters': ('tlsCertFile:{dir}/proxy.cert.pem,' 'tlsKeyFile:{dir}/proxy.key-pk8.pem').format(dir=TLS_DIR), 'superUserRoles': 'admin'}) proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties)) for node in nodes: client_conf = node.get_file(CLIENT_CONF) client_properties = PropertiesFile.loads(client_conf) client_properties.update({'authPlugin': 'org.apache.pulsar.client.impl.auth.AuthenticationTls', 'authParams': ('tlsCertFile:{dir}/admin.cert.pem,tlsKeyFile:' '{dir}/admin.key-pk8.pem').format(dir=TLS_CLIENT_DIR)}) node.put_file(CLIENT_CONF, PropertiesFile.dumps(client_properties)) # start broker nodes and proxy node for node in broker_nodes: execute_node_command(node, '{}/bin/pulsar-daemon start broker'.format(PULSAR_HOME), quiet, 'Broker start failed') out_file = '{}/logs/pulsar-proxy-{}.{}.out'.format(PULSAR_HOME, proxy_node.hostname, cluster.network) execute_node_command(proxy_node, 'mkdir -p {}/logs'.format(PULSAR_HOME), quiet) execute_node_command(proxy_node, 'nohup {}/bin/pulsar proxy > "{}" 2>&1 < /dev/null &'.format(PULSAR_HOME, out_file), quiet, 'Proxy start failed') logger.info('Performing health check on Pulsar cluster (%s) ...', args.pulsar_cluster_name) def condition(node, cluster_name, command): command_status = node.execute(command, quiet=True) return command_status.exit_code == 0 and command_status.output.splitlines()[-1].strip().strip('"') == cluster_name wait_for_condition(condition=condition, condition_args=[proxy_node, args.pulsar_cluster_name, '{}/bin/pulsar-admin clusters list'.format(PULSAR_HOME)]) logger.info('Pulsar cluster (%s) can be reached on docker network (%s):\n%s \n%s', args.pulsar_cluster_name, cluster.network, textwrap.indent('Web service URL: {}'.format(web_service_url), prefix=' '), textwrap.indent('Broker service URL: {}'.format(broker_service_url), prefix=' ')) logger.log(logging.INFO if args.tls else -1, 'Pulsar cluster (%s) can be reached securely on docker network (%s):\n%s \n%s', args.pulsar_cluster_name, cluster.network, textwrap.indent('Secure web service URL: {}'.format(web_service_url_tls), prefix=' '), textwrap.indent('Secure broker service URL: {}'.format(broker_service_url_tls), prefix=' '))
def main(args): kerberos_volume_dir = os.path.expanduser(args.kerberos_config_directory) image = '{}/{}/topology_nodebase:{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.operating_system or DEFAULT_OPERATING_SYSTEM) nodes = [ Node(hostname=hostname, group='nodes', image=image, volumes=[{ kerberos_volume_dir: KERBEROS_VOLUME_DIR }]) for hostname in args.nodes ] kdc_image = '{}/{}/topology_nodebase_kerberos:{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.operating_system or DEFAULT_OPERATING_SYSTEM) kdc_hostname = args.kdc_node[0] kdc_node = Node(hostname=kdc_hostname, group='kdc', image=kdc_image, volumes=[{ kerberos_volume_dir: KERBEROS_VOLUME_DIR }]) cluster = Cluster(kdc_node, *nodes) cluster.start(args.network) logger.info('Updating KDC configurations ...') realm = cluster.network.upper() krb5_conf_data = kdc_node.get_file(KDC_KRB5_CONF_FILENAME) kdc_node.put_file( KDC_KRB5_CONF_FILENAME, re.sub( r'EXAMPLE.COM', realm, re.sub( r'example.com', cluster.network, re.sub(r'kerberos.example.com', r'{}.{}'.format(kdc_hostname, cluster.network), krb5_conf_data)))) kdc_conf_data = kdc_node.get_file(KDC_CONF_FILENAME) kdc_node.put_file( KDC_CONF_FILENAME, re.sub( r'EXAMPLE.COM', realm, re.sub(r'\[kdcdefaults\]', r'[kdcdefaults]\n max_renewablelife = 7d\n max_life = 1d', kdc_conf_data))) acl_data = kdc_node.get_file(KDC_ACL_FILENAME) kdc_node.put_file(KDC_ACL_FILENAME, re.sub(r'EXAMPLE.COM', realm, acl_data)) logger.info('Starting KDC ...') kdc_commands = [ 'kdb5_util create -s -r {realm} -P kdcadmin'.format(realm=realm), 'kadmin.local -q "addprinc -pw {admin_pw} admin/admin@{realm}"'.format( admin_pw='acladmin', realm=realm) ] # Add the following commands before starting kadmin daemon etc. if args.kerberos_principals: principal_list = [ '{}@{}'.format(principal, realm) for principal in args.kerberos_principals.split(',') ] create_principals_cmds = [ 'kadmin.local -q "addprinc -randkey {}"'.format(principal) for principal in principal_list ] kdc_commands.extend(create_principals_cmds) kdc_commands.append('rm -f {}'.format(KDC_KEYTAB_FILENAME)) create_keytab_cmd = 'kadmin.local -q "xst -norandkey -k {} {}" '.format( KDC_KEYTAB_FILENAME, ' '.join(principal_list)) kdc_commands.append(create_keytab_cmd) kdc_commands.extend( ['krb5kdc', 'kadmind', 'authconfig --enablekrb5 --update']) kdc_commands.append('cp -f {} {}'.format(KDC_KRB5_CONF_FILENAME, KERBEROS_VOLUME_DIR)) if args.kerberos_principals: kdc_commands.append('chmod 644 {}'.format(KDC_KEYTAB_FILENAME)) kdc_node.execute(command="bash -c '{}'".format('; '.join(kdc_commands)), quiet=not args.verbose) logger.info('Validating service health ...') _validate_service_health(node=kdc_node, services=['krb5kdc', 'kadmin'], quiet=not args.verbose)
def main(args): quiet = not args.verbose print_topology_meta(args.topology) if args.include_services and args.exclude_services: raise ValueError( 'Cannot pass both --include-services and --exclude-services.') image_prefix = '{}/{}/topology_hdp:hdp{}_ambari{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.hdp_version, args.ambari_version) primary_node_image = '{}_{}'.format(image_prefix, 'primary-node') secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node') clusterdock_config_host_dir = os.path.realpath( os.path.expanduser(args.clusterdock_config_directory)) volumes = [{clusterdock_config_host_dir: CLUSTERDOCK_CLIENT_CONTAINER_DIR}] primary_node = Node(hostname=args.primary_node[0], group='primary', volumes=volumes, image=primary_node_image, ports=[{ AMBARI_PORT: AMBARI_PORT } if args.predictable else AMBARI_PORT]) secondary_nodes = [ Node(hostname=hostname, group='secondary', volumes=volumes, image=secondary_node_image) for hostname in args.secondary_nodes ] cluster = Cluster(primary_node, *secondary_nodes) cluster.primary_node = primary_node cluster.secondary_nodes = secondary_nodes for node in cluster.nodes: node.volumes.append({'/sys/fs/cgroup': '/sys/fs/cgroup'}) # do not use tempfile.mkdtemp, as systemd wont be able to bring services up when temp ends to be created in # /var/tmp/ directory node.volumes.append(['/run', '/run/lock']) cluster.start(args.network) hdp_version_tuple = version_tuple(args.hdp_version) logger.debug('Starting PostgreSQL for Ambari server ...') # Need this as init system in Docker misreports on postgres start initially # Check https://github.com/docker-library/postgres/issues/146 for more def condition(): primary_node.execute('service postgresql restart', quiet=quiet) if '1 row' in primary_node.execute( 'PGPASSWORD=bigdata psql ambari ' '-U ambari -h localhost -c "select 1"', quiet=quiet).output: return True wait_for_condition(condition=condition, time_between_checks=2) def condition(): if 'running' in primary_node.execute('service postgresql status', quiet=quiet).output: return True wait_for_condition(condition=condition) time.sleep( 10 ) # If images are set to start Ambari server/agents - give some time to recover the right status _update_node_names(cluster, quiet=quiet) # The HDP topology uses two pre-built images ('primary' and 'secondary'). If a cluster # larger than 2 nodes is started, some modifications need to be done. if len(secondary_nodes) > 1: _remove_files(nodes=secondary_nodes[1:], files=['/hadoop/hdfs/data/current/*'], quiet=quiet) logger.info('Starting Ambari server ...') primary_node.execute('ambari-server start', quiet=quiet) # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so # use that instead of the hostname if the host name is ``moby``. hostname = ('localhost' if client.info().get('Name') == 'moby' else socket.getaddrinfo( socket.gethostname(), 0, flags=socket.AI_CANONNAME)[0][3]) port = cluster.primary_node.host_ports.get(AMBARI_PORT) server_url = 'http://{}:{}'.format(hostname, port) logger.info('Ambari server is now reachable at %s', server_url) logger.info('Starting Ambari agents ...') for node in cluster: logger.debug('Starting Ambari agent on %s ...', node.fqdn) node.execute('ambari-agent start', quiet=quiet) ambari = Ambari(server_url, username='******', password='******') def condition(ambari, cluster): cluster_hosts = {node.fqdn for node in cluster} ambari_hosts = {host.host_name for host in ambari.hosts} logger.debug('Cluster hosts: %s; Ambari hosts: %s', cluster_hosts, ambari_hosts) return cluster_hosts == ambari_hosts wait_for_condition(condition=condition, condition_args=[ambari, cluster]) service_types_to_leave = (args.include_services.upper().split(',') if args.include_services else []) service_types_to_remove = (args.exclude_services.upper().split(',') if args.exclude_services else []) if service_types_to_leave or service_types_to_remove: for service in list(ambari.clusters(DEFAULT_CLUSTER_NAME).services): service_name = service.service_name.upper() if (service_name in service_types_to_remove or (service_types_to_leave and service_name not in service_types_to_leave)): logger.info('Removing cluster service (name = %s) ...', service_name) service.delete() for node in secondary_nodes[1:]: logger.info('Adding %s to cluster ...', node.fqdn) ambari.clusters(DEFAULT_CLUSTER_NAME).hosts.create(node.fqdn) secondary_node = ambari.clusters(DEFAULT_CLUSTER_NAME).hosts( secondary_nodes[0].fqdn) for component in secondary_node.components: logger.debug('Adding component (%s) to cluster on host (%s) ...', component.component_name, node.fqdn) host_components = ambari.clusters(DEFAULT_CLUSTER_NAME).hosts( node.fqdn).components host_components.create(component.component_name).wait() logger.debug('Installing all registered components on host (%s) ...', node.fqdn) ambari.clusters(DEFAULT_CLUSTER_NAME).hosts( node.fqdn).components.install().wait() logger.info('Waiting for all hosts to reach healthy state ...') def condition(ambari): health_report = ambari.clusters(DEFAULT_CLUSTER_NAME).health_report logger.debug('Ambari cluster health report: %s ...', health_report) return health_report.get('Host/host_state/HEALTHY') == len( list(ambari.hosts)) wait_for_condition(condition=condition, condition_args=[ambari]) service_names = [ service['service_name'] for service in ambari.clusters( DEFAULT_CLUSTER_NAME).services.to_dict() ] if 'ATLAS' in service_names: logger.info('Configuring Atlas required properties ...') _configure_atlas(ambari, args.hdp_version, atlas_server_host=cluster.primary_node.fqdn) if 'HIVE' in service_names: primary_node.execute('touch /etc/hive/sys.db.created', quiet=quiet) logger.info('Waiting for components to be ready ...') def condition(ambari): comps = ambari.clusters( DEFAULT_CLUSTER_NAME).cluster.host_components.refresh() for comp in comps: if comp.state.upper() == 'UNKNOWN': logger.debug('Not ready with component `%s` ...', comp.component_name) return False else: return True wait_for_condition(condition=condition, condition_args=[ambari]) if not args.dont_start_cluster: logger.info('Starting cluster services ...') ambari.clusters(DEFAULT_CLUSTER_NAME).services.start().wait( timeout=3600) if 'HBASE' in service_names: logger.info('Starting Thrift server ...') if hdp_version_tuple <= (2, 0, 13, 0): hbase_daemon_path = '/usr/lib/hbase/bin/hbase-daemon.sh' else: hbase_daemon_path = '/usr/hdp/current/hbase-master/bin/hbase-daemon.sh' primary_node.execute('{} start thrift -p {} ' '--infoport {}'.format( hbase_daemon_path, HBASE_THRIFT_SERVER_PORT, HBASE_THRIFT_SERVER_INFO_PORT), quiet=quiet)
def main(args): kerberos_volume_dir = os.path.expanduser( args.kerberos_config_directory or args.clusterdock_config_directory) # kerberos node. kdc_image = '{}/clusterdock/topology_nodebase_kerberos:centos6.8'.format( args.registry) kdc_hostname = args.kdc_node[0] kdc_node = Node(hostname=kdc_hostname, group='kdc', image=kdc_image, volumes=[{ kerberos_volume_dir: KERBEROS_VOLUME_DIR }]) # webserver node. this is the reverse proxy that exposes the URLs webserver_image = '{}/{}/topology_http_kerberos:webserver'.format( args.registry, args.namespace or DEFAULT_NAMESPACE) webserver_hostname = args.webserver_node[0] webserver_node = Node(hostname=webserver_hostname, group='webserver', image=webserver_image, volumes=[{ kerberos_volume_dir: KERBEROS_VOLUME_DIR }], ports={ 80: 80, 443: 443 }) # service node. the actual service (in our case, pretenders, which allows us to create mock http urls) service_hostname = args.service_node[0] service_node = Node(hostname=service_hostname, group='service', image='pretenders/pretenders:1.4', ports={8000: 8000}) cluster = Cluster(kdc_node, webserver_node, service_node) cluster.start(args.network) logger.info('Updating KDC configurations ...') realm = cluster.network.upper() krb5_conf_data = kdc_node.get_file(KDC_KRB5_CONF_FILENAME) kdc_node.put_file( KDC_KRB5_CONF_FILENAME, re.sub( r'EXAMPLE.COM', realm, re.sub( r'example.com', cluster.network, re.sub(r'kerberos.example.com', r'{}.{}'.format(kdc_hostname, cluster.network), krb5_conf_data)))) kdc_conf_data = kdc_node.get_file(KDC_CONF_FILENAME) kdc_node.put_file( KDC_CONF_FILENAME, re.sub( r'EXAMPLE.COM', realm, re.sub(r'\[kdcdefaults\]', r'[kdcdefaults]\n max_renewablelife = 7d\n max_life = 1d', kdc_conf_data))) acl_data = kdc_node.get_file(KDC_ACL_FILENAME) kdc_node.put_file(KDC_ACL_FILENAME, re.sub(r'EXAMPLE.COM', realm, acl_data)) logger.info('Starting KDC ...') kdc_commands = [ 'kdb5_util create -s -r {realm} -P kdcadmin'.format(realm=realm), 'kadmin.local -q "addprinc -pw {admin_pw} admin/admin@{realm}"'.format( admin_pw='acladmin', realm=realm) ] # Add two principals. One for the http service & the other for a client. principals = [{ 'principal': 'HTTP/webserver.{}@{}'.format(cluster.network, realm), 'keytab': SERVICE_KEYTAB_FILENAME }, { 'principal': 'HTTP/sdcwebserver.{}@{}'.format(cluster.network, realm), 'keytab': '/etc/clusterdock/kerberos/sdcwebserver.keytab' }, { 'principal': 'browser@{0}'.format(realm), 'keytab': CLIENT_KEYTAB_FILENAME }] create_principals_cmds = [ 'kadmin.local -q "addprinc -randkey {}"'.format(principal['principal']) for principal in principals ] kdc_commands.extend(create_principals_cmds) # Delete any exisiting keytab files. kdc_commands.append('rm -f {}/*.keytab'.format(KERBEROS_VOLUME_DIR)) create_keytab_cmds = [ 'kadmin.local -q "xst -norandkey -k {} {}"'.format( principal['keytab'], principal['principal']) for principal in principals ] kdc_commands.extend(create_keytab_cmds) kdc_commands.extend( ['krb5kdc', 'kadmind', 'authconfig --enablekrb5 --update']) kdc_commands.append('cp -f {} {}'.format(KDC_KRB5_CONF_FILENAME, KERBEROS_VOLUME_DIR)) kdc_commands.extend([ 'chmod 644 {}'.format(principal['keytab']) for principal in principals ]) kdc_node.execute(command="bash -c '{}'".format('; '.join(kdc_commands)), quiet=not args.verbose) logger.info('Validating kerberos service health ...') _validate_service_health(node=kdc_node, services=['krb5kdc', 'kadmin'], quiet=not args.verbose) # copy self signed certificate and private key from image to clusterdock config location. Any consumer # can then import the certificate as a trusted certificate. webserver_node.execute( 'cp /etc/ssl/certs/selfsigned.crt {ssl_cert_dir}/selfsigned.crt ' '&& cp /etc/ssl/private/private.key {ssl_cert_dir}/private.key'.format( ssl_cert_dir=KERBEROS_VOLUME_DIR), quiet=not args.verbose) # copy the krb5.conf file from the shared location to /etc on the webserver node and start the webserver. webserver_node.execute('cp -p {}/krb5.conf {}'.format( KERBEROS_VOLUME_DIR, KDC_KRB5_CONF_FILENAME), quiet=not args.verbose) webserver_node.execute('service httpd start', quiet=not args.verbose) logger.info('Validating web server health ...') _validate_service_health(node=webserver_node, services=['httpd'], quiet=not args.verbose)
def main(args): quiet = not args.verbose # Image name image = '{}/{}/topology_apache_kafka:kafka-{}-{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.kafka_version, args.scala_version) # Nodes in the Kafka cluster nodes = [ Node(hostname=hostname, group='brokers', ports=[ZOOKEEPER_PORT, BROKER_PORT], image=image) for hostname in args.brokers ] cluster = Cluster(*nodes) cluster.start(args.network, pull_images=args.always_pull) # Create distributed zookeeper configuration zookeeper_config = ('tickTime=2000\n' 'dataDir=/zookeeper\n' 'clientPort=2181\n' 'initLimit=5\n' 'syncLimit=2\n') for idx, node in enumerate(cluster): zookeeper_config += 'server.{}={}:2888:3888\n'.format( idx, node.hostname) # Start all zookeepers for idx, node in enumerate(cluster): logger.info('Starting Zookeeper on node {}'.format(node.hostname)) node.execute('mkdir -p /zookeeper') node.put_file('/zookeeper/myid', str(idx)) node.put_file('/zookeeper.properties', zookeeper_config) node.execute('/start_zookeeper &', detach=True) # Validate that Zookeepr is alive from each node for node in cluster: logger.info('Validating Zookeeper on node %s', node.hostname) wait_for_condition(condition=validate_zookeeper, condition_args=[node, quiet], time_between_checks=3, timeout=60, success=success, failure=failure) # Start all brokers for idx, node in enumerate(cluster): logger.info('Starting Kafka on node {}'.format(node.hostname)) kafka_config = node.get_file('/kafka/config/server.properties') kafka_config = kafka_config.replace('broker.id=0', 'broker.id={}'.format(idx)) node.put_file('/kafka.properties', kafka_config) node.execute('/start_kafka &', detach=True) # Verify that all Kafka brokers up logger.info('Waiting on all brokers to register in zookeeper') wait_for_condition(condition=validate_kafka, condition_args=[nodes[0], len(nodes), quiet], time_between_checks=3, timeout=60, success=success, failure=failure) # Automatically create topics for topic in args.topics.split(','): logger.info('Creating topic %s', topic) nodes[0].execute('/create_topic {}'.format(topic), quiet=quiet)
def main(args): primary_node_image = "{0}/{1}/{2}:cdh-cm-primary-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) secondary_node_image = "{0}/{1}/{2}:cdh-cm-secondary-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) edge_node_image = "{0}/{1}/{2}:cdh-cm-edge-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) # Docker's API for healthcheck uses units of nanoseconds. Define a constant # to make this more readable. SECONDS = 1000000000 cm_server_healthcheck = { 'test': 'curl --silent --output /dev/null 127.0.0.1:{}'.format(CM_PORT), 'interval': 1 * SECONDS, 'timeout': 1 * SECONDS, 'retries': 1, 'start_period': 30 * SECONDS } primary_node = Node(hostname=args.primary_node[0], group='primary', image=primary_node_image, ports=[{ CM_PORT: CM_PORT }], healthcheck=cm_server_healthcheck) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=secondary_node_image) for hostname in args.secondary_nodes ] edge_nodes = [ Node(hostname=hostname, group='edge', image=edge_node_image) for hostname in args.edge_nodes ] all_nodes = [primary_node] + secondary_nodes + edge_nodes cluster = Cluster(*all_nodes) cluster.primary_node = primary_node secondary_node_group = NodeGroup(secondary_nodes) edge_node_group = NodeGroup(edge_nodes) cluster.start(args.network) filesystem_fix_commands = [ 'cp {0} {0}.1; umount {0}; mv -f {0}.1 {0}'.format(file_) for file_ in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime'] ] cluster.execute("bash -c '{}'".format('; '.join(filesystem_fix_commands))) # Use BSD tar instead of tar because it works bether with docker cluster.execute("ln -fs /usr/bin/bsdtar /bin/tar") _configure_cm_agents(cluster) if args.change_hostfile: update_hosts_file(cluster) # The CDH topology uses two pre-built images ('primary' and 'secondary'). If a cluster # larger than 2 nodes is started, some modifications need to be done to the nodes to # prevent duplicate heartbeats and things like that. if len(secondary_nodes) > 1: _remove_files( nodes=secondary_nodes[1:], files=['/var/lib/cloudera-scm-agent/uuid', '/dfs*/dn/current/*']) logger.info('Configuring Kerberos...') cluster.primary_node.execute('/root/configure-kerberos.sh', quiet=True) cluster.primary_node.execute('service krb5kdc start', quiet=True) cluster.primary_node.execute('service kadmin start', quiet=True) logger.info('Restarting Cloudera Manager agents ...') # _restart_cm_agents(cluster) logger.info('Waiting for Cloudera Manager server to come online ...') _wait_for_cm_server(primary_node) # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so # use that instead of the hostname if the host name is ``moby``. hostname = 'localhost' if client.info().get( 'Name') == 'moby' else socket.gethostname() port = primary_node.host_ports.get(CM_PORT) server_url = 'http://{}:{}'.format(hostname, port) logger.info('Cloudera Manager server is now reachable at %s', server_url) # The work we need to do through CM itself begins here... deployment = ClouderaManagerDeployment(server_url) deployment.stop_cm_service() time.sleep(10) logger.info('Starting krb5kdc and kadmin ...') cluster.primary_node.execute('service krb5kdc start', quiet=True) cluster.primary_node.execute('service kadmin start', quiet=True) logger.info("Regenerating keytabs...") regenerate_keytabs(cluster, primary_node, deployment) logger.info("Adding hosts to cluster ...") # Add all CM hosts to the cluster (i.e. only new hosts that weren't part of the original # images). all_host_ids = {} for host in deployment.get_all_hosts(): all_host_ids[host['hostId']] = host['hostname'] for node in cluster: if node.fqdn == host['hostname']: node.host_id = host['hostId'] break else: raise Exception('Could not find CM host with hostname {}.'.format( node.fqdn)) cluster_host_ids = { host['hostId'] for host in deployment.get_cluster_hosts( cluster_name=DEFAULT_CLUSTER_NAME) } host_ids_to_add = set(all_host_ids.keys()) - cluster_host_ids if host_ids_to_add: logger.debug( 'Adding %s to cluster %s ...', 'host{} ({})'.format( 's' if len(host_ids_to_add) > 1 else '', ', '.join(all_host_ids[host_id] for host_id in host_ids_to_add)), DEFAULT_CLUSTER_NAME) deployment.add_cluster_hosts(cluster_name=DEFAULT_CLUSTER_NAME, host_ids=host_ids_to_add) _wait_for_activated_cdh_parcel(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME) # create and Apply host templates deployment.create_host_template(cluster_name='cluster', host_template_name='secondary', role_config_group_names=[ 'hdfs-DATANODE-BASE', 'hbase-REGIONSERVER-BASE', 'yarn-NODEMANAGER-BASE' ]) deployment.create_host_template(cluster_name='cluster', host_template_name='edgenode', role_config_group_names=[ 'hive-GATEWAY-BASE', 'hbase-GATEWAY-BASE', 'hdfs-GATEWAY-BASE', 'spark_on_yarn-GATEWAY-BASE' ]) deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME, host_template_name='secondary', start_roles=False, host_ids=host_ids_to_add) deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME, host_template_name='edgenode', start_roles=False, host_ids=host_ids_to_add) logger.info('Updating database configurations ...') _update_database_configs(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, primary_node=primary_node) # deployment.update_database_configs() # deployment.update_hive_metastore_namenodes() logger.info("Update KDC Config ") deployment.update_cm_config({ 'SECURITY_REALM': 'CLOUDERA', 'KDC_HOST': 'node-1.cluster', 'KRB_MANAGE_KRB5_CONF': 'true' }) deployment.update_service_config( service_name='hbase', cluster_name=DEFAULT_CLUSTER_NAME, configs={'hbase_superuser': '******'}) deployment.update_service_role_config_group_config( service_name='hive', cluster_name=DEFAULT_CLUSTER_NAME, role_config_group_name='hive-HIVESERVER2-BASE', configs={'hiveserver2_webui_port': '10009'}) logger.info("Importing Credentials..") cluster.primary_node.execute( "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/importAdminCredentials?username=cloudera-scm/admin@CLOUDERA&password=cloudera" .format(primary_node.fqdn, CM_PORT), quiet=True) logger.info("deploy cluster client config ...") deployment.deploy_cluster_client_config(cluster_name=DEFAULT_CLUSTER_NAME) logger.info("Configure for kerberos ...") cluster.primary_node.execute( "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/configureForKerberos --data 'clustername={2}'" .format(primary_node.fqdn, CM_PORT, DEFAULT_CLUSTER_NAME), quiet=True) logger.info("Creating keytab files ...") cluster.execute('/root/create-keytab.sh', quiet=True) logger.info('Deploying client config ...') _deploy_client_config(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME) if not args.dont_start_cluster: logger.info('Starting cluster services ...') _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="zookeeper", command="start") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hdfs", command="start") if not args.skip_accumulo: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="CreateHdfsDirCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="CreateAccumuloUserDirCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="AccumuloInitServiceCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="start") if not args.skip_yarn: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="yarn", command="start") if not args.skip_hbase: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hbase", command="start") if not args.skip_flume: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="flume", command="start") if not args.skip_spark: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="spark_on_yarn", command="start") if not args.skip_sqoop: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="sqoop", command="start") if not args.skip_hive: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hive", command="start") if not args.skip_oozie: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="oozie", command="start") if not args.skip_hue: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hue", command="start") logger.info('Starting CM services ...') _start_cm_service(deployment=deployment) logger.info("Setting up HDFS Homedir ...") cluster.primary_node.execute( "kinit -kt /var/run/cloudera-scm-agent/process/*-hdfs-NAMENODE/hdfs.keytab hdfs/node-1.cluster@CLOUDERA", quiet=True) cluster.primary_node.execute("hadoop fs -mkdir /user/cloudera-scm", quiet=True) cluster.primary_node.execute( "hadoop fs -chown cloudera-scm:cloudera-scm /user/cloudera-scm", quiet=True) logger.info("Kinit cloudera-scm/admin ...") cluster.execute('kinit -kt /root/cloudera-scm.keytab cloudera-scm/admin', quiet=True) logger.info("Executing post run script ...") secondary_node_group.execute("/root/post_run.sh") edge_node_group.execute("/root/post_run.sh")
def main(args): node_image = '{}/{}/clusterdock:greenplum{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.greenplum_version) volumes = [{'/sys/fs/cgroup': '/sys/fs/cgroup'}, {'/run': '/run/lock'}] if args.predictable: ports = [{ GREENPLUM_SQL_CLIENT_CONNECTION_PORT: GREENPLUM_SQL_CLIENT_CONNECTION_PORT }, { GREENPLUM_SSH_HOST_PORT: GREENPLUM_SSH_CONTAINER_PORT }, { GREENPLUM_GPSS_LISTENER_PORT: GREENPLUM_GPSS_LISTENER_PORT }, { GREENPLUM_GPFDIST_SERVICE_PORT: GREENPLUM_GPFDIST_SERVICE_PORT }] else: ports = [ GREENPLUM_SQL_CLIENT_CONNECTION_PORT, GREENPLUM_SSH_CONTAINER_PORT, GREENPLUM_GPSS_LISTENER_PORT, GREENPLUM_GPFDIST_SERVICE_PORT ] primary_node = Node(hostname=args.primary_node[0], group='primary', image=node_image, name='greenplum_{}'.format(args.greenplum_version), ports=ports, volumes=volumes) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=node_image, volumes=volumes) for hostname in args.secondary_nodes ] nodes = [primary_node] + secondary_nodes cluster = Cluster(*nodes) cluster.primary_node = primary_node cluster.start(args.network, pull_images=args.always_pull) primary_node.put_file(HOST_FILE_PATH, '\n'.join(args.secondary_nodes)) primary_node.put_file(CONFIG_JSON_FILE_PATH, CONFIG_JSON) commands = [ 'source /usr/local/greenplum-db/greenplum_path.sh', 'chmod 755 /home/gpadmin/prepare.sh', # Create segment hosts with 1 primary segment in each segment host. '/home/gpadmin/prepare.sh -s {} -n 1'.format(len(args.secondary_nodes) ), # Initialize Greenplum Database system using gpinitsystem_config file. 'gpinitsystem -a -c /home/gpadmin/gpinitsystem_config' ] primary_node.execute(' && '.join(commands), user='******') commands = [ 'source /usr/local/greenplum-db/greenplum_path.sh', # To allow access to Greenplum Database from every host, change pg_hba.conf file. "echo 'host all all 0.0.0.0/0 trust' >> /home/gpadmin/master/gpseg-1/pg_hba.conf", 'export MASTER_DATA_DIRECTORY=/home/gpadmin/master/gpseg-1', # Following will make sure the changes in pg_hba.conf take effect. '/usr/local/greenplum-db/bin/gpstop -u', 'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libpq.so.5 /usr/lib64/libpq.so.5', 'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libssl.so.1.0.0 /usr/lib64/libssl.so.1.0.0', 'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libcrypto.so.1.0.0 /usr/lib64/libcrypto.so.1.0.0', 'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libcom_err.so.3 /usr/lib64/libcom_err.so.3', # Create db and extension in it. '/usr/local/greenplum-db/bin/createdb some_db', "/usr/local/greenplum-db/bin/psql -d some_db -c 'CREATE EXTENSION gpss;'" ] primary_node.execute(' && '.join(commands), user='******') # Start Greenplum Stream Server in detached mode since it waits indefinitely for client job requests. primary_node.execute( '/usr/local/greenplum-db/bin/gpss /home/gpadmin/config.json', user='******', detach=True)
def main(args): if args.license_url and not args.license_credentials: raise Exception( '--license-credentials is a required argument if --license-url is provided.' ) image_prefix = '{}/{}/clusterdock:mapr{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.mapr_version) if args.mep_version: image_prefix = '{}_mep{}'.format(image_prefix, args.mep_version) primary_node_image = '{}_{}'.format(image_prefix, 'primary-node') secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node') node_disks = yaml.load(args.node_disks) # MapR-FS needs each fileserver node to have a disk allocated for it, so fail fast if the # node disks map is missing any nodes. if set(args.primary_node + args.secondary_nodes) != set(node_disks): raise Exception( 'Not all nodes are accounted for in the --node-disks dictionary') primary_node = Node( hostname=args.primary_node[0], group='primary', image=primary_node_image, ports=[{ MCS_SERVER_PORT: MCS_SERVER_PORT } if args.predictable else MCS_SERVER_PORT], devices=node_disks.get(args.primary_node[0]), # Secure cluster needs the ticket to execute rest of commands # after cluster start. environment=['MAPR_TICKETFILE_LOCATION=/opt/mapr/conf/mapruserticket'] if args.secure else []) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=secondary_node_image, devices=node_disks.get(hostname)) for hostname in args.secondary_nodes ] cluster = Cluster(primary_node, *secondary_nodes) if args.secure: secure_config_host_dir = os.path.expanduser( args.secure_config_directory) volumes = [{secure_config_host_dir: SECURE_CONFIG_CONTAINER_DIR}] for node in cluster.nodes: node.volumes.extend(volumes) # MapR versions 6.0.0 onwards use CentOS 7 which needs following settings. mapr_version_tuple = tuple(int(i) for i in args.mapr_version.split('.')) if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7: for node in cluster.nodes: node.volumes.append({'/sys/fs/cgroup': '/sys/fs/cgroup'}) temp_dir_name = tempfile.mkdtemp() logger.debug('Created temporary directory %s', temp_dir_name) node.volumes.append({temp_dir_name: '/run'}) cluster.primary_node = primary_node cluster.start(args.network, pull_images=args.always_pull) logger.info('Generating new UUIDs ...') cluster.execute('/opt/mapr/server/mruuidgen > /opt/mapr/hostid') if not args.secure: logger.info('Configuring the cluster ...') for node in cluster: configure_command = ( '/opt/mapr/server/configure.sh -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(node.hostname)))) node.execute("bash -c '{}'".format(configure_command)) else: logger.info('Configuring native security for the cluster ...') configure_command = ( '/opt/mapr/server/configure.sh -secure -genkeys -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(primary_node.hostname)))) source_files = [ '{}/{}'.format(MAPR_CONFIG_DIR, file) for file in SECURE_FILES ] commands = [ configure_command, 'chmod 600 {}/{}'.format(MAPR_CONFIG_DIR, SSL_KEYSTORE_FILE), 'cp -f {src} {dest_dir}'.format( src=' '.join(source_files), dest_dir=SECURE_CONFIG_CONTAINER_DIR) ] primary_node.execute(' && '.join(commands)) for node in secondary_nodes: source_files = [ '{}/{}'.format(SECURE_CONFIG_CONTAINER_DIR, file) for file in SECURE_FILES ] configure_command = ( '/opt/mapr/server/configure.sh -secure -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(node.hostname)))) commands = [ 'cp -f {src} {dest_dir}'.format(src=' '.join(source_files), dest_dir=MAPR_CONFIG_DIR), configure_command ] node.execute(' && '.join(commands)) logger.info('Waiting for MapR Control System server to come online ...') def condition(address, port): return socket().connect_ex((address, port)) == 0 def success(time): logger.info('MapR Control System server is online after %s seconds.', time) def failure(timeout): raise TimeoutError( 'Timed out after {} seconds waiting ' 'for MapR Control System server to come online.'.format(timeout)) wait_for_condition( condition=condition, condition_args=[primary_node.ip_address, MCS_SERVER_PORT], time_between_checks=3, timeout=180, success=success, failure=failure) mcs_server_host_port = primary_node.host_ports.get(MCS_SERVER_PORT) logger.info('Creating /apps/spark directory on %s ...', primary_node.hostname) spark_directory_command = [ 'hadoop fs -mkdir -p /apps/spark', 'hadoop fs -chmod 777 /apps/spark' ] primary_node.execute("bash -c '{}'".format( '; '.join(spark_directory_command))) logger.info('Creating MapR sample Stream named /sample-stream on %s ...', primary_node.hostname) primary_node.execute('maprcli stream create -path /sample-stream ' '-produceperm p -consumeperm p -topicperm p') if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7 and args.license_url: license_commands = [ 'curl --user {} {} > /tmp/lic'.format(args.license_credentials, args.license_url), '/opt/mapr/bin/maprcli license add -license /tmp/lic -is_file true', 'rm -rf /tmp/lic' ] logger.info('Applying license ...') primary_node.execute(' && '.join(license_commands)) if not args.dont_register_gateway: logger.info('Registering gateway with the cluster ...') register_gateway_commands = [ "cat /opt/mapr/conf/mapr-clusters.conf | egrep -o '^[^ ]* '" ' > /tmp/cluster-name', 'maprcli cluster gateway set -dstcluster $(cat ' '/tmp/cluster-name) -gateways {}'.format(primary_node.fqdn), 'rm /tmp/cluster-name' ] primary_node.execute(' && '.join(register_gateway_commands)) logger.info( 'MapR Control System server is now accessible at https://%s:%s', getfqdn(), mcs_server_host_port)
def main(args): image_prefix = '{}/{}/topology_hdp:hdp{}_ambari{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.hdp_version, args.ambari_version) primary_node_image = '{}_{}'.format(image_prefix, 'primary-node') secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node') primary_node = Node(hostname=args.primary_node[0], group='primary', image=primary_node_image, ports=[{ AMBARI_PORT: AMBARI_PORT } if args.predictable else AMBARI_PORT]) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=secondary_node_image) for hostname in args.secondary_nodes ] cluster = Cluster(primary_node, *secondary_nodes) cluster.primary_node = primary_node cluster.secondary_nodes = secondary_nodes cluster.start(args.network) logger.debug('Starting PostgreSQL for Ambari server ...') primary_node.execute('service postgresql start', quiet=not args.verbose) _update_node_names(cluster, quiet=not args.verbose) # The HDP topology uses two pre-built images ('primary' and 'secondary'). If a cluster # larger than 2 nodes is started, some modifications need to be done. if len(secondary_nodes) > 1: _remove_files(nodes=secondary_nodes[1:], files=['/hadoop/hdfs/data/current/*']) logger.info('Starting Ambari server ...') primary_node.execute('ambari-server start', quiet=not args.verbose) # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so # use that instead of the hostname if the host name is ``moby``. hostname = 'localhost' if client.info().get( 'Name') == 'moby' else socket.gethostname() port = cluster.primary_node.host_ports.get(AMBARI_PORT) server_url = 'http://{}:{}'.format(hostname, port) logger.info('Ambari server is now reachable at %s', server_url) logger.info('Starting Ambari agents ...') for node in cluster: logger.debug('Starting Ambari agent on %s ...', node.fqdn) node.execute('ambari-agent start', quiet=not args.verbose) ambari = Ambari(server_url, username='******', password='******') def condition(ambari, cluster): cluster_hosts = {node.fqdn for node in cluster} ambari_hosts = {host.host_name for host in ambari.hosts} logger.debug('Cluster hosts: %s; Ambari hosts: %s', cluster_hosts, ambari_hosts) return cluster_hosts == ambari_hosts wait_for_condition(condition=condition, condition_args=[ambari, cluster]) for node in secondary_nodes[1:]: logger.info('Adding %s to cluster ...', node.fqdn) ambari.clusters('cluster').hosts.create(node.fqdn) for component in ambari.clusters('cluster').hosts( secondary_nodes[0].fqdn).components: logger.debug('Adding component (%s) to cluster on host (%s) ...', component.component_name, node.fqdn) host_components = ambari.clusters('cluster').hosts( node.fqdn).components host_components.create(component.component_name).wait() logger.debug('Installing all registered components on host (%s) ...', node.fqdn) ambari.clusters('cluster').hosts(node.fqdn).components.install().wait() if not args.dont_start_cluster: logger.debug( 'Waiting for all hosts to reach healthy state before starting cluster ...' ) def condition(ambari): health_report = ambari.clusters('cluster').health_report logger.debug('Ambari cluster health report: %s ...', health_report) return health_report.get('Host/host_state/HEALTHY') == len( list(ambari.hosts)) wait_for_condition(condition=condition, condition_args=[ambari]) logger.info('Starting cluster services ...') ambari.clusters('cluster').services.start().wait()
def main(args): quiet = not args.verbose # Image name image = '{}/{}/topology_confluent_schema_registry:schema_registry-{}'.format(args.registry, args.namespace or DEFAULT_NAMESPACE, args.confluent_version) # Nodes in the Kafka cluster nodes = [Node(hostname=hostname, group='brokers', ports=[{REST_PORT : REST_PORT}], image=image) for hostname in args.nodes] cluster = Cluster(*nodes) cluster.start(args.network, pull_images=args.always_pull) # Create distributed zookeeper configuration zookeeper_config = ['tickTime=2000', 'dataDir=/zookeeper', 'clientPort=2181', 'initLimit=5', 'syncLimit=2'] for idx, node in enumerate(cluster): zookeeper_config.append('server.{}={}:2888:3888'.format(idx, node.hostname)) # Start all zookeepers for idx, node in enumerate(cluster): logger.info('Starting Zookeeper on node {}'.format(node.hostname)) node.execute('mkdir -p /zookeeper') node.put_file('/zookeeper/myid', str(idx)) node.put_file('/zookeeper.properties', '\n'.join(zookeeper_config)) node.execute('/start_zookeeper &', detach=True) # Validate that Zookeepr is alive from each node for node in cluster: logger.info('Validating Zookeeper on node %s', node.hostname) wait_for_condition(condition=validate_zookeeper, condition_args=[node, quiet], time_between_checks=3, timeout=60, success=success, failure=failure) # Start all brokers for idx, node in enumerate(cluster): logger.info('Starting Kafka on node {}'.format(node.hostname)) kafka_config = node.get_file('/confluent/etc/kafka/server.properties') kafka_config = kafka_config.replace('broker.id=0', 'broker.id={}'.format(idx)) node.put_file('/kafka.properties', kafka_config) node.execute('/start_kafka &', detach=True) # Verify that all Kafka brokers up logger.info('Waiting on all brokers to register in zookeeper') wait_for_condition(condition=validate_kafka, condition_args=[nodes[0], len(nodes), quiet], time_between_checks=3, timeout=60, success=success, failure=failure) # Start schema registry on all nodes for idx, node in enumerate(cluster): logger.info('Starting Schema Registry on node {}'.format(node.hostname)) node.execute('/start_schema_registry &', detach=True)