def set_up_cluster(): # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) # get the CM instance cm = ClouderaManager(api) # activate the CM trial license cm.begin_trial() # create the management service service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) # install hosts on this CM instance cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) print "Installing hosts. This might take a while." while cmd.success == None: sleep(5) cmd = cmd.fetch() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(0) print "cm_host_install succeeded" # first auto-assign roles and auto-configure the CM service cm.auto_assign_roles() cm.auto_configure() # create a cluster on that instance cluster = create_cluster(api, cluster_name, cdh_version) # add all our hosts to the cluster cluster.add_hosts(host_list) cluster = api.get_cluster("Cluster 1") parcels_list = [] # get and list all available parcels print "Available parcels:" for p in cluster.get_all_parcels(): print '\t' + p.product + ' ' + p.version if p.version.startswith(cdh_version_number) and p.product == "CDH": parcels_list.append(p) if len(parcels_list) == 0: print "No " + cdh_version + " parcel found!" exit(0) cdh_parcel = parcels_list[0] for p in parcels_list: if p.version > cdh_parcel.version: cdh_parcel = p # download the parcel print "Starting parcel download. This might take a while." cmd = cdh_parcel.start_download() if cmd.success != True: print "Parcel download failed!" exit(0) # make sure the download finishes while cdh_parcel.stage != 'DOWNLOADED': sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel print "Starting parcel distribution. This might take a while." cmd = cdh_parcel.start_distribution() if cmd.success != True: print "Parcel distribution failed!" exit(0) # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed" # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: print "Parcel activation failed!" exit(0) # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " activated" # inspect hosts and print the result print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(0) print "Hosts successfully inspected: \n" + cmd.resultMessage # create all the services we want to add; we will only create one instance # of each for s in service_types_and_names.keys(): service = cluster.create_service(service_types_and_names[s], s) # we will auto-assign roles; you can manually assign roles using the # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using # ApiService.createRole() cluster.auto_assign_roles() cluster.auto_configure() # this will set up the Hive and the reports manager databases because we # can't auto-configure those two things hive = cluster.get_service(service_types_and_names["HIVE"]) hive_config = { "hive_metastore_database_host" : hive_metastore_host, \ "hive_metastore_database_name" : hive_metastore_name, \ "hive_metastore_database_password" : hive_metastore_password, \ "hive_metastore_database_port" : hive_metastore_database_port, \ "hive_metastore_database_type" : hive_metastore_database_type } hive.update_config(hive_config) # start the management service cm_service = cm.get_service() cm_service.start().wait() # this will set the Reports Manager database password # first we find the correct role rm_role = None for r in cm.get_service().get_all_roles(): if r.type == "REPORTSMANAGER": rm_role = r if rm_role == None: print "No REPORTSMANAGER role found!" exit(0) # then we get the corresponding role config group -- even though there is # only once instance of each CM management service, we do this just in case # it is not placed in the base group rm_role_group = rm_role.roleConfigGroupRef rm_rcg = get_role_config_group(api, rm_role.type, \ rm_role_group.roleConfigGroupName, None) # update the appropriate fields in the config rm_rcg_config = { "headlamp_database_host" : reports_manager_host, \ "headlamp_database_name" : reports_manager_name, \ "headlamp_database_user" : reports_manager_username, \ "headlamp_database_password" : reports_manager_password, \ "headlamp_database_type" : reports_manager_database_type } rm_rcg.update_config(rm_rcg_config) # restart the management service with new configs cm_service.restart().wait() # execute the first run command print "Excuting first run command. This might take a while." cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage() exit(0) print "First run successfully executed. Your cluster has been set up!"
def set_up_cluster(): # get a handle on the instance of CM that we have running api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=19) # get the CM instance cm = ClouderaManager(api) print "*************************************" print " Starting Auto Deployment of Cluster " print "*************************************" # {'owner': ROAttr(), 'uuid': ROAttr(), 'expiration': ROAttr(),} TRIAL = False try: trial_active = cm.get_license() print trial_active if trial_active.owner == "Trial License": print "Trial License is already set - will NOT continue now." print "Assuming Cluster is already setup" TRIAL = True else: print "Setting up `Trial License`." cm.begin_trial() except: cm.begin_trial() if TRIAL: exit(0) # create the management service service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") try: if not cm.get_service().name: cm.create_mgmt_service(service_setup) else: print "Service already exist." except: cm.create_mgmt_service(service_setup) # install hosts on this CM instance cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url, unlimited_jce=True) print "Installing hosts. This might take a while." while cmd.success == None: sleep(5) cmd = cmd.fetch() print cmd if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(0) print "cm_host_install succeeded" # first auto-assign roles and auto-configure the CM service cm.auto_assign_roles() cm.auto_configure() # create a cluster on that instance cluster = create_cluster(api, cluster_name, cdh_version) # add all our hosts to the cluster cluster.add_hosts(host_list) cluster = api.get_cluster(cluster_name) parcels_list = [] # get and list all available parcels print "Available parcels:" for p in cluster.get_all_parcels(): print '\t' + p.product + ' ' + p.version if p.version.startswith(cdh_version_number) and p.product == "CDH": parcels_list.append(p) if len(parcels_list) == 0: print "No " + cdh_version + " parcel found!" exit(0) cdh_parcel = parcels_list[0] for p in parcels_list: if p.version > cdh_parcel.version: cdh_parcel = p # download the parcel print "Starting parcel download. This might take a while." cmd = cdh_parcel.start_download() if cmd.success != True: print "Parcel download failed!" exit(0) # make sure the download finishes while cdh_parcel.stage != 'DOWNLOADED': sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded" # distribute the parcel print "Starting parcel distribution. This might take a while." cmd = cdh_parcel.start_distribution() if cmd.success != True: print "Parcel distribution failed!" exit(0) # make sure the distribution finishes while cdh_parcel.stage != "DISTRIBUTED": sleep(5) cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed" # activate the parcel cmd = cdh_parcel.activate() if cmd.success != True: print "Parcel activation failed!" exit(0) # make sure the activation finishes while cdh_parcel.stage != "ACTIVATED": cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name) print cdh_parcel.product + ' ' + cdh_parcel.version + " activated" # inspect hosts and print the result print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(0) print "Hosts successfully inspected: \n" + cmd.resultMessage # create all the services we want to add; we will only create one instance # of each for s in service_types_and_names.keys(): service = cluster.create_service(service_types_and_names[s], s) # we will auto-assign roles; you can manually assign roles using the # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using # ApiService.createRole() cluster.auto_assign_roles() cluster.auto_configure() # # this will set up the Hive and the reports manager databases because we # # can't auto-configure those two things # hive = cluster.get_service(service_types_and_names["HIVE"]) # hive_config = {"hive_metastore_database_host": hive_metastore_host, \ # "hive_metastore_database_name": hive_metastore_name, \ # "hive_metastore_database_password": hive_metastore_password, \ # "hive_metastore_database_port": hive_metastore_database_port, \ # "hive_metastore_database_type": hive_metastore_database_type} # hive.update_config(hive_config) # start the management service cm_service = cm.get_service() cm_service.start().wait() # this will set the Reports Manager database password # first we find the correct role rm_role = None for r in cm.get_service().get_all_roles(): if r.type == "REPORTSMANAGER": rm_role = r if rm_role == None: print "No REPORTSMANAGER role found!" exit(0) # then we get the corresponding role config group -- even though there is # only once instance of each CM management service, we do this just in case # it is not placed in the base group rm_role_group = rm_role.roleConfigGroupRef rm_rcg = get_role_config_group(api, rm_role.type, \ rm_role_group.roleConfigGroupName, None) # update the appropriate fields in the config rm_rcg_config = {"headlamp_database_host": reports_manager_host, \ "headlamp_database_name": reports_manager_name, \ "headlamp_database_user": reports_manager_username, \ "headlamp_database_password": reports_manager_password, \ "headlamp_database_type": reports_manager_database_type} rm_rcg.update_config(rm_rcg_config) # restart the management service with new configs cm_service.restart().wait() # execute the first run command print "Excuting first run command. This might take a while." cmd = cluster.first_run() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage() exit(0) print "First run successfully executed. Your cluster has been set up!"
def set_up_cluster(cm_host, host_list): print "Setting up CDH cluster..." api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7) cm = ClouderaManager(api) print "Creating mgmg service." try: service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT") cm.create_mgmt_service(service_setup) except ApiException as exc: if exc.code != 400: print "create MGMT service failed: " + exc exit(1) print "Installing hosts. This might take a while." cmd = cm.host_install(host_username, host_list, password=host_password).wait() if cmd.success != True: print "cm_host_install failed: " + cmd.resultMessage exit(2) print "Auto-assign roles and auto-configure the CM service" if not is_cluster_installed(api): cm.auto_assign_roles() cm.auto_configure() print "Creating cluster." if not is_cluster_installed(api): cluster = create_cluster(api, cluster_name, cdh_version) cluster.add_hosts(host_list) cluster = api.get_cluster(cluster_name) cdh_parcel = get_cdh_parcel(cluster) print "Downloading CDH parcel. This might take a while." if cdh_parcel.stage == "AVAILABLE_REMOTELY": cdh_parcel = wait_for_parcel(cdh_parcel.start_download(), api, cdh_parcel, cluster_name, 'DOWNLOADED') print "Distributing CDH parcel. This might take a while." if cdh_parcel.stage == "DOWNLOADED": cdh_parcel = wait_for_parcel(cdh_parcel.start_distribution(), api, cdh_parcel, cluster_name, 'DISTRIBUTED') print "Activating CDH parcel. This might take a while." if cdh_parcel.stage == "DISTRIBUTED": cdh_parcel = wait_for_parcel(cdh_parcel.activate(), api, cdh_parcel, cluster_name, 'ACTIVATED') # if cdh_parcel.stage != "ACTIVATED": # print "CDH parcel activation failed. Parcel in stage: " + cdh_parcel.stage # exit(14) print "Inspecting hosts. This might take a few minutes." cmd = cm.inspect_hosts() while cmd.success == None: cmd = cmd.fetch() if cmd.success != True: print "Host inpsection failed!" exit(8) print "Hosts successfully inspected: \n" + cmd.resultMessage print "Creating specified services." for s in service_types_and_names.keys(): try: cluster.get_service(service_types_and_names[s]) except: print "Creating service: " + service_types_and_names[s] service = cluster.create_service(service_types_and_names[s], s) slaves = [host for host in host_list if 'slave' in host] edges = [host for host in host_list if 'edge' in host] #assign master roles to master node for service in cluster.get_all_services(): if service.name == 'HDFS-1': service.create_role('NAMENODE-1', 'NAMENODE', cm_host) service.create_role('SECONDARYNAMENODE', 'SECONDARYNAMENODE', cm_host) service.create_role('BALANCER-1', 'BALANCER', cm_host) service.create_role('HTTPFS-1', 'HTTPFS', cm_host) service.create_role('HDFS-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HDFS-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('DATANODE-%s' % i, 'DATANODE', slave) if service.name == 'ZOOKEEPER-1': service.create_role('ZOOKEEPERSERVER-1', 'SERVER', cm_host) if service.name == 'HBASE-1': service.create_role('MASTER-1', 'MASTER', cm_host) service.create_role('HBASETHRIFTSERVER-1', 'HBASETHRIFTSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('HBASE-RS-%s' % i, 'REGIONSERVER', slave) if service.name == 'HUE-1': service.create_role('HUE-MASTER1', 'HUE_SERVER', cm_host) service.create_role('HUE-LB_MASTER1', 'HUE_LOAD_BALANCER', cm_host) for (i, edge) in enumerate(edges): service.create_role('HUE-EDGE%s' % i, 'HUE_SERVER', edge) if service.name == 'HIVE-1': service.create_role('HIVEMETASTORE-1', 'HIVEMETASTORE', cm_host) service.create_role('HIVESERVER-1', 'HIVESERVER2', cm_host) service.create_role('HIVE-GW_MASTER1', 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('HIVE-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('HIVE-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'IMPALA-1': service.create_role('STATESTORE-1', 'STATESTORE', cm_host) service.create_role('CATALOGSERVER-1', 'CATALOGSERVER', cm_host) for (i, slave) in enumerate(slaves): service.create_role('IMPALAD-%s' % i, 'IMPALAD', slave) if service.name == 'OOZIE-1': service.create_role('OOZIE_SERVER-1', 'OOZIE_SERVER', cm_host) if service.name == 'SPARK_ON_YARN-1': service.create_role('SPARK_YARN_HISTORY_SERVER-1', 'SPARK_YARN_HISTORY_SERVER', cm_host) service.create_role('SPARK_ON_YARN-GW_MASTER%s' % i, 'GATEWAY', cm_host) for (i, edge) in enumerate(edges): service.create_role('SPARK_ON_YARN-GW_EDGE%s' % i, 'GATEWAY', edge) for (i, slave) in enumerate(slaves): service.create_role('SPARK_ON_YARN-GW_SLAVE%s' % i, 'GATEWAY', slave) if service.name == 'SQOOP-1': service.create_role('SQOOP_SERVER-1', 'SQOOP_SERVER', cm_host) if service.name == 'YARN-1': service.create_role('RESOURCEMANAGER-1', 'RESOURCEMANAGER', cm_host) service.create_role('JOBHISTORY-1', 'JOBHISTORY', cm_host) for (i, slave) in enumerate(slaves): service.create_role('NODEMANAGER-%s' % i, 'NODEMANAGER', slave) #print "Auto assigning roles." #cluster.auto_assign_roles() cluster.auto_configure() print "Updating Hive config." hive_metastore_host = cm_host # let's assume that hive = cluster.get_service(service_types_and_names["HIVE"]) hive_config = { "hive_metastore_database_host" : hive_metastore_host, \ "hive_metastore_database_name" : hive_metastore_name, \ "hive_metastore_database_user" : hive_metastore_user, \ "hive_metastore_database_password" : hive_metastore_password, \ "hive_metastore_database_port" : hive_metastore_database_port, \ "hive_metastore_database_type" : hive_metastore_database_type } hive.update_config(hive_config) print "Updating Hue config." hue_db_host = cm_host # let's assume that hue = cluster.get_service(service_types_and_names["HUE"]) hue_config = { "database_host" : hue_db_host, \ "database_name" : hue_db_name, \ "database_user" : hue_db_user, \ "database_password" : hue_db_password, \ "database_port" : hue_db_port, \ "database_type" : hue_db_type } hue.update_config(hue_config) # Set Java version to OpenJDK cm.update_all_hosts_config({'java_home': '/usr/lib/jvm/java-openjdk'}) print "Starting management service." cm_service = cm.get_service() cm_service.start().wait() print "Excuting first run command. This might take a while." cmd = cluster.first_run().wait() if cmd.success != True: print "The first run command failed: " + cmd.resultMessage exit(11) print "First run successfully executed. Your cluster has been set up!" config = cm.get_config(view='full') repolist = config['REMOTE_PARCEL_REPO_URLS'] value = repolist.value or repolist.default value += ',' + anaconda_repo cm.update_config({'REMOTE_PARCEL_REPO_URLS': value}) sleep(10) cluster = api.get_cluster(cluster_name) parcel = cluster.get_parcel('Anaconda', anaconda_parcel_version) print "Downloading Anaconda parcel. This might take a while." if parcel.stage == "AVAILABLE_REMOTELY": parcel = wait_for_parcel(parcel.start_download(), api, parcel, cluster_name, 'DOWNLOADED') print "Distributing Anaconda parcel. This might take a while." if parcel.stage == "DOWNLOADED": parcel = wait_for_parcel(parcel.start_distribution(), api, parcel, cluster_name, 'DISTRIBUTED') print "Activating Anaconda parcel. This might take a while." if parcel.stage == "DISTRIBUTED": parcel = wait_for_parcel(parcel.activate(), api, parcel, cluster_name, 'ACTIVATED') print "Anaconda is now installed."