def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get("cluster", "name"), productVersions={"CDH": config.get("cluster", "cdh_version")}, services=["HDFS", "YARN", "SPARK_ON_YARN"], virtualInstanceGroups={ "masters": VirtualInstanceGroup( name="masters", minCount=1, serviceTypeToRoleTypes={ "HDFS": ["NAMENODE", "SECONDARYNAMENODE"], "YARN": ["RESOURCEMANAGER", "JOBHISTORY"], "SPARK_ON_YARN": ["SPARK_YARN_HISTORY_SERVER"], }, virtualInstances=[create_virtual_instance_with_random_id(config, "master")], ), "gateways": VirtualInstanceGroup( name="gateways", minCount=1, serviceTypeToRoleTypes={"SPARK_ON_YARN": ["GATEWAY"], "HDFS": ["GATEWAY"], "YARN": ["GATEWAY"]}, virtualInstances=[create_virtual_instance_with_random_id(config, "gateway")], ), "workers": VirtualInstanceGroup( name="workers", minCount=cluster_size, serviceTypeToRoleTypes={"HDFS": ["DATANODE"], "YARN": ["NODEMANAGER"], "SPARK_ON_YARN": ["GATEWAY"]}, roleTypesConfigs={ "HDFS": { "DATANODE": {"dfs_datanode_handler_count": "10"}, "NODEMANAGER": {"nodemanager_webserver_port": "8047"}, } }, virtualInstances=[ create_virtual_instance_with_random_id(config, "worker") for _ in range(0, cluster_size) ], ), }, ) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print "Warning: a cluster with the same name already exists" else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ num_workers = config.getint("cluster", "num_workers") template = ClusterTemplate( name=config.get('cluster', 'name'), product_versions={'CDH': config.get('cluster', 'cdh_version')}, services=['HDFS', 'YARN'], services_configs={}, virtual_instance_groups={ 'masters': VirtualInstanceGroup( name='masters', min_count=1, service_type_to_role_types={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, role_types_configs={}, virtual_instances=[create_virtual_instance(config, 'master')]), 'workers': VirtualInstanceGroup( name='workers', min_count=num_workers, service_type_to_role_types={ 'HDFS': ['DATANODE'], 'YARN': ['NODEMANAGER'] }, # optional role configurations, if desired or needed role_types_configs={ #'HDFS': { # 'DATANODE': { # 'dfs_datanode_handler_count': '10' # }, # 'NODEMANAGER': { # 'nodemanager_webserver_port': '8047' # } #} }, virtual_instances=[ create_virtual_instance(config, 'worker') for _ in range(0, num_workers) ]) }) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except ApiException as exc: if exc.status == 409: print 'Warning: a cluster with the same name already exists' else: raise exc print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get('cluster', 'name'), productVersions={'CDH': config.get('cluster', 'cdh_version')}, services=['HDFS', 'YARN'], virtualInstanceGroups={ 'masters': VirtualInstanceGroup(name='masters', minCount=1, serviceTypeToRoleTypes={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, virtualInstances=[ create_virtual_instance_with_random_id( config, 'master'), ]), 'workers': VirtualInstanceGroup(name='workers', minCount=cluster_size, serviceTypeToRoleTypes={ 'HDFS': [ 'DATANODE', ], 'YARN': ['NODEMANAGER'] }, roleTypesConfigs={ 'HDFS': { 'DATANODE': { 'dfs_datanode_handler_count': '10' }, 'NODEMANAGER': { 'nodemanager_webserver_port': '8047' } } }, virtualInstances=[ create_virtual_instance_with_random_id( config, 'worker') for _ in range(0, cluster_size) ]) }) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print 'Warning: a cluster with the same name already exists' else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get('cluster', 'name'), productVersions={ 'CDH': config.get('cluster', 'cdh_version') }, services=['HDFS', 'YARN'], virtualInstanceGroups={ 'masters': VirtualInstanceGroup( name='masters', minCount=1, serviceTypeToRoleTypes={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, virtualInstances=[create_virtual_instance_with_random_id(config, 'master'), ] ), 'workers': VirtualInstanceGroup( name='workers', minCount=cluster_size, serviceTypeToRoleTypes={ 'HDFS': ['DATANODE', ], 'YARN': ['NODEMANAGER'] }, roleTypesConfigs={ 'HDFS': { 'DATANODE': { 'dfs_datanode_handler_count': '10' }, 'NODEMANAGER': { 'nodemanager_webserver_port': '8047' } } }, virtualInstances=[create_virtual_instance_with_random_id(config, 'worker') for _ in range(0, cluster_size)] ) } ) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print 'Warning: a cluster with the same name already exists' else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name