def create_cluster(self, cluster_create_properties: ClusterCreateProperties, cluster_name): """ Creates an HDInsight cluster This operation simply starts the deployment, which happens asynchronously in azure. You can call :meth:`~get_cluster_state` for polling on its provisioning. .. note:: This operation is idempotent. If the cluster already exists, this call will simple ignore that fact. So this can be used like a "create if not exists" call. :param cluster_create_properties: the ClusterCreateProperties representing the HDI cluster spec. You can explore some sample specs `here <https://github.com/Azure-Samples/hdinsight-python-sdk-samples>`_. This python object follows the same structure as the `HDInsight arm template <https://docs.microsoft.com/en-us/azure/templates/microsoft.hdinsight/2018-06-01-preview/clusters>`_. :download:`Example ClusterCreateProperties<../../examples/azure_hdi_cluster_conn.py>` :type cluster_create_properties: ClusterCreateProperties :param cluster_name: The full cluster name. This is the unique deployment identifier of an HDI cluster in Azure, and will be used for fetching its state or submitting jobs to it HDI cluster names have the following `restrictions <https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#cluster-name>`_. :type cluster_name: string """ cluster_deployment: AzureOperationPoller = self.client.clusters.create( cluster_name=cluster_name, resource_group_name=self.resource_group_name, parameters=ClusterCreateParametersExtended( location=self.resource_group_location, tags={}, properties=cluster_create_properties)) # cluster_deployment.wait() - this blocks self.log.info("Cluster deployment started %s: %s", cluster_name, cluster_deployment.status())
def create_cluster(cmd, client, cluster_name, resource_group_name, cluster_type, location=None, tags=None, no_wait=False, cluster_version='default', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, storage_account_managed_identity=None, vnet_name=None, subnet=None, domain=None, ldaps_urls=None, cluster_admin_account=None, cluster_admin_password=None, cluster_users_group_dns=None, assign_identity=None, encryption_vault_uri=None, encryption_key_name=None, encryption_key_version=None, encryption_algorithm='RSA-OAEP', esp=False, no_validation_timeout=False): from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \ get_storage_account_endpoint, validate_esp_cluster_create_params from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \ DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey validate_esp_cluster_create_params(esp, cluster_name, resource_group_name, cluster_type, subnet, domain, cluster_admin_account, assign_identity, ldaps_urls, cluster_admin_password, cluster_users_group_dns) if esp: if cluster_tier == Tier.standard: raise CLIError('Cluster tier cannot be {} when --esp is specified. ' 'Please use default value or specify {} explicitly.'.format(Tier.standard, Tier.premium)) if not cluster_tier: cluster_tier = Tier.premium # Update optional parameters with defaults location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if cluster_configurations: import json try: cluster_configurations = json.loads(cluster_configurations) except ValueError as ex: raise CLIError('The cluster_configurations argument must be valid JSON. Error: {}'.format(str(ex))) else: cluster_configurations = dict() if component_version: # See validator component_version = {c: v for c, v in [version.split('=') for version in component_version]} # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError('An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not http_username: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations if not http_password: try: http_password = prompt_pass('HTTP password for the cluster:', confirm=True) except NoTTYException: raise CLIError('Please specify --http-password in non-interactive mode.') # Update the cluster config with the HTTP credentials gateway_config['restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config['restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning("SSH credentials not specified. Using the HTTP password as the SSH password.") ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError('Either the default container or the default filesystem can be specified, but not both.') # Retrieve primary blob service endpoint is_wasb = not storage_account_managed_identity storage_account_endpoint = None if storage_account: storage_account_endpoint = get_storage_account_endpoint(cmd, storage_account, is_wasb) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account and is_wasb: from .util import get_key_for_storage_account logger.info('Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account) if not key: raise CLIError('Storage account key could not be inferred from storage account.') storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and is_wasb: storage_default_container = cluster_name.lower() logger.warning('Default WASB container not specified, using "%s".', storage_default_container) elif not storage_default_filesystem and not is_wasb: storage_default_filesystem = cluster_name.lower() logger.warning('Default ADLS file system not specified, using "%s".', storage_default_filesystem) # Validate storage info parameters if is_wasb and not _all_or_none(storage_account, storage_account_key, storage_default_container): raise CLIError('If storage details are specified, the storage account, storage account key, ' 'and the default container must be specified.') if not is_wasb and not _all_or_none(storage_account, storage_default_filesystem): raise CLIError('If storage details are specified, the storage account, ' 'and the default filesystem must be specified.') # Validate disk encryption parameters if not _all_or_none(encryption_vault_uri, encryption_key_name, encryption_key_version): raise CLIError('Either the encryption vault URI, key name and key version should be specified, ' 'or none of them should be.') # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError("Cannot define data disk storage account type unless disks per node is defined.") if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError("Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size ) ] os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey( certificate_data=ssh_public_key )] ) ) ) roles = [ # Required roles Role( name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile ), Role( name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups ) ] if zookeepernode_size: roles.append( Role( name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile )) if edgenode_size: roles.append( Role( name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile )) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount( name=storage_account_endpoint, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, resource_id=None if is_wasb else storage_account, msi_resource_id=storage_account_managed_identity, is_default=True ) ) additional_storage_accounts = [] # TODO: Add support for additional storage accounts if additional_storage_accounts: storage_accounts += [ StorageAccount( name=s.storage_account_endpoint, key=s.storage_account_key, container=s.container, is_default=False ) for s in additional_storage_accounts ] assign_identities = [] if assign_identity: assign_identities.append(assign_identity) if storage_account_managed_identity: assign_identities.append(storage_account_managed_identity) cluster_identity = build_identities_info(assign_identities) if assign_identities else None domain_name = domain and parse_domain_name(domain) if not ldaps_urls and domain_name: ldaps_urls = ['ldaps://{}:636'.format(domain_name)] security_profile = domain and SecurityProfile( directory_type=DirectoryType.active_directory, domain=domain_name, ldaps_urls=ldaps_urls, domain_username=cluster_admin_account, domain_user_password=cluster_admin_password, cluster_users_group_dns=cluster_users_group_dns, aadds_resource_id=domain, msi_resource_id=assign_identity ) disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties( vault_uri=encryption_vault_uri, key_name=encryption_key_name, key_version=encryption_key_version, encryption_algorithm=encryption_algorithm, msi_resource_id=assign_identity ) create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version ), compute_profile=ComputeProfile( roles=roles ), storage_profile=StorageProfile( storageaccounts=storage_accounts ), security_profile=security_profile, disk_encryption_properties=disk_encryption_properties ), identity=cluster_identity ) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def create_cluster(cmd, client, cluster_name, resource_group_name, cluster_type, location=None, tags=None, no_wait=False, cluster_version='default', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size=None, workernode_size=None, zookeepernode_size=None, edgenode_size=None, kafka_management_node_size=None, kafka_management_node_count=2, kafka_client_group_id=None, kafka_client_group_name=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, storage_account_managed_identity=None, vnet_name=None, subnet=None, domain=None, ldaps_urls=None, cluster_admin_account=None, cluster_admin_password=None, cluster_users_group_dns=None, assign_identity=None, minimal_tls_version=None, encryption_vault_uri=None, encryption_key_name=None, encryption_key_version=None, encryption_algorithm='RSA-OAEP', encryption_in_transit=None, autoscale_type=None, autoscale_min_workernode_count=None, autoscale_max_workernode_count=None, timezone=None, days=None, time=None, autoscale_workernode_count=None, encryption_at_host=None, esp=False, idbroker=False, resource_provider_connection=None, enable_private_link=None, enable_compute_isolation=None, host_sku=None, no_validation_timeout=False): from .util import build_identities_info, build_virtual_network_profile, parse_domain_name, \ get_storage_account_endpoint, validate_esp_cluster_create_params, set_vm_size from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, DataDisksGroups, SecurityProfile, \ DirectoryType, DiskEncryptionProperties, Tier, SshProfile, SshPublicKey, \ KafkaRestProperties, ClientGroupInfo, EncryptionInTransitProperties, \ Autoscale, AutoscaleCapacity, AutoscaleRecurrence, AutoscaleSchedule, AutoscaleTimeAndCapacity, \ NetworkProperties, PrivateLink, ComputeIsolationProperties validate_esp_cluster_create_params(esp, cluster_name, resource_group_name, cluster_type, subnet, domain, cluster_admin_account, assign_identity, ldaps_urls, cluster_admin_password, cluster_users_group_dns) if esp: if cluster_tier == Tier.standard: raise CLIError( 'Cluster tier cannot be {} when --esp is specified. ' 'Please use default value or specify {} explicitly.'.format( Tier.standard, Tier.premium)) if not cluster_tier: cluster_tier = Tier.premium # Update optional parameters with defaults location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if not cluster_configurations: cluster_configurations = dict() if component_version: # See validator component_version = { c: v for c, v in [version.split('=') for version in component_version] } # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError( 'An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not http_username: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations if not http_password: try: http_password = prompt_pass('HTTP password for the cluster:', confirm=True) except NoTTYException: raise CLIError( 'Please specify --http-password in non-interactive mode.') # Update the cluster config with the HTTP credentials gateway_config[ 'restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config[ 'restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning( "SSH credentials not specified. Using the HTTP password as the SSH password." ) ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError( 'Either the default container or the default filesystem can be specified, but not both.' ) # Retrieve primary blob service endpoint is_wasb = not storage_account_managed_identity storage_account_endpoint = None if storage_account: storage_account_endpoint = get_storage_account_endpoint( cmd, storage_account, is_wasb) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account and is_wasb: from .util import get_key_for_storage_account logger.info( 'Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account) if not key: raise CLIError( 'Storage account key could not be inferred from storage account.' ) storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and storage_account and is_wasb: storage_default_container = cluster_name.lower() logger.warning('Default WASB container not specified, using "%s".', storage_default_container) elif not storage_default_filesystem and not is_wasb: storage_default_filesystem = cluster_name.lower() logger.warning('Default ADLS file system not specified, using "%s".', storage_default_filesystem) # Validate storage info parameters if is_wasb and not _all_or_none(storage_account, storage_account_key, storage_default_container): raise CLIError( 'If storage details are specified, the storage account, storage account key, ' 'and the default container must be specified.') if not is_wasb and not _all_or_none(storage_account, storage_default_filesystem): raise CLIError( 'If storage details are specified, the storage account, ' 'and the default filesystem must be specified.') # Validate disk encryption parameters if not _all_or_none(encryption_vault_uri, encryption_key_name, encryption_key_version): raise CLIError( 'Either the encryption vault URI, key name and key version should be specified, ' 'or none of them should be.') # Validate kafka rest proxy parameters if not _all_or_none(kafka_client_group_id, kafka_client_group_name): raise CLIError( 'Either the kafka client group id and kafka client group name should be specified, ' 'or none of them should be') # Validate and initialize autoscale setting autoscale_configuration = None load_based_type = "Load" schedule_based_type = "Schedule" if autoscale_type and autoscale_type.lower() == load_based_type.lower(): if not all( [autoscale_min_workernode_count, autoscale_max_workernode_count]): raise CLIError( 'When the --autoscale-type is Load, ' 'both --autoscale-min-workernode-count and --autoscale-max-workernode-count should be specified.' ) autoscale_configuration = Autoscale(capacity=AutoscaleCapacity( min_instance_count=autoscale_min_workernode_count, max_instance_count=autoscale_max_workernode_count)) elif autoscale_type and autoscale_type.lower( ) == schedule_based_type.lower(): if not all([timezone, days, time, autoscale_workernode_count]): raise CLIError( 'When the --autoscale-type is Schedule, all of the --timezone, --days, --time, ' '--autoscale-workernode-count should be specified.') autoscale_configuration = Autoscale(recurrence=AutoscaleRecurrence( time_zone=timezone, schedule=[ AutoscaleSchedule( days=days, time_and_capacity=AutoscaleTimeAndCapacity( time=time, min_instance_count=autoscale_workernode_count, max_instance_count=autoscale_workernode_count)) ])) # Specify virtual network profile only when network arguments are provided virtual_network_profile = subnet and build_virtual_network_profile(subnet) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError( "Cannot define data disk storage account type unless disks per node is defined." ) if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError( "Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size) ] # call get default vm size api to set vm size if customer does not provide the value if not (workernode_size and headnode_size): headnode_size, workernode_size = set_vm_size(cmd.cli_ctx, location, cluster_type, headnode_size, workernode_size) if not headnode_size: raise RequiredArgumentMissingError( 'Please specify --headnode-size explicitly.') if not workernode_size: raise RequiredArgumentMissingError( 'Please specify --workernode-size explicitly.') os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_profile=ssh_public_key and SshProfile( public_keys=[SshPublicKey(certificate_data=ssh_public_key)]))) roles = [ # Required roles Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile), Role(name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups, autoscale_configuration=autoscale_configuration) ] if zookeepernode_size: roles.append( Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if edgenode_size: roles.append( Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if kafka_management_node_size: # generate kafkaRestProperties roles.append( Role(name="kafkamanagementnode", target_instance_count=kafka_management_node_count, hardware_profile=HardwareProfile( vm_size=kafka_management_node_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if esp and idbroker: roles.append( Role(name="idbrokernode", target_instance_count=2, virtual_network_profile=virtual_network_profile)) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount(name=storage_account_endpoint, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, resource_id=storage_account, msi_resource_id=storage_account_managed_identity, is_default=True)) additional_storage_accounts = [ ] # TODO: Add support for additional storage accounts if additional_storage_accounts: storage_accounts += [ StorageAccount(name=s.storage_account_endpoint, key=s.storage_account_key, container=s.container, is_default=False) for s in additional_storage_accounts ] assign_identities = [] if assign_identity: assign_identities.append(assign_identity) if storage_account_managed_identity: assign_identities.append(storage_account_managed_identity) cluster_identity = build_identities_info( assign_identities) if assign_identities else None domain_name = domain and parse_domain_name(domain) if not ldaps_urls and domain_name: ldaps_urls = ['ldaps://{}:636'.format(domain_name)] security_profile = domain and SecurityProfile( directory_type=DirectoryType.active_directory, domain=domain_name, ldaps_urls=ldaps_urls, domain_username=cluster_admin_account, domain_user_password=cluster_admin_password, cluster_users_group_dns=cluster_users_group_dns, aadds_resource_id=domain, msi_resource_id=assign_identity) disk_encryption_properties = encryption_vault_uri and DiskEncryptionProperties( vault_uri=encryption_vault_uri, key_name=encryption_key_name, key_version=encryption_key_version, encryption_algorithm=encryption_algorithm, msi_resource_id=assign_identity) if encryption_at_host: if disk_encryption_properties: disk_encryption_properties.encryption_at_host = encryption_at_host else: disk_encryption_properties = DiskEncryptionProperties( encryption_at_host=encryption_at_host) kafka_rest_properties = ( kafka_client_group_id and kafka_client_group_name ) and KafkaRestProperties(client_group_info=ClientGroupInfo( group_id=kafka_client_group_id, group_name=kafka_client_group_name)) encryption_in_transit_properties = encryption_in_transit and EncryptionInTransitProperties( is_encryption_in_transit_enabled=encryption_in_transit) # relay outbound and private link network_properties = ( resource_provider_connection or enable_private_link) and NetworkProperties( resource_provider_connection=resource_provider_connection, private_link=PrivateLink.enabled if enable_private_link is True else PrivateLink.disabled) # compute isolation compute_isolation_properties = enable_compute_isolation and ComputeIsolationProperties( enable_compute_isolation=enable_compute_isolation, host_sku=host_sku) create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version), compute_profile=ComputeProfile(roles=roles), storage_profile=StorageProfile(storageaccounts=storage_accounts), security_profile=security_profile, disk_encryption_properties=disk_encryption_properties, kafka_rest_properties=kafka_rest_properties, min_supported_tls_version=minimal_tls_version, encryption_in_transit_properties=encryption_in_transit_properties, network_properties=network_properties, compute_isolation_properties=compute_isolation_properties), identity=cluster_identity) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def get_cluster_create_parameters(resource_group_location: str, tags: dict, cluster_params: ClusterCreateProperties): return ClusterCreateParametersExtended( location=resource_group_location, tags=tags, properties=cluster_params)
def create_cluster(cmd, client, cluster_name, resource_group_name, location=None, tags=None, no_wait=False, cluster_version='default', cluster_type='spark', cluster_tier=None, cluster_configurations=None, component_version=None, headnode_size='large', workernode_size='large', zookeepernode_size=None, edgenode_size=None, workernode_count=3, workernode_data_disks_per_node=None, workernode_data_disk_storage_account_type=None, workernode_data_disk_size=None, http_username=None, http_password=None, ssh_username='******', ssh_password=None, ssh_public_key=None, storage_account=None, storage_account_key=None, storage_default_container=None, storage_default_filesystem=None, virtual_network=None, subnet_name=None): from azure.mgmt.hdinsight.models import ClusterCreateParametersExtended, ClusterCreateProperties, OSType, \ ClusterDefinition, ComputeProfile, HardwareProfile, Role, OsProfile, LinuxOperatingSystemProfile, \ StorageProfile, StorageAccount, VirtualNetworkProfile, DataDisksGroups # Update optional parameters with defaults additional_storage_accounts = [ ] # TODO: Add support for additional storage accounts location = location or _get_rg_location(cmd.cli_ctx, resource_group_name) # Format dictionary/free-form arguments if cluster_configurations: import json try: cluster_configurations = json.loads(cluster_configurations) except ValueError as ex: raise CLIError( 'The cluster_configurations argument must be valid JSON. Error: {}' .format(str(ex))) else: cluster_configurations = dict() if component_version: # See validator component_version = { c: v for c, v in [version.split('=') for version in component_version] } # Validate whether HTTP credentials were provided if 'gateway' in cluster_configurations: gateway_config = cluster_configurations['gateway'] else: gateway_config = dict() if http_username and 'restAuthCredential.username' in gateway_config: raise CLIError( 'An HTTP username must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') else: http_username = '******' # Implement default logic here, in case a user specifies the username in configurations is_password_in_cluster_config = 'restAuthCredential.password' in gateway_config if http_password and is_password_in_cluster_config: raise CLIError( 'An HTTP password must be specified either as a command-line parameter ' 'or in the cluster configuration, but not both.') if not (http_password or is_password_in_cluster_config): raise CLIError('An HTTP password is required.') # Update the cluster config with the HTTP credentials gateway_config[ 'restAuthCredential.isEnabled'] = 'true' # HTTP credentials are required http_username = http_username or gateway_config[ 'restAuthCredential.username'] gateway_config['restAuthCredential.username'] = http_username http_password = http_password or gateway_config[ 'restAuthCredential.password'] gateway_config['restAuthCredential.password'] = http_password cluster_configurations['gateway'] = gateway_config # Validate whether SSH credentials were provided if not (ssh_password or ssh_public_key): logger.warning( "SSH credentials not specified. Using the HTTP password as the SSH password." ) ssh_password = http_password # Validate storage arguments from the user if storage_default_container and storage_default_filesystem: raise CLIError( 'Either the default container or the default filesystem can be specified, but not both.' ) # Attempt to infer the storage account key from the endpoint if not storage_account_key and storage_account: from .util import get_key_for_storage_account logger.info( 'Storage account key not specified. Attempting to retrieve key...') key = get_key_for_storage_account(cmd, storage_account, resource_group_name) if not key: logger.warning( 'Storage account key could not be inferred from storage account.' ) else: storage_account_key = key # Attempt to provide a default container for WASB storage accounts if not storage_default_container and storage_account and _is_wasb_endpoint( storage_account): storage_default_container = cluster_name logger.warning('Default WASB container not specified, using "%s".', storage_default_container) # Validate storage info parameters if not _all_or_none( storage_account, storage_account_key, (storage_default_container or storage_default_filesystem)): raise CLIError( 'If storage details are specified, the storage account, storage account key, ' 'and either the default container or default filesystem must be specified.' ) # Validate network profile parameters if not _all_or_none(virtual_network, subnet_name): raise CLIError( 'Either both the virtual network and subnet should be specified, or neither should be.' ) # Specify virtual network profile only when network arguments are provided virtual_network_profile = virtual_network and VirtualNetworkProfile( id=virtual_network, subnet=subnet_name) # Validate data disk parameters if not workernode_data_disks_per_node and workernode_data_disk_storage_account_type: raise CLIError( "Cannot define data disk storage account type unless disks per node is defined." ) if not workernode_data_disks_per_node and workernode_data_disk_size: raise CLIError( "Cannot define data disk size unless disks per node is defined.") # Specify data disk groups only when disk arguments are provided workernode_data_disk_groups = workernode_data_disks_per_node and [ DataDisksGroups( disks_per_node=workernode_data_disks_per_node, storage_account_type=workernode_data_disk_storage_account_type, disk_size_gb=workernode_data_disk_size) ] os_profile = OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=ssh_username, password=ssh_password, ssh_public_key=ssh_public_key)) roles = [ # Required roles Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size=headnode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile), Role(name="workernode", target_instance_count=workernode_count, hardware_profile=HardwareProfile(vm_size=workernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile, data_disks_groups=workernode_data_disk_groups) ] if zookeepernode_size: roles.append( Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size=zookeepernode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) if edgenode_size: roles.append( Role(name="edgenode", target_instance_count=1, hardware_profile=HardwareProfile(vm_size=edgenode_size), os_profile=os_profile, virtual_network_profile=virtual_network_profile)) storage_accounts = [] if storage_account: # Specify storage account details only when storage arguments are provided storage_accounts.append( StorageAccount(name=storage_account, key=storage_account_key, container=storage_default_container, file_system=storage_default_filesystem, is_default=True)) if additional_storage_accounts: storage_accounts += [ StorageAccount(name=s.storage_account, key=s.storage_account_key, container=s.container, is_default=False) for s in additional_storage_accounts ] create_params = ClusterCreateParametersExtended( location=location, tags=tags, properties=ClusterCreateProperties( cluster_version=cluster_version, os_type=OSType.linux, tier=cluster_tier, cluster_definition=ClusterDefinition( kind=cluster_type, configurations=cluster_configurations, component_version=component_version), compute_profile=ComputeProfile(roles=roles), storage_profile=StorageProfile(storageaccounts=storage_accounts))) if no_wait: return sdk_no_wait(no_wait, client.create, resource_group_name, cluster_name, create_params) return client.create(resource_group_name, cluster_name, create_params)
def start(self): """ Make the cluster operational in DSS, creating an actual cluster if necessary. :returns: a tuple of : * the settings needed to access hadoop/hive/impala/spark on the cluster. If not specified, then the corresponding element (hadoop/hive/impala/spark) is not overriden * an dict of data to pass to to other methods when handling the cluster created """ logging.info("Init cluster for HDI") create_params = ClusterCreateParametersExtended( location=self.location, tags={}, properties=ClusterCreateProperties( #TODO: parametrize this correctly cluster_version="3.6", os_type=OSType.linux, tier=Tier.standard, cluster_definition=ClusterDefinition( kind="spark", configurations={ "gateway": { "restAuthCredential.enabled_credential": "True", "restAuthCredential.username": self.gateway_username, "restAuthCredential.password": self.gateway_password } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile( vm_size=self.headnode_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile), Role(name="workernode", target_instance_count=self.worker_count, hardware_profile=HardwareProfile( vm_size=self.worker_size), os_profile=OsProfile(linux_operating_system_profile= LinuxOperatingSystemProfile( username=self.ssh_username, password=self.ssh_password)), virtual_network_profile=self.vnet_profile) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=self.storage_account_name, key=self.storage_account_key, container=self.storage_account_container, is_default=True) ]))) logging.info('Creating Cluster ....') create_poller = self.hdi_client.clusters.create( self.resource_group_name, self.hdi_cluster_name, create_params) logging.info('Waiting for result poller...') try: cluster = create_poller.result() except: logging.error( 'Cluster creation failed, deleting what was provisioned') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete provisioned resources') pass raise logging.info('Poller resturned {}'.format(pformat(cluster))) try: dss_cluster_config = dku_hdi.make_cluster_keys_and_data( self.aad_client_credentials, self.subscription_id, self.hdi_cluster_name, self.resource_group_name) except: logging.error('Could not attach to created cluster, deleting') try: self.hdi_client.clusters.delete(self.resource_group_name, self.hdi_cluster_name) except: logging.error('Could not delete created cluster') pass raise return dss_cluster_config