def main(): SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID", None) TIME = str(time.time()).replace('.', '') GROUP_NAME = "testhdinsight" + TIME # Create client # # For other authentication approaches, please see: https://pypi.org/project/azure-identity/ resource_client = ResourceManagementClient( credential=DefaultAzureCredential(), subscription_id=SUBSCRIPTION_ID) hdinsight_client = HDInsightManagementClient( credential=DefaultAzureCredential(), subscription_id=SUBSCRIPTION_ID) # - init depended client - # - end - # Create resource group resource_client.resource_groups.create_or_update(GROUP_NAME, {"location": "eastus"}) # - init depended resources - # - end - # Create hdinsight hdinsight_client.operations.list() print("operations:\n") # Delete Group resource_client.resource_groups.begin_delete(GROUP_NAME).result()
def delete_cluster(self): client = HDInsightManagementClient(self.get_credential(), self.subscription_id) delete_poller = client.clusters.delete(self.resource_group_name, cluster_name=self.cluster_name) delete_poller.wait() return delete_poller.result()
def main(): # Authentication credentials = ServicePrincipalCredentials(client_id=CLIENT_ID, secret=CLIENT_SECRET, tenant=TENANT_ID) client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID) # Parse ADLS Gen2 storage account name from resource id adls_gen2_account_name = ADLS_GEN2_RESOURCE_ID.split('/')[-1] # Prepare cluster create parameters create_params = ClusterCreateParametersExtended( location=LOCATION, tags={}, properties=ClusterCreateProperties( cluster_version="3.6", os_type=OSType.linux, tier=Tier.standard, cluster_definition=ClusterDefinition( kind="Hadoop", configurations={ "gateway": { "restAuthCredential.isEnabled": "true", "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME, "restAuthCredential.password": PASSWORD } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD))), Role(name="workernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=adls_gen2_account_name + DFS_ENDPOINT_SUFFIX, is_default=True, file_system=ADLS_GEN2_FILE_SYSTEM_NAME.lower(), resource_id=ADLS_GEN2_RESOURCE_ID, msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID) ])), identity=ClusterIdentity( type=ResourceIdentityType.user_assigned, user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}})) print( 'Starting to create HDInsight Hadoop cluster {} with Azure Data Lake Storage Gen2' .format(CLUSTER_NAME)) client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
def main(): # Authentication credentials = ServicePrincipalCredentials(client_id=CLIENT_ID, secret=CLIENT_SECRET, tenant=TENANT_ID) client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID) # Prepare cluster create parameters create_params = ClusterCreateParametersExtended( location=LOCATION, tags={}, properties=ClusterCreateProperties( cluster_version="3.6", os_type=OSType.linux, tier=Tier.standard, cluster_definition=ClusterDefinition( kind="Kafka", configurations={ "gateway": { "restAuthCredential.isEnabled": "true", "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME, "restAuthCredential.password": PASSWORD } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD))), Role(name="workernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD)), data_disks_groups=[DataDisksGroups(disks_per_node=2)]), Role(name="zookeepernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size="Small"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=STORAGE_ACCOUNT_NAME + BLOB_ENDPOINT_SUFFIX, key=STORAGE_ACCOUNT_KEY, container=CONTAINER_NAME.lower(), is_default=True) ]))) print('Starting to create to create HDInsight Kafka cluster {}'.format( CLUSTER_NAME)) client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
def get_conn(self): """ Return a HDInsight client. This hook requires a service principal in order to work. After creating this service principal (Azure Active Directory/App Registrations), you need to fill in the client_id (Application ID) as login, the generated password as password, and tenantId and subscriptionId in the extra's field as a json. References https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal https://docs.microsoft.com/en-us/python/api/overview/azure/key-vault?toc=%2Fpython%2Fazure%2FTOC.json&view=azure-python :return: HDInsight manage client :rtype: HDInsightManagementClient """ conn = self.get_connection(self.conn_id) extra_options = conn.extra_dejson key_path = extra_options.get('key_path', False) self.resource_group_name = str( extra_options.get("resource_group_name")) self.resource_group_location = str( extra_options.get("resource_group_location")) if key_path: if key_path.endswith('.json'): self.log.info('Getting connection using a JSON key file.') return get_client_from_auth_file(HDInsightManagementClient, key_path) else: raise AirflowException('Unrecognised extension for key file.') if os.environ.get('AZURE_AUTH_LOCATION'): key_path = os.environ.get('AZURE_AUTH_LOCATION') if key_path.endswith('.json'): self.log.info('Getting connection using a JSON key file.') return get_client_from_auth_file(HDInsightManagementClient, key_path) else: raise AirflowException('Unrecognised extension for key file.') credentials = ServicePrincipalCredentials( client_id=conn.login, secret=conn.password, tenant=conn.extra_dejson['tenantId']) subscription_id = conn.extra_dejson['subscriptionId'] return HDInsightManagementClient(credentials, str(subscription_id))
def make_cluster_keys_and_data(aad_credentials, subscription_id, hdi_cluster_name, hdi_cluster_rg): # aad_credentials of type azure.common.credentials.InteractiveCredentials, ServicePrincipalCredentials, UserPassCredentials hdi_client = HDInsightManagementClient(aad_credentials, subscription_id) cluster = hdi_client.clusters.get(hdi_cluster_rg, hdi_cluster_name) cluster_core_info = hdi_client.configurations.get(hdi_cluster_rg, hdi_cluster_name, 'core-site') logging.info('HDI client retreived core info {}'.format( pformat(cluster_core_info))) cluster_gateway = hdi_client.configurations.get(hdi_cluster_rg, hdi_cluster_name, 'gateway') try: ambari_user = cluster_gateway['restAuthCredential.username'] ambari_pwd = cluster_gateway['restAuthCredential.password'] except KeyError: logging.error('Could not retreive ambari gateway credentials') raise cluster_endpoints = cluster.properties.connectivity_endpoints ambari_host = [ 'https://' + e.location for e in cluster_endpoints if e.port == 443 and e.name == 'HTTPS' ][0] ambari_client = HdiAmbariClient(ambari_host, ambari_user, ambari_pwd) conf_tags = ambari_client.set_desired_configs_tags(hdi_cluster_name) logging.info('Updated config tags:\n {}'.format(pformat(conf_tags))) dss_config_builder = AbstractDSSConfigBuilder(hdi_cluster_name, ambari_client) storage_info = dss_config_builder.make_storage_from_hdi_core_info( cluster_core_info) dss_config = dss_config_builder.make_dss_config(storage_info) logging.info('Returning DSS cluster config {}'.format(pformat(dss_config))) return [ dss_config, { 'hdiClusterId': hdi_cluster_name, 'subscriptionId': subscription_id, 'resourceGroupName': hdi_cluster_rg } ]
def main(): # Authentication credentials = ServicePrincipalCredentials(client_id=CLIENT_ID, secret=CLIENT_SECRET, tenant=TENANT_ID) client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID) # Parse AAD-DS DNS Domain name from resource id aadds_dns_domain_name = AADDS_RESOURCE_ID.split('/')[-1] # Prepare cluster create parameters create_params = ClusterCreateParametersExtended( location=LOCATION, tags={}, properties=ClusterCreateProperties( cluster_version="3.6", os_type=OSType.linux, tier=Tier.premium, cluster_definition=ClusterDefinition( kind="Spark", configurations={ "gateway": { "restAuthCredential.isEnabled": "true", "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME, "restAuthCredential.password": PASSWORD }, "hive-site": { "javax.jdo.option.ConnectionDriverName": "com.microsoft.sqlserver.jdbc.SQLServerDriver", "javax.jdo.option.ConnectionURL": "jdbc:sqlserver://%s;database=%s;encrypt=true;trustServerCertificate=true;create=false;loginTimeout=300" .format(METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE), "javax.jdo.option.ConnectionUserName": METASTORE_SQL_USERNAME, "javax.jdo.option.ConnectionPassword": METASTORE_SQL_PASSWORD, }, "hive-env": { "hive_database": "Existing MSSQL Server database with SQL authentication", "hive_database_name": METASTORE_SQL_DATABASE, "hive_database_type": "mssql", "hive_existing_mssql_server_database": METASTORE_SQL_DATABASE, "hive_existing_mssql_server_host": METASTORE_SQL_SERVER, "hive_hostname": METASTORE_SQL_SERVER, }, "ambari-conf": { "database-server": METASTORE_SQL_SERVER, "database-name": AMBARI_SQL_DATABASE, "database-user-name": AMBARI_SQL_USERNAME, "database-user-password": AMBARI_SQL_PASSWORD, }, "admin-properties": { "audit_db_name": METASTORE_SQL_DATABASE, "audit_db_user": METASTORE_SQL_USERNAME, "audit_db_password": METASTORE_SQL_PASSWORD, "db_name": METASTORE_SQL_DATABASE, "db_user": METASTORE_SQL_USERNAME, "db_password": METASTORE_SQL_PASSWORD, "db_host": METASTORE_SQL_SERVER, "db_root_user": "", "db_root_password": "" }, "ranger-admin-site": { "ranger.jpa.jdbc.url": "jdbc:sqlserver://%s;databaseName==%s".format( METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE) }, "ranger-env": { "ranger_privelege_user_jdbc_url": "jdbc:sqlserver://%s;databaseName==%s".format( METASTORE_SQL_SERVER, METASTORE_SQL_DATABASE) }, "ranger-hive-security": { "ranger.plugin.hive.service.name": RANGER_HIVE_PLUGIN_SERVICE_NAME }, "ranger-yarn-security": { "ranger.plugin.yarn.service.name": RANGER_HIVE_PLUGIN_SERVICE_NAME } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD)), virtual_network_profile=VirtualNetworkProfile( id=VIRTUAL_NETWORK_RESOURCE_ID, subnet='{}/subnets/{}'.format( VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))), Role(name="workernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD)), virtual_network_profile=VirtualNetworkProfile( id=VIRTUAL_NETWORK_RESOURCE_ID, subnet='{}/subnets/{}'.format( VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=STORAGE_ACCOUNT_NAME + BLOB_ENDPOINT_SUFFIX, key=STORAGE_ACCOUNT_KEY, container=CONTAINER_NAME.lower(), is_default=True) ]), security_profile=SecurityProfile( directory_type=DirectoryType.active_directory, ldaps_urls=[LDAPS_URL], domain_username=DOMAIN_USER_NAME, domain=aadds_dns_domain_name, cluster_users_group_dns=[CLUSTER_ACCESS_GROUP], aadds_resource_id=AADDS_RESOURCE_ID, msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID), disk_encryption_properties=DiskEncryptionProperties( vault_uri=ENCRYPTION_VAULT_URI, key_name=ENCRYPTION_KEY_NAME, key_version=ENCRYPTION_KEY_VERSION, encryption_algorithm=ENCRYPTION_ALGORITHM, msi_resource_id=ASSIGN_IDENTITY)), identity=ClusterIdentity( type=ResourceIdentityType.user_assigned, user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}})) print( 'Starting to create HDInsight Spark cluster {} with Enterprise Security Package' .format(CLUSTER_NAME)) create_poller = client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params) cluster_response = create_poller.result() if CLUSTER_NAME == cluster_response.name & cluster_response.id.endswith( CLUSTER_NAME ) & "Running" == cluster_response.properties.cluster_state & "Microsoft.HDInsight/clusters" & cluster_response.type: return 0 return 1
def main(): # Authentication credentials = ServicePrincipalCredentials(client_id=CLIENT_ID, secret=CLIENT_SECRET, tenant=TENANT_ID) client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID) # Parse AAD-DS DNS Domain name from resource id aadds_dns_domain_name = AADDS_RESOURCE_ID.split('/')[-1] # Prepare cluster create parameters create_params = ClusterCreateParametersExtended( location=LOCATION, tags={}, properties=ClusterCreateProperties( cluster_version="3.6", os_type=OSType.linux, tier=Tier.premium, cluster_definition=ClusterDefinition( kind="Spark", configurations={ "gateway": { "restAuthCredential.isEnabled": "true", "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME, "restAuthCredential.password": PASSWORD } }), compute_profile=ComputeProfile(roles=[ Role(name="headnode", target_instance_count=2, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD)), virtual_network_profile=VirtualNetworkProfile( id=VIRTUAL_NETWORK_RESOURCE_ID, subnet='{}/subnets/{}'.format( VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))), Role(name="workernode", target_instance_count=3, hardware_profile=HardwareProfile(vm_size="Large"), os_profile=OsProfile( linux_operating_system_profile= LinuxOperatingSystemProfile(username=SSH_USER_NAME, password=PASSWORD)), virtual_network_profile=VirtualNetworkProfile( id=VIRTUAL_NETWORK_RESOURCE_ID, subnet='{}/subnets/{}'.format( VIRTUAL_NETWORK_RESOURCE_ID, SUBNET_NAME))) ]), storage_profile=StorageProfile(storageaccounts=[ StorageAccount(name=STORAGE_ACCOUNT_NAME + BLOB_ENDPOINT_SUFFIX, key=STORAGE_ACCOUNT_KEY, container=CONTAINER_NAME.lower(), is_default=True) ]), security_profile=SecurityProfile( directory_type=DirectoryType.active_directory, ldaps_urls=[LDAPS_URL], domain_username=DOMAIN_USER_NAME, domain=aadds_dns_domain_name, cluster_users_group_dns=[CLUSTER_ACCESS_GROUP], aadds_resource_id=AADDS_RESOURCE_ID, msi_resource_id=MANAGED_IDENTITY_RESOURCE_ID)), identity=ClusterIdentity( type=ResourceIdentityType.user_assigned, user_assigned_identities={MANAGED_IDENTITY_RESOURCE_ID: {}})) print( 'Starting to create HDInsight Spark cluster {} with Enterprise Security Package' .format(CLUSTER_NAME)) client.clusters.create(RESOURCE_GROUP_NAME, CLUSTER_NAME, create_params)
from azure.mgmt.resource import ResourceManagementClient from azure.mgmt.hdinsight import HDInsightManagementClient from azure.mgmt.hdinsight.models import * import config SUBSCRIPTION_ID = config.azurerba['subscriptionId'] credentials = ServicePrincipalCredentials( client_id=config.azurerba['client_id'], secret=config.azurerba['client_secret'], tenant=config.azurerba['tenant'] ) client = HDInsightManagementClient(credentials, SUBSCRIPTION_ID) # The name for the cluster you are creating cluster_name = "biglakehdinsightauto" # The name of your existing Resource Group resource_group_name = "bdap-poc-playground" # Choose a username username = config.azure['username'] # Choose a password password = config.azure['password'] # Replace <> with the name of your storage account storage_account = "biglakestorageccountgen2.dfs.core.windows.net" # biglakestorageaccount.dfs.core.windows.net # Storage account key you obtained above storage_account_key = config.azurerba['storage_account_key'] # Choose a region location = "West Europe"
def __init__(self, cluster_id, cluster_name, config, plugin_config): """ :param cluster_id: the DSS identifier for this instance of cluster :param cluster_name: the name given by the user to this cluster :param config: the dict of the configuration of the object :param plugin_config: contains the plugin settings """ self.cluster_id = cluster_id self.dss_cluster_name = cluster_name self.config = config self.plugin_config = plugin_config self.aad_client_credentials = None #TODO: check when credentials are not the right way or incorrect if config['aadAuth'] == "user_pass": print("Using User Password authentication") self.aad_username = config['aad_username'] self.aad_password = config['aad_password'] self.aad_client_credentials = UserPassCredentials( username=self.aad_username, password=self.aad_password) elif config['aadAuth'] == "service_principal": print('Using Service Principal for authentication') self.client_id = config['client_id'] self.client_secret = config['client_secret'] self.tenant_id = config['tenant_id'] self.aad_client_credentials = ServicePrincipalCredentials( self.client_id, self.client_secret, tenant=self.tenant_id) else: raise ValueError('Unsupported authentication method') #params self.subscription_id = config['subscription_id'] self.cluster_version = config['cluster_version'] self.hdi_cluster_name = config['basename'] self.resource_group_name = config['resource_group_name'] self.location = config['location'] #TODO: should retreive available formats for output in case of error? self.headnode_size = config['headnode_size'] self.worker_size = config['worker_size'] self.worker_count = int(config['worker_count']) self.gateway_username = config['gateway_username'] self.gateway_password = config['gateway_password'] self.ssh_username = config['ssh_username'] #TODO: implement ssh with uploaded key self.ssh_password = config['ssh_password'] self.storage_account_name = '{}.blob.core.windows.net'.format( config['storage_account']) self.storage_account_key = config['storage_account_key'] self.storage_account_container = config['storage_account_container'] self.vnet_name = config['vnet_name'] self.subnet_name = config['subnet_name'] self.vnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}'.format( subsId=self.subscription_id, rgName=self.resource_group_name, vnetName=self.vnet_name) self.subnet_id = '/subscriptions/{subsId}/resourceGroups/{rgName}/providers/Microsoft.Network/virtualNetworks/{vnetName}/subnets/{subnetName}'.format( subsId=self.subscription_id, rgName=self.resource_group_name, vnetName=self.vnet_name, subnetName=self.subnet_name) self.vnet_profile = VirtualNetworkProfile(id=self.vnet_id, subnet=self.subnet_id) #TODO: better test the subscription_id here ? self.hdi_client = HDInsightManagementClient( self.aad_client_credentials, self.subscription_id)