Exemplo n.º 1
0
    def start(self):
        credentials, subscription_id = self._get_credentials()

        # Cluster name
        cluster_name = self.config.get("cluster", None)
        if _is_none_or_blank(cluster_name):
            cluster_name = self.cluster_name
            logging.info("Using same cluster name as DSS: {}".format(cluster_name))

        # Resource group
        resource_group = self.config.get('resourceGroup', None)
        if _is_none_or_blank(resource_group):
            metadata = get_instance_metadata()
            resource_group = metadata["compute"]["resourceGroupName"]
            logging.info("Using same resource group as DSS: {}".format(resource_group))

        clusters_client = ContainerServiceClient(credentials, subscription_id)

        # Get kubeconfig 
        logging.info("Fetching kubeconfig for cluster %s in %s", cluster_name, resource_group)
        def do_fetch():
            return clusters_client.managed_clusters.list_cluster_admin_credentials(resource_group, cluster_name)
        get_credentials_result = run_and_process_cloud_error(do_fetch)
        kube_config_content = get_credentials_result.kubeconfigs[0].value.decode('utf8')
        kube_config_path = os.path.join(os.getcwd(), 'kube_config')
        with open(kube_config_path, 'w') as f:
            f.write(kube_config_content)
        overrides = make_overrides(self.config, yaml.safe_load(kube_config_content), kube_config_path)
        
        # Get other cluster infos
        def do_inspect():
            return clusters_client.managed_clusters.get(resource_group, cluster_name)
        get_cluster_result = run_and_process_cloud_error(do_inspect)

        return [overrides, {'kube_config_path':kube_config_path, 'cluster':get_cluster_result.as_dict()}]
Exemplo n.º 2
0
    def run(self, progress_callback):
        cluster_data, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        kube_config_path = dss_cluster_settings.get_raw()['containerSettings'][
            'executionConfigsGenericOverrides']['kubeConfigPath']

        env = os.environ.copy()
        env['KUBECONFIG'] = kube_config_path
        cmd = ['kubectl'] + self.config.get('args', [])
        if not _is_none_or_blank(self.config.get("namespace", "")):
            cmd = cmd + ["--namespace", self.config.get("namespace", "")]
        if not _is_none_or_blank(self.config.get(
                "format", "")) and self.config.get("format", "") != 'none':
            cmd = cmd + ["-o", self.config.get("format", "")]
        logging.info("Run : %s" % json.dumps(cmd))
        try:
            out, err = run_with_timeout(cmd, env=env, timeout=20)
            rv = 0
        except KubeCommandException as e:
            rv = e.rv
            out = e.out
            err = e.err

        out_html = '<div class="alert alert-info"><div>Output</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % out
        err_html = '<div class="alert alert-danger"><div>Error</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % err
        if rv == 0 or _is_none_or_blank(err):
            return out_html
        else:
            return ('<div class="alert alert-danger">Failed with code %s</div>'
                    % rv) + err_html + out_html
Exemplo n.º 3
0
 def __init__(self, project_id, zone, credentials=None):
     logging.info("Connect using project_id=%s zone=%s credentials=%s" %
                  (project_id, zone, credentials))
     instance_info = get_instance_info()
     if _is_none_or_blank(project_id):
         default_project = instance_info["project"]
         logging.info("No project specified, using {} as default".format(
             default_project))
         self.project_id = default_project
     else:
         self.project_id = project_id
     if _is_none_or_blank(zone):
         default_zone = instance_info["zone"]
         logging.info(
             "No zone specified, using {} as default".format(default_zone))
         self.zone = default_zone
     else:
         self.zone = zone
     self.service = discovery.build('container',
                                    'v1',
                                    credentials=credentials,
                                    cache_discovery=False)
     self.compute = discovery.build('compute',
                                    'v1',
                                    credentials=credentials,
                                    cache_discovery=False)
Exemplo n.º 4
0
def get_credentials_from_connection_info(connection_info, connection_info_secret):
    client_id = connection_info.get('clientId', None)
    tenant_id = connection_info.get('tenantId', None)
    password = connection_info.get('password', None)
    if _is_none_or_blank(client_id) or _is_none_or_blank(password) or _is_none_or_blank(tenant_id):
        raise Exception('Client, password and tenant must all be defined')

    credentials = ServicePrincipalCredentials(client_id = client_id, secret = password, tenant = tenant_id)
    return credentials
Exemplo n.º 5
0
 def _get_credentials(self):
     connection_info = self.config.get("connectionInfo", None)
     connection_info_secret = self.plugin_config.get("connectionInfo", None)
     if not _is_none_or_blank(connection_info) or not _is_none_or_blank(connection_info_secret):
         logging.warn("Using legacy authentication fields. Clear them to use the new ones.")
         credentials = get_credentials_from_connection_info(connection_info, connection_info_secret)
         subscription_id = connection_info.get('subscriptionId', None)
     else:
         connection_info_v2 = self.config.get("connectionInfoV2",{"identityType":"default"})
         credentials, _ = get_credentials_from_connection_infoV2(connection_info_v2)
         subscription_id = get_subscription_id(connection_info_v2)
     return credentials, subscription_id
Exemplo n.º 6
0
def create_admin_binding(user_name=None, kube_config_path=None):
    if _is_none_or_blank(user_name):
        user_name = get_account()
    
    env = os.environ.copy()
    if not _is_none_or_blank(kube_config_path):
        env['KUBECONFIG'] = kube_config_path
    out = subprocess.check_output(["kubectl", "get", "clusterrolebinding", "cluster-admin-binding", "--ignore-not-found"], env=env)
    if not _is_none_or_blank(out):
        logging.info("Clusterrolebinding already exist")
    else:
        subprocess.check_call(["kubectl", "create", "clusterrolebinding", "cluster-admin-binding", "--clusterrole", "cluster-admin", "--user", user_name], env=env)
Exemplo n.º 7
0
    def start(self):
        connection_info = self.config.get("connectionInfo", {})
        connection_info_secret = self.plugin_config.get("connectionInfo", {})
        subscription_id = connection_info.get('subscriptionId', None)
        if _is_none_or_blank(subscription_id):
            raise Exception('Subscription must be defined')

        credentials = get_credentials_from_connection_info(
            connection_info, connection_info_secret)
        clusters_client = ContainerServiceClient(credentials, subscription_id)

        resource_group_name = self.config.get('resourceGroup', None)
        if _is_none_or_blank(resource_group_name):
            raise Exception(
                "A resource group to put the cluster in is required")

        cluster_name = self.config.get('cluster', self.cluster_name)

        logging.info("Fetching kubeconfig for cluster %s in %s" %
                     (cluster_name, resource_group_name))

        def do_fetch():
            return clusters_client.managed_clusters.list_cluster_admin_credentials(
                resource_group_name, cluster_name)

        get_credentials_result = run_and_process_cloud_error(do_fetch)

        kube_config_content = get_credentials_result.kubeconfigs[
            0].value.decode('utf8')

        kube_config_path = os.path.join(os.getcwd(), 'kube_config')
        with open(kube_config_path, 'w') as f:
            f.write(kube_config_content)

        overrides = make_overrides(self.config,
                                   yaml.safe_load(kube_config_content),
                                   kube_config_path)

        def do_inspect():
            return clusters_client.managed_clusters.get(
                resource_group_name, cluster_name)

        get_cluster_result = run_and_process_cloud_error(do_inspect)

        return [
            overrides, {
                'kube_config_path': kube_config_path,
                'cluster': get_cluster_result.as_dict()
            }
        ]
Exemplo n.º 8
0
    def build(self):
        node_pool = {'config': {}}
        node_pool['name'] = self.name if self.name is not None else 'node-pool'
        node_pool[
            'initialNodeCount'] = self.node_count if self.node_count is not None else 3
        if self.machine_type is not None:
            node_pool['config']['machineType'] = self.machine_type
        if self.disk_type is not None:
            node_pool['config']['diskType'] = self.disk_type
        # Add optional GPU accelerator:
        if self.enable_gpu:
            logging.info("GPU option enabled.")
            node_pool['config']['accelerators'] = [{
                'acceleratorCount':
                self.gpu_count,
                'acceleratorType':
                self.gpu_type
            }]
        if self.disk_size_gb is not None and self.disk_size_gb > 0:
            node_pool['config']['diskSizeGb'] = self.disk_size_gb
        node_pool['config']['oauthScopes'] = self.oauth_scopes

        if not _is_none_or_blank(self.service_account):
            node_pool['config']['serviceAccount'] = self.service_account

        node_pool["management"] = {"autoUpgrade": True, "autoRepair": True}
        if self.enable_autoscaling:
            node_pool['autoscaling'] = {
                "enabled":
                True,
                "minNodeCount":
                self.min_node_count if self.min_node_count is not None else
                node_pool['initialNodeCount'],
                "maxNodeCount":
                self.max_node_count if self.max_node_count is not None else
                node_pool['initialNodeCount']
            }
        node_pool["config"]["labels"] = self.nodepool_labels

        if not _is_none_or_blank(self.settings_valve):
            valve = json.loads(self.settings_valve)
            node_pool = _merge_objects(node_pool, valve)

        if isinstance(self.cluster_builder, ClusterBuilder):
            self.cluster_builder.with_node_pool(node_pool)
        elif isinstance(self.cluster_builder, NodePool):
            return self.cluster_builder.create(node_pool)
        else:
            raise Exception("Unreachable")
Exemplo n.º 9
0
def get_subscription_id(connection_info):
    identity_type = connection_info.get('identityType', None)
    subscription_id = connection_info.get('subscriptionId', None)
    if (identity_type == 'default' or identity_type
            == 'service-principal') and not _is_none_or_blank(subscription_id):
        return subscription_id
    else:
        return get_instance_metadata()["compute"]["subscriptionId"]
Exemplo n.º 10
0
    def get_kube_config(self, cluster_id=None):
        response = self.get_info()

        if _is_none_or_blank(cluster_id):
            cluster_id = self.name

        logging.info("Response=%s" % json.dumps(response, indent=2))

        legacy_auth = response.get("legacyAbac", {}).get("enabled", False)
        master_auth = response["masterAuth"]
        endpoint = response["endpoint"]

        user = {"name": "user-%s" % cluster_id, "user": {}}
        if legacy_auth:
            user["user"] = {
                "client-certificate-data": master_auth["clientCertificate"],
                "client-key-data": master_auth["clientKey"]
            }
        else:
            user["user"] = {
                "auth-provider": {
                    "name": "gcp",
                    "config": {
                        "cmd-args": "config config-helper --format=json",
                        "cmd-path": os.path.join(get_sdk_root(), "bin",
                                                 "gcloud"),
                        "expiry-key": "{.credential.token_expiry}",
                        "token-key": "{.credential.access_token}"
                    }
                }
            }

        cluster = {
            "name": "cluster-%s" % cluster_id,
            "cluster": {
                "certificate-authority-data":
                master_auth["clusterCaCertificate"],
                "server": "https://%s" % endpoint
            }
        }
        context = {
            "name": "context-%s" % cluster_id,
            "context": {
                "cluster": cluster["name"],
                "user": user["name"]
            }
        }

        config = {
            "apiVersion": "v1",
            "kind": "Config",
            "preferences": {},
            "clusters": [cluster],
            "contexts": [context],
            "users": [user],
            "current-context": context["name"]
        }
        return config
Exemplo n.º 11
0
 def with_oauth_scopes(self, oauth_scopes):
     if isinstance(oauth_scopes, text_type):
         return self.with_oauth_scopes(oauth_scopes.split(','))
     if oauth_scopes is not None:
         for oauth_scope in oauth_scopes:
             if _is_none_or_blank(oauth_scope):
                 continue
             self.add_oauth_scope(oauth_scope.strip())
     return self
Exemplo n.º 12
0
    def stop(self, data):
        connection_info = self.config.get("connectionInfo", {})
        connection_info_secret = self.plugin_config.get("connectionInfo", {})
        subscription_id = connection_info.get('subscriptionId', None)
        if _is_none_or_blank(subscription_id):
            raise Exception('Subscription must be defined')

        credentials = get_credentials_from_connection_info(
            connection_info, connection_info_secret)
        clusters_client = ContainerServiceClient(credentials, subscription_id)

        resource_group_name = self.config.get('resourceGroup', None)
        if _is_none_or_blank(resource_group_name):
            raise Exception(
                "A resource group to put the cluster in is required")

        logging.info("Fetching kubeconfig for cluster %s in %s" %
                     (self.cluster_name, resource_group_name))

        def do_delete():
            return clusters_client.managed_clusters.delete(
                resource_group_name, self.cluster_name)

        delete_result = run_and_process_cloud_error(do_delete)

        # delete returns void, so we poll until the cluster is really gone
        gone = False
        while not gone:
            time.sleep(5)
            try:
                cluster = clusters_client.managed_clusters.get(
                    resource_group_name, self.cluster_name)
                if cluster.provisioning_state.lower() != 'deleting':
                    logging.info(
                        "Cluster is not deleting anymore, must be deleted now (state = %s)"
                        % cluster.provisioning_state)
            except Exception as e:
                logging.info("Could not get cluster, should be gone (%s)" %
                             str(e))
                gone = True
Exemplo n.º 13
0
def get_cluster_from_connection_info(config, plugin_config):
    """
    Return a ContainerServiceClient after authenticating using the connection info.
    """
    connection_info = config.get("connectionInfo", None)
    connection_info_secret = plugin_config.get("connectionInfo", None)
    if not _is_none_or_blank(connection_info) or not _is_none_or_blank(
            connection_info_secret):
        logging.warn(
            "Using legacy authentication fields. Clear them to use the new ones."
        )
        credentials = get_credentials_from_connection_info(
            connection_info, connection_info_secret)
        subscription_id = connection_info.get('subscriptionId', None)
    else:
        connection_info_v2 = config.get("connectionInfoV2",
                                        {"identityType": "default"})
        credentials, _ = get_credentials_from_connection_infoV2(
            connection_info_v2)
        subscription_id = get_subscription_id(connection_info_v2)
    clusters_client = ContainerServiceClient(credentials, subscription_id)
    return clusters_client
Exemplo n.º 14
0
def make_html(command_outputs):
    divs = []
    for command_output in command_outputs:
        cmd_html = '<div>Run: %s</div>' % json.dumps(command_output[0])
        rv_html = '<div>Returned %s</div>' % command_output[1]
        out_html = '<div class="alert alert-info"><div>Output</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % command_output[2]
        err_html = '<div class="alert alert-danger"><div>Error</div><pre class="debug" style="max-width: 100%%; max-height: 100%%;">%s</pre></div>' % command_output[3]
        divs.append(cmd_html)
        divs.append(rv_html)
        divs.append(out_html)
        if command_output[1] != 0 and not _is_none_or_blank(command_output[3]):
            divs.append(err_html)
    return '\n'.join(divs).decode('utf8')
Exemplo n.º 15
0
    def run(self, progress_callback):
        cluster_data, clusters, dss_cluster_settings, dss_cluster_config = get_cluster_from_dss_cluster(
            self.config['clusterId'])

        # retrieve the actual name in the cluster's data
        if cluster_data is None:
            raise Exception("No cluster data (not started?)")
        cluster_def = cluster_data.get("cluster", None)
        if cluster_def is None:
            raise Exception("No cluster definition (starting failed?)")
        cluster_name = cluster_def["name"]

        resource_group_name = dss_cluster_config['config']['resourceGroup']
        # get the object for the cluster, AKS side
        cluster = clusters.managed_clusters.get(resource_group_name,
                                                cluster_name)

        node_pool_id = self.config.get('nodePoolId', None)
        node_pool = None
        for profile in cluster.agent_pool_profiles:
            if profile.name == node_pool_id or (
                    _is_none_or_blank(node_pool_id)
                    and len(cluster.agent_pool_profiles) == 1):
                node_pool = profile
        if node_pool is None:
            raise Exception("Unable to find node pool '%s'" % (node_pool_id))

        # see aks_scale() in azure-cli code
        cluster.service_principal_profile = None
        cluster.aad_profile = None

        desired_count = self.config['numNodes']
        logging.info("Resize to %s" % desired_count)
        if desired_count == 0:
            raise Exception("Can't delete node pool '%s'" % (node_pool_id))
        else:
            node_pool.count = desired_count
            logging.info("Waiting for cluster resize")

        def do_update():
            cluster_update_op = clusters.managed_clusters.create_or_update(
                resource_group_name, cluster_name, cluster)
            return cluster_update_op.result()

        update_result = run_and_process_cloud_error(do_update)
        logging.info("Cluster updated")
        return '<pre class="debug">%s</pre>' % json.dumps(
            update_result.as_dict(), indent=2)
Exemplo n.º 16
0
def create_installer_daemonset(kube_config_path=None):
    """
    Launch a pod on each node that will install the NVIDIA drivers.
    """

    env = os.environ.copy()
    if not _is_none_or_blank(kube_config_path):
        logging.info(
            "Setting kube_config path from KUBECONFIG env variable...")
        env["KUBECONFIG"] = kube_config_path
        logging.info("Found KUBECONFIG={}".format(env["KUBECONFIG"]))
    logging.info(
        "Creating NVIDIA driver daemonset (only GPU-tainted nodes will be affected)"
    )
    subprocess.check_call(["kubectl", "apply", "-f", DAEMONSET_MANIFEST_URL],
                          env=env)
Exemplo n.º 17
0
def get_cluster_from_connection_info(config, plugin_config):
    """
    Return a ContainerServiceClient after authenticating using the connection info.
    """
    
    connection_info = config.get("connectionInfo", {})
    connection_info_secret = plugin_config.get("connectionInfo", {})
    subscription_id = connection_info.get('subscriptionId', None)
    if _is_none_or_blank(subscription_id):
        raise Exception('Subscription must be defined')

    credentials = get_credentials_from_connection_info(connection_info, connection_info_secret)
    clusters_client = ContainerServiceClient(credentials, subscription_id)
            
    # credit this cluster to Dataiku
    # clusters_client.config.add_user_agent('pid-fd3813c7-273c-5eec-9221-77323f62a148')

    return clusters_client
Exemplo n.º 18
0
    def with_service_account(self, service_account_type,
                             custom_service_account_name):
        """
        Change default service account on cluster nodes.
        Requires the iam.serviceAccountUser IAM permission.
        """

        if service_account_type == "fromDSSHost":
            logging.info(
                "Custer nodes will inherit the DSS host Service Account")
            self.service_account = get_instance_service_account()
        if service_account_type == "custom":
            if _is_none_or_blank(custom_service_account_name):
                logging.info(
                    "Cluster nodes will have the default Compute Engine Service Account"
                )
                self.service_account = ""
            else:
                logging.info(
                    "Cluster nodes will have the custom Service Account: {}".
                    format(custom_service_account_name))
                self.service_account = custom_service_account_name
        return self
Exemplo n.º 19
0
    def start(self):
        connection_info = self.config.get('connectionInfo', {})
        networking_settings = self.config["networkingSettings"]

        args = ['create', 'cluster']
        args = args + ['-v', '4']

        if not self.config.get('advanced'):
            args = args + ['--name', self.cluster_id]

            if _has_not_blank_property(connection_info, 'region'):
                args = args + ['--region', connection_info['region']]
            elif 'AWS_DEFAULT_REGION' is os.environ:
                args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]

            args = args + ['--full-ecr-access']

            subnets = networking_settings.get('subnets', [])
            if networking_settings.get('privateNetworking', False):
                args = args + ['--node-private-networking']
                private_subnets = networking_settings.get('privateSubnets', [])
                if len(private_subnets) > 0:
                    args = args + [
                        '--vpc-private-subnets', ','.join(private_subnets)
                    ]
            if len(subnets) > 0:
                args = args + ['--vpc-public-subnets', ','.join(subnets)]

            security_groups = networking_settings.get('securityGroups', [])
            if len(security_groups) > 0:
                args = args + [
                    '--node-security-groups', ','.join(security_groups)
                ]

            node_pool = self.config.get('nodePool', {})
            if 'machineType' in node_pool:
                args = args + ['--node-type', node_pool['machineType']]
            if 'diskType' in node_pool:
                args = args + ['--node-volume-type', node_pool['diskType']]
            if 'diskSizeGb' in node_pool and node_pool['diskSizeGb'] > 0:
                args = args + [
                    '--node-volume-size',
                    str(node_pool['diskSizeGb'])
                ]

            args = args + ['--nodes', str(node_pool.get('numNodes', 3))]
            if node_pool.get('numNodesAutoscaling', False):
                args = args + ['--asg-access']
                args = args + [
                    '--nodes-min',
                    str(node_pool.get('minNumNodes', 2))
                ]
                args = args + [
                    '--nodes-max',
                    str(node_pool.get('maxNumNodes', 5))
                ]

            k8s_version = self.config.get("k8sVersion", None)
            if not _is_none_or_blank(k8s_version):
                args = args + ['--version', k8s_version.strip()]
        else:
            yaml_dict = yaml.safe_load(self.config.get("advancedYaml"))
            yaml_loc = os.path.join(os.getcwd(),
                                    self.cluster_id + '_advanced.yaml')
            with open(yaml_loc, 'w') as outfile:
                yaml.dump(yaml_dict, outfile, default_flow_style=False)

            args = args + ['-f', yaml_loc]

        # we don't add the context to the main config file, to not end up with an oversized config,
        # and because 2 different clusters could be concurrently editing the config file
        kube_config_path = os.path.join(os.getcwd(), 'kube_config')
        args = args + ['--kubeconfig', kube_config_path]

        c = EksctlCommand(args, connection_info)
        if c.run_and_log() != 0:
            raise Exception("Failed to start cluster")

        args = ['get', 'cluster']
        args = args + ['--name', self.cluster_id]

        if _has_not_blank_property(connection_info, 'region'):
            args = args + ['--region', connection_info['region']]
        elif 'AWS_DEFAULT_REGION' is os.environ:
            args = args + ['--region', os.environ['AWS_DEFAULT_REGION']]
        args = args + ['-o', 'json']

        if _has_not_blank_property(connection_info,
                                   'accessKey') and _has_not_blank_property(
                                       connection_info, 'secretKey'):
            creds_in_env = {
                'AWS_ACCESS_KEY_ID': connection_info['accessKey'],
                'AWS_SECRET_ACCESS_KEY': connection_info['secretKey']
            }
            add_authenticator_env(kube_config_path, creds_in_env)

        if not self.config.get('advanced'):
            if node_pool.get('numNodesAutoscaling', False):
                logging.info("Nodegroup is autoscaling, ensuring autoscaler")
                add_autoscaler_if_needed(self.cluster_id, kube_config_path)
        elif self.config.get('clusterAutoScaling'):
            logging.info("Nodegroup is autoscaling, ensuring autoscaler")
            add_autoscaler_if_needed(self.cluster_id, kube_config_path)

        c = EksctlCommand(args, connection_info)
        cluster_info = json.loads(c.run_and_get_output())[0]

        with open(kube_config_path, "r") as f:
            kube_config = yaml.safe_load(f)

        # collect and prepare the overrides so that DSS can know where and how to use the cluster
        overrides = make_overrides(self.config, kube_config, kube_config_path)
        return [
            overrides, {
                'kube_config_path': kube_config_path,
                'cluster': cluster_info
            }
        ]
Exemplo n.º 20
0
    def stop(self, data):
        credentials, _, _ = self._get_credentials()

        # Do NOT use the conf but the actual values from the cluster here
        cluster_resource_id = data["cluster"]["id"]
        _, _, subscription_id, _, resource_group, _, _, _, cluster_name = cluster_resource_id.split(
            "/")
        clusters_client = ContainerServiceClient(credentials, subscription_id)

        # Try to detach from ACR if required. It is not mandatory but if not done, it would pollute
        # the ACR with multiple invalid role attachments and consume attachment quotas
        node_resource_group = data["cluster"]["node_resource_group"]
        acr_attachment = data.get("acr_attachment", None)
        if not _is_none_or_blank(acr_attachment):
            logging.info(
                "Cluster has an ACR attachment, check managed identity")
            cluster_identity_profile = data["cluster"]["identity_profile"]
            kubelet_mi_resource_id = cluster_identity_profile[
                "kubeletidentity"].get("resource_id", None)
            if kubelet_mi_resource_id is not None:
                _, _, mi_subscription_id, _, mi_resource_group, _, _, _, mi_name = kubelet_mi_resource_id.split(
                    "/")
                if mi_resource_group == node_resource_group:
                    logging.info(
                        "Cluster has an AKS managed kubelet identity, try to detach"
                    )
                    authorization_client = AuthorizationManagementClient(
                        credentials, acr_attachment["subscription_id"])
                    try:
                        authorization_client.role_assignments.delete_by_id(
                            acr_attachment["role_assignment"]["id"])
                    except ResourceNotFoundError as e:
                        logging.warn(
                            "It looks that the ACR role assignment doesnt exist. Ignore this step"
                        )

        # Detach Vnet like ACR
        vnet_attachment = data.get("vnet_attachment", None)
        if not _is_none_or_blank(vnet_attachment):
            logging.info(
                "Cluster has an Vnet attachment, check managed identity")
            if "role_assignment" in vnet_attachment:
                logging.info(
                    "Cluster has an AKS managed kubelet identity, try to detach"
                )
                authorization_client = AuthorizationManagementClient(
                    credentials, vnet_attachment["subscription_id"])
                try:
                    authorization_client.role_assignments.delete_by_id(
                        vnet_attachment["role_assignment"]["id"])
                except ResourceNotFoundError as e:
                    logging.warn(
                        "It looks that the Vnet role assignment doesnt exist. Ignore this step"
                    )

        def do_delete():
            future = clusters_client.managed_clusters.begin_delete(
                resource_group, cluster_name)
            return future.result()

        delete_result = run_and_process_cloud_error(do_delete)

        # delete returns void, so we poll until the cluster is really gone
        gone = False
        while not gone:
            time.sleep(5)
            try:
                cluster = clusters_client.managed_clusters.get(
                    resource_group, cluster_name)
                if cluster.provisioning_state.lower() != 'deleting':
                    logging.info(
                        "Cluster is not deleting anymore, must be deleted now (state = %s)"
                        % cluster.provisioning_state)
            # other exceptions should not be ignored
            except ResourceNotFoundError as e:
                logging.info(
                    "Cluster doesn't seem to exist anymore, considering it deleted"
                )
                gone = True
Exemplo n.º 21
0
    def start(self):
        """
        Build the create cluster request.
        """
        credentials, subscription_id, managed_identity_id = self._get_credentials(
        )

        # Fetch metadata about the instance
        metadata = get_instance_metadata()

        # Resource group
        resource_group = self.config.get('resourceGroup', None)
        dss_host_resource_group = metadata["compute"]["resourceGroupName"]
        if _is_none_or_blank(resource_group):
            resource_group = dss_host_resource_group
            logging.info(
                "Using same resource group as DSS: {}".format(resource_group))

        # Location
        location = self.config.get('location', None)
        if _is_none_or_blank(location):
            location = metadata["compute"]["location"]
            logging.info("Using same location as DSS: {}".format(location))

        # Consistency checks
        if _is_none_or_blank(resource_group):
            raise Exception(
                "A resource group to put the cluster in is required")
        if _is_none_or_blank(location):
            raise Exception("A location to put the cluster in is required")

        # AKS Client
        clusters_client = None

        # Credit the cluster to DATAIKU
        if os.environ.get("DISABLE_AZURE_USAGE_ATTRIBUTION", "0") == "1":
            logging.info("Azure usage attribution is disabled")
            clusters_client = ContainerServiceClient(credentials,
                                                     subscription_id)
        else:
            policy = UserAgentPolicy()
            policy.add_user_agent('pid-fd3813c7-273c-5eec-9221-77323f62a148')
            clusters_client = ContainerServiceClient(credentials,
                                                     subscription_id,
                                                     user_agent_policy=policy)

        # check that the cluster doesn't exist yet, otherwise azure will try to update it
        # and will almost always fail
        try:
            existing = clusters_client.managed_clusters.get(
                resource_group, self.cluster_name)
            if existing is not None:
                raise Exception(
                    "A cluster with name %s in resource group %s already exists"
                    % (self.cluster_name, resource_group))
        except CloudError as e:
            logging.info("Cluster doesn't seem to exist yet")
        except ResourceNotFoundError as e:
            logging.info("Cluster doesn't seem to exist yet")

        cluster_builder = ClusterBuilder(clusters_client)
        cluster_builder.with_name(self.cluster_name)
        cluster_builder.with_dns_prefix("{}-dns".format(self.cluster_name))
        cluster_builder.with_resource_group(resource_group)
        cluster_builder.with_location(location)
        cluster_builder.add_tags(self.config.get("tags", None))
        cluster_builder.with_linux_profile()  # default is None
        cluster_builder.with_network_profile(
            service_cidr=self.config.get("serviceCIDR", None),
            dns_service_ip=self.config.get("dnsServiceIP", None),
            load_balancer_sku=self.config.get("loadBalancerSku", None),
            outbound_type=self.config.get("outboundType", None),
            network_plugin=self.config.get("networkPlugin"),
            docker_bridge_cidr=self.config.get("dockerBridgeCidr"))

        if self.config.get("useCustomNodeResourceGroup", False):
            cluster_builder.with_node_resource_group(
                self.config.get("nodeResourceGroup"))

        # Cluster identity
        connection_info = self.config.get("connectionInfo", None)
        cluster_idendity_legacy_use_distinct_sp = self.config.get(
            "useDistinctSPForCluster", False)
        cluster_idendity_legacy_sp = self.config.get("clusterServicePrincipal",
                                                     None)
        cluster_identity_type = None
        cluster_identity = None
        if not _is_none_or_blank(
                connection_info) or cluster_idendity_legacy_use_distinct_sp:
            logging.warn(
                "Using legacy options to configure cluster identity. Clear them to use the new ones."
            )
            if not cluster_idendity_legacy_use_distinct_sp and not _is_none_or_blank(
                    connection_info):
                cluster_sp = connection_info
            elif cluster_idendity_legacy_use_distinct_sp and not _is_none_or_blank(
                    cluster_idendity_legacy_sp):
                cluster_sp = self.config.get("clusterServicePrincipal")
            else:
                raise Exception(
                    "Legacy options are not complete enough to determine cluster identity settings"
                )
            cluster_builder.with_cluster_sp_legacy(
                cluster_service_principal_connection_info=cluster_sp)
        else:
            cluster_identity = self.config.get(
                "clusterIdentity", {"identityType": "managed-identity"})
            cluster_identity_type = cluster_identity.get(
                "identityType", "managed-identity")
            if cluster_identity_type == "managed-identity":
                if cluster_identity.get("inheritDSSIdentity", True):
                    logging.info(
                        "Need to inspect Managed Identity infos from Azure")
                    if metadata is None:
                        metadata = get_instance_metadata()
                    vm_resource_group = metadata["compute"][
                        "resourceGroupName"]
                    vm_name = metadata["compute"]["name"]
                    compute_client = ComputeManagementClient(
                        credentials, subscription_id)
                    vm = compute_client.virtual_machines.get(
                        vm_resource_group, vm_name)
                    # No choice here but to use the first one
                    if managed_identity_id is None:
                        managed_identity_id = next(
                            iter(vm.identity.user_assigned_identities.keys()))
                    for managed_identity_resource_id, managed_identity_properties in vm.identity.user_assigned_identities.items(
                    ):
                        if managed_identity_id == managed_identity_resource_id or managed_identity_id == managed_identity_properties.client_id:
                            break
                    logging.info("Found managed identity id {}".format(
                        managed_identity_resource_id))
                    cluster_builder.with_managed_identity(
                        managed_identity_resource_id)
                    cluster_builder.with_kubelet_identity(
                        managed_identity_resource_id,
                        managed_identity_properties.client_id,
                        managed_identity_properties.principal_id)
                else:
                    control_plane_mi = None if cluster_identity.get(
                        "useAKSManagedIdentity", True
                    ) else cluster_identity["controlPlaneUserAssignedIdentity"]
                    cluster_builder.with_managed_identity(control_plane_mi)
                    if control_plane_mi is None:
                        logging.info(
                            "Configure cluster with system managed identity.")
                    else:
                        logging.info(
                            "Configure cluster with user assigned identity: {}"
                            .format(control_plane_mi))
                    if not cluster_identity.get("useAKSManagedKubeletIdentity",
                                                True):
                        kubelet_mi = cluster_identity[
                            "kubeletUserAssignedIdentity"]
                        _, _, mi_subscription_id, _, mi_resource_group, _, _, _, mi_name = kubelet_mi.split(
                            "/")
                        msiclient = ManagedServiceIdentityClient(
                            AzureIdentityCredentialAdapter(credentials),
                            mi_subscription_id)
                        mi = msiclient.user_assigned_identities.get(
                            mi_resource_group, mi_name)
                        cluster_builder.with_kubelet_identity(
                            kubelet_mi, mi.client_id, mi.principal_id)
                        logging.info(
                            "Configure kubelet identity with user assigned identity resourceId=\"{}\", clientId=\"{}\", objectId=\"{}\""
                            .format(kubelet_mi, mi.client_id, mi.principal_id))
            elif cluster_identity_type == "service-principal":
                cluster_builder.with_cluster_sp(cluster_identity["clientId"],
                                                cluster_identity["password"])
                logging.info("Configure cluster with service principal")
            else:
                raise Exception(
                    "Cluster identity type \"{}\" is unknown".format(
                        cluster_identity_type))

        # Fail fast for non existing ACRs to avoid drama in case of failure AFTER cluster is created
        acr_role_id = None
        authorization_client = None
        if cluster_identity_type is not None and cluster_identity is not None:
            if cluster_identity_type == "managed-identity" and cluster_identity.get(
                    "useAKSManagedKubeletIdentity",
                    True) and not cluster_identity.get("inheritDSSIdentity",
                                                       True):
                acr_name = cluster_identity.get("attachToACRName", None)
                if not _is_none_or_blank(acr_name):
                    # build acr scope
                    acr_identifier_splitted = acr_name.split('/')
                    acr_subscription_id = subscription_id
                    acr_resource_group = resource_group
                    if 9 == len(acr_identifier_splitted):
                        _, _, acr_subscription_id, _, acr_resource_group, _, _, _, acr_name = acr_identifier_splitted
                    elif 2 == len(acr_identifier_splitted):
                        acr_resource_group, acr_name = acr_identifier_splitted

                    authorization_client = AuthorizationManagementClient(
                        credentials, acr_subscription_id)
                    acr_scope = "/subscriptions/{acr_subscription_id}/resourceGroups/{acr_resource_group}/providers/Microsoft.ContainerRegistry/registries/{acr_name}".format(
                        **locals())
                    try:
                        acr_roles = list(
                            authorization_client.role_definitions.list(
                                acr_scope, "roleName eq 'AcrPull'"))
                    except ResourceNotFoundError as e:
                        raise Exception(
                            "ACR {} not found. Check it exists and you are Owner of it."
                            .format(acr_scope))
                    if 0 == len(acr_roles):
                        raise Exception(
                            "Could not find the AcrPull role on the ACR {}. Check you are Owner of it."
                            .format(acr_scope))
                    else:
                        acr_role_id = acr_roles[0].id
                        logging.info("ACR pull role id: %s", acr_role_id)

                    # Try to run a fake role assignment. Depending on the failure type we know if we are Owner or not
                    try:
                        fake_role_assignment = authorization_client.role_assignments.create(
                            scope=acr_scope,
                            role_assignment_name=str(uuid.uuid4()),
                            parameters={
                                "properties": {
                                    "role_definition_id":
                                    acr_role_id,
                                    "principal_id":
                                    "00000000-0000-0000-0000-000000000000",
                                },
                            },
                        )
                    except HttpResponseError as e:
                        if e.reason == "Forbidden" and "AuthorizationFailed" in str(
                                e.error):
                            raise Exception(
                                "Cannot create role assignments on ACR {}. Check that your are Owner of it or provide an existing Kubelet identity."
                                .format(acr_scope))
                        elif e.reason == "Bad Request" and "PrincipalNotFound" in str(
                                e.error):
                            logging.info(
                                "Fake role assignment on ACR looks ok. Identity should be allowed to assign roles in further steps."
                            )
                        else:
                            raise (e)
                    except Exception as e:
                        raise (e)

        # Sanity check for node pools
        node_pool_vnets = set()
        for idx, node_pool_conf in enumerate(self.config.get("nodePools", [])):
            node_pool_builder = cluster_builder.get_node_pool_builder()
            nodepool_vnet = node_pool_conf.get("vnet", None)
            nodepool_subnet = node_pool_conf.get("subnet", None)
            vnet, _ = node_pool_builder.resolve_network(
                inherit_from_host=node_pool_conf.get(
                    "useSameNetworkAsDSSHost"),
                cluster_vnet=nodepool_vnet,
                cluster_subnet=nodepool_subnet,
                connection_info=connection_info,
                credentials=credentials,
                resource_group=resource_group,
                dss_host_resource_group=dss_host_resource_group)
            node_pool_vnets.add(vnet)

        if 1 < len(node_pool_vnets):
            raise Exception(
                "Node pools must all share the same vnet. Current node pools configuration yields vnets {}."
                .format(",".join(node_pool_vnets)))
        elif 0 == len(node_pool_vnets):
            raise Exception(
                "You cannot deploy a cluster without any node pool.")

        # Check role assignments for vnet like on ACR for fail fast if not doable
        vnet_id = node_pool_vnets.pop()
        if not vnet_id.startswith("/"):
            vnet_name = vnet_id
            vnet_id = "/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Network/virtualNetworks/{vnet_name}".format(
                **locals())
        vnet_role_id = None
        if cluster_identity_type is not None and cluster_identity is not None:
            if cluster_identity_type == "managed-identity" and cluster_identity.get(
                    "useAKSManagedIdentity",
                    True) and not cluster_identity.get("inheritDSSIdentity",
                                                       True):
                authorization_client = AuthorizationManagementClient(
                    credentials, subscription_id)
                try:
                    vnet_roles = list(
                        authorization_client.role_definitions.list(
                            vnet_id, "roleName eq 'Contributor'"))
                except ResourceNotFoundError as e:
                    raise Exception(
                        "Vnet {} not found. Check it exists and you are Owner of it."
                        .format(vnet_id))
                if 0 == len(acr_roles):
                    raise Exception(
                        "Could not find the Contributor role on the vnet {}. Check you are Owner of it."
                        .format(vnet_id))
                else:
                    vnet_role_id = vnet_roles[0].id
                    logging.info("Vnet contributor role id: %s", acr_role_id)
                    # Try to run a fake role assignment. Depending on the failure type we know if we are Owner or not
                    try:
                        fake_role_assignment = authorization_client.role_assignments.create(
                            scope=vnet_id,
                            role_assignment_name=str(uuid.uuid4()),
                            parameters={
                                "properties": {
                                    "role_definition_id":
                                    vnet_role_id,
                                    "principal_id":
                                    "00000000-0000-0000-0000-000000000000",
                                },
                            },
                        )
                    except HttpResponseError as e:
                        if e.reason == "Forbidden" and "AuthorizationFailed" in str(
                                e.error):
                            raise Exception(
                                "Cannot create role assignments on Vnet {}. Check that your are Owner of it or provide an existing Controle Plane identity."
                                .format(vnet_id))
                        elif e.reason == "Bad Request" and "PrincipalNotFound" in str(
                                e.error):
                            logging.info(
                                "Fake role assignment on Vnet looks ok. Identity should be allowed to assign roles in further steps."
                            )
                        else:
                            raise (e)
                    except Exception as e:
                        raise (e)

        # Access level
        if self.config.get("privateAccess"):
            cluster_builder.with_private_access(
                self.config.get("privateAccess"))

        cluster_builder.with_cluster_version(
            self.config.get("clusterVersion", None))

        # Node pools
        for idx, node_pool_conf in enumerate(self.config.get("nodePools", [])):
            node_pool_builder = cluster_builder.get_node_pool_builder()
            node_pool_builder.with_idx(idx)
            node_pool_builder.with_vm_size(node_pool_conf.get("vmSize", None))
            vnet = node_pool_conf.get("vnet", None)
            subnet = node_pool_conf.get("subnet", None)
            node_pool_builder.with_network(
                inherit_from_host=node_pool_conf.get(
                    "useSameNetworkAsDSSHost"),
                cluster_vnet=vnet,
                cluster_subnet=subnet,
                connection_info=connection_info,
                credentials=credentials,
                resource_group=resource_group,
                dss_host_resource_group=dss_host_resource_group)

            node_pool_builder.with_availability_zones(
                use_availability_zones=node_pool_conf.get(
                    "useAvailabilityZones", True))

            node_pool_builder.with_node_count(
                enable_autoscaling=node_pool_conf.get("autoScaling", False),
                num_nodes=node_pool_conf.get("numNodes", None),
                min_num_nodes=node_pool_conf.get("minNumNodes", None),
                max_num_nodes=node_pool_conf.get("maxNumNodes", None))

            node_pool_builder.with_mode(
                mode=node_pool_conf.get("mode", "Automatic"),
                system_pods_only=node_pool_conf.get("systemPodsOnly", True))

            node_pool_builder.with_disk_size_gb(
                disk_size_gb=node_pool_conf.get("osDiskSizeGb", 0))
            node_pool_builder.with_node_labels(
                node_pool_conf.get("labels", None))
            node_pool_builder.with_node_taints(
                node_pool_conf.get("taints", None))
            node_pool_builder.add_tags(self.config.get("tags", None))
            node_pool_builder.add_tags(node_pool_conf.get("tags", None))
            node_pool_builder.build()
            cluster_builder.with_node_pool(
                node_pool=node_pool_builder.agent_pool_profile)

        # Run creation
        logging.info("Start creation of cluster")

        def do_creation():
            cluster_create_op = cluster_builder.build()
            return cluster_create_op.result()

        create_result = run_and_process_cloud_error(do_creation)
        logging.info("Cluster creation finished")

        # Attach to ACR
        acr_attachment = {}
        if cluster_identity_type is not None and cluster_identity is not None:
            if cluster_identity_type == "managed-identity" and cluster_identity.get(
                    "useAKSManagedKubeletIdentity",
                    True) and not cluster_identity.get("inheritDSSIdentity",
                                                       True):
                kubelet_mi_object_id = create_result.identity_profile.get(
                    "kubeletidentity").object_id
                logging.info("Kubelet Managed Identity object id: %s",
                             kubelet_mi_object_id)
                if not _is_none_or_blank(acr_role_id):
                    logging.info("Assign ACR pull role id %s to %s",
                                 acr_role_id, kubelet_mi_object_id)
                    role_assignment = authorization_client.role_assignments.create(
                        scope=acr_scope,
                        role_assignment_name=str(uuid.uuid4()),
                        parameters={
                            "properties": {
                                "role_definition_id": acr_role_id,
                                "principal_id": kubelet_mi_object_id,
                            },
                        },
                    )
                    acr_attachment.update({
                        "name":
                        acr_name,
                        "resource_group":
                        acr_resource_group,
                        "subscription_id":
                        acr_subscription_id,
                        "resource_id":
                        acr_scope,
                        "role_assignment":
                        role_assignment.as_dict(),
                    })

        # Attach to VNET to allow LoadBalancers creation
        vnet_attachment = {}
        if cluster_identity_type is not None and cluster_identity is not None:
            if cluster_identity_type == "managed-identity" and cluster_identity.get(
                    "useAKSManagedIdentity",
                    True) and not cluster_identity.get("inheritDSSIdentity",
                                                       True):
                # And here we are blocked because we cant get the principal id of a System Assigned Managed Id easily
                control_plane_object_id = create_result.identity.principal_id
                logging.info("Controle Plane Managed Identity object id: %s",
                             control_plane_object_id)
                if not _is_none_or_blank(vnet_role_id):
                    logging.info("Assign Vnet contributolr role id %s to %s",
                                 vnet_role_id, control_plane_object_id)
                    vnet_role_assignment = authorization_client.role_assignments.create(
                        scope=vnet_id,
                        role_assignment_name=str(uuid.uuid4()),
                        parameters={
                            "properties": {
                                "role_definition_id": vnet_role_id,
                                "principal_id": control_plane_object_id,
                            },
                        },
                    )
                    vnet_attachment.update({
                        "subscription_id":
                        subscription_id,
                        "resource_id":
                        vnet_id,
                        "role_assignment":
                        vnet_role_assignment.as_dict(),
                    })

        logging.info("Fetching kubeconfig for cluster {} in {}...".format(
            self.cluster_name, resource_group))

        def do_fetch():
            return clusters_client.managed_clusters.list_cluster_admin_credentials(
                resource_group, self.cluster_name)

        get_credentials_result = run_and_process_cloud_error(do_fetch)
        kube_config_content = get_credentials_result.kubeconfigs[
            0].value.decode("utf8")
        logging.info("Writing kubeconfig file...")
        kube_config_path = os.path.join(os.getcwd(), "kube_config")
        with open(kube_config_path, 'w') as f:
            f.write(kube_config_content)

        overrides = make_overrides(
            self.config,
            yaml.safe_load(kube_config_content),
            kube_config_path,
            acr_name=None
            if _is_none_or_blank(acr_attachment) else acr_attachment["name"],
        )

        return [
            overrides, {
                "kube_config_path": kube_config_path,
                "cluster": create_result.as_dict(),
                "acr_attachment": acr_attachment,
                "vnet_attachment": vnet_attachment
            }
        ]
Exemplo n.º 22
0
    def start(self):
        """
        Build the create cluster request.
        """

        connection_info = self.config.get("connectionInfo", {})
        connection_info_secret = self.plugin_config.get("connectionInfo", {})
        credentials = get_credentials_from_connection_info(
            connection_info, connection_info_secret)
        subscription_id = connection_info.get('subscriptionId', None)
        resource_group = self.config.get('resourceGroup', None)

        clusters_client = ContainerServiceClient(credentials, subscription_id)

        # Credit the cluster to DATAIKU
        if os.environ.get("DISABLE_AZURE_USAGE_ATTRIBUTION", "0") == "1":
            logging.info("Azure usage attribution is disabled")
        else:
            clusters_client.config.add_user_agent(
                'pid-fd3813c7-273c-5eec-9221-77323f62a148')

        resource_group_name = self.config.get('resourceGroup', None)
        # TODO: Auto detection
        #if _is_none_or_blank(resource_group_name):
        #    resource_group_name = vm_infos.get('resource_group_name', None)
        if _is_none_or_blank(resource_group_name):
            raise Exception(
                "A resource group to put the cluster in is required")

        location = self.config.get('location', None)
        # TODO: Auto detection
        #if _is_none_or_blank(location):
        #    location = vm_infos.get('location', None)
        if _is_none_or_blank(location):
            raise Exception("A location to put the cluster in is required")

        # check that the cluster doesn't exist yet, otherwise azure will try to update it
        # and will almost always fail
        try:
            existing = clusters_client.managed_clusters.get(
                resource_group_name, self.cluster_name)
            if existing is not None:
                raise Exception(
                    "A cluster with name %s in resource group %s already exists"
                    % (self.cluster_name, resource_group_name))
        except CloudError as e:
            logging.info("Cluster doesn't seem to exist yet")

        cluster_builder = ClusterBuilder(clusters_client)
        cluster_builder.with_name(self.cluster_name)
        cluster_builder.with_dns_prefix("{}-dns".format(self.cluster_name))
        cluster_builder.with_resource_group(resource_group)
        cluster_builder.with_location(self.config.get("location", None))
        cluster_builder.with_linux_profile()  # default is None
        cluster_builder.with_network_profile(
            service_cidr=self.config.get("serviceCIDR", None),
            dns_service_ip=self.config.get("dnsServiceIP", None),
            load_balancer_sku=self.config.get("loadBalancerSku", None))

        if self.config.get("useDistinctSPForCluster", False):
            cluster_sp = self.config.get("clusterServicePrincipal")
        else:
            cluster_sp = connection_info
        cluster_builder.with_cluster_sp(
            cluster_service_principal_connection_info=cluster_sp)

        cluster_builder.with_cluster_version(
            self.config.get("clusterVersion", None))

        for idx, node_pool_conf in enumerate(self.config.get("nodePools", [])):
            node_pool_builder = cluster_builder.get_node_pool_builder()
            node_pool_builder.with_idx(idx)
            node_pool_builder.with_vm_size(node_pool_conf.get("vmSize", None))
            vnet = node_pool_conf.get("vnet", None)
            subnet = node_pool_conf.get("subnet", None)
            node_pool_builder.with_network(
                inherit_from_host=node_pool_conf.get(
                    "useSameNetworkAsDSSHost"),
                cluster_vnet=vnet,
                cluster_subnet=subnet,
                connection_info=connection_info,
                credentials=credentials,
                resource_group=resource_group)

            node_pool_builder.with_node_count(
                enable_autoscaling=node_pool_conf.get("autoScaling", False),
                num_nodes=node_pool_conf.get("numNodes", None),
                min_num_nodes=node_pool_conf.get("minNumNodes", None),
                max_num_nodes=node_pool_conf.get("maxNumNodes", None))

            node_pool_builder.with_disk_size_gb(
                disk_size_gb=node_pool_conf.get("osDiskSizeGb", 0))
            node_pool_builder.build()
            cluster_builder.with_node_pool(
                node_pool=node_pool_builder.agent_pool_profile)

        def do_creation():
            cluster_create_op = cluster_builder.build()
            return cluster_create_op.result()

        create_result = run_and_process_cloud_error(do_creation)

        logging.info("Fetching kubeconfig for cluster {} in {}...".format(
            self.cluster_name, resource_group))

        def do_fetch():
            return clusters_client.managed_clusters.list_cluster_admin_credentials(
                resource_group, self.cluster_name)

        get_credentials_result = run_and_process_cloud_error(do_fetch)
        kube_config_content = get_credentials_result.kubeconfigs[
            0].value.decode("utf8")
        logging.info("Writing kubeconfig file...")
        kube_config_path = os.path.join(os.getcwd(), "kube_config")
        with open(kube_config_path, 'w') as f:
            f.write(kube_config_content)

        overrides = make_overrides(self.config,
                                   yaml.safe_load(kube_config_content),
                                   kube_config_path)

        return [
            overrides, {
                "kube_config_path": kube_config_path,
                "cluster": create_result.as_dict()
            }
        ]
Exemplo n.º 23
0
    def build(self):
        cluster_name = self.name
        cluster_version = self.version
        cluster_node_count = self.node_count
        cluster_network = self.network
        cluster_subnetwork = self.subnetwork
        cluster_labels = self.labels
        cluster_pod_ip_range = self.pod_ip_range
        cluster_svc_ip_range = self.svc_ip_range

        if _is_none_or_blank(cluster_name):
            cluster_name = self._auto_name()
        if cluster_node_count is None:
            cluster_node_count = 3

        create_cluster_request_body = {
            "cluster": {
                "name": cluster_name,
                "initialClusterVersion": cluster_version,
                "description": "Created from plugin",
                "network": cluster_network,
                "subnetwork": cluster_subnetwork,
                "resourceLabels": cluster_labels,
                "nodePools": []
            },
            "parent": self.clusters.get_location()
        }
        if self.is_vpc_native:
            ip_allocation_policy = {
                "createSubnetwork": False,
                "useIpAliases": True,
                "servicesIpv4CidrBlock": cluster_svc_ip_range,
                "clusterIpv4CidrBlock": cluster_pod_ip_range,
            }
            create_cluster_request_body["cluster"][
                "ipAllocationPolicy"] = ip_allocation_policy

        if self.legacy_auth:
            create_cluster_request_body["cluster"]["legacyAbac"] = {
                "enabled": True
            }

        need_issue_certificate = False

        if cluster_version == "latest" or cluster_version == "-":
            need_issue_certificate = True
        else:
            version_chunks = cluster_version.split('.')
            major_version = int(version_chunks[0])
            minor_version = int(version_chunks[1])
            need_issue_certificate = major_version > 1 or (
                major_version == 1 and minor_version >= 12)

        if need_issue_certificate:
            create_cluster_request_body["cluster"]["masterAuth"] = {
                "clientCertificateConfig": {
                    "issueClientCertificate": True
                }
            }

        create_cluster_request_body["cluster"]["addonsConfig"] = {}
        if self.http_load_balancing:
            create_cluster_request_body["cluster"]["addonsConfig"][
                "httpLoadBalancing"] = {
                    "disabled": False
                }
        else:
            create_cluster_request_body["cluster"]["addonsConfig"][
                "httpLoadBalancing"] = {
                    "disabled": True
                }

        for node_pool in self.node_pools:
            create_cluster_request_body['cluster']['nodePools'].append(
                node_pool)

        if not _is_none_or_blank(self.settings_valve):
            valve = json.loads(self.settings_valve)
            create_cluster_request_body["cluster"] = _merge_objects(
                create_cluster_request_body["cluster"], valve)

        logging.info("Requesting cluster %s" %
                     json.dumps(create_cluster_request_body, indent=2))

        location_params = self.clusters.get_location_params()
        request = self.clusters.get_clusters_api().create(
            body=create_cluster_request_body, **location_params)

        try:
            response = request.execute()
            return Operation(response, self.clusters.get_operations_api(),
                             self.clusters.get_location_params())
        except HttpError as e:
            raise Exception("Failed to create cluster : %s" % str(e))