Пример #1
0
def check_kube_connection(configuration):
    api_instance = kube_client.NetworkingV1Api(kube_client.ApiClient(configuration))
    try:
        api_instance.get_api_resources()
    except Exception as e:  # pylint: disable=broad-except
        logger.warning("Unable to verify connectivity to the Kubernetes cluster.")
        utils.kubernetes_exception_handler(e, consts.Kubernetes_Connectivity_FaultType, 'Unable to verify connectivity to the Kubernetes cluster',
                                           error_message="If you are using AAD Enabled cluster, verify that you are able to access the cluster. Learn more at https://aka.ms/arc/k8s/onboarding-aad-enabled-clusters")
Пример #2
0
def get_server_version(configuration):
    api_instance = kube_client.VersionApi(kube_client.ApiClient(configuration))
    try:
        api_response = api_instance.get_code()
        return api_response.git_version
    except Exception as e:  # pylint: disable=broad-except
        logger.warning("Unable to fetch kubernetes version.")
        utils.kubernetes_exception_handler(e, consts.Get_Kubernetes_Version_Fault_Type, 'Unable to fetch kubernetes version',
                                           raise_error=False)
Пример #3
0
def get_kubernetes_distro(configuration):
    api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration))
    try:
        api_response = api_instance.list_node()
        if api_response.items:
            labels = api_response.items[0].metadata.labels
            if labels.get("node.openshift.io/os_id") == "rhcos" or labels.get("node.openshift.io/os_id") == "rhel":
                return "openshift"
        return "default"
    except Exception as e:  # pylint: disable=broad-except
        logger.warning("Error occured while trying to fetch kubernetes distribution.")
        utils.kubernetes_exception_handler(e, consts.Get_Kubernetes_Distro_Fault_Type, 'Unable to fetch kubernetes distribution',
                                           raise_error=False)
Пример #4
0
def ensure_namespace_cleanup(configuration):
    api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration))
    timeout = time.time() + 180
    while True:
        if time.time() > timeout:
            telemetry.set_user_fault()
            logger.warning("Namespace 'azure-arc' still in terminating state. Please ensure that you delete the 'azure-arc' namespace before onboarding the cluster again.")
            return
        try:
            api_response = api_instance.list_namespace(field_selector='metadata.name=azure-arc')
            if not api_response.items:
                return
            time.sleep(5)
        except Exception as e:  # pylint: disable=broad-except
            logger.warning("Error while retrieving namespace information.")
            utils.kubernetes_exception_handler(e, consts.Get_Kubernetes_Namespace_Fault_Type, 'Unable to fetch kubernetes namespace',
                                               raise_error=False)
Пример #5
0
def delete_connectedk8s(cmd,
                        client,
                        resource_group_name,
                        cluster_name,
                        kube_config=None,
                        kube_context=None,
                        no_wait=False):
    logger.warning(
        "Ensure that you have the latest helm version installed before proceeding to avoid unexpected errors."
    )
    logger.warning("This operation might take a while ...\n")

    # Send cloud information to telemetry
    send_cloud_telemetry(cmd)

    # Setting kubeconfig
    kube_config = set_kube_config(kube_config)

    # Loading the kubeconfig file in kubernetes client configuration
    try:
        config.load_kube_config(config_file=kube_config, context=kube_context)
    except Exception as e:
        telemetry.set_user_fault()
        telemetry.set_exception(exception=e,
                                fault_type=consts.Load_Kubeconfig_Fault_Type,
                                summary='Problem loading the kubeconfig file')
        raise CLIError("Problem loading the kubeconfig file." + str(e))
    configuration = kube_client.Configuration()

    # Checking the connection to kubernetes cluster.
    # This check was added to avoid large timeouts when connecting to AAD Enabled
    # AKS clusters if the user had not logged in.
    check_kube_connection(configuration)

    # Checking helm installation
    check_helm_install(kube_config, kube_context)

    # Check helm version
    check_helm_version(kube_config, kube_context)

    # Check Release Existance
    release_namespace = get_release_namespace(kube_config, kube_context)
    if not release_namespace:
        delete_cc_resource(client, resource_group_name, cluster_name, no_wait)
        return

    # Loading config map
    api_instance = kube_client.CoreV1Api(kube_client.ApiClient(configuration))
    try:
        configmap = api_instance.read_namespaced_config_map(
            'azure-clusterconfig', 'azure-arc')
    except Exception as e:  # pylint: disable=broad-except
        utils.kubernetes_exception_handler(
            e,
            consts.Read_ConfigMap_Fault_Type,
            'Unable to read ConfigMap',
            error_message=
            "Unable to read ConfigMap 'azure-clusterconfig' in 'azure-arc' namespace: ",
            message_for_not_found=
            "The helm release 'azure-arc' is present but the azure-arc namespace/configmap is missing. Please run 'helm delete azure-arc --no-hooks' to cleanup the release before onboarding the cluster again."
        )

    if (configmap.data["AZURE_RESOURCE_GROUP"].lower()
            == resource_group_name.lower()
            and configmap.data["AZURE_RESOURCE_NAME"].lower()
            == cluster_name.lower()):
        delete_cc_resource(client, resource_group_name, cluster_name, no_wait)
    else:
        telemetry.set_user_fault()
        telemetry.set_exception(
            exception='Unable to delete connected cluster',
            fault_type=consts.Bad_DeleteRequest_Fault_Type,
            summary=
            'The resource cannot be deleted as kubernetes cluster is onboarded with some other resource id'
        )
        raise CLIError(
            "The current context in the kubeconfig file does not correspond " +
            "to the connected cluster resource specified. Agents installed on this cluster correspond "
            + "to the resource group name '{}' ".format(
                configmap.data["AZURE_RESOURCE_GROUP"]) +
            "and resource name '{}'.".format(
                configmap.data["AZURE_RESOURCE_NAME"]))

    # Deleting the azure-arc agents
    delete_arc_agents(release_namespace, kube_config, kube_context,
                      configuration)
Пример #6
0
def create_connectedk8s(cmd,
                        client,
                        resource_group_name,
                        cluster_name,
                        https_proxy="",
                        http_proxy="",
                        no_proxy="",
                        location=None,
                        kube_config=None,
                        kube_context=None,
                        no_wait=False,
                        tags=None):
    logger.warning(
        "Ensure that you have the latest helm version installed before proceeding."
    )
    logger.warning("This operation might take a while...\n")

    # Setting subscription id
    subscription_id = get_subscription_id(cmd.cli_ctx)

    # Send cloud information to telemetry
    send_cloud_telemetry(cmd)

    # Fetching Tenant Id
    graph_client = _graph_client_factory(cmd.cli_ctx)
    onboarding_tenant_id = graph_client.config.tenant_id

    # Setting kubeconfig
    kube_config = set_kube_config(kube_config)

    # Escaping comma, forward slash present in https proxy urls, needed for helm params.
    https_proxy = escape_proxy_settings(https_proxy)

    # Escaping comma, forward slash present in http proxy urls, needed for helm params.
    http_proxy = escape_proxy_settings(http_proxy)

    # Escaping comma, forward slash present in no proxy urls, needed for helm params.
    no_proxy = escape_proxy_settings(no_proxy)

    # Checking whether optional extra values file has been provided.
    values_file_provided = False
    values_file = os.getenv('HELMVALUESPATH')
    if (values_file is not None) and (os.path.isfile(values_file)):
        values_file_provided = True
        logger.warning(
            "Values files detected. Reading additional helm parameters from same."
        )
        # trimming required for windows os
        if (values_file.startswith("'") or values_file.startswith('"')):
            values_file = values_file[1:]
        if (values_file.endswith("'") or values_file.endswith('"')):
            values_file = values_file[:-1]

    # Validate the helm environment file for Dogfood.
    dp_endpoint_dogfood = None
    release_train_dogfood = None
    if cmd.cli_ctx.cloud.endpoints.resource_manager == consts.Dogfood_RMEndpoint:
        dp_endpoint_dogfood, release_train_dogfood = validate_env_file_dogfood(
            values_file, values_file_provided)

    # Loading the kubeconfig file in kubernetes client configuration
    try:
        config.load_kube_config(config_file=kube_config, context=kube_context)
    except Exception as e:
        telemetry.set_user_fault()
        telemetry.set_exception(exception=e,
                                fault_type=consts.Load_Kubeconfig_Fault_Type,
                                summary='Problem loading the kubeconfig file')
        raise CLIError("Problem loading the kubeconfig file." + str(e))
    configuration = kube_client.Configuration()

    # Checking the connection to kubernetes cluster.
    # This check was added to avoid large timeouts when connecting to AAD Enabled AKS clusters
    # if the user had not logged in.
    check_kube_connection(configuration)

    # Get kubernetes cluster info for telemetry
    kubernetes_version = get_server_version(configuration)
    kubernetes_distro = get_kubernetes_distro(configuration)

    kubernetes_properties = {
        'Context.Default.AzureCLI.KubernetesVersion': kubernetes_version,
        'Context.Default.AzureCLI.KubernetesDistro': kubernetes_distro
    }
    telemetry.add_extension_event('connectedk8s', kubernetes_properties)

    # Checking helm installation
    check_helm_install(kube_config, kube_context)

    # Check helm version
    helm_version = check_helm_version(kube_config, kube_context)
    telemetry.add_extension_event(
        'connectedk8s', {'Context.Default.AzureCLI.HelmVersion': helm_version})

    # Validate location
    utils.validate_location(cmd, location)
    resourceClient = _resource_client_factory(cmd.cli_ctx,
                                              subscription_id=subscription_id)

    # Check Release Existance
    release_namespace = get_release_namespace(kube_config, kube_context)
    if release_namespace:
        # Loading config map
        api_instance = kube_client.CoreV1Api(
            kube_client.ApiClient(configuration))
        try:
            configmap = api_instance.read_namespaced_config_map(
                'azure-clusterconfig', 'azure-arc')
        except Exception as e:  # pylint: disable=broad-except
            utils.kubernetes_exception_handler(
                e,
                consts.Read_ConfigMap_Fault_Type,
                'Unable to read ConfigMap',
                error_message=
                "Unable to read ConfigMap 'azure-clusterconfig' in 'azure-arc' namespace: ",
                message_for_not_found=
                "The helm release 'azure-arc' is present but the azure-arc namespace/configmap is missing. Please run 'helm delete azure-arc --no-hooks' to cleanup the release before onboarding the cluster again."
            )
        configmap_rg_name = configmap.data["AZURE_RESOURCE_GROUP"]
        configmap_cluster_name = configmap.data["AZURE_RESOURCE_NAME"]
        if connected_cluster_exists(client, configmap_rg_name,
                                    configmap_cluster_name):
            if (configmap_rg_name.lower() == resource_group_name.lower() and
                    configmap_cluster_name.lower() == cluster_name.lower()):
                # Re-put connected cluster
                try:
                    public_key = client.get(
                        configmap_rg_name,
                        configmap_cluster_name).agent_public_key_certificate
                except Exception as e:  # pylint: disable=broad-except
                    utils.arm_exception_handler(
                        e, consts.Get_ConnectedCluster_Fault_Type,
                        'Failed to check if connected cluster resource already exists.'
                    )
                cc = generate_request_payload(configuration, location,
                                              public_key, tags)
                create_cc_resource(client, resource_group_name, cluster_name,
                                   cc, no_wait)
            else:
                telemetry.set_user_fault()
                telemetry.set_exception(
                    exception='The kubernetes cluster is already onboarded',
                    fault_type=consts.Cluster_Already_Onboarded_Fault_Type,
                    summary='Kubernetes cluster already onboarded')
                raise CLIError(
                    "The kubernetes cluster you are trying to onboard " +
                    "is already onboarded to the resource group" +
                    " '{}' with resource name '{}'.".format(
                        configmap_rg_name, configmap_cluster_name))
        else:
            # Cleanup agents and continue with put
            delete_arc_agents(release_namespace, kube_config, kube_context,
                              configuration)
    else:
        if connected_cluster_exists(client, resource_group_name, cluster_name):
            telemetry.set_user_fault()
            telemetry.set_exception(
                exception='The connected cluster resource already exists',
                fault_type=consts.Resource_Already_Exists_Fault_Type,
                summary='Connected cluster resource already exists')
            raise CLIError(
                "The connected cluster resource {} already exists ".format(
                    cluster_name) +
                "in the resource group {} ".format(resource_group_name) +
                "and corresponds to a different Kubernetes cluster. To onboard this Kubernetes cluster"
                +
                "to Azure, specify different resource name or resource group name."
            )

    # Resource group Creation
    if resource_group_exists(cmd.cli_ctx, resource_group_name,
                             subscription_id) is False:
        resource_group_params = {'location': location}
        try:
            resourceClient.resource_groups.create_or_update(
                resource_group_name, resource_group_params)
        except Exception as e:  # pylint: disable=broad-except
            utils.arm_exception_handler(e,
                                        consts.Create_ResourceGroup_Fault_Type,
                                        'Failed to create the resource group')

    # Adding helm repo
    if os.getenv('HELMREPONAME') and os.getenv('HELMREPOURL'):
        utils.add_helm_repo(kube_config, kube_context)

    # Retrieving Helm chart OCI Artifact location
    registry_path = os.getenv('HELMREGISTRY') if os.getenv(
        'HELMREGISTRY') else utils.get_helm_registry(
            cmd, location, dp_endpoint_dogfood, release_train_dogfood)

    # Get azure-arc agent version for telemetry
    azure_arc_agent_version = registry_path.split(':')[1]
    telemetry.add_extension_event(
        'connectedk8s',
        {'Context.Default.AzureCLI.AgentVersion': azure_arc_agent_version})

    # Get helm chart path
    chart_path = utils.get_chart_path(registry_path, kube_config, kube_context)

    # Generate public-private key pair
    try:
        key_pair = RSA.generate(4096)
    except Exception as e:
        telemetry.set_exception(
            exception=e,
            fault_type=consts.KeyPair_Generate_Fault_Type,
            summary='Failed to generate public-private key pair')
        raise CLIError("Failed to generate public-private key pair. " + str(e))
    try:
        public_key = get_public_key(key_pair)
    except Exception as e:
        telemetry.set_exception(exception=e,
                                fault_type=consts.PublicKey_Export_Fault_Type,
                                summary='Failed to export public key')
        raise CLIError("Failed to export public key." + str(e))
    try:
        private_key_pem = get_private_key(key_pair)
    except Exception as e:
        telemetry.set_exception(exception=e,
                                fault_type=consts.PrivateKey_Export_Fault_Type,
                                summary='Failed to export private key')
        raise CLIError("Failed to export private key." + str(e))

    # Generate request payload
    cc = generate_request_payload(configuration, location, public_key, tags)

    # Create connected cluster resource
    put_cc_response = create_cc_resource(client, resource_group_name,
                                         cluster_name, cc, no_wait)

    # Install azure-arc agents
    helm_install_release(chart_path, subscription_id, kubernetes_distro,
                         resource_group_name, cluster_name, location,
                         onboarding_tenant_id, http_proxy, https_proxy,
                         no_proxy, private_key_pem, kube_config, kube_context,
                         no_wait, values_file_provided, values_file)

    return put_cc_response