示例#1
0
    def create_cluster(
        self,
        cluster: Union[Dict, Cluster],
        project_id: Optional[str] = None,
        retry: Retry = DEFAULT,
        timeout: float = DEFAULT
    ) -> str:
        """
        Creates a cluster, consisting of the specified number and type of Google Compute
        Engine instances.

        :param cluster: A Cluster protobuf or dict. If dict is provided, it must
            be of the same form as the protobuf message
            :class:`google.cloud.container_v1.types.Cluster`
        :type cluster: dict or google.cloud.container_v1.types.Cluster
        :param project_id: Google Cloud Platform project ID
        :type project_id: str
        :param retry: A retry object (``google.api_core.retry.Retry``) used to
            retry requests.
            If None is specified, requests will not be retried.
        :type retry: google.api_core.retry.Retry
        :param timeout: The amount of time, in seconds, to wait for the request to
            complete. Note that if retry is specified, the timeout applies to each
            individual attempt.
        :type timeout: float
        :return: The full url to the new, or existing, cluster
        :raises:
            ParseError: On JSON parsing problems when trying to convert dict
            AirflowException: cluster is not dict type nor Cluster proto type
        """

        if isinstance(cluster, dict):
            cluster_proto = Cluster()
            cluster = ParseDict(cluster, cluster_proto)
        elif not isinstance(cluster, Cluster):
            raise AirflowException(
                "cluster is not instance of Cluster proto or python dict")

        self._append_label(cluster, 'airflow-version', 'v' + version.version)

        self.log.info(
            "Creating (project_id=%s, zone=%s, cluster_name=%s)",
            project_id, self.location, cluster.name
        )
        try:
            resource = self.get_conn().create_cluster(project_id=project_id,
                                                      zone=self.location,
                                                      cluster=cluster,
                                                      retry=retry,
                                                      timeout=timeout)
            resource = self.wait_for_operation(resource)

            return resource.target_link
        except AlreadyExists as error:
            self.log.info('Assuming Success: %s', error.message)
            return self.get_cluster(name=cluster.name).self_link
    def test_create_cluster_proto(self, wait_mock, convert_mock, mock_project_id):
        mock_cluster_proto = Cluster()
        mock_cluster_proto.name = CLUSTER_NAME

        retry_mock, timeout_mock = mock.Mock(), mock.Mock()

        client_create = self.gke_hook._client.create_cluster = mock.Mock()

        self.gke_hook.create_cluster(cluster=mock_cluster_proto,
                                     project_id=TEST_GCP_PROJECT_ID,
                                     retry=retry_mock,
                                     timeout=timeout_mock)

        client_create.assert_called_once_with(project_id=TEST_GCP_PROJECT_ID,
                                              zone=GKE_ZONE,
                                              cluster=mock_cluster_proto,
                                              retry=retry_mock, timeout=timeout_mock)
        wait_mock.assert_called_once_with(client_create.return_value)
        convert_mock.assert_not_called()
示例#3
0
    def create_cluster(
        self,
        cluster: Union[Dict, Cluster, None],
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Union[Retry, _MethodDefault] = DEFAULT,
        timeout: Optional[float] = None,
    ) -> str:
        """
        Creates a cluster, consisting of the specified number and type of Google Compute
        Engine instances.

        :param cluster: A Cluster protobuf or dict. If dict is provided, it must
            be of the same form as the protobuf message
            :class:`google.cloud.container_v1.types.Cluster`
        :param project_id: Google Cloud project ID
        :param retry: A retry object (``google.api_core.retry.Retry``) used to
            retry requests.
            If None is specified, requests will not be retried.
        :param timeout: The amount of time, in seconds, to wait for the request to
            complete. Note that if retry is specified, the timeout applies to each
            individual attempt.
        :return: The full url to the new, or existing, cluster
        :raises:
            ParseError: On JSON parsing problems when trying to convert dict
            AirflowException: cluster is not dict type nor Cluster proto type
        """
        if isinstance(cluster, dict):
            cluster = Cluster.from_json(json.dumps(cluster))
        elif not isinstance(cluster, Cluster):
            raise AirflowException(
                "cluster is not instance of Cluster proto or python dict")

        self._append_label(cluster, 'airflow-version',
                           'v' + version.version)  # type: ignore

        self.log.info(
            "Creating (project_id=%s, location=%s, cluster_name=%s)",
            project_id,
            self.location,
            cluster.name,  # type: ignore
        )
        try:
            resource = self.get_cluster_manager_client().create_cluster(
                parent=f'projects/{project_id}/locations/{self.location}',
                cluster=cluster,  # type: ignore
                retry=retry,
                timeout=timeout,
            )
            resource = self.wait_for_operation(resource)

            return resource.target_link
        except AlreadyExists as error:
            self.log.info('Assuming Success: %s', error.message)
            return self.get_cluster(name=cluster.name,
                                    project_id=project_id)  # type: ignore
示例#4
0
    def persist(context: "Context", task_instance, cluster: Union[Dict, Cluster, None]):
        if isinstance(cluster, dict):
            cluster = Cluster.from_json(json.dumps(cluster))

        task_instance.xcom_push(
            context=context,
            key=KubernetesEngineClusterLink.key,
            value={
                "location": task_instance.location,
                "cluster_name": cluster.name,  # type: ignore
                "project_id": task_instance.project_id,
            },
        )
示例#5
0
    def create_cluster(self, cluster, retry=DEFAULT, timeout=DEFAULT):
        """
        Creates a cluster, consisting of the specified number and type of Google Compute
        Engine instances.

        :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of
            the same form as the protobuf message google.cloud.container_v1.types.Cluster
        :type cluster: dict or google.cloud.container_v1.types.Cluster
        :param retry: A retry object (google.api_core.retry.Retry) used to retry requests.
            If None is specified, requests will not be retried.
        :type retry: google.api_core.retry.Retry
        :param timeout: The amount of time, in seconds, to wait for the request to
            complete. Note that if retry is specified, the timeout applies to each
            individual attempt.
        :type timeout: float
        :return: The full url to the new, or existing, cluster
        :raises
            ParseError: On JSON parsing problems when trying to convert dict
            AirflowException: cluster is not dict type nor Cluster proto type
        """

        if isinstance(cluster, dict):
            cluster_proto = Cluster()
            cluster = self._dict_to_proto(py_dict=cluster, proto=cluster_proto)
        elif not isinstance(cluster, Cluster):
            raise AirflowException(
                "cluster is not instance of Cluster proto or python dict")

        self._append_label(cluster, 'airflow-version', 'v' + version.version)

        self.log.info(
            "Creating (project_id={}, zone={}, cluster_name={})".format(
                self.project_id, self.location, cluster.name))
        try:
            op = self.client.create_cluster(project_id=self.project_id,
                                            zone=self.location,
                                            cluster=cluster,
                                            retry=retry,
                                            timeout=timeout)
            op = self.wait_for_operation(op)

            return op.target_link
        except AlreadyExists as error:
            self.log.info('Assuming Success: ' + error.message)
            return self.get_cluster(name=cluster.name).self_link
示例#6
0
    def provisioning(self):
        logger.info("Starting provisioning Kubernetes clusters")
        cluster_manager_client = self._get_gke_client()
        project_id = self.configs['project_id']
        list_zones = list()
        for cluster in self.configs['clusters']:
            list_zones.append(cluster['zone'])

        logger.info("Checking the Kubernetes clusters exist or not")
        clusters_ok, clusters_ko = self._get_existed_clusters(project_id, list_zones, cluster_manager_client)

        for cluster in self.configs['clusters']:
            key = '%s:%s' % (cluster['zone'], cluster['cluster_name'])
            if key in clusters_ok:
                logger.info('Cluster %s in zone %s already existed and is running' %
                            (cluster['cluster_name'], cluster['zone']))
                self.clusters.append(clusters_ok[key])
            elif key in clusters_ko:
                logger.info('Cluster "%s" in zone %s already existed but not running' %
                            (cluster['cluster_name'], cluster['zone']))
            else:
                logger.info('Deploying K8s cluster "%s" with %s nodes in zone %s' %
                            (cluster['cluster_name'], cluster['n_nodes'], cluster['zone']))
                cluster_specs = Cluster(mapping={
                    'name': cluster['cluster_name'],
                    'locations': [cluster['zone']],
                    'initial_node_count': cluster['n_nodes'],
                    'ip_allocation_policy': {'use_ip_aliases': True}
                })
                cluster_manager_client.create_cluster(cluster=cluster_specs,
                                                      parent='projects/%s/locations/%s' % (project_id, cluster['zone']))

                sleep(40 * cluster['n_nodes'])
                i = 0
                while i < 10:
                    c = cluster_manager_client.get_cluster(project_id=project_id,
                                                           zone=cluster['zone'],
                                                           cluster_id=cluster['cluster_name'])
                    if c.status == 2:
                        self.clusters.append(c)
                        break
                    i += 1
                    # nodes take a while to boot up
                    sleep(20)
        logger.info("Finish provisioning Kubernetes clusters on GKE\n")
示例#7
0
    def test_create_cluster_dict(self, wait_mock, convert_mock, mock_project_id):
        mock_cluster_dict = {'name': CLUSTER_NAME}
        retry_mock, timeout_mock = mock.Mock(), mock.Mock()

        client_create = self.gke_hook._client.create_cluster = mock.Mock()
        proto_mock = convert_mock.return_value = mock.Mock()

        self.gke_hook.create_cluster(
            cluster=mock_cluster_dict, project_id=TEST_GCP_PROJECT_ID, retry=retry_mock, timeout=timeout_mock
        )

        client_create.assert_called_once_with(
            project_id=TEST_GCP_PROJECT_ID,
            zone=GKE_ZONE,
            cluster=proto_mock,
            retry=retry_mock,
            timeout=timeout_mock,
        )
        wait_mock.assert_called_once_with(client_create.return_value)
        convert_mock.assert_called_once_with({'name': 'test-cluster'}, Cluster())
def node_pools(options: Dict[str, Any]) -> Iterator[List[str]]:
    credentials, project = google.auth.default()
    if options["project"] is None:
        options["project"] = project
    gke = ClusterManagerClient(credentials=credentials)
    # build node pool configurations
    pools = {}
    if options["generator"]:
        if options["load_balancer"] is None or options[
                "server_uri"] is not None:
            pools["generator"] = NodePool(initial_node_count=1)
            pools["generator"].config.machine_type = "n1-highcpu-2"
    if options["load_balancer"] is None:
        pools["server"] = NodePool(initial_node_count=4)
        pools["server"].config.machine_type = "n1-highcpu-2"
        if options["emulator"]:
            pools["emulator"] = NodePool(initial_node_count=1)
        else:
            # need pubsub permissions
            pools["server"].config.oauth_scopes.append(
                "https://www.googleapis.com/auth/pubsub")
    # add labels
    for name, pool in pools.items():
        pool.name = name
        pool.config.preemptible = options["preemptible"]
        pool.config.labels["name"] = name
        if options["location"][-2] == "-":
            # triple node count for single zone cluster
            pool.initial_node_count *= 3
    # create cluster
    if not pools:
        yield []  # nothing to create
    else:
        kwargs = {
            "cluster":
            Cluster(
                name=options["cluster"],
                logging_service=None,
                monitoring_service=None,
                node_pools=list(pools.values()),
            ),
            "parent":
            f"projects/{options['project']}/locations/{options['location']}",
        }
        name = f"{kwargs['parent']}/clusters/{options['cluster']}"
        try:
            operation = gke.create_cluster(**kwargs)
        except AlreadyExists:
            pass
        else:
            # wait for operation to complete
            request = GetOperationRequest(
                name=operation.self_link.split("projects").pop())
            while gke.get_operation(
                    request).status <= Operation.Status.RUNNING:
                time.sleep(15)
        # set kube credentials
        cluster = gke.get_cluster(name=name)
        config = kube.Configuration()
        config.host = f"https://{cluster.endpoint}:443"
        config.verify_ssl = False
        config.api_key = {"authorization": f"Bearer {credentials.token}"}
        kube.Configuration.set_default(config)
        # delete cluster after test completes
        try:
            yield list(pools)
        finally:
            gke.delete_cluster(name=name)
示例#9
0
def cluster(options: Dict[str, Any]) -> Iterator[Optional[str]]:
    if options["cluster"] is None:
        load_kube_config()
        if options["project"] is None:
            options["project"] = "test"
        yield None
    else:
        credentials, project = google.auth.default()
        if options["project"] is None:
            options["project"] = project
        gke = ClusterManagerClient(credentials=credentials)
        # create cluster
        parent = f"projects/{options['project']}/locations/{options['location']}"
        name = f"{parent}/clusters/{options['cluster']}"
        try:
            operation = gke.create_cluster(
                cluster=Cluster(
                    name=options["cluster"],
                    logging_service=None,
                    monitoring_service=None,
                    node_pools=[
                        NodePool(
                            initial_node_count=1,
                            name="test",
                            config={
                                "preemptible": options["preemptible"],
                                "machine_type": "n1-highcpu-2",
                            },
                        )
                    ],
                ),
                parent=parent,
            )
        except AlreadyExists:
            pass
        else:
            # wait for operation to complete
            request = GetOperationRequest(
                name=operation.self_link.split("projects").pop())
            while gke.get_operation(
                    request).status <= Operation.Status.RUNNING:
                time.sleep(15)
        # set kube credentials
        cluster = gke.get_cluster(name=name)
        config = kube.Configuration()
        config.host = f"https://{cluster.endpoint}:443"
        config.verify_ssl = False
        config.api_key = {"authorization": f"Bearer {credentials.token}"}
        kube.Configuration.set_default(config)
        # delete cluster after test completes
        try:
            yield options["cluster"]
        finally:
            try:
                # delete persistent volumes because gke cluster delete won't do it
                # https://cloud.google.com/kubernetes-engine/docs/how-to/deleting-a-cluster#overview
                api = kube.CoreV1Api()
                for pv in api.list_persistent_volume().items:
                    try:
                        pv.spec.persistent_volume_reclaim_policy = "Delete"
                        api.patch_persistent_volume(
                            name=pv.metadata.name,
                            body=pv,
                        )
                        api.delete_persistent_volume(
                            name=pv.metadata.name,
                            grace_period_seconds=0,
                            propagation_policy="Foreground",
                        )
                    except ApiException:
                        print_exc()
                # wait for pv deletes to complete
                for _ in range(60):
                    if not api.list_persistent_volume().items:
                        break
                    time.sleep(1)
                else:
                    print("FAILED TO CLEANUP PERSISTENT VOLUMES")
            finally:
                gke.delete_cluster(name=name)