def execute(self, context):
        self.log.info('Creating cluster: %s', self.cluster_name)
        hook = DataProcHook(gcp_conn_id=self.gcp_conn_id,
                            delegate_to=self.delegate_to)
        service = hook.get_conn()

        if self._get_cluster(service):
            self.log.info('Cluster %s already exists... Checking status...',
                          self.cluster_name)
            self._wait_for_done(service)
            return True

        cluster_data = self._build_cluster_data()
        try:
            service.projects().regions().clusters().create(
                projectId=self.project_id,
                region=self.region,
                body=cluster_data).execute()
        except HttpError as e:
            # probably two cluster start commands at the same time
            time.sleep(10)
            if self._get_cluster(service):
                self.log.info(
                    'Cluster {} already exists... Checking status...',
                    self.cluster_name)
                self._wait_for_done(service)
                return True
            else:
                raise e

        self._wait_for_done(service)
Пример #2
0
    def execute(self, context):
        hook = DataProcHook(
            gcp_conn_id=self.google_cloud_conn_id,
            delegate_to=self.delegate_to
        )
        service = hook.get_conn()

        if self._get_cluster(service):
            logging.info('Cluster {} already exists... Checking status...'.format(
                            self.cluster_name
                        ))
            self._wait_for_done(service)
            return True

        cluster_data = self._build_cluster_data()
        try:
            service.projects().regions().clusters().create(
                projectId=self.project_id,
                region=self.region,
                body=cluster_data
            ).execute()
        except HttpError as e:
            # probably two cluster start commands at the same time
            time.sleep(10)
            if self._get_cluster(service):
                logging.info('Cluster {} already exists... Checking status...'.format(
                             self.cluster_name
                             ))
                self._wait_for_done(service)
                return True
            else:
                raise e

        self._wait_for_done(service)
    def execute(self, context):
        self.log.info('Deleting cluster: %s', self.cluster_name)
        hook = DataProcHook(gcp_conn_id=self.gcp_conn_id,
                            delegate_to=self.delegate_to)
        service = hook.get_conn()

        response = service.projects().regions().clusters().delete(
            projectId=self.project_id,
            region=self.region,
            clusterName=self.cluster_name).execute()
        operation_name = response['name']
        self.log.info("Cluster delete operation name: %s", operation_name)
        self._wait_for_done(service, operation_name)
Пример #4
0
    def execute(self, context):
        hook = DataProcHook(gcp_conn_id=self.google_cloud_conn_id,
                            delegate_to=self.delegate_to)
        service = hook.get_conn()

        response = service.projects().regions().clusters().delete(
            projectId=self.project_id,
            region=self.region,
            clusterName=self.cluster_name).execute()
        operation_name = response['name']
        logging.info(
            "Cluster delete operation name: {}".format(operation_name))
        self._wait_for_done(service, operation_name)
Пример #5
0
    def execute(self, context):
        hook = DataProcHook(
            gcp_conn_id=self.google_cloud_conn_id,
            delegate_to=self.delegate_to
        )
        service = hook.get_conn()

        response = service.projects().regions().clusters().delete(
            projectId=self.project_id,
            region=self.region,
            clusterName=self.cluster_name
        ).execute()
        operation_name = response['name']
        logging.info("Cluster delete operation name: {}".format(operation_name))
        self._wait_for_done(service, operation_name)
Пример #6
0
    def execute(self, context):
        self.log.info('Deleting cluster: %s', self.cluster_name)
        hook = DataProcHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to
        )
        service = hook.get_conn()

        response = service.projects().regions().clusters().delete(
            projectId=self.project_id,
            region=self.region,
            clusterName=self.cluster_name
        ).execute()
        operation_name = response['name']
        self.log.info("Cluster delete operation name: %s", operation_name)
        self._wait_for_done(service, operation_name)
    def get_dataproc_vars():
        gcp_conn_id = 'google_cloud_default'
        delegate_to = None
        cluster_name = models.Variable.get('dataproc_cluster_name')
        project_id = models.Variable.get('gcp_project')
        region = models.Variable.get('gce_region')

        hook = DataProcHook(gcp_conn_id=gcp_conn_id, delegate_to=delegate_to)

        service = hook.get_conn()

        cluster = _get_cluster(service, cluster_name, region)

        if 'status' in cluster:
            logging.info(cluster['config']['configBucket'])
            logging.info(cluster['config']['workerConfig']['instanceNames'])
        else:
            logging.info('not ready')
    def execute(self, context):
        hook = DataProcHook(gcp_conn_id=self.google_cloud_conn_id,
                            delegate_to=self.delegate_to)
        service = hook.get_conn()

        if self._get_cluster(service):
            logging.info(
                'Cluster {} already exists... Checking status...'.format(
                    self.cluster_name))
            self._wait_for_done(service)
            return True

        zone_uri = \
            'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
                self.project_id, self.zone
            )
        master_type_uri = \
            "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format(
                self.project_id, self.zone, self.master_machine_type
            )
        worker_type_uri = \
            "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format(
                self.project_id, self.zone, self.worker_machine_type
            )
        cluster_data = {
            'projectId': self.project_id,
            'clusterName': self.cluster_name,
            'config': {
                'gceClusterConfig': {
                    'zoneUri': zone_uri
                },
                'masterConfig': {
                    'numInstances': 1,
                    'machineTypeUri': master_type_uri,
                    'diskConfig': {
                        'bootDiskSizeGb': self.master_disk_size
                    }
                },
                'workerConfig': {
                    'numInstances': self.num_workers,
                    'machineTypeUri': worker_type_uri,
                    'diskConfig': {
                        'bootDiskSizeGb': self.worker_disk_size
                    }
                },
                'secondaryWorkerConfig': {},
                'softwareConfig': {}
            }
        }
        if self.num_preemptible_workers > 0:
            cluster_data['config']['secondaryWorkerConfig'] = {
                'numInstances': self.num_preemptible_workers,
                'machineTypeUri': worker_type_uri,
                'diskConfig': {
                    'bootDiskSizeGb': self.worker_disk_size
                },
                'isPreemptible': True
            }
        if self.labels:
            cluster_data['labels'] = self.labels
        if self.storage_bucket:
            cluster_data['config']['configBucket'] = self.storage_bucket
        if self.metadata:
            cluster_data['config']['gceClusterConfig'][
                'metadata'] = self.metadata
        if self.properties:
            cluster_data['config']['softwareConfig'][
                'properties'] = self.properties
        if self.init_actions_uris:
            init_actions_dict = [{
                'executableFile': uri
            } for uri in self.init_actions_uris]
            cluster_data['config']['initializationActions'] = init_actions_dict

        try:
            service.projects().regions().clusters().create(
                projectId=self.project_id,
                region=self.region,
                body=cluster_data).execute()
        except HttpError as e:
            # probably two cluster start commands at the same time
            time.sleep(10)
            if self._get_cluster(service):
                logging.info(
                    'Cluster {} already exists... Checking status...'.format(
                        self.cluster_name))
                self._wait_for_done(service)
                return True
            else:
                raise e

        self._wait_for_done(service)
Пример #9
0
    def execute(self, context):
        hook = DataProcHook(
            gcp_conn_id=self.google_cloud_conn_id,
            delegate_to=self.delegate_to
        )
        service = hook.get_conn()

        if self._get_cluster(service):
            logging.info('Cluster {} already exists... Checking status...'.format(
                            self.cluster_name
                        ))
            self._wait_for_done(service)
            return True

        zone_uri = \
            'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format(
                self.project_id, self.zone
            )
        master_type_uri = \
            "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format(
                self.project_id, self.zone, self.master_machine_type
            )
        worker_type_uri = \
            "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format(
                self.project_id, self.zone, self.worker_machine_type
            )
        cluster_data = {
            'projectId': self.project_id,
            'clusterName': self.cluster_name,
            'config': {
                'gceClusterConfig': {
                    'zoneUri': zone_uri
                },
                'masterConfig': {
                    'numInstances': 1,
                    'machineTypeUri': master_type_uri,
                    'diskConfig': {
                        'bootDiskSizeGb': self.master_disk_size
                    }
                },
                'workerConfig': {
                    'numInstances': self.num_workers,
                    'machineTypeUri': worker_type_uri,
                    'diskConfig': {
                        'bootDiskSizeGb': self.worker_disk_size
                    }
                },
                'secondaryWorkerConfig': {},
                'softwareConfig': {}
            }
        }
        if self.num_preemptible_workers > 0:
            cluster_data['config']['secondaryWorkerConfig'] = {
                'numInstances': self.num_preemptible_workers,
                'machineTypeUri': worker_type_uri,
                'diskConfig': {
                    'bootDiskSizeGb': self.worker_disk_size
                },
                'isPreemptible': True
            }
        if self.labels:
            cluster_data['labels'] = self.labels
        if self.storage_bucket:
            cluster_data['config']['configBucket'] = self.storage_bucket
        if self.metadata:
            cluster_data['config']['gceClusterConfig']['metadata'] = self.metadata
        if self.properties:
            cluster_data['config']['softwareConfig']['properties'] = self.properties
        if self.init_actions_uris:
            init_actions_dict = [
                {'executableFile': uri} for uri in self.init_actions_uris
            ]
            cluster_data['config']['initializationActions'] = init_actions_dict

        try:
            service.projects().regions().clusters().create(
                projectId=self.project_id,
                region=self.region,
                body=cluster_data
            ).execute()
        except HttpError as e:
            # probably two cluster start commands at the same time
            time.sleep(10)
            if self._get_cluster(service):
                logging.info('Cluster {} already exists... Checking status...'.format(
                             self.cluster_name
                             ))
                self._wait_for_done(service)
                return True
            else:
                raise e

        self._wait_for_done(service)