def execute(self, context): self.log.info('Creating cluster: %s', self.cluster_name) hook = DataProcHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to) service = hook.get_conn() if self._get_cluster(service): self.log.info('Cluster %s already exists... Checking status...', self.cluster_name) self._wait_for_done(service) return True cluster_data = self._build_cluster_data() try: service.projects().regions().clusters().create( projectId=self.project_id, region=self.region, body=cluster_data).execute() except HttpError as e: # probably two cluster start commands at the same time time.sleep(10) if self._get_cluster(service): self.log.info( 'Cluster {} already exists... Checking status...', self.cluster_name) self._wait_for_done(service) return True else: raise e self._wait_for_done(service)
def execute(self, context): hook = DataProcHook( gcp_conn_id=self.google_cloud_conn_id, delegate_to=self.delegate_to ) service = hook.get_conn() if self._get_cluster(service): logging.info('Cluster {} already exists... Checking status...'.format( self.cluster_name )) self._wait_for_done(service) return True cluster_data = self._build_cluster_data() try: service.projects().regions().clusters().create( projectId=self.project_id, region=self.region, body=cluster_data ).execute() except HttpError as e: # probably two cluster start commands at the same time time.sleep(10) if self._get_cluster(service): logging.info('Cluster {} already exists... Checking status...'.format( self.cluster_name )) self._wait_for_done(service) return True else: raise e self._wait_for_done(service)
def execute(self, context): self.log.info('Deleting cluster: %s', self.cluster_name) hook = DataProcHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to) service = hook.get_conn() response = service.projects().regions().clusters().delete( projectId=self.project_id, region=self.region, clusterName=self.cluster_name).execute() operation_name = response['name'] self.log.info("Cluster delete operation name: %s", operation_name) self._wait_for_done(service, operation_name)
def execute(self, context): hook = DataProcHook(gcp_conn_id=self.google_cloud_conn_id, delegate_to=self.delegate_to) service = hook.get_conn() response = service.projects().regions().clusters().delete( projectId=self.project_id, region=self.region, clusterName=self.cluster_name).execute() operation_name = response['name'] logging.info( "Cluster delete operation name: {}".format(operation_name)) self._wait_for_done(service, operation_name)
def execute(self, context): hook = DataProcHook( gcp_conn_id=self.google_cloud_conn_id, delegate_to=self.delegate_to ) service = hook.get_conn() response = service.projects().regions().clusters().delete( projectId=self.project_id, region=self.region, clusterName=self.cluster_name ).execute() operation_name = response['name'] logging.info("Cluster delete operation name: {}".format(operation_name)) self._wait_for_done(service, operation_name)
def execute(self, context): self.log.info('Deleting cluster: %s', self.cluster_name) hook = DataProcHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to ) service = hook.get_conn() response = service.projects().regions().clusters().delete( projectId=self.project_id, region=self.region, clusterName=self.cluster_name ).execute() operation_name = response['name'] self.log.info("Cluster delete operation name: %s", operation_name) self._wait_for_done(service, operation_name)
def get_dataproc_vars(): gcp_conn_id = 'google_cloud_default' delegate_to = None cluster_name = models.Variable.get('dataproc_cluster_name') project_id = models.Variable.get('gcp_project') region = models.Variable.get('gce_region') hook = DataProcHook(gcp_conn_id=gcp_conn_id, delegate_to=delegate_to) service = hook.get_conn() cluster = _get_cluster(service, cluster_name, region) if 'status' in cluster: logging.info(cluster['config']['configBucket']) logging.info(cluster['config']['workerConfig']['instanceNames']) else: logging.info('not ready')
def execute(self, context): hook = DataProcHook(gcp_conn_id=self.google_cloud_conn_id, delegate_to=self.delegate_to) service = hook.get_conn() if self._get_cluster(service): logging.info( 'Cluster {} already exists... Checking status...'.format( self.cluster_name)) self._wait_for_done(service) return True zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( self.project_id, self.zone ) master_type_uri = \ "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format( self.project_id, self.zone, self.master_machine_type ) worker_type_uri = \ "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format( self.project_id, self.zone, self.worker_machine_type ) cluster_data = { 'projectId': self.project_id, 'clusterName': self.cluster_name, 'config': { 'gceClusterConfig': { 'zoneUri': zone_uri }, 'masterConfig': { 'numInstances': 1, 'machineTypeUri': master_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.master_disk_size } }, 'workerConfig': { 'numInstances': self.num_workers, 'machineTypeUri': worker_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.worker_disk_size } }, 'secondaryWorkerConfig': {}, 'softwareConfig': {} } } if self.num_preemptible_workers > 0: cluster_data['config']['secondaryWorkerConfig'] = { 'numInstances': self.num_preemptible_workers, 'machineTypeUri': worker_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.worker_disk_size }, 'isPreemptible': True } if self.labels: cluster_data['labels'] = self.labels if self.storage_bucket: cluster_data['config']['configBucket'] = self.storage_bucket if self.metadata: cluster_data['config']['gceClusterConfig'][ 'metadata'] = self.metadata if self.properties: cluster_data['config']['softwareConfig'][ 'properties'] = self.properties if self.init_actions_uris: init_actions_dict = [{ 'executableFile': uri } for uri in self.init_actions_uris] cluster_data['config']['initializationActions'] = init_actions_dict try: service.projects().regions().clusters().create( projectId=self.project_id, region=self.region, body=cluster_data).execute() except HttpError as e: # probably two cluster start commands at the same time time.sleep(10) if self._get_cluster(service): logging.info( 'Cluster {} already exists... Checking status...'.format( self.cluster_name)) self._wait_for_done(service) return True else: raise e self._wait_for_done(service)
def execute(self, context): hook = DataProcHook( gcp_conn_id=self.google_cloud_conn_id, delegate_to=self.delegate_to ) service = hook.get_conn() if self._get_cluster(service): logging.info('Cluster {} already exists... Checking status...'.format( self.cluster_name )) self._wait_for_done(service) return True zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( self.project_id, self.zone ) master_type_uri = \ "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format( self.project_id, self.zone, self.master_machine_type ) worker_type_uri = \ "https://www.googleapis.com/compute/v1/projects/{}/zones/{}/machineTypes/{}".format( self.project_id, self.zone, self.worker_machine_type ) cluster_data = { 'projectId': self.project_id, 'clusterName': self.cluster_name, 'config': { 'gceClusterConfig': { 'zoneUri': zone_uri }, 'masterConfig': { 'numInstances': 1, 'machineTypeUri': master_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.master_disk_size } }, 'workerConfig': { 'numInstances': self.num_workers, 'machineTypeUri': worker_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.worker_disk_size } }, 'secondaryWorkerConfig': {}, 'softwareConfig': {} } } if self.num_preemptible_workers > 0: cluster_data['config']['secondaryWorkerConfig'] = { 'numInstances': self.num_preemptible_workers, 'machineTypeUri': worker_type_uri, 'diskConfig': { 'bootDiskSizeGb': self.worker_disk_size }, 'isPreemptible': True } if self.labels: cluster_data['labels'] = self.labels if self.storage_bucket: cluster_data['config']['configBucket'] = self.storage_bucket if self.metadata: cluster_data['config']['gceClusterConfig']['metadata'] = self.metadata if self.properties: cluster_data['config']['softwareConfig']['properties'] = self.properties if self.init_actions_uris: init_actions_dict = [ {'executableFile': uri} for uri in self.init_actions_uris ] cluster_data['config']['initializationActions'] = init_actions_dict try: service.projects().regions().clusters().create( projectId=self.project_id, region=self.region, body=cluster_data ).execute() except HttpError as e: # probably two cluster start commands at the same time time.sleep(10) if self._get_cluster(service): logging.info('Cluster {} already exists... Checking status...'.format( self.cluster_name )) self._wait_for_done(service) return True else: raise e self._wait_for_done(service)