def execute(self, context: "Context") -> dict: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Creating Dataproc Metastore service: %s", self.project_id) try: operation = hook.create_service( region=self.region, project_id=self.project_id, service=self.service, service_id=self.service_id, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) service = hook.wait_for_operation(self.timeout, operation) self.log.info("Service %s created successfully", self.service_id) except HttpError as err: if err.resp.status not in (409, '409'): raise self.log.info("Instance %s already exists", self.service_id) service = hook.get_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK) return Service.to_dict(service)
def execute(self, context: "Context"): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain) self.log.info("Exporting metadata from Dataproc Metastore service: %s", self.service_id) hook.export_metadata( destination_gcs_folder=self.destination_gcs_folder, project_id=self.project_id, region=self.region, service_id=self.service_id, request_id=self.request_id, database_dump_type=self.database_dump_type, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) metadata_export = self._wait_for_export_metadata(hook) self.log.info("Metadata from service %s exported successfully", self.service_id) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_EXPORT_LINK) uri = self._get_uri_from_destination( MetadataExport.to_dict(metadata_export)["destination_gcs_uri"]) StorageLink.persist(context=context, task_instance=self, uri=uri) return MetadataExport.to_dict(metadata_export)
def execute(self, context: "Context"): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain) self.log.info( "Restoring Dataproc Metastore service: %s from backup: %s", self.service_id, self.backup_id) hook.restore_service( project_id=self.project_id, region=self.region, service_id=self.service_id, backup_project_id=self.backup_project_id, backup_region=self.backup_region, backup_service_id=self.backup_service_id, backup_id=self.backup_id, restore_type=self.restore_type, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) self._wait_for_restore_service(hook) self.log.info("Service %s restored from backup %s", self.service_id, self.backup_id) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
def execute(self, context: dict) -> dict: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain) self.log.info("Creating Dataproc Metastore backup: %s", self.backup_id) try: operation = hook.create_backup( project_id=self.project_id, region=self.region, service_id=self.service_id, backup=self.backup, backup_id=self.backup_id, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) backup = hook.wait_for_operation(self.timeout, operation) self.log.info("Backup %s created successfully", self.backup_id) except HttpError as err: if err.resp.status not in (409, '409'): raise self.log.info("Backup %s already exists", self.backup_id) backup = hook.get_backup( project_id=self.project_id, region=self.region, service_id=self.service_id, backup_id=self.backup_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) return Backup.to_dict(backup)
def execute(self, context: "Context"): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain) self.log.info("Creating Dataproc Metastore metadata import: %s", self.metadata_import_id) operation = hook.create_metadata_import( project_id=self.project_id, region=self.region, service_id=self.service_id, metadata_import=self.metadata_import, metadata_import_id=self.metadata_import_id, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) metadata_import = hook.wait_for_operation(self.timeout, operation) self.log.info("Metadata import %s created successfully", self.metadata_import_id) DataprocMetastoreDetailedLink.persist(context=context, task_instance=self, url=METASTORE_IMPORT_LINK, resource=self.metadata_import_id) return MetadataImport.to_dict(metadata_import)
def execute(self, context: 'Context') -> dict: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Gets the details of a single Dataproc Metastore service: %s", self.project_id) result = hook.get_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) return Service.to_dict(result)
def execute(self, context: "Context"): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Deleting Dataproc Metastore service: %s", self.project_id) operation = hook.delete_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) hook.wait_for_operation(self.timeout, operation) self.log.info("Service %s deleted successfully", self.project_id)
def execute(self, context: "Context") -> dict: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Gets the details of a single Dataproc Metastore service: %s", self.project_id) result = hook.get_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK) return Service.to_dict(result)
def execute(self, context: "Context") -> None: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Deleting Dataproc Metastore backup: %s", self.backup_id) operation = hook.delete_backup( project_id=self.project_id, region=self.region, service_id=self.service_id, backup_id=self.backup_id, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) hook.wait_for_operation(self.timeout, operation) self.log.info("Backup %s deleted successfully", self.project_id)
def execute(self, context: 'Context') -> List[dict]: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Listing Dataproc Metastore backups: %s", self.service_id) backups = hook.list_backups( project_id=self.project_id, region=self.region, service_id=self.service_id, page_size=self.page_size, page_token=self.page_token, filter=self.filter, order_by=self.order_by, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) return [Backup.to_dict(backup) for backup in backups]
def execute(self, context: 'Context'): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Creating Dataproc Metastore metadata import: %s", self.metadata_import_id) operation = hook.create_metadata_import( project_id=self.project_id, region=self.region, service_id=self.service_id, metadata_import=self.metadata_import, metadata_import_id=self.metadata_import_id, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) metadata_import = hook.wait_for_operation(self.timeout, operation) self.log.info("Metadata import %s created successfully", self.metadata_import_id) return MetadataImport.to_dict(metadata_import)
def execute(self, context: 'Context'): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Exporting metadata from Dataproc Metastore service: %s", self.service_id) hook.export_metadata( destination_gcs_folder=self.destination_gcs_folder, project_id=self.project_id, region=self.region, service_id=self.service_id, request_id=self.request_id, database_dump_type=self.database_dump_type, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) metadata_export = self._wait_for_export_metadata(hook) self.log.info("Metadata from service %s exported successfully", self.service_id) return MetadataExport.to_dict(metadata_export)
def execute(self, context: 'Context'): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Updating Dataproc Metastore service: %s", self.service.get("name")) operation = hook.update_service( project_id=self.project_id, region=self.region, service_id=self.service_id, service=self.service, update_mask=self.update_mask, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) hook.wait_for_operation(self.timeout, operation) self.log.info("Service %s updated successfully", self.service.get("name"))
def execute(self, context: "Context") -> List[dict]: hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Listing Dataproc Metastore backups: %s", self.service_id) backups = hook.list_backups( project_id=self.project_id, region=self.region, service_id=self.service_id, page_size=self.page_size, page_token=self.page_token, filter=self.filter, order_by=self.order_by, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_BACKUPS_LINK) return [Backup.to_dict(backup) for backup in backups]
def execute(self, context: "Context"): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info("Updating Dataproc Metastore service: %s", self.service.get("name")) operation = hook.update_service( project_id=self.project_id, region=self.region, service_id=self.service_id, service=self.service, update_mask=self.update_mask, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) hook.wait_for_operation(self.timeout, operation) self.log.info("Service %s updated successfully", self.service.get("name")) DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
def execute(self, context: 'Context'): hook = DataprocMetastoreHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain ) self.log.info( "Restoring Dataproc Metastore service: %s from backup: %s", self.service_id, self.backup_id ) hook.restore_service( project_id=self.project_id, region=self.region, service_id=self.service_id, backup_project_id=self.backup_project_id, backup_region=self.backup_region, backup_service_id=self.backup_service_id, backup_id=self.backup_id, restore_type=self.restore_type, request_id=self.request_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) self._wait_for_restore_service(hook) self.log.info("Service %s restored from backup %s", self.service_id, self.backup_id)
def _wait_for_restore_service(self, hook: DataprocMetastoreHook): """ Workaround to check that restore service was finished successfully. We discovered an issue to parse result to Restore inside the SDK """ for time_to_wait in exponential_sleep_generator(initial=10, maximum=120): sleep(time_to_wait) service = hook.get_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) activities: MetadataManagementActivity = service.metadata_management_activity restore_service: Restore = activities.restores[0] if restore_service.state == Restore.State.SUCCEEDED: return restore_service if restore_service.state == Restore.State.FAILED: raise AirflowException("Restoring service FAILED")
def _wait_for_export_metadata(self, hook: DataprocMetastoreHook): """ Workaround to check that export was created successfully. We discovered a issue to parse result to MetadataExport inside the SDK """ for time_to_wait in exponential_sleep_generator(initial=10, maximum=120): sleep(time_to_wait) service = hook.get_service( region=self.region, project_id=self.project_id, service_id=self.service_id, retry=self.retry, timeout=self.timeout, metadata=self.metadata, ) activities: MetadataManagementActivity = service.metadata_management_activity metadata_export: MetadataExport = activities.metadata_exports[0] if metadata_export.state == MetadataExport.State.SUCCEEDED: return metadata_export if metadata_export.state == MetadataExport.State.FAILED: raise AirflowException( f"Exporting metadata from Dataproc Metastore {metadata_export.name} FAILED" )