async def _direct_download_resources_async( loop: asyncio.BaseEventLoop, blob_client: azureblob.BlockBlobService, table_client: azuretable.TableService, nglobalresources: int) -> None: """Direct download resource logic :param asyncio.BaseEventLoop loop: event loop :param azureblob.BlockBlobService blob_client: blob client :param azuretable.TableService table_client: table client :param int nglobalresources: number of global resources """ # ensure we are not downloading too many sources at once with _DIRECTDL_LOCK: if len(_DIRECTDL_DOWNLOADING) > _CONCURRENT_DOWNLOADS_ALLOWED: return # retrieve a resource from dl queue _seen = set() while True: try: resource = _DIRECTDL_QUEUE.get() except queue.Empty: break else: if resource in _seen: _DIRECTDL_QUEUE.put(resource) resource = None break _seen.add(resource) with _DIRECTDL_LOCK: if resource not in _DIRECTDL_DOWNLOADING: break else: _DIRECTDL_QUEUE.put(resource) resource = None del _seen # attempt to get a blob lease if resource is not None: lease_id = None blob_name = None for i in range(0, _CONCURRENT_DOWNLOADS_ALLOWED): blob_name = '{}.{}'.format(compute_resource_hash(resource), i) try: lease_id = blob_client.acquire_blob_lease( container_name=_STORAGE_CONTAINERS['blob_globalresources'], blob_name=blob_name, lease_duration=60, ) break except azure.common.AzureConflictHttpError: blob_name = None pass if lease_id is None: logger.debug('no available blobs to lease for resource: {}'.format( resource)) _DIRECTDL_QUEUE.put(resource) return # create lease renew callback logger.debug('blob lease {} acquired for resource {}'.format( lease_id, resource)) _BLOB_LEASES[resource] = lease_id _CBHANDLES[resource] = loop.call_later(15, _renew_blob_lease, loop, blob_client, 'blob_globalresources', resource, blob_name) if resource is None: return # pull and save container image in thread if is_container_resource(resource): thr = ContainerImageSaveThread(blob_client, table_client, resource, blob_name, nglobalresources) thr.start() else: # TODO download via blob, explode uri to get container/blob # use download to path into /tmp and move to directory raise NotImplementedError()
class AzureStorageCheckpointLeaseManager(AbstractCheckpointManager, AbstractLeaseManager): """ Manages checkpoints and lease with azure storage blobs. In this implementation, checkpoints are data that's actually in the lease blob, so checkpoint operations turn into lease operations under the covers. """ def __init__(self, storage_account_name, storage_account_key, lease_container_name, storage_blob_prefix=None, lease_renew_interval=10, lease_duration=30): AbstractCheckpointManager.__init__(self) AbstractLeaseManager.__init__(self, lease_renew_interval, lease_duration) self.storage_account_name = storage_account_name self.storage_account_key = storage_account_key self.lease_container_name = lease_container_name self.storage_blob_prefix = storage_blob_prefix self.storage_client = None self.consumer_group_directory = None self.host = None self.storage_max_execution_time = 120 self.request_session = requests.Session() self.request_session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)) # Validate storage inputs if not self.storage_account_name or not self.storage_account_key: raise ValueError("Need a valid storage account name and key") if not re.compile(r"^[a-z0-9](([a-z0-9\-[^\-])){1,61}[a-z0-9]$").match(self.lease_container_name): raise ValueError("Azure Storage lease container name is invalid.\ Please check naming conventions at\ https:# msdn.microsoft.com/en-us/library/azure/dd135715.aspx") if self.storage_blob_prefix: self.storage_blob_prefix.replace(" ", "") # Convert all-whitespace to empty string. else: self.storage_blob_prefix = "" # Convert null prefix to empty string. def initialize(self, host): """ The EventProcessorHost can't pass itself to the AzureStorageCheckpointLeaseManager constructor because it is still being constructed. Do other initialization here also because it might throw and hence we don't want it in the constructor. """ self.host = host self.storage_client = BlockBlobService(account_name=self.storage_account_name, account_key=self.storage_account_key, request_session=self.request_session) self.consumer_group_directory = self.storage_blob_prefix + self.host.eh_config.consumer_group # Checkpoint Managment Methods async def create_checkpoint_store_if_not_exists_async(self): """ Create the checkpoint store if it doesn't exist. Do nothing if it does exist. (Returns) true if the checkpoint store already exists or was created OK, false if there was a failure """ await self.create_lease_store_if_not_exists_async() async def get_checkpoint_async(self, partition_id): """ Get the checkpoint data associated with the given partition. Could return null if no checkpoint has been created for that partition. (Returns) Given partition checkpoint info, or null if none has been previously stored. """ lease = await self.get_lease_async(partition_id) checkpoint = None if lease: if lease.offset: checkpoint = Checkpoint(partition_id, lease.offset, lease.sequence_number) return checkpoint async def create_checkpoint_if_not_exists_async(self, partition_id): """ Create the given partition checkpoint if it doesn't exist.Do nothing if it does exist. The offset/sequenceNumber for a freshly-created checkpoint should be set to StartOfStream/0. (Returns) The checkpoint for the given partition, whether newly created or already existing. """ checkpoint = await self.get_checkpoint_async(partition_id) if not checkpoint: await self.create_lease_if_not_exists_async(partition_id) checkpoint = Checkpoint(partition_id) return checkpoint async def update_checkpoint_async(self, lease, checkpoint): """ Update the checkpoint in the store with the offset/sequenceNumber in the provided checkpoint checkpoint:offset/sequeceNumber to update the store with. """ new_lease = AzureBlobLease() new_lease.with_source(lease) new_lease.offset = checkpoint.offset new_lease.sequence_number = checkpoint.sequence_number await self.update_lease_async(new_lease) async def delete_checkpoint_async(self, partition_id): """ Delete the stored checkpoint for the given partition. If there is no stored checkpoint for the given partition, that is treated as success. """ return # Make this a no-op to avoid deleting leases by accident. # Lease Managment Methods async def create_lease_store_if_not_exists_async(self): """ Create the lease store if it does not exist, do nothing if it does exist. (Returns) true if the lease store already exists or was created successfully, false if not """ try: self.storage_client.create_container(self.lease_container_name) except Exception as err: logging.error(repr(err)) raise err return True async def delete_lease_store_async(self): """ Not used by EventProcessorHost, but a convenient function to have for testing. (Returns) true if the lease store was deleted successfully, false if not """ return "Not Supported in Python" async def get_lease_async(self, partition_id): """ Return the lease info for the specified partition. Can return null if no lease has been created in the store for the specified partition. (Returns) lease info for the partition, or null """ try: blob = self.storage_client.get_blob_to_text(self.lease_container_name, partition_id) lease = AzureBlobLease() lease.with_blob(blob) def state(): """ Allow lease to curry storage_client to get state """ try: return self.storage_client.get_blob_properties(self.lease_container_name, partition_id).properties.lease.state except Exception as err: logging.error("Failed to get lease state %s %s", err, partition_id) lease.state = state return lease except Exception as err: logging.error("Failed to get lease %s %s", err, partition_id) async def get_all_leases(self): """ Return the lease info for all partitions. A typical implementation could just call get_lease_async() on all partitions. (Returns) list of lease info. """ lease_futures = [] partition_ids = await self.host.partition_manager.get_partition_ids_async() for partition_id in partition_ids: lease_futures.append(self.get_lease_async(partition_id)) return lease_futures async def create_lease_if_not_exists_async(self, partition_id): """ Create in the store the lease info for the given partition, if it does not exist. Do nothing if it does exist in the store already. (Returns) the existing or newly-created lease info for the partition """ return_lease = None try: return_lease = AzureBlobLease() return_lease.partition_id = partition_id json_lease = json.dumps(return_lease.serializable()) logging.info("Creating Lease %s %s %s", self.lease_container_name, partition_id, json_lease) self.storage_client.create_blob_from_text(self.lease_container_name, partition_id, json_lease) except Exception: try: return_lease = await self.get_lease_async(partition_id) except Exception as err: logging.error("Failed to create lease %s ", repr(err)) raise err return return_lease async def delete_lease_async(self, lease): """ Delete the lease info for the given partition from the store. If there is no stored lease for the given partition, that is treated as success. """ self.storage_client.delete_blob(self.lease_container_name, lease.partition_id, lease_id=lease.token) async def acquire_lease_async(self, lease): """ Acquire the lease on the desired partition for this EventProcessorHost. Note that it is legal to acquire a lease that is already owned by another host. Lease-stealing is how partitions are redistributed when additional hosts are started. (Returns) true if the lease was acquired successfully, false if not """ retval = True new_lease_id = str(uuid.uuid4()) partition_id = lease.partition_id try: if lease.state() == "leased": if not lease.token: # We reach here in a race condition: when this instance of EventProcessorHost # scanned the lease blobs, this partition was unowned (token is empty) but # between then and now, another instance of EPH has established a lease # (getLeaseState() is LEASED). We normally enforcethat we only steal the lease # if it is still owned by the instance which owned it when we scanned, but we # can't do that when we don't know who owns it. The safest thing to do is just # fail the acquisition. If that means that one EPH instance gets more partitions # than it should, rebalancing will take care of that quickly enough. retval = False else: logging.info("ChangingLease %s %s", self.host.guid, lease.partition_id) self.storage_client.change_blob_lease(self.lease_container_name, partition_id, lease.token, new_lease_id) lease.token = new_lease_id else: logging.info("AcquiringLease %s %s", self.host.guid, lease.partition_id) lease.token = self.storage_client.acquire_blob_lease(self.lease_container_name, partition_id, self.lease_duration, new_lease_id) lease.owner = self.host.host_name lease.increment_epoch() #check if this solves the issue await self.update_lease_async(lease) except Exception as err: logging.error("Failed to acquire lease %s %s %s", repr(err), partition_id, lease.token) return False return retval async def renew_lease_async(self, lease): """ Renew a lease currently held by this host. If the lease has been stolen, or expired, or released, it is not possible to renew it. You will have to call getLease() and then acquireLease() again. (Returns) true if the lease was renewed successfully, false if not """ try: self.storage_client.renew_blob_lease(self.lease_container_name, lease.partition_id, lease_id=lease.token, timeout=self.lease_duration) except Exception as err: if "LeaseIdMismatchWithLeaseOperation" in str(err): logging.info("LeaseLost") else: logging.error("Failed to renew lease on partition %s with token %s %s", lease.partition_id, lease.token, repr(err)) return False return True async def release_lease_async(self, lease): """ Give up a lease currently held by this host. If the lease has been stolen, or expired, releasing it is unnecessary, and will fail if attempted. (Returns) true if the lease was released successfully, false if not """ try: logging.info("Releasing lease %s %s", self.host.guid, lease.partition_id) lease_id = lease.token released_copy = AzureBlobLease() released_copy.with_lease(lease) released_copy.token = None released_copy.owner = None released_copy.state = None self.storage_client.create_blob_from_text(self.lease_container_name, lease.partition_id, json.dumps(released_copy.serializable()), lease_id=lease_id) self.storage_client.release_blob_lease(self.lease_container_name, lease.partition_id, lease_id) except Exception as err: logging.error("Failed to release lease %s %s %s", repr(err), lease.partition_id, lease_id) return False return True async def update_lease_async(self, lease): """ Update the store with the information in the provided lease. It is necessary to currently hold a lease in order to update it. If the lease has been stolen, or expired, or released, it cannot be updated. Updating should renew the lease before performing the update to avoid lease expiration during the process. (Returns) true if the updated was performed successfully, false if not. """ if lease is None: return False if not lease.token: return False logging.debug("Updating lease %s %s", self.host.guid, lease.partition_id) # First, renew the lease to make sure the update will go through. if await self.renew_lease_async(lease): try: self.storage_client.create_blob_from_text(self.lease_container_name, lease.partition_id, json.dumps(lease.serializable()), lease_id=lease.token) except Exception as err: logging.error("Failed to update lease %s %s %s", self.host.guid, lease.partition_id, repr(err)) raise err else: return False return True