def _inner_acquire(self, timeout, ephemeral=True): # wait until it's our chance to get it.. if self.is_acquired: raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: yield self._ensure_path() node = None if self.create_tried: node = yield self._find_node() else: self.create_tried = True if not node: node = yield self.tornado_kazoo.create( self.create_path, self.data, ephemeral=ephemeral, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = yield self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() predecessor = self.predecessor(children, our_index) if not predecessor: raise gen.Return(True) # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + predecessor self.client.add_listener(self._watch_session_listener) try: yield self.tornado_kazoo.get(predecessor, self._watch_predecessor) except NoNodeError: pass # predecessor has already been deleted else: try: yield self.wake_event.wait(timeout) except gen.TimeoutError: raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session_listener)
def _inner_acquire(self, blocking, timeout): # wait until it's our chance to get it.. if self.is_acquired: if not blocking: return False raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: self._ensure_path() node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() if self.acquired_lock(children, our_index): return True if not blocking: return False # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + children[our_index - 1] self.client.add_listener(self._watch_session) try: if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(timeout) if not self.wake_event.isSet(): raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session)
def _inner_acquire(self): """Inner loop that runs from the top anytime a command hits a retryable Zookeeper exception.""" self._session_expired = False self.client.add_listener(self._watch_session) if not self.assured_path: self.client.ensure_path(self.path) # Do we already have a lease? if self.client.exists(self.create_path): return True with self.client.Lock(self.lock_path, self.data): while True: self.wake_event.clear() if self._session_expired: raise ForceRetryError("Retry on session loss at top") if self.cancelled: raise CancelledError("Semaphore cancelled") # Is there a lease free? children = self.client.get_children(self.path, self._watch_lease_change) if len(children) < self.max_leases: self.client.create(self.create_path, self.data, ephemeral=True) return True else: self.wake_event.wait()
def update_state(self, state): # update state in owned job current_job_path = self.running_job_path.get() job = self.get_job_from_path(current_job_path) if not job: logging.debug("Fail to update State, No Job in {path}".format( path=self.running_job_path)) return priority = int(current_job_path.split("-")[1]) job_object = json.loads(job) job_object["state"] = state job_updated = json.dumps(job_object) if state == Jobstate.SUCCESSFUL: finish_path = '{path}/{prefix}{priority:03d}-{dataset}:{groupid}-'.format( path=self.done_path, prefix=self.prefix, priority=priority, dataset=job_object.get("dataset"), groupid=job_object.get("groupid")) self.client.create(finish_path, job_updated, sequence=True) # save in Mongo job_mongo_id = self.results.insert_one(job_object).inserted_id logging.debug( "finish saving job_id:{job_mongo_id} in mongo".format( job_mongo_id=job_mongo_id)) try: self.client.delete(current_job_path) except NoNodeError: raise ForceRetryError() else: self.client.retry(self.client.set, current_job_path, job_updated)
def _get_lease(self, data=None): # Make sure the session is still valid if self._session_expired: raise ForceRetryError("Retry on session loss at top") # Make sure that the request hasn't been canceled if self.cancelled: raise CancelledError("Semaphore cancelled") # Get a list of the current potential lock holders. If they change, # notify our wake_event object. This is used to unblock a blocking # self._inner_acquire call. children = self.client.get_children(self.path, self._watch_lease_change) # If there are leases available, acquire one if len(children) < self.max_leases: self.client.create(self.create_path, self.data, ephemeral=True) # Check if our acquisition was successful or not. Update our state. if self.client.exists(self.create_path): self.is_acquired = True else: self.is_acquired = False # Return current state return self.is_acquired
def _inner_get(client, path): max_retries = 3 try: data, stat = client.get(path) except NoNodeError: # the first node has vanished in the meantime, try to # get another one raise ForceRetryError() try: client.delete(path) except NoNodeError: # we were able to get the data but someone else has removed # the node in the meantime. consider the item as processed # by the other process raise ForceRetryError() return data
def _inner_change(self, value): data, version = self._value() data = repr(data + value).encode('ascii') try: self.client.set(self.path, data, version=version) except BadVersionError: # pragma: nocover raise ForceRetryError()
def _get_predecessor(self, node): """returns `node`'s predecessor or None Note: This handle the case where the current lock is not a contender (e.g. rlock), this and also edge cases where the lock's ephemeral node is gone. """ children = self.client.get_children(self.path) found_self = False # Filter out the contenders using the computed regex contender_matches = [] for child in children: match = self._contenders_re.search(child) if match is not None: contender_matches.append(match) if child == node: # Remember the node's match object so we can short circuit # below. found_self = match if found_self is False: # pragma: nocover # somehow we aren't in the childrens -- probably we are # recovering from a session failure and our ephemeral # node was removed. raise ForceRetryError() predecessor = None # Sort the contenders using the sequence number extracted by the regex, # then extract the original string. for match in sorted(contender_matches, key=lambda m: m.groups()): if match is found_self: break predecessor = match.string return predecessor
def _resolve_deadlocks(self, children_list): """ Check if there are any concurrent cross-group locks. Args: children_list: A list of current transactions for each group. """ current_txid = int(self.data) for index, children in enumerate(children_list): our_index = children.index(self.nodes[index]) # Skip groups where this lock already has the earliest contender. if our_index == 0: continue # Get transaction IDs for earlier contenders. for child in children[:our_index - 1]: try: data, _ = self.client.get(self.paths[index] + '/' + child) except NoNodeError: continue # If data is not set, it doesn't belong to a cross-group # transaction. if not data: continue child_txid = int(data) # As an arbitrary rule, require later transactions to # resolve deadlocks. if current_txid > child_txid: # TODO: Implement a more graceful deadlock detection. self.client.retry(self._delete_nodes, self.nodes) raise ForceRetryError()
def _inner_get_for_update(self, path): try: data, stat = self.client.get(path) except NoNodeError: raise ForceRetryError() return data
def _inner_get(self, path): try: data, stat = self.client.get(path) except NoNodeError: # the first node has vanished in the meantime, try to # get another one raise ForceRetryError() try: self.client.delete(path) except NoNodeError: # we were able to get the data but someone else has removed # the node in the meantime. consider the item as processed # by the other process raise ForceRetryError() del self.unowned_job[:] return data
def _inner_get(self, children): if not children: return None name = children.pop(0) try: data, stat = self.client.get(self.path + "/" + name) except NoNodeError: # pragma: nocover # the first node has vanished in the meantime, try to # get another one raise ForceRetryError() try: self.client.delete(self.path + "/" + name) except NoNodeError: # pragma: nocover # we were able to get the data but someone else has removed # the node in the meantime. consider the item as processed # by the other process raise ForceRetryError() return data
def _inner_change(self, value): data, version = self._value() data = repr(data + value).encode('ascii') if int(data) > self.max_count: raise OverQuota() try: self.client.set(self.path, data, version=version) except kazoo.exceptions.BadVersionError: # pragma: nocover raise ForceRetryError()
def _inner_acquire(self, blocking, timeout, ephemeral=True): # wait until it's our chance to get it.. if self.is_acquired: if not blocking: return False raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: self._ensure_path() node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=ephemeral, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() predecessor = self._get_predecessor(node) if predecessor is None: return True if not blocking: return False # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + predecessor self.client.add_listener(self._watch_session) try: self.client.get(predecessor, self._watch_predecessor) except NoNodeError: pass # predecessor has already been deleted else: self.wake_event.wait(timeout) if not self.wake_event.isSet(): raise LockTimeout( "Failed to acquire lock on %s after %s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session)
def _inner_change(self, value): self.pre_value, version = self._value() post_value = self.pre_value + value data = repr(post_value).encode('ascii') try: self.client.set(self.path, data, version=version) except BadVersionError: # pragma: nocover self.post_value = None raise ForceRetryError() self.post_value = post_value
def _connect_loop(self, retry): # Iterate through the hosts a full cycle before starting over status = None host_ports = self._expand_client_hosts() # Check for an empty hostlist, indicating none resolved if len(host_ports) == 0: raise ForceRetryError('No host resolved. Reconnecting') for host, hostip, port in host_ports: if self.client._stopped.is_set(): status = STOP_CONNECTING break status = self._connect_attempt(host, hostip, port, retry) if status is STOP_CONNECTING: break if status is STOP_CONNECTING: return STOP_CONNECTING else: raise ForceRetryError('Reconnecting')
def _inner_change(self, value): data, version = self._value() # Decrement counter only if data(current count) is non zero. if data > 0 or value > 0: data += value # Dont raise OverQuota during delete if (data > self.max_count and value > 0): raise OverQuota() try: self.client.set( self.path, repr(data).encode('ascii'), version=version) except kazoo.exceptions.BadVersionError: # pragma: nocover raise ForceRetryError()
def _inner_change(self, value): self.pre_value, version = self._value() post_value = self.pre_value + value if self.support_curator: data = struct.pack(">i", post_value) else: data = repr(post_value).encode('ascii') try: self.client.set(self.path, data, version=version) except BadVersionError: # pragma: nocover self.post_value = None raise ForceRetryError() self.post_value = post_value
def _connect_loop(self, retry): # Iterate through the hosts a full cycle before starting over status = None for host, port in self.client.hosts: if self.client._stopped.is_set(): status = STOP_CONNECTING break status = self._connect_attempt(host, port, retry) if status is STOP_CONNECTING: break if status is STOP_CONNECTING: return STOP_CONNECTING else: raise ForceRetryError('Reconnecting')
def _connect_loop(self, hosts, retry): # Iterate through the hosts a full cycle before starting over total_hosts = len(self.client.hosts) cur = 0 status = None while cur < total_hosts and status is not STOP_CONNECTING: if self.client._stopped.is_set(): status = STOP_CONNECTING break status = self._connect_attempt(hosts, retry) cur += 1 if status is STOP_CONNECTING: return STOP_CONNECTING else: raise ForceRetryError('Reconnecting')
def _inner_change(self, value): """ Add a value to the counter. Args: value: An integer specifying how much to add. Returns: An integer indicating the new count after the change. """ data, version = self._value() new_value = data + value new_data = repr(new_value).encode('ascii') try: self.client.set(self.path, new_data, version=version) return new_value except BadVersionError: raise ForceRetryError()
def _inner_acquire(self): # make sure our election parent node exists if not self.assured_path: self.client.ensure_path(self.path) node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True) # strip off path to node node = node[len(self.path)+1:] self.node = node while True: # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = self._get_sorted_children() try: our_index = children.index(node) except ValueError: # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() #noinspection PySimplifyBooleanCheck if our_index == 0: # we have the lock return True # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + children[our_index-1] with self.condition: if self.client.exists(predecessor, self._watch_predecessor): self.condition.wait()
def _inner_get(self): if not self._children: self._children = self.client.retry(self.client.get_children, self.path) self._children = sorted(self._children) if not self._children: return None name = self._children[0] try: data, stat = self.client.get(self.path + "/" + name) self.client.delete(self.path + "/" + name) except NoNodeError: # pragma: nocover # the first node has vanished in the meantime, try to # get another one self._children = [] raise ForceRetryError() self._children.pop(0) return data
def _get_predecessor(self, node): """returns `node`'s predecessor or None Note: This handle the case where the current lock is not a contender (e.g. rlock), this and also edge cases where the lock's ephemeral node is gone. """ node_sequence = node[len(self.prefix):] children = self.client.get_children(self.path) found_self = False # Filter out the contenders using the computed regex contender_matches = [] for child in children: match = self._contenders_re.search(child) if match is not None: contender_sequence = match.group(1) # Only consider contenders with a smaller sequence number. # A contender with a smaller sequence number has a higher # priority. if contender_sequence < node_sequence: contender_matches.append(match) if child == node: # Remember the node's match object so we can short circuit # below. found_self = match if found_self is False: # pragma: nocover # somehow we aren't in the childrens -- probably we are # recovering from a session failure and our ephemeral # node was removed. raise ForceRetryError() if not contender_matches: return None # Sort the contenders using the sequence number extracted by the regex # and return the original string of the predecessor. sorted_matches = sorted(contender_matches, key=lambda m: m.groups()) return sorted_matches[-1].string
def _acquire_lock(): got_it = self._lock.acquire(False) if not got_it: raise ForceRetryError() return True
def inner(): if scope['times'] >= times: pass else: scope['times'] += 1 raise ForceRetryError('Failed!')
def _acquire_lock(): """ Acquire a kazoo thread lock. """ got_it = self._lock.acquire(False) if not got_it: raise ForceRetryError() return True
def _inner_acquire(self): """ Create contender node(s) and wait until the lock is acquired. """ # Make sure the group lock node exists. self._ensure_path() nodes = [None for _ in self.paths] if self.create_tried: nodes = self._find_nodes() else: self.create_tried = True for index, node in enumerate(nodes): if node is not None: continue # The entity group lock root may have been deleted, so try a few times. try_num = 0 while True: try: node = self.client.create(self.create_paths[index], self.data, sequence=True) break except NoNodeError: self.client.ensure_path(self.paths[index]) if try_num > 3: raise ForceRetryError() try_num += 1 # Strip off path to node. node = node[len(self.paths[index]) + 1:] nodes[index] = node self.nodes = nodes while True: self.wake_event.clear() # Bail out with an exception if cancellation has been requested. if self.cancelled: raise CancelledError() children_list = self._get_sorted_children() predecessors = [] for index, children in enumerate(children_list): try: our_index = children.index(nodes[index]) except ValueError: raise ForceRetryError() # If the lock for this group hasn't been acquired, get the predecessor. if our_index != 0: predecessors.append(self.paths[index] + "/" + children[our_index - 1]) if not predecessors: return True if len(nodes) > 1: self._resolve_deadlocks(children_list) # Wait for predecessor to be removed. # TODO: Listen for all at the same time. for index, predecessor in enumerate(predecessors): self.client.add_listener(self._watch_session) try: if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(LOCK_TIMEOUT) if not self.wake_event.isSet(): error = 'Failed to acquire lock on {} after {} '\ 'seconds'.format(self.paths, LOCK_TIMEOUT * (index + 1)) raise LockTimeout(error) finally: self.client.remove_listener(self._watch_session)