def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) taskNameBase = self.getTaskName(event, fanIn=True) # see comment (***) in self._queueDispatchFanIn # # in the case of failing to acquire a read lock (due to failed release of write lock) # we have decided to keep retrying raiseOnFail = False if self._getTaskRetryLimit() is not None: raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM]) rwlock = ReadWriteLock(taskNameBase, self) rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) # the following step ensure that fan-in only ever operates one time over a list of data # the entity is created in State.dispatch(...) _after_ all the actions have executed # successfully workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) if obj[constants.RETRY_COUNT_PARAM] > 0: semaphore = RunOnceSemaphore(workIndex, self) if semaphore.readRunOnceSemaphore( payload=self.__obj[constants.TASK_NAME_PARAM]): self.logger.info( "Fan-in idempotency guard for workIndex '%s', not processing any work items.", workIndex) return FSMContextList(self, []) # don't operate over the data again # fetch all the work packages in the current group for processing query = _FantasmFanIn.all() \ .filter('workIndex =', workIndex) \ .order('__key__') # construct a list of FSMContexts contexts = [self.clone(data=r.context) for r in query] return FSMContextList(self, contexts)
def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) self.logger.debug('Index: %s', index) taskNameBase = self.getTaskName(event, fanIn=True) # see comment (***) in self._queueDispatchFanIn # # in the case of failing to acquire a read lock (due to failed release of write lock) # we have decided to keep retrying raiseOnFail = False if self._getTaskRetryLimit() is not None: raiseOnFail = (self._getTaskRetryLimit() > self.__obj[constants.RETRY_COUNT_PARAM]) rwlock = ReadWriteLock(taskNameBase, self) rwlock.acquireReadLock(index, raiseOnFail=raiseOnFail) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts, guarded=False): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName self.guarded = guarded # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) # the following step ensure that fan-in only ever operates one time over a list of data # the entity is created in State.dispatch(...) _after_ all the actions have executed # successfully khash = knuthHash(index) self.logger.debug('knuthHash of index: %s', khash) workIndex = '%s-%d' % (taskNameBase, khash) if obj[constants.RETRY_COUNT_PARAM] > 0: semaphore = RunOnceSemaphore(workIndex, self) if semaphore.readRunOnceSemaphore(payload=self.__obj[constants.TASK_NAME_PARAM]): self.logger.info("Fan-in idempotency guard for workIndex '%s', not processing any work items.", workIndex) return FSMContextList(self, [], guarded=True) # don't operate over the data again # fetch all the work packages in the current group for processing query = _FantasmFanIn.all(namespace='') \ .filter('workIndex =', workIndex) \ .order('__key__') # construct a list of FSMContexts contexts = [self.clone(replaceData=r.context) for r in query] return FSMContextList(self, contexts)
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) fork = self.pop(constants.FORK_PARAM, None) # transfer the fan-in-group into the context (under a fixed value key) so that states beyond # the fan-in get unique Task names # FIXME: this will likely change once we formalize what to do post fan-in transition = self.currentState.getTransition(nextEvent) if self.get(transition.target.fanInGroup) is not None: self[constants.FAN_IN_GROUP_PARAM] = self[transition.target.fanInGroup] taskNameBase = self.getTaskName(nextEvent, fanIn=True) rwlock = ReadWriteLock(taskNameBase, self) index = rwlock.currentIndex() # (***) # # grab the lock - memcache.incr() # # on Task retry, multiple incr() calls are possible. possible ways to handle: # # 1. release the lock in a 'finally' clause, but then risk missing a work # package because acquiring the read lock will succeed even though the # work package was not written yet. # # 2. allow the lock to get too high. the fan-in logic attempts to wait for # work packages across multiple-retry attempts, so this seems like the # best option. we basically trade a bit of latency in fan-in for reliability. # rwlock.acquireWriteLock(index, nextEvent=nextEvent) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) # on retry, we want to ensure we get the same work index for this task actualTaskName = self.__obj[constants.TASK_NAME_PARAM] indexKeyName = 'workIndex-' + '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None semaphore = RunOnceSemaphore(indexKeyName, self) # check if the workIndex changed during retry semaphoreWritten = False if self.__obj[constants.RETRY_COUNT_PARAM] > 0: # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False) if payload: semaphoreWritten = True if payload != workIndex: self.logger.info("Work index changed from '%s' to '%s' on retry.", payload, workIndex) workIndex = payload # update this here so it gets written down into the work package too self[constants.INDEX_PARAM] = index # write down two models, one actual work package, one idempotency package keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName) # close enough to idempotent, but could still write only one of the entities # FIXME: could be made faster using a bulk put, but this interface is cleaner if not semaphoreWritten: semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False) # put the work item db.put(work) # (A) now the datastore is asynchronously writing the indices, so the work package may # not show up in a query for a period of time. there is a corresponding time.sleep() # in the fan-in of self.mergeJoinDispatch(...) # release the lock - memcache.decr() rwlock.releaseWriteLock(index) try: # insert a task to run in the future and process a bunch of work packages now = time.time() url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) task = Task(name='%s-%d' % (taskNameBase, index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers, retry_options=retryOptions) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic
except HaltMachineError: raise # let it bubble up quietly except Exception, e: level = context.logger.error if e.__class__ in TRANSIENT_ERRORS: level = context.logger.warn level('Error processing action for state. (Machine %s, State %s, Action %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise if transition.target.isFanIn: # this prevents fan-in from re-counting the data if there is an Exception # or DeadlineExceeded _after_ doAction.execute(...) succeeds index = context.get(constants.INDEX_PARAM) or contextOrContexts[0].get(constants.INDEX_PARAM) workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) semaphore = RunOnceSemaphore(workIndex, context) semaphore.writeRunOnceSemaphore(payload=obj[constants.TASK_NAME_PARAM]) try: # at this point we have processed the work items, delete them task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup', url=constants.DEFAULT_CLEANUP_URL, params={constants.WORK_INDEX_PARAM: workIndex}) context.Queue(name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task) except (TaskAlreadyExistsError, TombstonedTaskError): context.logger.info("Fan-in cleanup Task already exists.") if context.get('UNITTEST_RAISE_AFTER_FAN_IN'): # only way to generate this failure if not contextOrContexts.guarded:
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, retryOptions=None, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get( constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) fork = self.pop(constants.FORK_PARAM, None) taskNameBase = self.getTaskName(nextEvent, fanIn=True) rwlock = ReadWriteLock(taskNameBase, self) index = rwlock.currentIndex() # (***) # # grab the lock - memcache.incr() # # on Task retry, multiple incr() calls are possible. possible ways to handle: # # 1. release the lock in a 'finally' clause, but then risk missing a work # package because acquiring the read lock will succeed even though the # work package was not written yet. # # 2. allow the lock to get too high. the fan-in logic attempts to wait for # work packages across multiple-retry attempts, so this seems like the # best option. we basically trade a bit of latency in fan-in for reliability. # rwlock.acquireWriteLock(index, nextEvent=nextEvent) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) # on retry, we want to ensure we get the same work index for this task actualTaskName = self.__obj[constants.TASK_NAME_PARAM] indexKeyName = 'workIndex-' + '-'.join( [str(i) for i in [actualTaskName, fork] if i]) or None semaphore = RunOnceSemaphore(indexKeyName, self) # check if the workIndex changed during retry semaphoreWritten = False if self.__obj[constants.RETRY_COUNT_PARAM] > 0: # see comment (A) in self._queueDispatchFanIn(...) time.sleep(constants.DATASTORE_ASYNCRONOUS_INDEX_WRITE_WAIT_TIME) payload = semaphore.readRunOnceSemaphore(payload=workIndex, transactional=False) if payload: semaphoreWritten = True if payload != workIndex: self.logger.info( "Work index changed from '%s' to '%s' on retry.", payload, workIndex) workIndex = payload # write down two models, one actual work package, one idempotency package keyName = '-'.join([str(i) for i in [actualTaskName, fork] if i]) or None work = _FantasmFanIn(context=self, workIndex=workIndex, key_name=keyName) # close enough to idempotent, but could still write only one of the entities # FIXME: could be made faster using a bulk put, but this interface is cleaner if not semaphoreWritten: semaphore.writeRunOnceSemaphore(payload=workIndex, transactional=False) # put the work item db.put(work) # (A) now the datastore is asynchronously writing the indices, so the work package may # not show up in a query for a period of time. there is a corresponding time.sleep() # in the fan-in of self.mergeJoinDispatch(...) # release the lock - memcache.decr() rwlock.releaseWriteLock(index) try: # insert a task to run in the future and process a bunch of work packages now = time.time() self[constants.INDEX_PARAM] = index url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) task = Task(name='%s-%d' % (taskNameBase, index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers, retry_options=retryOptions) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic
def dispatch(self, context, event, obj): """ Fires the transition and executes the next States's entry, do and exit actions. @param context: an FSMContext instance @param event: a string event to dispatch to the State @param obj: an object that the Transition can operate on @return: the event returned from the next state's main action. """ transition = self.getTransition(event) if context.currentState.exitAction: try: context.currentAction = context.currentState.exitAction context.currentState.exitAction.execute(context, obj) except Exception: context.logger.error( 'Error processing entry action for state. (Machine %s, State %s, exitAction %s)', context.machineName, context.currentState.name, context.currentState.exitAction.__class__) raise # join the contexts of a fan-in contextOrContexts = context if transition.target.isFanIn: taskNameBase = context.getTaskName(event, fanIn=True) contextOrContexts = context.mergeJoinDispatch(event, obj) if not contextOrContexts: context.logger.info( 'Fan-in resulted in 0 contexts. Terminating machine. (Machine %s, State %s)', context.machineName, context.currentState.name) obj[constants.TERMINATED_PARAM] = True transition.execute(context, obj) if context.currentState.entryAction: try: context.currentAction = context.currentState.entryAction context.currentState.entryAction.execute( contextOrContexts, obj) except Exception: context.logger.error( 'Error processing entry action for state. (Machine %s, State %s, entryAction %s)', context.machineName, context.currentState.name, context.currentState.entryAction.__class__) raise if context.currentState.isContinuation: try: token = context.get(constants.CONTINUATION_PARAM, None) nextToken = context.currentState.doAction.continuation( contextOrContexts, obj, token=token) if nextToken: context.continuation(nextToken) context.pop(constants.CONTINUATION_PARAM, None) # pop this off because it is really long except Exception: context.logger.error( 'Error processing continuation for state. (Machine %s, State %s, continuation %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise # either a fan-in resulted in no contexts, or a continuation was completed if obj.get(constants.TERMINATED_PARAM): return None nextEvent = None if context.currentState.doAction: try: context.currentAction = context.currentState.doAction nextEvent = context.currentState.doAction.execute( contextOrContexts, obj) except Exception: context.logger.error( 'Error processing action for state. (Machine %s, State %s, Action %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise if transition.target.isFanIn: # this prevents fan-in from re-counting the data if there is an Exception # or DeadlineExceeded _after_ doAction.execute(...) succeeds index = context.get(constants.INDEX_PARAM) workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) semaphore = RunOnceSemaphore(workIndex, context) semaphore.writeRunOnceSemaphore( payload=obj[constants.TASK_NAME_PARAM]) try: # at this point we have processed the work items, delete them task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup', url=constants.DEFAULT_CLEANUP_URL, params={constants.WORK_INDEX_PARAM: workIndex}) context.Queue( name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task) except (TaskAlreadyExistsError, TombstonedTaskError): context.logger.info("Fan-in cleanup Task already exists.") if context.get('UNITTEST_RAISE_AFTER_FAN_IN' ): # only way to generate this failure raise Exception() if nextEvent: if not isinstance(nextEvent, str) or not constants.NAME_RE.match(nextEvent): raise InvalidEventNameRuntimeError(nextEvent, context.machineName, context.currentState.name, context.instanceName) return nextEvent
def dispatch(self, context, event, obj): """ Fires the transition and executes the next States's entry, do and exit actions. @param context: an FSMContext instance @param event: a string event to dispatch to the State @param obj: an object that the Transition can operate on @return: the event returned from the next state's main action. """ transition = self.getTransition(event) if context.currentState.exitAction: try: context.currentAction = context.currentState.exitAction context.currentState.exitAction.execute(context, obj) except Exception: context.logger.error('Error processing entry action for state. (Machine %s, State %s, exitAction %s)', context.machineName, context.currentState.name, context.currentState.exitAction.__class__) raise # join the contexts of a fan-in contextOrContexts = context if transition.target.isFanIn: taskNameBase = context.getTaskName(event, fanIn=True) contextOrContexts = context.mergeJoinDispatch(event, obj) if not contextOrContexts and not contextOrContexts.guarded: # by implementation, EVERY fan-in should have at least one work package available to it, this # is likely caused by an index writing delay, and it is suitable to simply retry this task raise FanInNoContextsAvailableRuntimeError(event, context.machineName, context.currentState.name, context.instanceName) transition.execute(context, obj) if context.currentState.entryAction: try: context.currentAction = context.currentState.entryAction context.currentState.entryAction.execute(contextOrContexts, obj) except Exception: context.logger.error('Error processing entry action for state. (Machine %s, State %s, entryAction %s)', context.machineName, context.currentState.name, context.currentState.entryAction.__class__) raise if context.currentState.isContinuation: try: token = context.get(constants.CONTINUATION_PARAM, None) nextToken = context.currentState.doAction.continuation(contextOrContexts, obj, token=token) if nextToken: context.continuation(nextToken) context.pop(constants.CONTINUATION_PARAM, None) # pop this off because it is really long except Exception: context.logger.error('Error processing continuation for state. (Machine %s, State %s, continuation %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise # either a fan-in resulted in no contexts, or a continuation was completed if obj.get(constants.TERMINATED_PARAM): return None nextEvent = None if context.currentState.doAction: try: context.currentAction = context.currentState.doAction nextEvent = context.currentState.doAction.execute(contextOrContexts, obj) except Exception: context.logger.error('Error processing action for state. (Machine %s, State %s, Action %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise if transition.target.isFanIn: # this prevents fan-in from re-counting the data if there is an Exception # or DeadlineExceeded _after_ doAction.execute(...) succeeds index = context.get(constants.INDEX_PARAM) workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) semaphore = RunOnceSemaphore(workIndex, context) semaphore.writeRunOnceSemaphore(payload=obj[constants.TASK_NAME_PARAM]) try: # at this point we have processed the work items, delete them task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup', url=constants.DEFAULT_CLEANUP_URL, params={constants.WORK_INDEX_PARAM: workIndex}) context.Queue(name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task) except (TaskAlreadyExistsError, TombstonedTaskError): context.logger.info("Fan-in cleanup Task already exists.") if context.get('UNITTEST_RAISE_AFTER_FAN_IN'): # only way to generate this failure if not contextOrContexts.guarded: raise Exception() if nextEvent: if not isinstance(nextEvent, str) or not constants.NAME_RE.match(nextEvent): raise InvalidEventNameRuntimeError(nextEvent, context.machineName, context.currentState.name, context.instanceName) return nextEvent
if e.__class__ in TRANSIENT_ERRORS: level = context.logger.warn level( 'Error processing action for state. (Machine %s, State %s, Action %s)', context.machineName, context.currentState.name, context.currentState.doAction.__class__) raise if transition.target.isFanIn: # this prevents fan-in from re-counting the data if there is an Exception # or DeadlineExceeded _after_ doAction.execute(...) succeeds index = context.get( constants.INDEX_PARAM) or contextOrContexts[0].get( constants.INDEX_PARAM) workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) semaphore = RunOnceSemaphore(workIndex, context) semaphore.writeRunOnceSemaphore( payload=obj[constants.TASK_NAME_PARAM]) try: # at this point we have processed the work items, delete them task = Task(name=obj[constants.TASK_NAME_PARAM] + '-cleanup', url=constants.DEFAULT_CLEANUP_URL, params={constants.WORK_INDEX_PARAM: workIndex}) context.Queue( name=constants.DEFAULT_CLEANUP_QUEUE_NAME).add(task) except (TaskAlreadyExistsError, TombstonedTaskError): context.logger.info("Fan-in cleanup Task already exists.")
def mergeJoinDispatch(self, event, obj): """ Performs a merge join on the pending fan-in dispatches. @param event: an event that is being merge joined (destination state must be a fan in) @return: a list (possibly empty) of FSMContext instances """ # this assertion comes from _queueDispatchFanIn - we never want fan-out info in a fan-in context assert not self.get(constants.GEN_PARAM) assert not self.get(constants.FORK_PARAM) # the work package index is stored in the url of the Task/FSMContext index = self.get(constants.INDEX_PARAM) taskNameBase = self.getTaskName(event, fanIn=True) # tell writers to use another index memcache.incr('index-' + taskNameBase) lock = '%s-lock-%d' % (taskNameBase, index) memcache.decr(lock, 2**15) # tell writers they missed the boat # 20 iterations * 0.25s = 5s total wait time busyWaitIters = 20 busyWaitIterSecs = 0.250 # busy wait for writers for i in xrange(busyWaitIters): counter = memcache.get(lock) # counter is None --> ejected from memcache # int(counter) <= 2**15 --> writers have all called memcache.decr if counter is None or int(counter) <= 2**15: break time.sleep(busyWaitIterSecs) self.logger.debug("Tried to acquire lock '%s' %d times...", lock, i + 1) # FIXME: is there anything else that can be done? will work packages be lost? maybe queue another task # to sweep up later? if i >= (busyWaitIters - 1): # pylint: disable-msg=W0631 self.logger.error("Gave up waiting for all fan-in work items.") # at this point we could have two tasks trying to process the same work packages. in the # happy path this will not likely happen because the tasks are sent off with different ETAs, # however in the unhappy path, it is possible for multiple tasks to be executing (retry on # 500 etc.). we solve this with a read lock using memcache. # # FIXME: would using a transaction on db.delete work if using ancestors? one task would win the # race to delete the the work based on a transaction error? readlock = '%s-readlock-%d' % (taskNameBase, index) haveReadLock = False try: # put the actual name of the winning task into to lock actualTaskName = self.get(constants.TASK_NAME_PARAM) added = memcache.add(readlock, actualTaskName, time=30) # FIXME: is 30s appropriate? lockValue = memcache.get(readlock) # and return the FSMContexts list class FSMContextList(list): """ A list that supports .logger.info(), .logger.warning() etc.for fan-in actions """ def __init__(self, context, contexts): """ setup a self.logger for fan-in actions """ super(FSMContextList, self).__init__(contexts) self.logger = Logger(context) self.instanceName = context.instanceName # if the lock value is not equal to the added value, it means this task lost the race if not added or lockValue != actualTaskName: return FSMContextList(self, []) # raise FanInReadLockFailureRuntimeError(event, # self.machineName, # self.currentState.name, # self.instanceName) # flag used in finally block to decide whether or not to log an error message haveReadLock = True # fetch all the work packages in the current group for processing workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) query = _FantasmFanIn.all() \ .filter('workIndex =', workIndex) \ .order('__key__') # iterate over the query to fetch results - this is done in 'small batches' fanInResults = list(query) # construct a list of FSMContexts contexts = [self.clone(data=r.context) for r in fanInResults] # hold the fanInResult around in case we need to re-put them (on an Exception) obj[constants.FAN_IN_RESULTS_PARAM] = fanInResults # and delete the work packages - bearing in mind appengine limits maxDeleteSize = 250 # appengine does not like to delete > 500 models at a time, 250 is a nice safe number if len(fanInResults) > maxDeleteSize: self.logger.warning("%d contexts in the current batch. Consider decreasing fan-in.", len(fanInResults)) i = 0 while fanInResults[i:i+maxDeleteSize]: db.delete(fanInResults[i:i+maxDeleteSize]) i += maxDeleteSize return FSMContextList(self, contexts) finally: deleted = memcache.delete(readlock) # FIXME: is there anything else that can be done? if haveReadLock and deleted == memcache.DELETE_NETWORK_FAILURE: self.logger.error("Unable to release the fan in read lock.")
def _queueDispatchFanIn(self, nextEvent, fanInPeriod=0, queueName=None): """ Queues a call to .dispatch(nextEvent) in the task queue, or saves the context to the datastore for processing by the queued .dispatch(nextEvent) @param nextEvent: a string event @param fanInPeriod: the period of time between fan in Tasks @param queueName: the queue name to Queue into @return: a taskqueue.Task instance which may or may not have been queued already """ assert nextEvent is not None assert not self.get(constants.INDEX_PARAM) # fan-in after fan-in is not allowed assert queueName # we pop this off here because we do not want the fan-out/continuation param as part of the # task name, otherwise we loose the fan-in - each fan-in gets one work unit. self.pop(constants.GEN_PARAM, None) self.pop(constants.FORK_PARAM, None) taskNameBase = self.getTaskName(nextEvent, fanIn=True) index = memcache.get('index-' + taskNameBase) if index is None: # using 'random.randint' here instead of '1' helps when the index is ejected from memcache # instead of restarting at the same counter, we jump (likely) far way from existing task job # names. memcache.add('index-' + taskNameBase, random.randint(1, 2**32)) index = memcache.get('index-' + taskNameBase) # grab the lock lock = '%s-lock-%d' % (taskNameBase, index) writers = memcache.incr(lock, initial_value=2**16) if writers < 2**16: memcache.decr(lock) # this will escape as a 500 error and the Task will be re-tried by appengine raise FanInWriteLockFailureRuntimeError(nextEvent, self.machineName, self.currentState.name, self.instanceName) # insert the work package, which is simply a serialized FSMContext workIndex = '%s-%d' % (taskNameBase, knuthHash(index)) work = _FantasmFanIn(context=self, workIndex=workIndex) work.put() # insert a task to run in the future and process a bunch of work packages now = time.time() try: self[constants.INDEX_PARAM] = index url = self.buildUrl(self.currentState, nextEvent) params = self.buildParams(self.currentState, nextEvent) # int(now / (fanInPeriod - 1 + 30)) included because it was in [2], but is less needed now that # we use random.randint in seeding memcache. for long fan in periods, and the case where random.randint # hits the same value twice, this may cause problems for up to fanInPeriod + 30s. # see: http://www.mail-archive.com/[email protected]/msg30408.html task = Task(name='%s-%d-%d' % (taskNameBase, int(now / (fanInPeriod - 1 + 30)), index), method=self.method, url=url, params=params, eta=datetime.datetime.utcfromtimestamp(now) + datetime.timedelta(seconds=fanInPeriod), headers=self.headers) self.Queue(name=queueName).add(task) return task except (TaskAlreadyExistsError, TombstonedTaskError): pass # Fan-in magic finally: memcache.decr(lock)