def __init__(self, doneCondition, albums, capsule): self.__doneCondition = doneCondition #self.__index_location = capsule.base() + capsule.user().getId() TCommentsPipe.logger.debug('INIT') self.__index = CommentsSearchAPI(capsule.user().getIndexLocation()) self.__albums = albums self.__capsule = capsule self.__writeDAL = WriteDAL() super(TCommentsPipe, self).__init__()
def __init__(self, fetcher, index , capsule, doneCondition, incrementalCallback=None, incrementalArguments=None): self.__capsule = capsule self.__fetcher = fetcher self.__incrementalCallback = incrementalCallback self.__incrementalArguments = incrementalArguments self.__index = index self.__doneCondition = doneCondition self.__stillIndexingCondition = threading.Condition() self.__writeDAL = WriteDAL() super(TAlbumsPipe, self).__init__()
def __init__(self,**kwargs): self.__curr_cursor = kwargs.get('cursor') self.__searchId = kwargs.get('searchid') self.__cb = kwargs.get('callback') #self.__threshold = kwargs.get('threshold') self.__rdal = ReadDAL() self.__wdal = WriteDAL() self.__counter = 0 self.__done = False self.__notificationBus = ResultNotifier()
def execute(**kwargs): fbid = kwargs.get('fbid') cursor = kwargs.get('cursor') access_token = kwargs.get('access_token') callback = kwargs.get('callback') logging.info("QUERY REQUEST %s %s ", fbid,cursor) if user == None : # Create/save user if does exist wdal = WriteDAL() user = User(fbid) user.setIndexTime(datetime.datetime.now().isoformat()) user.setCreated(datetime.datetime.now().isoformat()) user.setIndexing(1) user.setAccessToken(access_token) wdal.persistUser(user) #Prep user account (create index location, etc) user.prepFirstUse() #handles getting search hits when we get them and returning with partial results qw = QueryWaiter(callback, cursor, search_id) qw.start() #create capsule capsule = StateCapsule(cursor_handler=CursorHandler(cursor=cursor,searchid=search_id,callback=callback), query=query, user=user) CapsuleManager().addCapsule(capsule) #start fetch pipeline fp = FetchPipeline(capsule) #AsyncQueryHandler.logger.debug('FULL INDEXING CYCLE STARTED') fp.start() else : #this is a returning user #check to see if this user has already searched for this id (memcache) search_result = rdal.getSearchID(search_id) if search_result != None : #This is an existing query from the user that we still have in the cache #return the next page of results r = CursorHandler.getPageForSearchResults(search_result, cursor) if user.getIndexing() == 0: #this signifies we really have no more results to return callback(r, cursor, 1) else : #handles getting search hits when we get them and returning with partial results qw = QueryWaiter(callback, cursor, search_id) qw.start() #also need to kick off a query pipeline here so we start getting results asap else : qw = QueryWaiter(callback, cursor, search_id) qw.start() #brand new query for an existing user capsule = StateCapsule(cursor_handler=CursorHandler(cursor=cursor,searchid=search_id,callback=callback), query=query, user=user) CapsuleManager().addCapsule(capsule) qr = QueryPipeline(capsule) qr.start() #check the index time. if the latest time we've queried this user's data is > THRESHOLD , kick off fetching + indexing from a certain point in time
class TCommentsPipe(threading.Thread): NUM_ALBUMS_PER_QUERY = 40 # create logger logger = logging.getLogger() #logger.setLevel(logging.DEBUG) # create console handler and set level to debug #ch = logging.StreamHandler() #ch.setLevel(logging.DEBUG) # create formatter #formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(thread)d - %(message)s") # add formatter to ch #ch.setFormatter(formatter) # add ch to logger #logger.addHandler(ch) ''' doneCondition -- Meant to be used with a condition that's waiting on status index -- the Comments Index interface ''' def __init__(self, doneCondition, albums, capsule): self.__doneCondition = doneCondition #self.__index_location = capsule.base() + capsule.user().getId() TCommentsPipe.logger.debug('INIT') self.__index = CommentsSearchAPI(capsule.user().getIndexLocation()) self.__albums = albums self.__capsule = capsule self.__writeDAL = WriteDAL() super(TCommentsPipe, self).__init__() def run(self): self.__index.getEnv().attachCurrentThread() count = 0 album_list= '' for album in self.__albums : if count != 0 and count % TCommentsPipe.NUM_ALBUMS_PER_QUERY == 0: album_list = album_list[0:(len(album_list) - 1)] self.fetchCommentsForAlbums(album_list) count = 0 album_list = '' else : album_list += album['aid'] album_list += ',' count += 1 #finish up what's left if count != 0 : album_list = album_list[0:(len(album_list) - 1)] self.fetchCommentsForAlbums(album_list) count = 0 self.__index.finish() self.__doneCondition.acquire() self.__doneCondition.notify() self.__doneCondition.release() TCommentsPipe.logger.debug('NOTIFIED DONE CONDITION') def fetchCommentsForAlbums(self, album_list): TCommentsPipe.logger.debug('getting comments for photos in these albums : ' + album_list) c = CommentsFetcher(self.__capsule.user().getAccessToken(), object_ids='SELECT object_id FROM photo WHERE aid IN (%(list)s)' % {'list' : album_list }) # Fetcher.incrementalFetch(c, 100, self.fetchCallback) num_retries = 0 comments = [] while num_retries < 3 : try: comments = c.fetchAll() except Exception as e: num_retries += 1 self.logger.warning(e) continue break self.fetchCallback(comments) def fetchCallback(self, comments): TCommentsPipe.logger.debug('indexing %(n)d' % {'n' : len(comments)}) for comment in comments : try: c = Comment() c.setFbArray(comment, self.__capsule.user().getId()) self.__writeDAL.persistItem(c) self.__index.index(comment) except Exception as e: TCommentsPipe.logger.error('WARNING, problem persisting the following comment') TCommentsPipe.logger.error(e) #TCommentsPipe.logger.warning(sys.exc_info()[0]) try: self.__index.index(comment) except Exception as e: TCommentsPipe.logger.error('WARNING, problem indexing the following comment') TCommentsPipe.logger.error(e) #TCommentsPipe.logger.warning(sys.exc_info()[0]) try: capsules = CapsuleManager().capsulesForUser(self.__capsule.user().getId()) for capsule in capsules: capsule.updateState() except Exception as e : TCommentsPipe.logger.error('WARNING, problem following up in the State Capsule') TCommentsPipe.logger.error(e)
class CursorHandler(object): @staticmethod def orderByIndexTime(docs): sorted(docs, key=lambda doc:doc.get('indexed_at')) print docs return docs def __init__(self,**kwargs): self.__curr_cursor = kwargs.get('cursor') self.__searchId = kwargs.get('searchid') self.__cb = kwargs.get('callback') #self.__threshold = kwargs.get('threshold') self.__rdal = ReadDAL() self.__wdal = WriteDAL() self.__counter = 0 self.__done = False self.__notificationBus = ResultNotifier() def addRawDocuments(self, docs): #CursorHandler.orderByIndexTime(docs) #Fetch from memcache c = [] for doc in docs: item = json.loads(doc.get("full").encode('utf-8')) type = doc.get("type") c.append({"type" : type, "item" : item}) #store the cached search self.__wdal.cacheSearchResult(self.__searchId, c) if len(c) > self.__curr_cursor : #notify of result self.__notificationBus.notifyConditionWithResults(self.__searchId, c) # def addRawDocument(self, doc): # #Fetch from memcache # c = self.__rdal.getSearchID(self.__searchId) # # item = doc.get("full").encode('utf-8') # type = doc.get("type") # # #add to the cached result # if c == None: # c = [item] # else : # c.append({"type" : type, "item" : item}) # # #store the cached search # self.__wdal.cacheSearchResult(self.__searchId, c) # # #if we're still looking to hit that threshold to return, then keep incrementing the counter # if not self.__done : # self.__counter += 1 # if self.__counter >= self.__threshold : # self.__done = True # self.__cb(c) def isDone(self): return self.__done @staticmethod def getPageForSearchResults(result, cursor): page_size = boobox.config.PAGE_SIZE end = max([(cursor+1)*(page_size-1), len(result)]) return result[cursor:end]
class TAlbumsPipe(threading.Thread): NUM_ALBUMS_PER_QUERY = 40 # create logger logger = logging.getLogger("TAlbumsPipeLogger") #logger.setLevel(logging.DEBUG) # create console handler and set level to debug #ch = logging.StreamHandler() #ch.setLevel(logging.DEBUG) # create formatter #formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(thread)d - %(message)s") # add formatter to ch #ch.setFormatter(formatter) # add ch to logger #logger.addHandler(ch) def __init__(self, fetcher, index , capsule, doneCondition, incrementalCallback=None, incrementalArguments=None): self.__capsule = capsule self.__fetcher = fetcher self.__incrementalCallback = incrementalCallback self.__incrementalArguments = incrementalArguments self.__index = index self.__doneCondition = doneCondition self.__stillIndexingCondition = threading.Condition() self.__writeDAL = WriteDAL() super(TAlbumsPipe, self).__init__() def _aggregate(self, items): TAlbumsPipe.logger.debug('indexing %(n)d' % {'n' : len(items)}) try: for item in items : try: album = Album() album.setFbArray(item, self.__capsule.user().getId()) self.__writeDAL.persistItem(album) except Exception as e : TAlbumsPipe.logger.error('WARNING, problem persisting album') TAlbumsPipe.logger.error(e) #TAlbumsPipe.logger.warning(sys.exc_info()[0]) try: self.__index.index(item) except Exception as e : TAlbumsPipe.logger.error('WARNING, problem indexing album') TAlbumsPipe.logger.error(e) #TAlbumsPipe.logger.warning(sys.exc_info()[0]) try: capsules = CapsuleManager().capsulesForUser(self.__capsule.user().getId()) for capsule in capsules: capsule.updateState() except Exception as e : TAlbumsPipe.logger.error('WARNING, problem following up in the State Capsule') #TAlbumsPipe.logger.warning(sys.exc_info()[0]) TAlbumsPipe.logger.error(e) except: TAlbumsPipe.logger.error('WARNING, problem indexing the following album') TAlbumsPipe.logger.warning(sys.exc_info()[0]) if self.__incrementalCallback != None : self.__incrementalCallback(items, self.__incrementalArguments) def run(self): self.__index.getEnv().attachCurrentThread() Fetcher.incrementalFetch(self.__fetcher, 10, self._aggregate) #only do this when we're sure we've indexed and captured everything self.__doneCondition.acquire() self.__doneCondition.notify() self.__doneCondition.release() #this signifies the thread is done running