示例#1
0
 def __init__(self, doneCondition, albums, capsule):
     self.__doneCondition = doneCondition
     #self.__index_location = capsule.base() + capsule.user().getId()
     TCommentsPipe.logger.debug('INIT')
     self.__index = CommentsSearchAPI(capsule.user().getIndexLocation())
     self.__albums = albums
     self.__capsule = capsule
     self.__writeDAL = WriteDAL()
     super(TCommentsPipe, self).__init__()
示例#2
0
 def __init__(self, fetcher, index , capsule, doneCondition, incrementalCallback=None, incrementalArguments=None):
     self.__capsule = capsule
     self.__fetcher = fetcher
     self.__incrementalCallback = incrementalCallback
     self.__incrementalArguments = incrementalArguments
     self.__index = index
     self.__doneCondition = doneCondition
     self.__stillIndexingCondition = threading.Condition()
     self.__writeDAL = WriteDAL()
     super(TAlbumsPipe, self).__init__()
示例#3
0
 def __init__(self,**kwargs):
     self.__curr_cursor = kwargs.get('cursor')
     self.__searchId = kwargs.get('searchid')
     self.__cb = kwargs.get('callback')
     #self.__threshold = kwargs.get('threshold')
     self.__rdal = ReadDAL()
     self.__wdal = WriteDAL()
     
     self.__counter = 0
     self.__done = False
     
     self.__notificationBus = ResultNotifier()
示例#4
0
文件: request.py 项目: linares/fwiki
def execute(**kwargs):
    fbid = kwargs.get('fbid')
    cursor = kwargs.get('cursor')
    access_token = kwargs.get('access_token')
    callback = kwargs.get('callback')

    logging.info("QUERY REQUEST %s %s ", fbid,cursor)

    if user == None :

        # Create/save user if does exist
        wdal = WriteDAL()
        user = User(fbid)
        user.setIndexTime(datetime.datetime.now().isoformat())
        user.setCreated(datetime.datetime.now().isoformat())
        user.setIndexing(1)
        user.setAccessToken(access_token)
        wdal.persistUser(user)

        #Prep user account (create index location, etc)
        user.prepFirstUse()

        #handles getting search hits when we get them and returning with partial results
        qw = QueryWaiter(callback, cursor, search_id)
        qw.start()

        #create capsule
        capsule = StateCapsule(cursor_handler=CursorHandler(cursor=cursor,searchid=search_id,callback=callback), query=query, user=user)
        CapsuleManager().addCapsule(capsule)

        #start fetch pipeline
        fp = FetchPipeline(capsule)
        #AsyncQueryHandler.logger.debug('FULL INDEXING CYCLE STARTED')
        fp.start()

    else :
        #this is a returning user
        #check to see if this user has already searched for this id (memcache)

        search_result = rdal.getSearchID(search_id)

        if search_result != None :
            #This is an existing query from the user that we still have in the cache
            #return the next page of results
            r = CursorHandler.getPageForSearchResults(search_result, cursor)

            if user.getIndexing() == 0:
                #this signifies we really have no more results to return
                callback(r, cursor, 1)
            else :
                #handles getting search hits when we get them and returning with partial results
                qw = QueryWaiter(callback, cursor, search_id)
                qw.start()

                #also need to kick off a query pipeline here so we start getting results asap
        else :
            qw = QueryWaiter(callback, cursor, search_id)
            qw.start()

            #brand new query for an existing user
            capsule = StateCapsule(cursor_handler=CursorHandler(cursor=cursor,searchid=search_id,callback=callback), query=query, user=user)
            CapsuleManager().addCapsule(capsule)
            qr = QueryPipeline(capsule)
            qr.start()


        #check the index time.  if the latest time we've queried this user's data is > THRESHOLD , kick off fetching + indexing from a certain point in time
示例#5
0
class TCommentsPipe(threading.Thread):

    NUM_ALBUMS_PER_QUERY = 40

    # create logger
    logger = logging.getLogger()
    #logger.setLevel(logging.DEBUG)
    
    # create console handler and set level to debug
    #ch = logging.StreamHandler()
    #ch.setLevel(logging.DEBUG)
    
    # create formatter
    #formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(thread)d - %(message)s")
    
    # add formatter to ch
    #ch.setFormatter(formatter)
    
    # add ch to logger
    #logger.addHandler(ch)

    '''
        doneCondition -- Meant to be used with a condition that's waiting on status
        index -- the Comments Index interface
        
    '''
    def __init__(self, doneCondition, albums, capsule):
        self.__doneCondition = doneCondition
        #self.__index_location = capsule.base() + capsule.user().getId()
        TCommentsPipe.logger.debug('INIT')
        self.__index = CommentsSearchAPI(capsule.user().getIndexLocation())
        self.__albums = albums
        self.__capsule = capsule
        self.__writeDAL = WriteDAL()
        super(TCommentsPipe, self).__init__()
        
    def run(self):
        self.__index.getEnv().attachCurrentThread()
        count = 0
        album_list= ''    
        
        for album in self.__albums :
            if count != 0 and count % TCommentsPipe.NUM_ALBUMS_PER_QUERY == 0:
                album_list = album_list[0:(len(album_list) - 1)]
                self.fetchCommentsForAlbums(album_list)
                count = 0
                album_list = ''
            else :
                album_list += album['aid']
                album_list += ','
                count += 1
            
        #finish up what's left
        if count != 0 :
            album_list = album_list[0:(len(album_list) - 1)]
            self.fetchCommentsForAlbums(album_list)
            count = 0
        
        self.__index.finish()
        
        self.__doneCondition.acquire()
        self.__doneCondition.notify()
        self.__doneCondition.release()
        
        TCommentsPipe.logger.debug('NOTIFIED DONE CONDITION')
        
        
    
    def fetchCommentsForAlbums(self, album_list):
       
        TCommentsPipe.logger.debug('getting comments for photos in these albums : ' + album_list)
        c = CommentsFetcher(self.__capsule.user().getAccessToken(), object_ids='SELECT object_id FROM photo WHERE aid IN (%(list)s)' % {'list' : album_list })
        
#        Fetcher.incrementalFetch(c, 100, self.fetchCallback)
        num_retries = 0
        comments = []
        while num_retries < 3 :
            
            try:
                comments = c.fetchAll()
            except Exception as e:
                num_retries += 1
                self.logger.warning(e)
                continue
            
            break
        
        self.fetchCallback(comments)
        
         
    def fetchCallback(self, comments):
        TCommentsPipe.logger.debug('indexing %(n)d' % {'n' : len(comments)})
        for comment in comments :
            try:
                
                c = Comment()
                c.setFbArray(comment,  self.__capsule.user().getId())
                self.__writeDAL.persistItem(c)
                self.__index.index(comment)
                

            except Exception as e:
                TCommentsPipe.logger.error('WARNING, problem persisting the following comment')
                TCommentsPipe.logger.error(e)
                #TCommentsPipe.logger.warning(sys.exc_info()[0])
         
            try:
                self.__index.index(comment)
            except Exception as e:
                TCommentsPipe.logger.error('WARNING, problem indexing the following comment')
                TCommentsPipe.logger.error(e)
                #TCommentsPipe.logger.warning(sys.exc_info()[0])
            
            try:
                capsules = CapsuleManager().capsulesForUser(self.__capsule.user().getId())
                for capsule in capsules:
                    capsule.updateState()
            except Exception as e :
                TCommentsPipe.logger.error('WARNING, problem following up in the State Capsule')
                TCommentsPipe.logger.error(e)
示例#6
0
class CursorHandler(object):

    @staticmethod
    def orderByIndexTime(docs):
        sorted(docs, key=lambda doc:doc.get('indexed_at'))
        print docs
        return docs

    def __init__(self,**kwargs):
        self.__curr_cursor = kwargs.get('cursor')
        self.__searchId = kwargs.get('searchid')
        self.__cb = kwargs.get('callback')
        #self.__threshold = kwargs.get('threshold')
        self.__rdal = ReadDAL()
        self.__wdal = WriteDAL()
        
        self.__counter = 0
        self.__done = False
        
        self.__notificationBus = ResultNotifier()
        
    def addRawDocuments(self, docs):
        
        #CursorHandler.orderByIndexTime(docs)
        
        #Fetch from memcache
        c = []
        
        for doc in docs:
            item = json.loads(doc.get("full").encode('utf-8'))
            type = doc.get("type")
            
            c.append({"type" : type, "item" : item})
        
        
        #store the cached search
        self.__wdal.cacheSearchResult(self.__searchId, c)    
        if len(c) > self.__curr_cursor :
            #notify of result
            self.__notificationBus.notifyConditionWithResults(self.__searchId, c)
        
        
#    def addRawDocument(self, doc):
#        #Fetch from memcache
#        c = self.__rdal.getSearchID(self.__searchId)
#        
#        item = doc.get("full").encode('utf-8')
#        type = doc.get("type")
#        
#        #add to the cached result
#        if c == None:
#            c = [item]
#        else :
#            c.append({"type" : type, "item" : item})
#        
#        #store the cached search
#        self.__wdal.cacheSearchResult(self.__searchId, c)
#        
#        #if we're still looking to hit that threshold to return, then keep incrementing the counter
#        if not self.__done :
#            self.__counter += 1
#            if self.__counter >= self.__threshold :
#                self.__done = True
#                self.__cb(c)
                
            
        
    def isDone(self):
        return self.__done
    
    
    
    @staticmethod
    def getPageForSearchResults(result, cursor):
        page_size = boobox.config.PAGE_SIZE

        end = max([(cursor+1)*(page_size-1), len(result)])
        return result[cursor:end]
示例#7
0
class TAlbumsPipe(threading.Thread):
    
    NUM_ALBUMS_PER_QUERY = 40

    # create logger
    logger = logging.getLogger("TAlbumsPipeLogger")
    #logger.setLevel(logging.DEBUG)
    
    # create console handler and set level to debug
    #ch = logging.StreamHandler()
    #ch.setLevel(logging.DEBUG)
    
    # create formatter
    #formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(thread)d - %(message)s")
    
    # add formatter to ch
    #ch.setFormatter(formatter)
    
    # add ch to logger
    #logger.addHandler(ch)
    
    def __init__(self, fetcher, index , capsule, doneCondition, incrementalCallback=None, incrementalArguments=None):
        self.__capsule = capsule
        self.__fetcher = fetcher
        self.__incrementalCallback = incrementalCallback
        self.__incrementalArguments = incrementalArguments
        self.__index = index
        self.__doneCondition = doneCondition
        self.__stillIndexingCondition = threading.Condition()
        self.__writeDAL = WriteDAL()
        super(TAlbumsPipe, self).__init__()
        
    def _aggregate(self, items):
        TAlbumsPipe.logger.debug('indexing %(n)d' % {'n' : len(items)})
        try:
            for item in items :
                
                try:
                    album = Album()
                    album.setFbArray(item, self.__capsule.user().getId())
                    
                    self.__writeDAL.persistItem(album)
                    
                    
                except Exception as e :
                    TAlbumsPipe.logger.error('WARNING, problem persisting album')
                    TAlbumsPipe.logger.error(e)
                    #TAlbumsPipe.logger.warning(sys.exc_info()[0])
                    
                try:
                    self.__index.index(item)
                except Exception as e :
                    TAlbumsPipe.logger.error('WARNING, problem indexing album')
                    TAlbumsPipe.logger.error(e)
                    #TAlbumsPipe.logger.warning(sys.exc_info()[0])
                
                try:
                    capsules = CapsuleManager().capsulesForUser(self.__capsule.user().getId())
                    for capsule in capsules:
                        capsule.updateState()
                except Exception as e :
                    TAlbumsPipe.logger.error('WARNING, problem following up in the State Capsule')
                    #TAlbumsPipe.logger.warning(sys.exc_info()[0])
                    TAlbumsPipe.logger.error(e)
        except:
            TAlbumsPipe.logger.error('WARNING, problem indexing the following album')
            TAlbumsPipe.logger.warning(sys.exc_info()[0])
            
            
       
        
        
        if self.__incrementalCallback != None :
            self.__incrementalCallback(items, self.__incrementalArguments)

    def run(self):
        self.__index.getEnv().attachCurrentThread()
        Fetcher.incrementalFetch(self.__fetcher, 10, self._aggregate)
        
        #only do this when we're sure we've indexed and captured everything
        self.__doneCondition.acquire()
        self.__doneCondition.notify()
        self.__doneCondition.release()
        #this signifies the thread is done running