Пример #1
0
def _require_session_owned(session_id):
    """
    Require that the session is owned by the logged in user
    """
    session_r = _session.find_one({'_id': bson.ObjectId(session_id)})
    if not session_r:
        raise Exception('Session not found')    

    search_r = _search.find_one({'_id': bson.ObjectId(session_r['search_id'])})
    if not search_r:
        raise Exception('Search not found')
 
    if search_r['username'] != session.get('username', ''):
        raise Exception('You do not have permission to access this snapshot')
        
    return (search_r, session_r)
Пример #2
0
def _require_session_owned(session_id):
    """
    Require that the session is owned by the logged in user
    """
    session_r = _session.find_one({'_id': bson.ObjectId(session_id)})
    if not session_r:
        raise Exception('Session not found')

    search_r = _search.find_one({'_id': bson.ObjectId(session_r['search_id'])})
    if not search_r:
        raise Exception('Search not found')

    if search_r['username'] != session.get('username', ''):
        raise Exception('You do not have permission to access this snapshot')

    return (search_r, session_r)
Пример #3
0
def filter(session_id):
    """
    Get histogram and tweets
    
    @filter: comma-delimited list of elements to filter by
        if element starts with '#', then it is a hashtag
        else, it is a stem
    """
    try:
        _require_session_access(session_id)

        session_r = _session.find_one(
            {'_id': bson.ObjectId(session_id)})
        if not session_r:
            raise Exception('Session not found')
            
        search_r = _search.find_one(
            {'_id': bson.ObjectId(session_r['search_id'])})
        if not search_r:
            raise Exception('Search not found')
                        
        # Find tweets
        params = {'session_id': session_id}
        
        filter = request.args.getlist('filter[]')
        filter_stems = []
        filter_hashtags = []       
        filter_urls = []
        
        for element in filter:
            if element.startswith('#'):
                filter_hashtags.append(element)
            elif element.startswith('http'):
                filter_urls.append(element)
            else:
                filter_stems.append(element)
                
        if filter_urls:
            params['urls'] = {'$all': filter_urls}
        if filter_stems:
            params['stems'] = {'$all': filter_stems}
        if filter_hashtags:
            params['hashtags'] = {'$all': filter_hashtags}
        
        cursor = _tweets.find(params, {
                'embed': 1,
                'id_str': 1,
                'created_at': 1,
                'user.name': 1,
                'user.screen_name': 1,
                'retweeted_status.id_str': 1,
                'stems': 1,
                'hashtags': 1,
                'urls': 1
            }, sort=[('dt', pymongo.DESCENDING)])
        
        # Process tweets
        stem_counter = Counter()
        hashtag_counter = Counter()
        url_counter = Counter()
        
        tweets = []   
        retweets = 0        
        id_set = set()
        
        for tweet in cursor:  
            stem_counter.update(tweet['stems'])
            hashtag_counter.update(tweet['hashtags'])
            url_counter.update(tweet['urls'])
            
            if tweet['id_str'] in id_set:
                retweets += 1
                continue
            id_set.add(tweet['id_str'])
          
            if 'retweeted_status' in tweet:
                retweeted_id = tweet['retweeted_status']['id_str']
                if retweeted_id in id_set:
                    retweets += 1
                    continue              
                id_set.add(retweeted_id)
                    
            tweets.append({
                'text': tweet['embed'],
                'user_name': tweet['user']['name'],
                'user_screen_name': tweet['user']['screen_name'],
                'id_str': tweet['id_str'],
                'created_at': tweet['created_at']           
            })
                
        stem_counts = [x for x in stem_counter.most_common() \
            if x[0] not in filter_stems]
        hashtag_counts = [x for x in hashtag_counter.most_common() \
            if x[0] not in filter_hashtags]
        url_counts = [x for x in url_counter.most_common() \
            if x[0] not in filter_urls]
                           
        return _jsonify(
            search=search_r,
            session=session_r,
            stem_counts=stem_counts, 
            hashtag_counts=hashtag_counts,
            url_counts=url_counts,
            tweets=tweets,
            retweets=retweets
        )
    except Exception, e:
        traceback.print_exc()
        return _jsonify(error=str(e))
Пример #4
0
def filter(session_id):
    """
    Get histogram and tweets

    @filter: comma-delimited list of elements to filter by
        if element starts with '#', then it is a hashtag
        else, it is a stem
    """
    try:
        _require_session_access(session_id)

        session_r = _session.find_one(
            {'_id': bson.ObjectId(session_id)})
        if not session_r:
            raise Exception('Session not found')

        search_r = _search.find_one(
            {'_id': bson.ObjectId(session_r['search_id'])})
        if not search_r:
            raise Exception('Search not found')

        # Find tweets
        params = {'session_id': session_id}

        filter = request.args.getlist('filter[]')
        filter_stems = []
        filter_hashtags = []
        filter_urls = []

        for element in filter:
            if element.startswith('#'):
                filter_hashtags.append(element)
            elif element.startswith('http'):
                filter_urls.append(element)
            else:
                filter_stems.append(element)

        if filter_urls:
            params['urls'] = {'$all': filter_urls}
        if filter_stems:
            params['stems'] = {'$all': filter_stems}
        if filter_hashtags:
            params['hashtags'] = {'$all': filter_hashtags}

        cursor = _tweets.find(params, {
                'embed': 1,
                'id_str': 1,
                'created_at': 1,
                'user.name': 1,
                'user.screen_name': 1,
                'retweeted_status.id_str': 1,
                'stems': 1,
                'hashtags': 1,
                'urls': 1
            }, sort=[('dt', pymongo.DESCENDING)])

        # Process tweets
        stem_counter = Counter()
        hashtag_counter = Counter()
        url_counter = Counter()

        tweets = []
        retweets = 0
        id_set = set()

        for tweet in cursor:
            stem_counter.update(tweet['stems'])
            hashtag_counter.update(tweet['hashtags'])
            url_counter.update(tweet['urls'])

            if tweet['id_str'] in id_set:
                retweets += 1
                continue
            id_set.add(tweet['id_str'])

            if 'retweeted_status' in tweet:
                retweeted_id = tweet['retweeted_status']['id_str']
                if retweeted_id in id_set:
                    retweets += 1
                    continue
                id_set.add(retweeted_id)

            tweets.append({
                'text': tweet['embed'],
                'user_name': tweet['user']['name'],
                'user_screen_name': tweet['user']['screen_name'],
                'id_str': tweet['id_str'],
                'created_at': tweet['created_at']
            })

        stem_counts = [x for x in stem_counter.most_common() \
            if x[0] not in filter_stems]
        hashtag_counts = [x for x in hashtag_counter.most_common() \
            if x[0] not in filter_hashtags]
        url_counts = [x for x in url_counter.most_common() \
            if x[0] not in filter_urls]

        return _jsonify(
            search=search_r,
            session=session_r,
            stem_counts=stem_counts,
            hashtag_counts=hashtag_counts,
            url_counts=url_counts,
            tweets=tweets,
            retweets=retweets
        )
    except Exception, e:
        traceback.print_exc()
        return _jsonify(error=str(e))