def _require_session_owned(session_id): """ Require that the session is owned by the logged in user """ session_r = _session.find_one({'_id': bson.ObjectId(session_id)}) if not session_r: raise Exception('Session not found') search_r = _search.find_one({'_id': bson.ObjectId(session_r['search_id'])}) if not search_r: raise Exception('Search not found') if search_r['username'] != session.get('username', ''): raise Exception('You do not have permission to access this snapshot') return (search_r, session_r)
def filter(session_id): """ Get histogram and tweets @filter: comma-delimited list of elements to filter by if element starts with '#', then it is a hashtag else, it is a stem """ try: _require_session_access(session_id) session_r = _session.find_one( {'_id': bson.ObjectId(session_id)}) if not session_r: raise Exception('Session not found') search_r = _search.find_one( {'_id': bson.ObjectId(session_r['search_id'])}) if not search_r: raise Exception('Search not found') # Find tweets params = {'session_id': session_id} filter = request.args.getlist('filter[]') filter_stems = [] filter_hashtags = [] filter_urls = [] for element in filter: if element.startswith('#'): filter_hashtags.append(element) elif element.startswith('http'): filter_urls.append(element) else: filter_stems.append(element) if filter_urls: params['urls'] = {'$all': filter_urls} if filter_stems: params['stems'] = {'$all': filter_stems} if filter_hashtags: params['hashtags'] = {'$all': filter_hashtags} cursor = _tweets.find(params, { 'embed': 1, 'id_str': 1, 'created_at': 1, 'user.name': 1, 'user.screen_name': 1, 'retweeted_status.id_str': 1, 'stems': 1, 'hashtags': 1, 'urls': 1 }, sort=[('dt', pymongo.DESCENDING)]) # Process tweets stem_counter = Counter() hashtag_counter = Counter() url_counter = Counter() tweets = [] retweets = 0 id_set = set() for tweet in cursor: stem_counter.update(tweet['stems']) hashtag_counter.update(tweet['hashtags']) url_counter.update(tweet['urls']) if tweet['id_str'] in id_set: retweets += 1 continue id_set.add(tweet['id_str']) if 'retweeted_status' in tweet: retweeted_id = tweet['retweeted_status']['id_str'] if retweeted_id in id_set: retweets += 1 continue id_set.add(retweeted_id) tweets.append({ 'text': tweet['embed'], 'user_name': tweet['user']['name'], 'user_screen_name': tweet['user']['screen_name'], 'id_str': tweet['id_str'], 'created_at': tweet['created_at'] }) stem_counts = [x for x in stem_counter.most_common() \ if x[0] not in filter_stems] hashtag_counts = [x for x in hashtag_counter.most_common() \ if x[0] not in filter_hashtags] url_counts = [x for x in url_counter.most_common() \ if x[0] not in filter_urls] return _jsonify( search=search_r, session=session_r, stem_counts=stem_counts, hashtag_counts=hashtag_counts, url_counts=url_counts, tweets=tweets, retweets=retweets ) except Exception, e: traceback.print_exc() return _jsonify(error=str(e))