def getPages(): likesCollection = getLikesCollection() pageCollection = getPageCollection() counter = 0 for likes in likesCollection.find(): likes = likes['data'] for like in likes: page = {'_id': like['id']} pageCollection.update(page, page, upsert=True) counter += 1 print counter print 'Total', counter, 'pages fetched'
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection from pprint import pprint import dateutil.parser as dateparser allpages = getPageCollection() alllikes = getLikesCollection() fbpagesinfo = getPagesClusterInfoCollection() clusterinfo = getClusterCollection() fbpagesinfo.drop() counter = 0 for pageId in allpages.find(): cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}}) cluster = clusterinfo.find_one({'pages': pageId['_id']}) cluster = cluster["cluster"] document = {'_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster} for c in cursor: dd = {'id': c['id']} for pages in c['data']: if pages['id'] == pageId['_id']: if 'created_time' in pages: dd['created_time'] = dateparser.parse(pages['created_time']) break document['people'].append(dd) counter += 1 print 'document', counter, 'done' fbpagesinfo.insert(document)
import requests from database import getLikesCollection, getFriendsCollection from utilities import url, access_token from Queue import Queue import threading import json idQueue = Queue() likesCollection = getLikesCollection() friendsCollection = getFriendsCollection() for friend in friendsCollection.find(): idQueue.put(friend['id']) class getLikes(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.queue = idQueue def run(self): while True: try: fbid = self.queue.get() rurl = url + '/v2.3/' + fbid response = requests.get(rurl, params={ 'access_token': access_token, 'fields': 'likes' })
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection from pprint import pprint import dateutil.parser as dateparser allpages = getPageCollection() alllikes = getLikesCollection() fbpagesinfo = getPagesClusterInfoCollection() clusterinfo = getClusterCollection() fbpagesinfo.drop() counter = 0 for pageId in allpages.find(): cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}}) cluster = clusterinfo.find_one({'pages': pageId['_id']}) cluster = cluster["cluster"] document = { '_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster } for c in cursor: dd = {'id': c['id']} for pages in c['data']: if pages['id'] == pageId['_id']: if 'created_time' in pages: dd['created_time'] = dateparser.parse( pages['created_time']) break document['people'].append(dd)