def getPartialInfluenceScore():
    clusterInfoCollection = getPagesClusterInfoCollection()
    clusterInfluencerCollection = getClusterInfluencerScoreCollection()
    friendsCollection = getFriendsCollection()
    clusterInfluencerCollection.drop()
    clusterNumber = len(clusterInfoCollection.distinct('cluster'))
    for friend in friendsCollection.find():
        _id = friend['id']
        document = {}
        document['_id'] = _id
        document['cluster'] = {}
        for i in range(clusterNumber):
            document['cluster'][str(i)] = []
        clusterInfluencerCollection.insert(document)

    pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}})
    epoch = datetime.datetime.utcfromtimestamp(0)

    dt = 7 * 24 * 60 * 60
    scores = []
    done = 0
    for page in pagesCursor:
        users = page['people']
        try:
            users.sort(key=lambda x: x['created_time'])
            cluster = page['cluster']
            liketime = []
            for user in users:
                liketime.append((user['created_time'] - epoch).total_seconds())

            back = 0
            done += 1
            print done
            for user in users:
                userId = user['id']
                timeahead = (user['created_time'] - epoch).total_seconds() + dt
                timeback = (user['created_time'] - epoch).total_seconds() - dt
                ahead = bisect.bisect_right(liketime, timeahead)
                score = ahead - back
                back += 1
                # print userId, cluster, score
                clusterInfluencerCollection.update(
                    {'_id': userId},
                    {'$push': {
                        'cluster.' + str(cluster): score
                    }},
                    upsert=False)
        except:
            print "hmmm"
def getPartialInfluenceScore():
    clusterInfoCollection = getPagesClusterInfoCollection()
    clusterInfluencerCollection = getClusterInfluencerScoreCollection()
    friendsCollection = getFriendsCollection()
    clusterInfluencerCollection.drop()
    clusterNumber = len(clusterInfoCollection.distinct('cluster'))
    for friend in friendsCollection.find():
        _id = friend['id']
        document = {}
        document['_id'] = _id
        document['cluster'] = {}
        for i in range(clusterNumber):
            document['cluster'][str(i)] = []
        clusterInfluencerCollection.insert(document)

    pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}})
    epoch = datetime.datetime.utcfromtimestamp(0)

    dt = 7 * 24 * 60 * 60
    scores = []
    done = 0
    for page in pagesCursor:
        users = page['people']
        try:
            users.sort(key=lambda x: x['created_time'])
            cluster = page['cluster']
            liketime = []
            for user in users:
                liketime.append((user['created_time'] - epoch).total_seconds())

            back = 0
            done += 1
            print done
            for user in users:
                userId = user['id']
                timeahead = (user['created_time'] - epoch).total_seconds() + dt
                timeback = (user['created_time'] - epoch).total_seconds() - dt
                ahead = bisect.bisect_right(liketime, timeahead)
                score = ahead - back
                back += 1
                # print userId, cluster, score
                clusterInfluencerCollection.update({'_id': userId}, {'$push': {'cluster.' + str(cluster): score}},
                                                   upsert=False)
        except:
            print "hmmm"
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {'_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster}
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(pages['created_time'])
                    break
        document['people'].append(dd)
    counter += 1
    print 'document', counter, 'done'
    fbpagesinfo.insert(document)
Exemplo n.º 4
0
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {
        '_id': pageId['_id'],
        'people': [],
        'count': cursor.count(),
        'cluster': cluster
    }
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(
                        pages['created_time'])
                    break
        document['people'].append(dd)