def getFullInfluenceScore():
    partialScoreCollection = getClusterInfluencerScoreCollection()
    userClusterContributionCollection = getUserClusterContributionCollection()

    fbfriends = getFriendsCollection()
    fullScoreCollection = getFullInfluenceScoreCollection()

    for friend in fbfriends.find():
        fid = friend['id']
        partial = partialScoreCollection.find_one({'_id': fid})
        contribution = userClusterContributionCollection.find_one({'_id': fid})

        score = []

        for i in range(len(contribution['contribution'])):
            avg = 1.0
            try:
                avg *= sum(partial['cluster'][str(i)]) / len(
                    partial['cluster'][str(i)])
            except ZeroDivisionError:
                avg = 0.0
            score.append(avg * contribution['contribution'][i])

        if len(score) > 0:
            document = {}
            document['_id'] = fid
            document['name'] = friend['name']
            document['score'] = score
            fullScoreCollection.insert(document)
            print friend['name'], score
def getFullInfluenceScore():
	partialScoreCollection = getClusterInfluencerScoreCollection()
	userClusterContributionCollection = getUserClusterContributionCollection()

	fbfriends = getFriendsCollection()
	fullScoreCollection = getFullInfluenceScoreCollection()

	for friend in fbfriends.find():
		fid = friend['id']
		partial = partialScoreCollection.find_one({'_id' : fid})
		contribution = userClusterContributionCollection.find_one({'_id' : fid})

		score = []

		for i in range(len(contribution['contribution'])):
			avg = 1.0
			try:
				avg *= sum(partial['cluster'][str(i)])/len(partial['cluster'][str(i)])
			except ZeroDivisionError:
				avg = 0.0
			score.append(avg*contribution['contribution'][i])

		if len(score) > 0:
			document = {}
			document['_id'] = fid
			document['name'] = friend['name']
			document['score'] = score
			fullScoreCollection.insert(document)
			print friend['name'], score
def getClusterContribution():
	clusterInfluencerScoreCollection = getClusterInfluencerScoreCollection()
	userClusterContributionCollection = getUserClusterContributionCollection()
	userClusterContributionCollection.drop()
	for scores in clusterInfluencerScoreCollection.find():
		document = {}
		document['_id'] = scores['_id']
		document['contribution'] = []
		for cscore in sorted(scores['cluster'].keys()):
			document['contribution'].append(len(scores['cluster'][cscore]))

		document['contribution'] = [i*1.0/sum(document['contribution']) for i in document['contribution'] if sum(document['contribution']) > 0]
		print 'Calculated for', userClusterContributionCollection.insert(document)
def getPartialInfluenceScore():
    clusterInfoCollection = getPagesClusterInfoCollection()
    clusterInfluencerCollection = getClusterInfluencerScoreCollection()
    friendsCollection = getFriendsCollection()
    clusterInfluencerCollection.drop()
    clusterNumber = len(clusterInfoCollection.distinct('cluster'))
    for friend in friendsCollection.find():
        _id = friend['id']
        document = {}
        document['_id'] = _id
        document['cluster'] = {}
        for i in range(clusterNumber):
            document['cluster'][str(i)] = []
        clusterInfluencerCollection.insert(document)

    pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}})
    epoch = datetime.datetime.utcfromtimestamp(0)

    dt = 7 * 24 * 60 * 60
    scores = []
    done = 0
    for page in pagesCursor:
        users = page['people']
        try:
            users.sort(key=lambda x: x['created_time'])
            cluster = page['cluster']
            liketime = []
            for user in users:
                liketime.append((user['created_time'] - epoch).total_seconds())

            back = 0
            done += 1
            print done
            for user in users:
                userId = user['id']
                timeahead = (user['created_time'] - epoch).total_seconds() + dt
                timeback = (user['created_time'] - epoch).total_seconds() - dt
                ahead = bisect.bisect_right(liketime, timeahead)
                score = ahead - back
                back += 1
                # print userId, cluster, score
                clusterInfluencerCollection.update(
                    {'_id': userId},
                    {'$push': {
                        'cluster.' + str(cluster): score
                    }},
                    upsert=False)
        except:
            print "hmmm"
def getPartialInfluenceScore():
    clusterInfoCollection = getPagesClusterInfoCollection()
    clusterInfluencerCollection = getClusterInfluencerScoreCollection()
    friendsCollection = getFriendsCollection()
    clusterInfluencerCollection.drop()
    clusterNumber = len(clusterInfoCollection.distinct('cluster'))
    for friend in friendsCollection.find():
        _id = friend['id']
        document = {}
        document['_id'] = _id
        document['cluster'] = {}
        for i in range(clusterNumber):
            document['cluster'][str(i)] = []
        clusterInfluencerCollection.insert(document)

    pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}})
    epoch = datetime.datetime.utcfromtimestamp(0)

    dt = 7 * 24 * 60 * 60
    scores = []
    done = 0
    for page in pagesCursor:
        users = page['people']
        try:
            users.sort(key=lambda x: x['created_time'])
            cluster = page['cluster']
            liketime = []
            for user in users:
                liketime.append((user['created_time'] - epoch).total_seconds())

            back = 0
            done += 1
            print done
            for user in users:
                userId = user['id']
                timeahead = (user['created_time'] - epoch).total_seconds() + dt
                timeback = (user['created_time'] - epoch).total_seconds() - dt
                ahead = bisect.bisect_right(liketime, timeahead)
                score = ahead - back
                back += 1
                # print userId, cluster, score
                clusterInfluencerCollection.update({'_id': userId}, {'$push': {'cluster.' + str(cluster): score}},
                                                   upsert=False)
        except:
            print "hmmm"
Пример #6
0
def getClusterContribution():
    clusterInfluencerScoreCollection = getClusterInfluencerScoreCollection()
    userClusterContributionCollection = getUserClusterContributionCollection()
    userClusterContributionCollection.drop()
    for scores in clusterInfluencerScoreCollection.find():
        document = {}
        document['_id'] = scores['_id']
        document['contribution'] = []
        for cscore in sorted(scores['cluster'].keys()):
            document['contribution'].append(len(scores['cluster'][cscore]))

        document['contribution'] = [
            i * 1.0 / sum(document['contribution'])
            for i in document['contribution']
            if sum(document['contribution']) > 0
        ]
        print 'Calculated for', userClusterContributionCollection.insert(
            document)