def clusterLevelResults():
    scoreCollection = getFullInfluenceScoreCollection()
    clusterResCollection = getclusterLevelResultCollection()

    numCluster = len(scoreCollection.find_one()['score'])
    clusterResCollection.drop()
    for i in range(numCluster):
        doc = {}
        i = str(i)
        doc['_id'] = i
        doc['users'] = []
        clusterResCollection.insert(doc)

    for scores in scoreCollection.find():
        fid = scores['_id']
        name = scores['name']
        userScore = scores['score']
        for i in range(len(userScore)):
            doc = {}
            doc['id'] = fid
            doc['name'] = name
            doc['score'] = userScore[i]
            cluster = str(i)
            print i, doc
            clusterResCollection.update({'_id': cluster}, {
                '$push': {
                    'users': {
                        '$each': [doc],
                        '$sort': {
                            'score': -1
                        },
                        '$slice': -1000
                    }
                }
            },
                                        upsert=False)
def clusterLevelResults():
    scoreCollection = getFullInfluenceScoreCollection()
    clusterResCollection = getclusterLevelResultCollection()

    numCluster = len(scoreCollection.find_one()['score'])
    clusterResCollection.drop()
    for i in range(numCluster):
        doc = {}
        i = str(i)
        doc['_id'] = i
        doc['users'] = []
        clusterResCollection.insert(doc)

    for scores in scoreCollection.find():
        fid = scores['_id']
        name = scores['name']
        userScore = scores['score']
        for i in range(len(userScore)):
            doc = {}
            doc['id'] = fid
            doc['name'] = name
            doc['score'] = userScore[i]
            cluster = str(i)
            print i, doc
            clusterResCollection.update({'_id': cluster},
                                        {
                                            '$push': {
                                                'users': {
                                                    '$each': [doc],
                                                    '$sort': {'score': -1},
                                                    '$slice': -1000
                                                }
                                            }

                                        }
                                        , upsert=False)
    # collection.drop()
    for j, c in enumerate(clusters):
        # print("cluster %d:" % j)
        array = []
        for i in c:
            # print("\t%s" % args[i])
            array.append(args[i])
            if cleanName(args[i]) == PID:
                cluster = j
                break
        array = map(cleanName, array)
        doc = {'cluster': j, 'pages': array}
    # print "Cluster", cluster

    clusterCollection = getclusterLevelResultCollection()
    count = 0

    f = open('tempfile.txt', 'w')
    f.write("Influencers for this cluster are\n")

    ct = 0
    for influ in clusterCollection.find({'_id': str(cluster)}):
        for each in influ['users']:
            ct += 1
            if ct <= 5:
                f.write(each['name'], each['id'], '\n')
            else:
                break
    print 'done'
Пример #4
0
    # collection.drop()
    for j, c in enumerate(clusters):
        # print("cluster %d:" % j)
        array = []
        for i in c:
            # print("\t%s" % args[i])
            array.append(args[i])
            if cleanName(args[i]) == PID:
                cluster = j
                break
        array = map(cleanName, array)
        doc = {'cluster': j, 'pages': array}
    # print "Cluster", cluster

    clusterCollection = getclusterLevelResultCollection()
    count = 0

    f = open('tempfile.txt', 'w')
    f.write("Influencers for this cluster are\n")

    ct = 0
    for influ in clusterCollection.find({'_id': str(cluster)}):
        for each in influ['users']:
            ct += 1
            if ct <= 5:
                f.write(each['name'], each['id'], '\n')
            else:
                break
    print 'done'