def iterateSpots():
     def nearbyLocations(lid, radiusInMiles): return (location for location in locationsCollection.find({"l": {"$within": {"$center": [getLocationFromLid(lid), convertMilesToRadians(radiusInMiles)]}}}))
     graph = nx.Graph()
     for lid in locationsForUsIterator(minUniqueUsersCheckedInTheLocation):
         for location in nearbyLocations(lid, radiusInMiles): graph.add_edge(location['_id'], lid)
     for locations in nx.connected_components(graph): 
         if len(locations)>=minimumLocationsPerSpot: yield getKMLForCluster(locations)
 def iterateSpots():
     locationsToCheck = set(list(locationsForUsIterator(minUniqueUsersCheckedInTheLocation)))
     graph = nx.Graph()
     for e in locationToLocationCollection.find():
         d = e['_id'].split()
         l1, l2 = ' '.join(d[:2]), ' '.join(d[2:])
         if l1 in locationsToCheck and l2 in locationsToCheck and e['d']<=graphNodesDistanceInMiles: graph.add_edge(l1.replace(' ', '_'), l2.replace(' ', '_'), {'w': e['u']})
     for locations in nx.connected_components(graph): 
         if len(locations)>=minimumLocationsPerSpot: 
             clusters = clusterUsingMCLClustering(graph.subgraph(locations), inflation=20)
             print graph.subgraph(locations).number_of_nodes(), graph.subgraph(locations).number_of_edges(), len(clusters)
             for cluster in clusters: 
                 if len(cluster)>=minimumLocationsPerSpot:  yield getKMLForCluster([c.replace('_', ' ') for c in cluster])
Exemplo n.º 3
0
    return dict((u['user'], dict(sorted(u['locations'].iteritems(), key=itemgetter(1), reverse=True)[:10000])) for u in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation,  fullRecord = True))

def getDayBlockMeansForClusters(users, userClusterMap):
    completeDayBlockDistribution = defaultdict(list)
    for user in users:
        dayBlockDistributionForUser = []
        for day in users[user]:
            dayBlockDistributionForUser+=[int(dayBlock) for dayBlock in users[user][day] for i in range(users[user][day][dayBlock])]
        completeDayBlockDistribution[userClusterMap[user]]+=dayBlockDistributionForUser
    return [(k, np.mean(completeDayBlockDistribution[k]), np.std(completeDayBlockDistribution[k])) for k in completeDayBlockDistribution]

def getAverageDistanceBetweenClusters(meanDayblockValues): return np.mean([np.abs(m1-m2) for m1, m2 in combinations(meanDayblockValues,2)])
    

userVectors = getUserVectors()
locationsInUS = set(list(locationsForUsIterator(minUniqueUsersCheckedInTheLocation)))

def clusterLocation(location):
    dimensions = defaultdict(int)
    for u in location['users']:
        for lid in userVectors[u]: dimensions[lid]+=1
    dimensions = [d for d in dimensions if dimensions[d]>=2]
    userVectorsToCluster = [(u, ' '.join([l.replace(' ', '_') for l in userVectors[u] if l in dimensions for j in range(userVectors[u][l])])) for u in location['users']]
    resultsForVaryingK = []
    for k in range(2,6):
        try:
            cluster = KMeansClustering(userVectorsToCluster, k).cluster()
            userClusterMap = dict((k1,v) for k1,v in zip(location['users'], cluster))
            dayBlockMeansForClusters = getDayBlockMeansForClusters(location['users'], userClusterMap)
            userClusterMap = dict([(str(k2), v) for k2, v in userClusterMap.iteritems()])
            resultsForVaryingK.append([k, userClusterMap, zip(*dayBlockMeansForClusters)[1:], getAverageDistanceBetweenClusters(zip(*dayBlockMeansForClusters)[1])])