示例#1
0
    def scatterPlot(clustering, location, fileName):
        userClusterMap = {}
        for clusterId, users in clustering[2]['clusters'].iteritems():
            for user in users: 
                if user in location['users']: userClusterMap[user]=clusterId
        scatterData = defaultdict(dict)
        clusterMap = clustering[3]
        for user, userVector in location['users'].iteritems():
            if user in userClusterMap:
                for d in userVector:
                    for db in userVector[d]:
                        for h in [(datetime.datetime.fromtimestamp(ep).hour-6)%24 for ep in userVector[d][db]]:
                            if h not in scatterData[userClusterMap[user]]: scatterData[userClusterMap[user]][h]=0
                            scatterData[userClusterMap[user]][h]+=1
        total = float(sum([k for cluster, clusterInfo in scatterData.iteritems() for k, v in clusterInfo.iteritems() for i in range(v)]))
        for cluster, clusterInfo in scatterData.iteritems(): 
            if cluster in validClusters: 
#                if type=='normal':
                data = [k for k, v in clusterInfo.iteritems() for i in range(v)]
                mean, std = np.mean(data), np.std(data)
                if std!=0: plotNorm(sum(data)/total, mean, std, color=clusterMap[cluster])
                else: plotNorm(sum(data)/total, mean, random.uniform(0.1, 0.5), color=clusterMap[cluster])
#                elif type=='scatter': plt.scatter(clusterInfo.keys(), clusterInfo.values(), color=clusterMap[cluster], label=cluster)
        plt.title('%s (%s, %s, %s)'%(location['name'],location['location'], location['categories'], location['tags'])),plt.legend()
#        plt.show()
        plt.xlim(xmin=0,xmax=24)
        plt.savefig(fileName), plt.clf()
示例#2
0
    def plotLocation(locationName, locationId, locationClustering, dayBlockMeans, dayBlockStandardDeviations, colorMap):
        classes, classDistribution = getDataDistribution(locationClustering.values())
        mu, sigma = dayBlockMeans, dayBlockStandardDeviations
        totalUsers = float(sum(classDistribution))
        for dist, mu, sigma, color in zip(classDistribution, mu, sigma, [colorMap[c] for c in classes]):
            if sigma==0: sigma=0.15
            plotNorm(dist/totalUsers, scale(mu), scale(sigma), color=color)
        plt.title('%s (%s)'%(locationName,locationId))
        plt.xlim(xmin=0,xmax=24)
#        plt.show()
        plt.savefig(fileName)
        plt.clf()
示例#3
0
 def plotGaussianGraphsForClusters(place):
     for location in Analysis.iterateLocationsWithClusterDetails(place):
         total = location['total']
         clustersInfo = location['clustersInfo']
         for clusterId, data in clustersInfo.iteritems():
             mean, std, clusterSum, color = data['mean'], data['std'], data['clusterSum'], data['color']
             if std!=0: plotNorm(clusterSum/total, mean, std, color=color, label=str(clusterId))
             else: plotNorm(clusterSum/total, mean, random.uniform(0.1, 0.5), color=color, label=str(clusterId))
         plt.xlim(xmin=0, xmax=23); plt.legend()
         plt.title(location['name'])
         fileName = '/'.join([placesGaussianImagesFolder%place['name'], getLocationType(location), location['location'].replace(' ', '_').replace('.', '+')+'.png'])
         print fileName
         FileIO.createDirectoryForFile(fileName)
         plt.savefig(fileName), plt.clf()
示例#4
0
def plotLocationDistribution():
    '''Types of locations seen: 
        => Locations where different people have to be at same time: Example office, pub
        => Locations that different people choose to go at different times: cafe+party place
       Big cluster suggests most people who come to a location go to similar locations (implies similar people). 
        Their mean suggests the most poplar time to go to that location.
    '''
    def scale(val): return (val*4)+2#val*2*4+2
    for location in FileIO.iterateJsonFromFile(locationClustersFile):
        if 'clustering' in location:
            classes, classDistribution = getDataDistribution(location['clustering'][1].values())
            mu, sigma = location['clustering'][2][0], location['clustering'][2][1]
            totalUsers = float(sum(classDistribution))
            for dist, mu, sigma in zip(classDistribution, mu, sigma):
                if sigma==0: sigma=0.15
                print dist/totalUsers
                plotNorm(dist/totalUsers, scale(mu), scale(sigma))
            title = venuesCollection.find_one({'lid':location['location']})
            if title!=None: title = unicode(title['n']).encode("utf-8")
            else: title = ''
            plt.title('%s (%s)'%(title,location['location']))
            plt.xlim(xmin=0,xmax=24)
            print 'comes here'
            plt.show()