def getLocationsCheckinDistribution(place): checkinDistribution = {} for location in locationToUserMapIterator(place): checkinDistribution[location['location']]=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()]) dataX, dataY = getDataDistribution(checkinDistribution.values()) plt.loglog(dataX,dataY) outputFile = placesAnalysisFolder%place['name']+'locationsCheckinDistribution.png' FileIO.createDirectoryForFile(outputFile) plt.savefig(outputFile)
def getLocationUserSpecificMads(locationVector): completeDayBlockDistribution, madOfDayBlockDistributionForUsers = [], [] for user in locationVector['users']: dayBlockDistributionForUser = [] for day in locationVector['users'][user]: dayBlockDistributionForUser+=[int(dayBlock) for dayBlock in locationVector['users'][user][day] for i in range(locationVector['users'][user][day][dayBlock])] completeDayBlockDistribution+=dayBlockDistributionForUser madOfDayBlockDistributionForUsers.append(getMAD(dayBlockDistributionForUser) ) dataX, dataY = getDataDistribution(completeDayBlockDistribution) return getMAD(madOfDayBlockDistributionForUsers), getMAD(completeDayBlockDistribution) , dict((str(x),y) for x,y in zip(dataX, dataY))
def plotLocation(locationName, locationId, locationClustering, dayBlockMeans, dayBlockStandardDeviations, colorMap): classes, classDistribution = getDataDistribution(locationClustering.values()) mu, sigma = dayBlockMeans, dayBlockStandardDeviations totalUsers = float(sum(classDistribution)) for dist, mu, sigma, color in zip(classDistribution, mu, sigma, [colorMap[c] for c in classes]): if sigma==0: sigma=0.15 plotNorm(dist/totalUsers, scale(mu), scale(sigma), color=color) plt.title('%s (%s)'%(locationName,locationId)) plt.xlim(xmin=0,xmax=24) # plt.show() plt.savefig(fileName) plt.clf()
def plotLocationDistribution(file, **conf): data = [] scale = conf['scale'] for d in FileIO.iterateJsonFromFile(file): data.append(d['location_db_mad']) plt.xlabel('locations mad'); plt.ylabel('# of locations') dataX, dataY = getDataDistribution(data) print dataX, dataY plt.semilogy(dataX, dataY, marker='o', color='m') # plt.xlim(xmin=-0.1, xmax=2.6) # plt.ylim(ymax=10**6) locs, labels = plt.xticks() plt.xticks( locs, [x*2*scale+scale for x in locs] ) plt.legend() plt.show()
def plotLocationToUserDistribution(file, **conf): data = defaultdict(list) scale = conf['scale'] for d in FileIO.iterateJsonFromFile(file): data[d['location_db_mad']].append(d['users_db_mad']) plt.xlabel('users mad'); plt.ylabel('# of locations') j=1 for i in sorted(data): plt.subplot(conf['row'], conf['column'],j) dataX, dataY = getDataDistribution(data[i]) print i, dataX, dataY plt.semilogy(dataX, dataY, marker='o', label='%s'%(i*2*scale+scale), color='m') # plt.xlim(xmin=-0.1, xmax=1.1) # plt.ylim(ymax=10**6) # locs, labels = plt.xticks() # plt.xticks( locs, [1]*len(locs) ) locs, labels = plt.xticks() print locs plt.xticks( locs, [x*2*scale+scale for x in locs] ) plt.legend() j+=1 plt.show()
def plotLocationDistribution(): '''Types of locations seen: => Locations where different people have to be at same time: Example office, pub => Locations that different people choose to go at different times: cafe+party place Big cluster suggests most people who come to a location go to similar locations (implies similar people). Their mean suggests the most poplar time to go to that location. ''' def scale(val): return (val*4)+2#val*2*4+2 for location in FileIO.iterateJsonFromFile(locationClustersFile): if 'clustering' in location: classes, classDistribution = getDataDistribution(location['clustering'][1].values()) mu, sigma = location['clustering'][2][0], location['clustering'][2][1] totalUsers = float(sum(classDistribution)) for dist, mu, sigma in zip(classDistribution, mu, sigma): if sigma==0: sigma=0.15 print dist/totalUsers plotNorm(dist/totalUsers, scale(mu), scale(sigma)) title = venuesCollection.find_one({'lid':location['location']}) if title!=None: title = unicode(title['n']).encode("utf-8") else: title = '' plt.title('%s (%s)'%(title,location['location'])) plt.xlim(xmin=0,xmax=24) print 'comes here' plt.show()
def analyzeFrequentLocations(minUserLocations, minCalculatedSupport): # dataX, dataY = [], [] # for itemset, support in Mahout.iterateFrequentLocationsFromFIMahout(minUserLocations, minCalculatedSupport, yieldSupport=True): dataX.append(len(itemset)), dataY.append(support) # plt.scatter(dataY, dataX) # plt.title('%s'%minUserLocations), plt.ylabel('Location itemset length'); plt.xlabel('support') # plt.savefig('sup_vs_itemset_length_%s.pdf'%minUserLocations) # values = [] # for locations, support in Mahout.iterateFrequentLocationsFromFIMahout(minUserLocations, minCalculatedSupport, yieldSupport=True): values.append(support) # dataX,dataY = getDataDistribution(values) # plt.loglog(dataX, dataY) # plt.title('%s'%minUserLocations), plt.ylabel('Count'); plt.xlabel('support') # plt.savefig('sup_distribution_%s.pdf'%minUserLocations) values = [] for itemset, support in Mahout.iterateFrequentLocationsFromFIMahout( minUserLocations, minCalculatedSupport, yieldSupport=True ): values.append(len(itemset)) dataX, dataY = getDataDistribution(values) plt.loglog(dataX, dataY) plt.title("%s" % minUserLocations), plt.ylabel("Count") plt.xlabel("Location itemset length") plt.savefig("location_itemsets_distribution_%s.pdf" % minUserLocations)
def plotLocationGraphEdgeDistribution(): dataX, dataY = getDataDistribution(edge['w'] for edge in FileIO.iterateJsonFromFile(locationGraph)) plt.loglog(dataX, dataY) plt.savefig('%s.pdf'%'locationGraph')
def plotNoOfClusersPerLocationDistribution(place): data = [len(location['clusters']) for location in locationClusterMeansIterator(place)] clusterCount, distribution = getDataDistribution(data) plt.plot(clusterCount, distribution) plt.title(getLatexForString('\# of clusters / location (\mu=%0.2f \sigma=%0.2f)'%(np.mean(data), np.std(data)))) plt.show()