def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK): distances = defaultdict(dict) for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))): if latticeObject['id']==lattice: latticeHashtagsSet = set(latticeObject['hashtags']) for neighborLattice, neighborHashtags in latticeObject['links'].iteritems(): distances[neighborLattice] = {} neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False) neighborHashtagsSet = set(neighborHashtags) distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet))) distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.) distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' ')) break dataPoints = [] ax = plt.subplot(111) for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance'])) points, colors = zip(*sorted(dataPoints, key=itemgetter(1))) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0) plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0) divider = make_axes_locatable(ax) plt.title('Average time difference from New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) # plt.show() plt.savefig('../images/temporalDistanceExample.png')
def iterateFrequentLocationsFromFIMahout( minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, minCalculatedSupport, minLocationsInItemset=0, extraMinSupport=minSupport, yieldSupport=False, lids=False, ): # for line in FileIO.iterateLinesFromFile(locationsFIMahoutOutputFile%(minUserLocations, minCalculatedSupport)): for line in FileIO.iterateLinesFromFile( locationsFIMahoutOutputFile % (minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, minCalculatedSupport) ): if line.startswith("Key:"): data = line.split("Value: ")[1][1:-1].split(",") if not lids: locationItemset, support = ( [getLocationFromLid(i.replace("_", " ")) for i in data[0][1:-1].split()], int(data[1]), ) else: locationItemset, support = [i.replace("_", " ") for i in data[0][1:-1].split()], int(data[1]) if support >= extraMinSupport and len(locationItemset) >= minLocationsInItemset: if not yieldSupport: yield [location for location in locationItemset if isWithinBoundingBox(location, us_boundary)] else: yield [ location for location in locationItemset if isWithinBoundingBox(getLocationFromLid(location), us_boundary) ], support
def map_hashtag_object_to_tuo_norm_iid_and_interval_stats(self, hashtag, hashtag_object): def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat ltuo_iid_and_tuo_interval_and_lids = \ get_ltuo_iid_and_tuo_interval_and_lids(hashtag_object) peak_tuo_iid_and_tuo_interval_and_lids = \ max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids)) peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0] # total_occurrences = sum(len(data[1][1]) for data in peak_tuo_iid_and_tuo_interval_and_lids) # Overall locality stats overall_mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object) overall_points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']] overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False) overall_focus = focus(overall_mf_lid_to_occurrence_count)[1] overall_coverage = getRadiusOfGyration(overall_points) total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids) for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids: mf_lid_to_occurrence_count = defaultdict(float) for lid in lids: mf_lid_to_occurrence_count[lid]+=1 points = [getLocationFromLid(lid.replace('_', ' ')) for lid in lids] current_entropy = entropy(mf_lid_to_occurrence_count, False) current_focus = focus(mf_lid_to_occurrence_count)[1] current_coverage = getRadiusOfGyration(points) yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, distance_from_overall_locality_stat(overall_entropy, current_entropy), distance_from_overall_locality_stat(overall_focus, current_focus), distance_from_overall_locality_stat(overall_coverage, current_coverage),]
def getKMLForCluster(cluster): clusterToYield = [] if len(cluster)>3: for lid in cluster: title = venuesCollection.find_one({'lid':lid}) if title!=None: clusterToYield.append((getLocationFromLid(lid), unicode(title['n']).encode("utf-8"))) else: clusterToYield.append((getLocationFromLid(lid), '')) return clusterToYield
def plotLocationGraphOnMap(graph): points = map(lambda lid:getLocationFromLid(lid.replace('_', ' ')), graph.nodes()) _, m =plotPointsOnUSMap(points, s=10, lw=0, c='m', returnBaseMapObject=True) print graph.number_of_edges(), graph.number_of_nodes() totalEdgeWeight = max([d['w'] for _,_,d in graph.edges(data=True)])+0.0 for u, v, data in graph.edges(data=True): u, v, w = getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=cm.Purples(w/totalEdgeWeight), alpha=0.5) plt.show()
def plotLatticeTemporalDistanceInHoursOnMap(latticeGraphType, latticeObject): latticeObject = latticeGraphType['method'](latticeObject) points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1), reverse=True)) cm = matplotlib.cm.get_cmap('autumn') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0) plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0) plt.xlabel(latticeGraphType['title']) plt.colorbar(sc) return sc
def getClusterForKML(cluster): clusterToYield = [] if len(cluster) > 3: for lid in cluster: title = venuesCollection.find_one({"lid": lid}) if title != None: clusterToYield.append((getLocationFromLid(lid), unicode(title["n"]).encode("utf-8"))) else: clusterToYield.append((getLocationFromLid(lid), "")) return clusterToYield
def plotLatticeTemporalClosenessScoresOnMap(latticeGraphType, latticeObject): latticeObject = latticeGraphType['method'](latticeObject) LatticeGraph.normalizeNode(latticeObject) points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1))) cm = matplotlib.cm.get_cmap('YlOrRd') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0) plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0) plt.xlabel(latticeGraphType['title']) plt.colorbar(sc) return sc
def plotLocationClustersOnMap(graph): noOfClusters, clusters = clusterUsingAffinityPropagation(graph) nodeToClusterIdMap = dict(clusters) colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)]) clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] points, colors = zip(*map(lambda l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes())) _, m =plotPointsOnUSMap(points, s=0, lw=0, c=colors, returnBaseMapObject=True) for u, v, data in graph.edges(data=True): if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]: color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=0.5) plt.show()
def nearbyLocations(lid, radiusInMiles): return ( location for location in locationsCollection.find( {"l": {"$within": {"$center": [getLocationFromLid(lid), convertMilesToRadians(radiusInMiles)]}}} ) )
def load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary=[[-90,-180], [90, 180]], noOfInfluencers=None): mf_location_to_global_influence_score = {} mf_location_to_mf_influence_type_to_influence_score = defaultdict(dict) mf_location_to_tuo_neighbor_location_and_locations_influencing_score = \ dict(Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE)) mf_location_to_tuo_neighbor_location_and_locations_influenced_score = \ dict(Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE)) for location in mf_location_to_tuo_neighbor_location_and_locations_influenced_score.keys()[:]: if not isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), boundary): if location in mf_location_to_tuo_neighbor_location_and_locations_influencing_score: del mf_location_to_tuo_neighbor_location_and_locations_influencing_score[location] del mf_location_to_tuo_neighbor_location_and_locations_influenced_score[location] no_of_locations = len(mf_location_to_tuo_neighbor_location_and_locations_influenced_score) for location, tuo_neighbor_location_and_locations_influencing_score in \ mf_location_to_tuo_neighbor_location_and_locations_influencing_score.iteritems(): mf_location_to_mf_influence_type_to_influence_score[location][InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE] \ = sum(zip(*tuo_neighbor_location_and_locations_influencing_score)[1])/no_of_locations for location, tuo_neighbor_location_and_locations_influenced_score in \ mf_location_to_tuo_neighbor_location_and_locations_influenced_score.iteritems(): mf_location_to_mf_influence_type_to_influence_score[location][InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE] \ = sum(zip(*tuo_neighbor_location_and_locations_influenced_score)[1])/no_of_locations for location, mf_influence_type_to_influence_score in \ mf_location_to_mf_influence_type_to_influence_score.iteritems(): influence_type, influence_score = max(mf_influence_type_to_influence_score.iteritems(), key=itemgetter(1)) if influence_type==InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE: mf_location_to_global_influence_score[location] = -influence_score else: mf_location_to_global_influence_score[location] = influence_score return mf_location_to_global_influence_score.items()
def probabilisticCoverageModelExample(hashtag, type): MINUTES, timeUnit = 5, 1 print len(CoverageBasedLatticeSelectionModel.lattices) for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): if hashtagObject['h']==hashtag: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit) else: print getRadius(allOccurances) probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances) latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices) points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1)))) # print points[0], colors[0] ax = plt.subplot(111) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) plt.show() # plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type)) plt.clf() break
def writeClusterKML(): kml = SpotsKML() outputKMLFile='%s/clusters.kml'%placesAnalysisFolder%place['name'] for data in FileIO.iterateJsonFromFile(placesUserClusterFeaturesFile%place['name']): clusterId, color, features = data kml.addLocationPointsWithTitles([(getLocationFromLid(f[0].replace('_', ' ')), f[2]) for f in features[:noOfFeatures]], color=color) FileIO.createDirectoryForFile(outputKMLFile) kml.write(outputKMLFile)
def locationsFromAllTransactionsIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): observedLocations = set() i = 0 for d in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation): print i i += 1 for k in filter(lambda l: l not in observedLocations, d): observedLocations.add(k) yield getLocationFromLid(k)
def writeUserClusterKMLs(place): clustering = getUserClustering(place, place.get('k')) colorMap = clustering[3] for clusterId, details in sorted(getUserClusteringDetails(place, clustering).iteritems(), key=lambda k: int(k[0])): kml = SpotsKML() kml.addLocationPointsWithTitles([(getLocationFromLid(lid), unicode(name).encode('utf-8')) for lid, name, _ in details['locations'][:5]], color=colorMap[clusterId]) outputKMLFile=placesKMLsFolder%place['name']+'locations/userClusters/%s/%s.kml'%(str(clustering[0]), str(clusterId)) FileIO.createDirectoryForFile(outputKMLFile) kml.write(outputKMLFile)
def addLine(self, points, description=None): from simplekml import LineStyle points = [list(reversed(getLocationFromLid(point))) for point in points] ls = self.kml.newlinestring(coords=points) self.kml.newpoint(coords=[points[0]]), self.kml.newpoint(coords=[points[-1]]) if description: ls.description = description ls.linestyle = LineStyle(width=3.0)
def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15): input_locations = [ ('40.6000_-73.9500', 'new_york'), ('33.3500_-118.1750', 'los_angeles'), ('29.7250_-97.1500', 'austin'), ('30.4500_-95.7000', 'college_station'), ('-22.4750_-42.7750', 'rio'), ('51.4750_0.0000', 'london'), ('-23.2000_-46.4000', 'sao_paulo') ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \ Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE) for input_location, label in input_locations: for location, tuo_neighbor_location_and_locations_influence_score in \ tuo_location_and_tuo_neighbor_location_and_locations_influence_score: if input_location==location: input_location = getLocationFromLid(input_location.replace('_', ' ')) output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label) number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations) if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score) locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences] locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] # locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations) if locations: _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0) # _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0) for location in locations: # if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5) # plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0) # plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0) FileIO.createDirectoryForFile(output_file) print output_file savefig(output_file) plt.clf() else: GeneralMethods.runCommand('rm -rf %s'%output_file) break
def plotSharingProbabilityAndTemporalClosenessScoresOnMap(timeRange, outputFolder): i = 1 for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)): latticePoint = getLocationFromLid(latticeObject['id'].replace('_', ' ')) latticeId = getLatticeLid([latticePoint[1], latticePoint[0]], LATTICE_ACCURACY) plt.subplot(211) plt.title(latticeId) LatticeGraphPlots.plotLatticeSharingProbabilityOnMap(LatticeGraph.typeSharingProbability, latticeObject) plt.subplot(212) LatticeGraphPlots.plotLatticeTemporalClosenessScoresOnMap(LatticeGraph.typeTemporalCloseness, latticeObject) plt.show() outputFile = hashtagsImagesGraphAnalysisFolder%outputFolder+'%s_and_%s/%s.png'%(LatticeGraph.typeSharingProbability['id'], LatticeGraph.typeTemporalCloseness['id'], latticeId); FileIO.createDirectoryForFile(outputFile) print i, outputFile; i+=1
def analyzeDataClusters(): regex = 'cafe' neighborLocationExtractionMethod = NeighborLocationsSelection.N_LOCATIONS inputFile = checkinSequenceLocationRegexAnalysisFolder+neighborLocationExtractionMethod+'/'+regex for line in FileIO.iterateJsonFromFile(inputFile): if line['parameters']['checkinsWindow']==10: for location, data in line['locations'].iteritems(): # data = line['locations']['41.895 -87.623'] if isWithinBoundingBox(getLocationFromLid(location), us_boundary): print venuesCollection.find_one({'lid': location})['n'], location,'\n' for l, _ in data['clusters'][:5]: print [i[0] for i in l] print '\n ********** \n'
def influence_clusters(model_ids, min_cluster_size=15): influence_type = InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE for model_id in model_ids: digraph_of_location_and_location_similarity = nx.DiGraph() for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \ enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)): # print line_count for neighbor_location, mf_influence_type_to_similarity in tuo_neighbor_location_and_mf_influence_type_and_similarity: if isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY) and \ isWithinBoundingBox(getLocationFromLid(neighbor_location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY): digraph_of_location_and_location_similarity.add_edge(location, neighbor_location, {'w': mf_influence_type_to_similarity[influence_type]}) no_of_clusters, tuo_location_and_cluster_id = clusterUsingAffinityPropagation(digraph_of_location_and_location_similarity) tuo_cluster_id_to_locations = [ (cluster_id, zip(*ito_tuo_location_and_cluster_id)[0]) for cluster_id, ito_tuo_location_and_cluster_id in groupby( sorted(tuo_location_and_cluster_id, key=itemgetter(1)), key=itemgetter(1) ) ] mf_location_to_cluster_id = dict(tuo_location_and_cluster_id) mf_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)]) mf_valid_locations_to_color = {} for cluster_id, locations in \ sorted(tuo_cluster_id_to_locations, key=lambda (cluster_id, locations): len(locations))[-10:]: # if len(locations)>min_cluster_size: print cluster_id, len(locations) for location in locations: mf_valid_locations_to_color[location] \ = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[location]] locations, colors = zip(*mf_valid_locations_to_color.iteritems()) locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] _, m = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, s=0, returnBaseMapObject=True, lw = 0) for u, v, data in digraph_of_location_and_location_similarity.edges(data=True): if u in mf_valid_locations_to_color and v in mf_valid_locations_to_color \ and mf_location_to_cluster_id[u]==mf_location_to_cluster_id[v]: color, u, v, w = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=0.6) plt.show()
def load_checkins_graph(checkins_graph_file): graph = nx.Graph() for data in iterateJsonFromFile(checkins_graph_file): (u, v) = data['e'].split('__') graph.add_edge(u , v, {'w': data['w']}) noOfClusters, clusters = clusterUsingAffinityPropagation(graph) # for cluster in clusters: # print len(cluster), cluster nodeToClusterIdMap = dict(clusters) colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)]) clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] points, colors = zip(*map(lambda l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes())) _, m =plotPointsOnWorldMap(points[:1], s=0, lw=0, c=colors[:1], returnBaseMapObject=True) for u, v, data in graph.edges(data=True): if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]: color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=1.5) # plt.title(title) plt.show() print noOfClusters print graph.number_of_edges() print graph.number_of_nodes()
def map_hashtag_object_to_tuo_hashtag_and_occurrence_count_and_entropy_and_focus_and_coverage_and_peak(self, hashtag, hashtag_object): mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object) points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']] # Determine peak ltuo_iid_and_tuo_interval_and_occurrence_count = get_ltuo_iid_and_tuo_interval_and_occurrence_count(hashtag_object) peak_tuo_iid_and_tuo_interval_and_occurrence_count = \ max(ltuo_iid_and_tuo_interval_and_occurrence_count, key=lambda (_, (__, occurrence_count)): occurrence_count) peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0] yield hashtag_object['hashtag'], [hashtag_object['hashtag'], len(hashtag_object['ltuo_lid_and_s_interval']), entropy(mf_lid_to_occurrence_count, False), focus(mf_lid_to_occurrence_count), getRadiusOfGyration(points), peak_iid]
def writeLocationToUserMap(place): name, boundary = place['name'], place['boundary'] GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name) for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile): lid=getLocationFromLid(location['location']) if isWithinBoundingBox(lid, boundary): location['categories'] = ''; location['tags'] = ''; location['name']='' title = venuesCollection.find_one({'lid':location['location']}) if title: location['name'] = unicode(title['n']).encode("utf-8") meta = venuesMetaDataCollection.find_one({'_id':location['location']}) if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8") for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user] location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()]) if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
def plotHashtagSourcesOnMap(timeRange, outputFolder): i = 1 distribution = defaultdict(int) for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange)): occuranesInHighestActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True) if occuranesInHighestActiveRegion: source, count = getSourceLattice(occuranesInHighestActiveRegion) print i, source;i+=1 distribution[getLidFromLocation(source)]+=1 # if i==10: break points, colors = zip(*[(getLocationFromLid(k),v) for k, v in sorted(distribution.iteritems(), key=itemgetter(1))]) cm = matplotlib.cm.get_cmap('Paired') sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0) plt.colorbar(sc) plt.show()
def plot_global_influencers(ltuo_model_id_and_hashtag_tag): tuples_of_boundary_and_boundary_label = [ ([[-90,-180], [90, 180]], 'World', 'm'), ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: print model_id, hashtag_tag tuples_of_location_and_color = [] for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary) tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10] locations = zip(*tuo_location_and_influence_scores)[0] for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color]) locations, colors = zip(*tuples_of_location_and_color) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0, alpha=1.) for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0) # plt.legend(loc=3, ncol=4, mode="expand",) # plt.show() savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
def plot_geo_distribution_in_social_networks(): total_checkins = 0.0 for social_network in [FOURSQUARE_ID, BRIGHTKITE_ID, GOWALLA_ID]: print social_network ax = plt.subplot(111) tuples_of_location_and_location_occurences_count = [(getLocationFromLid(data['key'].replace('_', ' ')), data['distribution'][social_network]) for i, data in enumerate(iterateJsonFromFile(lidsToDistributionInSocialNetworksMapFile%BOUNDARY_ID))\ if social_network in data['distribution'] and data['distribution'][social_network]>25] tuples_of_location_and_location_occurences_count = sorted(tuples_of_location_and_location_occurences_count, key=itemgetter(1)) locations, colors = zip(*tuples_of_location_and_location_occurences_count) sc = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) # for k, v in tuples_of_location_and_location_occurences_count: # print social_network, k, v # print len(tuples_of_location_and_location_occurences_count) plt.show()
def plotDistributionGraphs(occurences, validTimeUnits, title, startingEpoch=None): occurences = getOccurencesFilteredByDistributionInTimeUnits(occurences, validTimeUnits) occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), sorted(zip(*occs)[1])) for lid, occs in groupby(sorted([(getLatticeLid(l, ACCURACY), t) for l, t in occurences], key=itemgetter(0)), key=itemgetter(0))] plt.subplot(211) pointsForNumberOfOccurances, numberOfOccurancesList = zip(*sorted(occurancesGroupedByLattice, key=lambda t: len(t[1]))) numberOfOccurancesList = [len(ocs) for ocs in numberOfOccurancesList] cm = matplotlib.cm.get_cmap('cool') sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=numberOfOccurancesList, cmap=cm, lw = 0, alpha=1.0) plt.colorbar(sc), plt.title(title), plt.xlabel('Number of mentions') plt.subplot(212) pointsForNumberOfOccurances, occuranceTime = zip(*sorted(occurancesGroupedByLattice, key=lambda t: min(t[1]), reverse=True)) occuranceTime=[min(t) for t in occuranceTime] if not startingEpoch: startingEpoch = occuranceTime[-1] occuranceTime=[(t-startingEpoch)/TIME_UNIT_IN_SECONDS for t in occuranceTime] cm = matplotlib.cm.get_cmap('autumn') sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=occuranceTime, cmap=cm, lw = 0, alpha=1.0) plt.colorbar(sc), plt.xlabel('Speed of hashtag arrival') return startingEpoch
def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy): occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict) for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc) higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True) for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' '))) for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1)) occurancesToReturn = [] currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances)) for hl, occs in higherLattices: higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl} while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5: (l, d) = distanceMatrix[hl][0]; distanceMatrix[hl]=distanceMatrix[hl][1:] higherLatticeSet['distance']+=d higherLatticeSet['lattices'].append(l) higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l]) if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l] # return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))} return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
def getLocalityIndexAtK(occurances, kValue): ''' Locality index at k - for a hashtag is the minimum radius that covers k percentage of occurrances. A high locality index suggests hashtag was global with a small index suggests it was local. To find locality index at k, I must find a point that is closest to k percentage of occurances. Brute force requires nC2 complexity. Hence, use lattices of bigger size technique. ''' def getLatticeThatGivesMinimumLocalityIndexAtK(): occurancesDict = {'occurances': occurances} for accuracy in [4, 2, 1, 0.5, ACCURACY]: occurancesDict = getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurancesDict['occurances'], accuracy) return occurancesDict['sourceLattice'] def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy): occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict) for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc) higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True) for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' '))) for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1)) occurancesToReturn = [] currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances)) for hl, occs in higherLattices: higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl} while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5: (l, d) = distanceMatrix[hl][0]; distanceMatrix[hl]=distanceMatrix[hl][1:] higherLatticeSet['distance']+=d higherLatticeSet['lattices'].append(l) higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l]) if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l] # return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))} return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))} occurancesDistributionInHigherLattice = defaultdict(int) for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, ACCURACY)]+=1 totalOccurances, distance, observedOccuraces = float(len(occurances)), 0, 0 lattice = getLatticeThatGivesMinimumLocalityIndexAtK() sortedLatticeObjects = sorted([(getLocationFromLid(k.replace('_', ' ')), getHaversineDistance(lattice, getLocationFromLid(k.replace('_', ' '))), v) for k, v in occurancesDistributionInHigherLattice.iteritems()], key=itemgetter(1)) for l, d, oc in sortedLatticeObjects: distance=d; observedOccuraces+=oc if observedOccuraces/totalOccurances>=kValue: break return (d, lattice)
def plot_local_influencers(ltuo_model_id_and_hashtag_tag): tuples_of_boundary_and_boundary_label = [ ([[24.527135,-127.792969], [49.61071,-59.765625]], 'USA', GeneralMethods.getRandomColor()), ([[10.107706,-118.660469], [26.40009,-93.699531]], 'Mexico', GeneralMethods.getRandomColor()), ([[-16.6695,88.409841], [30.115057,119.698904]], 'SE-Asia', GeneralMethods.getRandomColor()), ([[-29.565473,-58.191719], [7.327985,-30.418282]], 'Brazil', GeneralMethods.getRandomColor()), ] for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag: print model_id, hashtag_tag tuples_of_location_and_color = [] for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary) tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10] locations = zip(*tuo_location_and_influence_scores)[0] for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color]) locations, colors = zip(*tuples_of_location_and_color) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0, alpha=1.) for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0) plt.legend(loc=3, ncol=4, mode="expand",) # plt.show() savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))