Exemplo n.º 1
0
    def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK):
        distances = defaultdict(dict)
        for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))):
            if latticeObject['id']==lattice:
                latticeHashtagsSet = set(latticeObject['hashtags'])
                for neighborLattice, neighborHashtags in latticeObject['links'].iteritems():
                    distances[neighborLattice] = {}
                    neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False)
                    neighborHashtagsSet = set(neighborHashtags)
                    distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet)))
                    distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.)
                    distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' '))
                break
        dataPoints = []
        ax = plt.subplot(111)
        for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance']))
        points, colors = zip(*sorted(dataPoints, key=itemgetter(1)))
        sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0)
        plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0)
        divider = make_axes_locatable(ax)
        plt.title('Average time difference from New York')
        cax = divider.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(sc, cax=cax)
#        plt.show()
        plt.savefig('../images/temporalDistanceExample.png')
 def iterateFrequentLocationsFromFIMahout(
     minLocationsTheUserHasCheckedin,
     minUniqueUsersCheckedInTheLocation,
     minCalculatedSupport,
     minLocationsInItemset=0,
     extraMinSupport=minSupport,
     yieldSupport=False,
     lids=False,
 ):
     #        for line in FileIO.iterateLinesFromFile(locationsFIMahoutOutputFile%(minUserLocations, minCalculatedSupport)):
     for line in FileIO.iterateLinesFromFile(
         locationsFIMahoutOutputFile
         % (minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, minCalculatedSupport)
     ):
         if line.startswith("Key:"):
             data = line.split("Value: ")[1][1:-1].split(",")
             if not lids:
                 locationItemset, support = (
                     [getLocationFromLid(i.replace("_", " ")) for i in data[0][1:-1].split()],
                     int(data[1]),
                 )
             else:
                 locationItemset, support = [i.replace("_", " ") for i in data[0][1:-1].split()], int(data[1])
             if support >= extraMinSupport and len(locationItemset) >= minLocationsInItemset:
                 if not yieldSupport:
                     yield [location for location in locationItemset if isWithinBoundingBox(location, us_boundary)]
                 else:
                     yield [
                         location
                         for location in locationItemset
                         if isWithinBoundingBox(getLocationFromLid(location), us_boundary)
                     ], support
Exemplo n.º 3
0
    def map_hashtag_object_to_tuo_norm_iid_and_interval_stats(self, hashtag, hashtag_object):
        def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat
        ltuo_iid_and_tuo_interval_and_lids = \
            get_ltuo_iid_and_tuo_interval_and_lids(hashtag_object)
        peak_tuo_iid_and_tuo_interval_and_lids = \
            max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids))
        peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0]
#        total_occurrences = sum(len(data[1][1]) for data in peak_tuo_iid_and_tuo_interval_and_lids)
        # Overall locality stats
        overall_mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object)
        overall_points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']]
        overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False)
        overall_focus = focus(overall_mf_lid_to_occurrence_count)[1]
        overall_coverage = getRadiusOfGyration(overall_points)
        total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids)
        for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids:
            mf_lid_to_occurrence_count = defaultdict(float)
            for lid in lids: mf_lid_to_occurrence_count[lid]+=1
            points = [getLocationFromLid(lid.replace('_', ' ')) for lid in lids]
            
            current_entropy = entropy(mf_lid_to_occurrence_count, False)
            current_focus = focus(mf_lid_to_occurrence_count)[1]
            current_coverage = getRadiusOfGyration(points)
            
            yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, 
                                    distance_from_overall_locality_stat(overall_entropy, current_entropy),
                                    distance_from_overall_locality_stat(overall_focus, current_focus),
                                    distance_from_overall_locality_stat(overall_coverage, current_coverage),]
def getKMLForCluster(cluster):
    clusterToYield = []
    if len(cluster)>3: 
        for lid in cluster:
            title = venuesCollection.find_one({'lid':lid})
            if title!=None: clusterToYield.append((getLocationFromLid(lid), unicode(title['n']).encode("utf-8")))
            else: clusterToYield.append((getLocationFromLid(lid), ''))
    return clusterToYield
Exemplo n.º 5
0
def plotLocationGraphOnMap(graph):
    points = map(lambda  lid:getLocationFromLid(lid.replace('_', ' ')), graph.nodes())
    _, m =plotPointsOnUSMap(points, s=10, lw=0, c='m', returnBaseMapObject=True)
    print graph.number_of_edges(), graph.number_of_nodes()
    totalEdgeWeight = max([d['w'] for _,_,d in graph.edges(data=True)])+0.0
    for u, v, data in graph.edges(data=True):
        u, v, w = getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
        m.drawgreatcircle(u[1],u[0],v[1],v[0],color=cm.Purples(w/totalEdgeWeight), alpha=0.5)
    plt.show()
Exemplo n.º 6
0
 def plotLatticeTemporalDistanceInHoursOnMap(latticeGraphType, latticeObject):
     latticeObject = latticeGraphType['method'](latticeObject)
     points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1), reverse=True))
     cm = matplotlib.cm.get_cmap('autumn')
     sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0)
     plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0)
     plt.xlabel(latticeGraphType['title'])
     plt.colorbar(sc)
     return sc
def getClusterForKML(cluster):
    clusterToYield = []
    if len(cluster) > 3:
        for lid in cluster:
            title = venuesCollection.find_one({"lid": lid})
            if title != None:
                clusterToYield.append((getLocationFromLid(lid), unicode(title["n"]).encode("utf-8")))
            else:
                clusterToYield.append((getLocationFromLid(lid), ""))
    return clusterToYield
Exemplo n.º 8
0
 def plotLatticeTemporalClosenessScoresOnMap(latticeGraphType, latticeObject):
     latticeObject = latticeGraphType['method'](latticeObject)
     LatticeGraph.normalizeNode(latticeObject)
     points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1)))
     cm = matplotlib.cm.get_cmap('YlOrRd')
     sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0)
     plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0)
     plt.xlabel(latticeGraphType['title'])
     plt.colorbar(sc)
     return sc
Exemplo n.º 9
0
def plotLocationClustersOnMap(graph):
    noOfClusters, clusters = clusterUsingAffinityPropagation(graph)
    nodeToClusterIdMap = dict(clusters)
    colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)])
    clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))]
    points, colors = zip(*map(lambda  l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes()))
    _, m =plotPointsOnUSMap(points, s=0, lw=0, c=colors, returnBaseMapObject=True)
    for u, v, data in graph.edges(data=True):
        if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]:
            color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
            m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=0.5)
    plt.show()
Exemplo n.º 10
0
 def nearbyLocations(lid, radiusInMiles):
     return (
         location
         for location in locationsCollection.find(
             {"l": {"$within": {"$center": [getLocationFromLid(lid), convertMilesToRadians(radiusInMiles)]}}}
         )
     )
Exemplo n.º 11
0
 def load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary=[[-90,-180], [90, 180]], noOfInfluencers=None):
     mf_location_to_global_influence_score = {}
     mf_location_to_mf_influence_type_to_influence_score = defaultdict(dict)
     mf_location_to_tuo_neighbor_location_and_locations_influencing_score = \
         dict(Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers, InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE))
     mf_location_to_tuo_neighbor_location_and_locations_influenced_score = \
         dict(Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers, InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE))
     for location in mf_location_to_tuo_neighbor_location_and_locations_influenced_score.keys()[:]:
         if not isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), boundary):
             if location in mf_location_to_tuo_neighbor_location_and_locations_influencing_score:
                 del mf_location_to_tuo_neighbor_location_and_locations_influencing_score[location]
             del mf_location_to_tuo_neighbor_location_and_locations_influenced_score[location]
     no_of_locations = len(mf_location_to_tuo_neighbor_location_and_locations_influenced_score)
     for location, tuo_neighbor_location_and_locations_influencing_score in \
             mf_location_to_tuo_neighbor_location_and_locations_influencing_score.iteritems():
         mf_location_to_mf_influence_type_to_influence_score[location][InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE] \
             = sum(zip(*tuo_neighbor_location_and_locations_influencing_score)[1])/no_of_locations
     for location, tuo_neighbor_location_and_locations_influenced_score in \
             mf_location_to_tuo_neighbor_location_and_locations_influenced_score.iteritems():
         mf_location_to_mf_influence_type_to_influence_score[location][InfluenceMeasuringModels.TYPE_OUTGOING_INFLUENCE] \
             = sum(zip(*tuo_neighbor_location_and_locations_influenced_score)[1])/no_of_locations
     for location, mf_influence_type_to_influence_score in \
             mf_location_to_mf_influence_type_to_influence_score.iteritems():
         influence_type, influence_score = max(mf_influence_type_to_influence_score.iteritems(), key=itemgetter(1))
         if influence_type==InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE: 
             mf_location_to_global_influence_score[location] = -influence_score
         else: mf_location_to_global_influence_score[location] = influence_score
     return mf_location_to_global_influence_score.items()
Exemplo n.º 12
0
    def probabilisticCoverageModelExample(hashtag, type):
        MINUTES, timeUnit = 5, 1
        print len(CoverageBasedLatticeSelectionModel.lattices)
        for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
            if hashtagObject['h']==hashtag:
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit)
                else: 
                    print getRadius(allOccurances)
                    probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances)
                latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices)
                points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1))))
#                print points[0], colors[0]
                ax = plt.subplot(111)
                sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
                divider = make_axes_locatable(ax)
#                plt.title('Jaccard similarity with New York')
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(sc, cax=cax)
                plt.show()
#                plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type))
                plt.clf()
                break
Exemplo n.º 13
0
 def writeClusterKML():
     kml = SpotsKML()
     outputKMLFile='%s/clusters.kml'%placesAnalysisFolder%place['name']
     for data in FileIO.iterateJsonFromFile(placesUserClusterFeaturesFile%place['name']):
         clusterId, color, features = data
         kml.addLocationPointsWithTitles([(getLocationFromLid(f[0].replace('_', ' ')), f[2]) for f in features[:noOfFeatures]], color=color)
         FileIO.createDirectoryForFile(outputKMLFile)
         kml.write(outputKMLFile)
Exemplo n.º 14
0
def locationsFromAllTransactionsIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation):
    observedLocations = set()
    i = 0
    for d in filteredUserIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation):
        print i
        i += 1
        for k in filter(lambda l: l not in observedLocations, d):
            observedLocations.add(k)
            yield getLocationFromLid(k)
Exemplo n.º 15
0
def writeUserClusterKMLs(place):
    clustering = getUserClustering(place, place.get('k'))
    colorMap = clustering[3]
    for clusterId, details in sorted(getUserClusteringDetails(place, clustering).iteritems(), key=lambda k: int(k[0])):
        kml = SpotsKML()
        kml.addLocationPointsWithTitles([(getLocationFromLid(lid), unicode(name).encode('utf-8')) for lid, name, _ in details['locations'][:5]], color=colorMap[clusterId])
        outputKMLFile=placesKMLsFolder%place['name']+'locations/userClusters/%s/%s.kml'%(str(clustering[0]), str(clusterId))
        FileIO.createDirectoryForFile(outputKMLFile)
        kml.write(outputKMLFile)
Exemplo n.º 16
0
    def addLine(self, points, description=None):
        from simplekml import LineStyle

        points = [list(reversed(getLocationFromLid(point))) for point in points]
        ls = self.kml.newlinestring(coords=points)
        self.kml.newpoint(coords=[points[0]]), self.kml.newpoint(coords=[points[-1]])
        if description:
            ls.description = description
        ls.linestyle = LineStyle(width=3.0)
Exemplo n.º 17
0
    def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15):
        input_locations = [
                               ('40.6000_-73.9500', 'new_york'),
                               ('33.3500_-118.1750', 'los_angeles'),
                               ('29.7250_-97.1500', 'austin'),
                           ('30.4500_-95.7000', 'college_station'),
                            ('-22.4750_-42.7750', 'rio'),
                           ('51.4750_0.0000', 'london'),
                           ('-23.2000_-46.4000', 'sao_paulo')
                         ] 
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \
                    Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE)
            for input_location, label in input_locations:
                for location, tuo_neighbor_location_and_locations_influence_score in \
                        tuo_location_and_tuo_neighbor_location_and_locations_influence_score:
                    if input_location==location:
                        input_location = getLocationFromLid(input_location.replace('_', ' '))
                        output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label)
                        number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations)
                        if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score)
                        locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences]
                        locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations]
#                        locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations)
                        if locations:
                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0)
#                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0)
                            for location in locations: 
    #                            if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): 
                                m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5)
#                            plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0)
#                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            FileIO.createDirectoryForFile(output_file)
                            print output_file
                            savefig(output_file)
                            plt.clf()
                        else:
                            GeneralMethods.runCommand('rm -rf %s'%output_file)
                        break
Exemplo n.º 18
0
 def plotSharingProbabilityAndTemporalClosenessScoresOnMap(timeRange, outputFolder):
     i = 1
     for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange)):
         latticePoint = getLocationFromLid(latticeObject['id'].replace('_', ' '))
         latticeId = getLatticeLid([latticePoint[1], latticePoint[0]], LATTICE_ACCURACY)
         plt.subplot(211)
         plt.title(latticeId)
         LatticeGraphPlots.plotLatticeSharingProbabilityOnMap(LatticeGraph.typeSharingProbability, latticeObject)
         plt.subplot(212)
         LatticeGraphPlots.plotLatticeTemporalClosenessScoresOnMap(LatticeGraph.typeTemporalCloseness, latticeObject)
         plt.show()
         outputFile = hashtagsImagesGraphAnalysisFolder%outputFolder+'%s_and_%s/%s.png'%(LatticeGraph.typeSharingProbability['id'], LatticeGraph.typeTemporalCloseness['id'], latticeId); FileIO.createDirectoryForFile(outputFile)
         print i, outputFile; i+=1
Exemplo n.º 19
0
 def analyzeDataClusters():
     regex = 'cafe'
     neighborLocationExtractionMethod = NeighborLocationsSelection.N_LOCATIONS
     inputFile = checkinSequenceLocationRegexAnalysisFolder+neighborLocationExtractionMethod+'/'+regex
     for line in FileIO.iterateJsonFromFile(inputFile):
         if line['parameters']['checkinsWindow']==10:
             for location, data in line['locations'].iteritems():
 #                data = line['locations']['41.895 -87.623']
                 if isWithinBoundingBox(getLocationFromLid(location), us_boundary):
                     print venuesCollection.find_one({'lid': location})['n'], location,'\n'
                     for l, _ in data['clusters'][:5]:
                         print [i[0] for i in l]
                     print '\n ********** \n'
Exemplo n.º 20
0
    def influence_clusters(model_ids, min_cluster_size=15):
        influence_type = InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE
        for model_id in model_ids:
            digraph_of_location_and_location_similarity = nx.DiGraph()
            for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \
                        enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)):
#                print line_count
                for neighbor_location, mf_influence_type_to_similarity in tuo_neighbor_location_and_mf_influence_type_and_similarity: 
                    if isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY) and \
                            isWithinBoundingBox(getLocationFromLid(neighbor_location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY):
                        digraph_of_location_and_location_similarity.add_edge(location, neighbor_location, {'w': mf_influence_type_to_similarity[influence_type]})

            no_of_clusters, tuo_location_and_cluster_id = clusterUsingAffinityPropagation(digraph_of_location_and_location_similarity)
            tuo_cluster_id_to_locations = [ (cluster_id, zip(*ito_tuo_location_and_cluster_id)[0])
                                            for cluster_id, ito_tuo_location_and_cluster_id in 
                                            groupby(
                                                  sorted(tuo_location_and_cluster_id, key=itemgetter(1)),
                                                  key=itemgetter(1)
                                                  )
                                           ]
            mf_location_to_cluster_id = dict(tuo_location_and_cluster_id)
            mf_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)])
            mf_valid_locations_to_color = {}
            for cluster_id, locations in \
                    sorted(tuo_cluster_id_to_locations, key=lambda (cluster_id, locations): len(locations))[-10:]:
#                if len(locations)>min_cluster_size:
                print cluster_id, len(locations)
                for location in locations: mf_valid_locations_to_color[location] \
                    = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[location]]
            locations, colors = zip(*mf_valid_locations_to_color.iteritems())
            locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations]
            _, m = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, s=0, returnBaseMapObject=True, lw = 0)
            for u, v, data in digraph_of_location_and_location_similarity.edges(data=True):
                if u in mf_valid_locations_to_color and v in mf_valid_locations_to_color \
                        and mf_location_to_cluster_id[u]==mf_location_to_cluster_id[v]:
                    color, u, v, w = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
                    m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=0.6)
            plt.show()
Exemplo n.º 21
0
    def load_checkins_graph(checkins_graph_file):
        graph = nx.Graph()
        for data in iterateJsonFromFile(checkins_graph_file):
            (u, v) = data['e'].split('__')
            graph.add_edge(u , v, {'w': data['w']})
        noOfClusters, clusters = clusterUsingAffinityPropagation(graph)
#        for cluster in clusters:
#            print len(cluster), cluster
            
        nodeToClusterIdMap = dict(clusters)
        colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)])
        clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))]
        points, colors = zip(*map(lambda  l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes()))
        _, m =plotPointsOnWorldMap(points[:1], s=0, lw=0, c=colors[:1], returnBaseMapObject=True)
        for u, v, data in graph.edges(data=True):
            if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]:
                color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
                m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=1.5)
#        plt.title(title)
        plt.show()
        print noOfClusters
        print graph.number_of_edges()
        print graph.number_of_nodes()
Exemplo n.º 22
0
 def map_hashtag_object_to_tuo_hashtag_and_occurrence_count_and_entropy_and_focus_and_coverage_and_peak(self, hashtag, hashtag_object):
     mf_lid_to_occurrence_count = get_mf_lid_to_occurrence_count(hashtag_object)
     points = [ getLocationFromLid(lid.replace('_', ' ')) for lid,_ in hashtag_object['ltuo_lid_and_s_interval']]
     # Determine peak
     ltuo_iid_and_tuo_interval_and_occurrence_count = get_ltuo_iid_and_tuo_interval_and_occurrence_count(hashtag_object)
     peak_tuo_iid_and_tuo_interval_and_occurrence_count = \
         max(ltuo_iid_and_tuo_interval_and_occurrence_count, key=lambda (_, (__, occurrence_count)): occurrence_count)
     peak_iid = peak_tuo_iid_and_tuo_interval_and_occurrence_count[0]
     yield hashtag_object['hashtag'], [hashtag_object['hashtag'], 
                                       len(hashtag_object['ltuo_lid_and_s_interval']), 
                                       entropy(mf_lid_to_occurrence_count, False), 
                                       focus(mf_lid_to_occurrence_count), 
                                       getRadiusOfGyration(points),
                                       peak_iid]
Exemplo n.º 23
0
 def writeLocationToUserMap(place):
     name, boundary = place['name'], place['boundary']
     GeneralMethods.runCommand('rm -rf %s'%placesLocationToUserMapFile%name)
     for location in filteredLocationToUserAndTimeMapIterator(minLocationsTheUserHasCheckedin, minUniqueUsersCheckedInTheLocation, inputFile=locationToUserAndExactTimeMapFile):
         lid=getLocationFromLid(location['location'])
         if isWithinBoundingBox(lid, boundary): 
             location['categories'] = ''; location['tags'] = ''; location['name']=''
             title = venuesCollection.find_one({'lid':location['location']})
             if title: location['name'] = unicode(title['n']).encode("utf-8")
             meta = venuesMetaDataCollection.find_one({'_id':location['location']})
             if meta: location['categories'] = unicode(meta['c']).encode("utf-8"); location['tags'] = unicode(meta['t']).encode("utf-8")
             for user in location['users'].keys()[:]: location['users'][str(user)]=location['users'][user]; del location['users'][user]
             location['noOfCheckins']=sum([len(epochs) for user, userVector in location['users'].iteritems() for day, dayVector in userVector.iteritems() for db, epochs in dayVector.iteritems()])
             if location['noOfCheckins']>place.get('minLocationCheckins',0): FileIO.writeToFileAsJson(location, placesLocationToUserMapFile%name)
Exemplo n.º 24
0
def plotHashtagSourcesOnMap(timeRange, outputFolder):
    i = 1
    distribution = defaultdict(int)
    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange)):
        occuranesInHighestActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True)
        if occuranesInHighestActiveRegion:
            source, count = getSourceLattice(occuranesInHighestActiveRegion)
            print i, source;i+=1
            distribution[getLidFromLocation(source)]+=1
#        if i==10: break
    points, colors = zip(*[(getLocationFromLid(k),v) for k, v in sorted(distribution.iteritems(), key=itemgetter(1))])
    cm = matplotlib.cm.get_cmap('Paired')
    sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0)
    plt.colorbar(sc)
    plt.show()
Exemplo n.º 25
0
    def plot_global_influencers(ltuo_model_id_and_hashtag_tag):
        tuples_of_boundary_and_boundary_label = [
                ([[-90,-180], [90, 180]], 'World', 'm'),
            ]
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            print model_id, hashtag_tag
            tuples_of_location_and_color = []
            for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label:
                tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary)
                tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10]
                locations = zip(*tuo_location_and_influence_scores)[0]
                for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color])
            locations, colors = zip(*tuples_of_location_and_color)
            plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors,  lw = 0, alpha=1.)
            for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0)
#            plt.legend(loc=3, ncol=4, mode="expand",)
#            plt.show()
            savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
Exemplo n.º 26
0
    def plot_geo_distribution_in_social_networks():
        total_checkins = 0.0
        for social_network in [FOURSQUARE_ID, BRIGHTKITE_ID, GOWALLA_ID]:
            print social_network
            ax = plt.subplot(111)
            tuples_of_location_and_location_occurences_count = [(getLocationFromLid(data['key'].replace('_', ' ')), data['distribution'][social_network]) 
                                                         for i, data in enumerate(iterateJsonFromFile(lidsToDistributionInSocialNetworksMapFile%BOUNDARY_ID))\
                                                         if social_network in data['distribution'] and data['distribution'][social_network]>25]
            tuples_of_location_and_location_occurences_count = sorted(tuples_of_location_and_location_occurences_count, key=itemgetter(1))
            locations, colors = zip(*tuples_of_location_and_location_occurences_count)
            sc = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
            divider = make_axes_locatable(ax)
#            plt.title('Jaccard similarity with New York')
            cax = divider.append_axes("right", size="5%", pad=0.05)
            plt.colorbar(sc, cax=cax)
#            for k, v in tuples_of_location_and_location_occurences_count:
#                print social_network, k, v
#            print len(tuples_of_location_and_location_occurences_count)
            plt.show()
Exemplo n.º 27
0
def plotDistributionGraphs(occurences, validTimeUnits, title, startingEpoch=None):
        occurences = getOccurencesFilteredByDistributionInTimeUnits(occurences, validTimeUnits)
        occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), sorted(zip(*occs)[1])) for lid, occs in groupby(sorted([(getLatticeLid(l, ACCURACY), t) for l, t in occurences], key=itemgetter(0)), key=itemgetter(0))]
        plt.subplot(211)
        pointsForNumberOfOccurances, numberOfOccurancesList = zip(*sorted(occurancesGroupedByLattice, key=lambda t: len(t[1])))
        numberOfOccurancesList = [len(ocs) for ocs in numberOfOccurancesList]
        cm = matplotlib.cm.get_cmap('cool')
        sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=numberOfOccurancesList, cmap=cm, lw = 0, alpha=1.0)
        plt.colorbar(sc), plt.title(title), plt.xlabel('Number of mentions')
        
        plt.subplot(212)
        pointsForNumberOfOccurances, occuranceTime = zip(*sorted(occurancesGroupedByLattice, key=lambda t: min(t[1]), reverse=True))
        occuranceTime=[min(t) for t in occuranceTime]
        if not startingEpoch: startingEpoch = occuranceTime[-1]
        occuranceTime=[(t-startingEpoch)/TIME_UNIT_IN_SECONDS for t in occuranceTime]
        cm = matplotlib.cm.get_cmap('autumn')
        sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=occuranceTime, cmap=cm, lw = 0, alpha=1.0)
        plt.colorbar(sc), plt.xlabel('Speed of hashtag arrival')
        return startingEpoch
Exemplo n.º 28
0
 def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy):
     occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict)
     for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc)
     higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True)
     for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' ')))
     for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1))
     occurancesToReturn = []
     currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances))
     for hl, occs  in higherLattices: 
         higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl}
         while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5:
             (l, d) = distanceMatrix[hl][0]; 
             distanceMatrix[hl]=distanceMatrix[hl][1:]
             higherLatticeSet['distance']+=d
             higherLatticeSet['lattices'].append(l)
             higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l])
         if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet
     for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l]
 #    return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
     return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
Exemplo n.º 29
0
def getLocalityIndexAtK(occurances, kValue):
    ''' Locality index at k - for a hashtag is the minimum radius that covers k percentage of occurrances.
            A high locality index suggests hashtag was global with a small index suggests it was local.
        To find locality index at k, I must find a point that is closest to k percentage of occurances. 
            Brute force requires nC2 complexity. 
            Hence, use lattices of bigger size technique.
    '''
    def getLatticeThatGivesMinimumLocalityIndexAtK():
        occurancesDict = {'occurances': occurances}
        for accuracy in [4, 2, 1, 0.5, ACCURACY]: occurancesDict = getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurancesDict['occurances'], accuracy)
        return occurancesDict['sourceLattice']
    def getLatticeThatGivesMinimumLocalityIndexAtKForAccuracy(occurances, accuracy):
        occurancesDistributionInHigherLattice, distanceMatrix = defaultdict(list), defaultdict(dict)
        for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, accuracy)].append(oc)
        higherLattices = sorted(occurancesDistributionInHigherLattice.iteritems(), key=lambda t: len(t[1]), reverse=True)
        for hl1, hl2 in combinations(occurancesDistributionInHigherLattice, 2): distanceMatrix[hl1][hl2] = distanceMatrix[hl2][hl1] = getHaversineDistance(getLocationFromLid(hl1.replace('_', ' ')), getLocationFromLid(hl2.replace('_', ' ')))
        for k,v in distanceMatrix.iteritems(): distanceMatrix[k] = sorted(v.iteritems(), key=itemgetter(1))
        occurancesToReturn = []
        currentHigherLatticeSet, totalOccurances = {'distance': ()}, float(len(occurances))
        for hl, occs  in higherLattices: 
            higherLatticeSet = {'distance': 0, 'observedOccurances': len(occs), 'lattices': [hl], 'sourceLattice': hl}
            while currentHigherLatticeSet['distance']>higherLatticeSet['distance'] and higherLatticeSet['observedOccurances']/totalOccurances<0.5:
                (l, d) = distanceMatrix[hl][0]; 
                distanceMatrix[hl]=distanceMatrix[hl][1:]
                higherLatticeSet['distance']+=d
                higherLatticeSet['lattices'].append(l)
                higherLatticeSet['observedOccurances']+=len(occurancesDistributionInHigherLattice[l])
            if currentHigherLatticeSet==None or currentHigherLatticeSet['distance']>higherLatticeSet['distance']: currentHigherLatticeSet=higherLatticeSet
        for l in currentHigherLatticeSet['lattices']: occurancesToReturn+=occurancesDistributionInHigherLattice[l]
    #    return {'distance': currentHigherLatticeSet['distance'], 'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
        return {'occurances': occurancesToReturn, 'sourceLattice': getLocationFromLid(currentHigherLatticeSet['sourceLattice'].replace('_', ' '))}
    occurancesDistributionInHigherLattice = defaultdict(int)
    for oc in occurances: occurancesDistributionInHigherLattice[getLatticeLid(oc, ACCURACY)]+=1
    totalOccurances, distance, observedOccuraces = float(len(occurances)), 0, 0
    lattice = getLatticeThatGivesMinimumLocalityIndexAtK()
    sortedLatticeObjects = sorted([(getLocationFromLid(k.replace('_', ' ')), getHaversineDistance(lattice, getLocationFromLid(k.replace('_', ' '))), v) for k, v in occurancesDistributionInHigherLattice.iteritems()],
                 key=itemgetter(1))
    for l, d, oc in sortedLatticeObjects:
        distance=d; observedOccuraces+=oc
        if observedOccuraces/totalOccurances>=kValue: break
    return (d, lattice)
Exemplo n.º 30
0
    def plot_local_influencers(ltuo_model_id_and_hashtag_tag):
        tuples_of_boundary_and_boundary_label = [
                ([[24.527135,-127.792969], [49.61071,-59.765625]], 'USA', GeneralMethods.getRandomColor()),
                ([[10.107706,-118.660469], [26.40009,-93.699531]], 'Mexico', GeneralMethods.getRandomColor()),
                ([[-16.6695,88.409841], [30.115057,119.698904]], 'SE-Asia', GeneralMethods.getRandomColor()),
                ([[-29.565473,-58.191719], [7.327985,-30.418282]], 'Brazil', GeneralMethods.getRandomColor()),
            ]
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            print model_id, hashtag_tag
            tuples_of_location_and_color = []
            for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label:
                tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary)
                tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10]
                locations = zip(*tuo_location_and_influence_scores)[0]
                for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color])
            locations, colors = zip(*tuples_of_location_and_color)
            plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors,  lw = 0, alpha=1.)
            for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0)
            plt.legend(loc=3, ncol=4, mode="expand",)
#            plt.show()
            savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))