Exemplo n.º 1
0
 def significant_nei_utm_ids():
     output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png'
     for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)):
         utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id'])
         nei_utm_lat_longs = map(
                           lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id),
                           data['nei_utm_ids']
                         )
         if nei_utm_lat_longs:
             output_file = output_folder%('%s_%s'%(utm_lat_long))
             plotPointsOnWorldMap(nei_utm_lat_longs,
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#EA00FF',
                                  alpha=1.)
             _, m = plotPointsOnWorldMap([utm_lat_long],
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#2BFF00',
                                  s = 40,
                                  returnBaseMapObject=True,
                                  alpha=1.)
             for nei_utm_lat_long in nei_utm_lat_longs:
                 m.drawgreatcircle(utm_lat_long[1],
                                   utm_lat_long[0],
                                   nei_utm_lat_long[1],
                                   nei_utm_lat_long[0],
                                   color='#FFA600',
                                   lw=1.5,
                                   alpha=1.0)
             print 'Saving %s'%(i+1)
             savefig(output_file)
Exemplo n.º 2
0
    def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK):
        distances = defaultdict(dict)
        for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))):
            if latticeObject['id']==lattice:
                latticeHashtagsSet = set(latticeObject['hashtags'])
                for neighborLattice, neighborHashtags in latticeObject['links'].iteritems():
                    distances[neighborLattice] = {}
                    neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False)
                    neighborHashtagsSet = set(neighborHashtags)
                    distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet)))
                    distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.)
                    distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' '))
                break
        dataPoints = []
        ax = plt.subplot(111)
        for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance']))
        points, colors = zip(*sorted(dataPoints, key=itemgetter(1)))
        sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0)
        plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0)
        divider = make_axes_locatable(ax)
        plt.title('Average time difference from New York')
        cax = divider.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(sc, cax=cax)
#        plt.show()
        plt.savefig('../images/temporalDistanceExample.png')
Exemplo n.º 3
0
 def plotLatticeTemporalDistanceInHoursOnMap(latticeGraphType, latticeObject):
     latticeObject = latticeGraphType['method'](latticeObject)
     points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1), reverse=True))
     cm = matplotlib.cm.get_cmap('autumn')
     sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0)
     plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0)
     plt.xlabel(latticeGraphType['title'])
     plt.colorbar(sc)
     return sc
Exemplo n.º 4
0
 def plotLatticeTemporalClosenessScoresOnMap(latticeGraphType, latticeObject):
     latticeObject = latticeGraphType['method'](latticeObject)
     LatticeGraph.normalizeNode(latticeObject)
     points, colors = zip(*sorted([(getLocationFromLid(neighborId.replace('_', ' ')), val) for neighborId, val in latticeObject['links'].iteritems()], key=itemgetter(1)))
     cm = matplotlib.cm.get_cmap('YlOrRd')
     sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0, vmin=0)
     plotPointsOnWorldMap([getLocationFromLid(latticeObject['id'].replace('_', ' '))], c='#00FF00', lw = 0)
     plt.xlabel(latticeGraphType['title'])
     plt.colorbar(sc)
     return sc
Exemplo n.º 5
0
    def probabilisticCoverageModelExample(hashtag, type):
        MINUTES, timeUnit = 5, 1
        print len(CoverageBasedLatticeSelectionModel.lattices)
        for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
            if hashtagObject['h']==hashtag:
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit)
                else: 
                    print getRadius(allOccurances)
                    probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances)
                latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices)
                points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1))))
#                print points[0], colors[0]
                ax = plt.subplot(111)
                sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
                divider = make_axes_locatable(ax)
#                plt.title('Jaccard similarity with New York')
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(sc, cax=cax)
                plt.show()
#                plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type))
                plt.clf()
                break
Exemplo n.º 6
0
    def top_k_locations_on_world_map():
        output_file = fld_data_analysis_results%GeneralMethods.get_method_id() + '.png'
        ltuo_location_and_occurrence_count = []
        for location_object in\
                FileIO.iterateJsonFromFile(f_dense_hashtag_distribution_in_locations, remove_params_dict=True):
            ltuo_location_and_occurrence_count.append([
                                                      location_object['location'],
                                                      location_object['occurrences_count']
                                                    ])
        ltuo_lid_and_r_occurrence_count = sorted(ltuo_location_and_occurrence_count, key=itemgetter(1), reverse=True)
#        for i, d in enumerate(ltuo_lid_and_r_occurrence_count):
#            print i, d
#        exit()
        lids = zip(*ltuo_lid_and_r_occurrence_count)[0][:200]
        points = map(UTMConverter.getLatLongUTMIdInLatLongForm, lids)
        plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c='m',  lw = 0, alpha=1.)
        savefig(output_file)
Exemplo n.º 7
0
    def plot_global_influencers(ltuo_model_id_and_hashtag_tag):
        tuples_of_boundary_and_boundary_label = [
                ([[-90,-180], [90, 180]], 'World', 'm'),
            ]
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            print model_id, hashtag_tag
            tuples_of_location_and_color = []
            for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label:
                tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary)
                tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10]
                locations = zip(*tuo_location_and_influence_scores)[0]
                for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color])
            locations, colors = zip(*tuples_of_location_and_color)
            plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors,  lw = 0, alpha=1.)
            for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0)
#            plt.legend(loc=3, ncol=4, mode="expand",)
#            plt.show()
            savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
Exemplo n.º 8
0
def plotDistributionGraphs(occurences, validTimeUnits, title, startingEpoch=None):
        occurences = getOccurencesFilteredByDistributionInTimeUnits(occurences, validTimeUnits)
        occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), sorted(zip(*occs)[1])) for lid, occs in groupby(sorted([(getLatticeLid(l, ACCURACY), t) for l, t in occurences], key=itemgetter(0)), key=itemgetter(0))]
        plt.subplot(211)
        pointsForNumberOfOccurances, numberOfOccurancesList = zip(*sorted(occurancesGroupedByLattice, key=lambda t: len(t[1])))
        numberOfOccurancesList = [len(ocs) for ocs in numberOfOccurancesList]
        cm = matplotlib.cm.get_cmap('cool')
        sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=numberOfOccurancesList, cmap=cm, lw = 0, alpha=1.0)
        plt.colorbar(sc), plt.title(title), plt.xlabel('Number of mentions')
        
        plt.subplot(212)
        pointsForNumberOfOccurances, occuranceTime = zip(*sorted(occurancesGroupedByLattice, key=lambda t: min(t[1]), reverse=True))
        occuranceTime=[min(t) for t in occuranceTime]
        if not startingEpoch: startingEpoch = occuranceTime[-1]
        occuranceTime=[(t-startingEpoch)/TIME_UNIT_IN_SECONDS for t in occuranceTime]
        cm = matplotlib.cm.get_cmap('autumn')
        sc = plotPointsOnWorldMap(pointsForNumberOfOccurances, c=occuranceTime, cmap=cm, lw = 0, alpha=1.0)
        plt.colorbar(sc), plt.xlabel('Speed of hashtag arrival')
        return startingEpoch
Exemplo n.º 9
0
 def utm_ids_on_map():
     ''' Plots utm ids on world map. The color indicates the
     log(total_hashtag_count)
     '''
     output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.png'
     ltuo_point_and_total_hashtag_count = []
     for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, remove_params_dict=True):
         point = UTMConverter.getLatLongUTMIdInLatLongForm(utm_object['utm_id'])
         total_hashtag_count = log(utm_object['total_hashtag_count'])
         ltuo_point_and_total_hashtag_count.append((point, total_hashtag_count))
     points, total_hashtag_counts = zip(*sorted(ltuo_point_and_total_hashtag_count, key=itemgetter(1)))
     plotPointsOnWorldMap(points,
                          blueMarble=False,
                          bkcolor='#CFCFCF',
                          c=total_hashtag_counts,
                          cmap=matplotlib.cm.cool,
                          lw = 0,
                          alpha=1.)
     
     savefig(output_file)
Exemplo n.º 10
0
    def temp():
        hashtags, MINUTES = [], 60
        for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'):
            if hashtagObject['h']=='americanhorrorstory':
                print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                totalOccurances = []
                for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                    occs = occsDistributionInTimeUnits[t]
                    if occs:
                        fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
#                        print interval, t, len(occs)
                        print fileName
                        occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))]
                        occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                        points, colors = zip(*occurancesGroupedByLattice)
                        plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
#                        plt.show()
                        plt.savefig(fileName)
                        plt.clf()
                exit()
Exemplo n.º 11
0
    def plot_local_influencers(ltuo_model_id_and_hashtag_tag):
        tuples_of_boundary_and_boundary_label = [
                ([[24.527135,-127.792969], [49.61071,-59.765625]], 'USA', GeneralMethods.getRandomColor()),
                ([[10.107706,-118.660469], [26.40009,-93.699531]], 'Mexico', GeneralMethods.getRandomColor()),
                ([[-16.6695,88.409841], [30.115057,119.698904]], 'SE-Asia', GeneralMethods.getRandomColor()),
                ([[-29.565473,-58.191719], [7.327985,-30.418282]], 'Brazil', GeneralMethods.getRandomColor()),
            ]
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            print model_id, hashtag_tag
            tuples_of_location_and_color = []
            for boundary, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label:
                tuo_location_and_influence_scores = Experiments.load_tuo_location_and_boundary_influence_score(model_id, hashtag_tag, boundary)
                tuo_location_and_influence_scores = sorted(tuo_location_and_influence_scores, key=itemgetter(1))[:10]
                locations = zip(*tuo_location_and_influence_scores)[0]
                for location in locations: tuples_of_location_and_color.append([getLocationFromLid(location.replace('_', ' ')), boundary_color])
            locations, colors = zip(*tuples_of_location_and_color)
            plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors,  lw = 0, alpha=1.)
            for _, boundary_label, boundary_color in tuples_of_boundary_and_boundary_label: plt.scatter([0], [0], label=boundary_label, c=boundary_color, lw = 0)
            plt.legend(loc=3, ncol=4, mode="expand",)
#            plt.show()
            savefig(fld_results%(GeneralMethods.get_method_id()) +'%s_%s.png'%(model_id, hashtag_tag))
Exemplo n.º 12
0
    def plot_locations_influence_on_world_map(ltuo_model_id_and_hashtag_tag, noOfInfluencers=10, percentage_of_locations=0.15):
        input_locations = [
                               ('40.6000_-73.9500', 'new_york'),
                               ('33.3500_-118.1750', 'los_angeles'),
                               ('29.7250_-97.1500', 'austin'),
                           ('30.4500_-95.7000', 'college_station'),
                            ('-22.4750_-42.7750', 'rio'),
                           ('51.4750_0.0000', 'london'),
                           ('-23.2000_-46.4000', 'sao_paulo')
                         ] 
        for model_id, hashtag_tag in ltuo_model_id_and_hashtag_tag:
            tuo_location_and_tuo_neighbor_location_and_locations_influence_score = \
                    Experiments.load_tuo_location_and_tuo_neighbor_location_and_locations_influence_score(model_id, hashtag_tag, noOfInfluencers=None, influence_type=InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE)
            for input_location, label in input_locations:
                for location, tuo_neighbor_location_and_locations_influence_score in \
                        tuo_location_and_tuo_neighbor_location_and_locations_influence_score:
                    if input_location==location:
                        input_location = getLocationFromLid(input_location.replace('_', ' '))
                        output_file = fld_results%GeneralMethods.get_method_id() + '/%s_%s/%s.png'%(model_id, hashtag_tag, label)
                        number_of_outgoing_influences = int(len(tuo_neighbor_location_and_locations_influence_score)*percentage_of_locations)
                        if number_of_outgoing_influences==0: number_of_outgoing_influences=len(tuo_neighbor_location_and_locations_influence_score)
                        locations = zip(*tuo_neighbor_location_and_locations_influence_score)[0][:number_of_outgoing_influences]
                        locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations]
#                        locations = filter(lambda location: isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY), locations)
                        if locations:
                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#000000', c='#FF00FF', returnBaseMapObject=True, lw = 0)
#                            _, m = plotPointsOnWorldMap(locations, resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#FF00FF', returnBaseMapObject=True, lw = 0)
                            for location in locations: 
    #                            if isWithinBoundingBox(location, PARTIAL_WORLD_BOUNDARY): 
                                m.drawgreatcircle(location[1], location[0], input_location[1], input_location[0], color='#FAA31B', lw=1., alpha=0.5)
#                            plotPointsOnWorldMap([input_location], blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#000000', c='#003CFF', s=40, lw = 0)
#                            plotPointsOnWorldMap([input_location], resolution='c', blueMarble=False, bkcolor='#CFCFCF', c='#003CFF', s=40, lw = 0)
                            FileIO.createDirectoryForFile(output_file)
                            print output_file
                            savefig(output_file)
                            plt.clf()
                        else:
                            GeneralMethods.runCommand('rm -rf %s'%output_file)
                        break
Exemplo n.º 13
0
 def plotGraphsForHashtag(hashtag):
     for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
         MINUTES = 5
         if hashtagObject['h']==hashtag:
             print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
             occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             totalOccurances = []
             for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                 occs = occsDistributionInTimeUnits[t]
                 totalOccurances+=occs
                 if occs:
                     fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
                     print fileName
                     occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))]
                     occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                     points, colors = zip(*occurancesGroupedByLattice)
                     plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
                     plt.show()
 #                    plt.savefig(fileName)
                     plt.clf()
                 if (interval+1)*MINUTES>=120: break
             break
Exemplo n.º 14
0
 def plot_maps_for_every_hour():
     MINUTES = 15
     hashtags = ['ripstevejobs', 'cnbcdebate']
     map_from_hashtag_to_subplot = dict([('ripstevejobs', 211), ('cnbcdebate', 212)])
     map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict)
     for hashtag in hashtags:
         for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag):
             map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time =  getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0))
             epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0]
             epoch_ending_time_unit = epoch_starting_time_unit+24*60*60
             for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time:
                 if epoch_time_unit<=epoch_ending_time_unit:
                     if tuples_of_location_and_epoch_occurrence_time:
                         epoch_lag = epoch_time_unit - epoch_starting_time_unit
                         tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1))
                         map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time]
     map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list)
     GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_hour/')
     for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag):
         file_world_map_plot = './images/plot_maps_for_every_hour/%s.png'%(epoch_lag)
         print file_world_map_plot
         map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag]
         for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems():
             map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag
         for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems():
             plt.subplot(map_from_hashtag_to_subplot[hashtag])
             tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1]))
                                for location, iterator_of_tuples_of_location_and_epoch_lag in 
                                groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0))
                             ]
             locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)])
             plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60)
             plt.title('%s      (%s hours)'%(hashtag, (epoch_lag+MINUTES*60)/(60.*60)))
 #        plt.show()
         FileIO.createDirectoryForFile(file_world_map_plot)
         plt.savefig(file_world_map_plot)
         plt.clf()
Exemplo n.º 15
0
def plotHashtagSourcesOnMap(timeRange, outputFolder):
    i = 1
    distribution = defaultdict(int)
    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%(outputFolder,'%s_%s'%timeRange)):
        occuranesInHighestActiveRegion, isFirstActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject, True)
        if occuranesInHighestActiveRegion:
            source, count = getSourceLattice(occuranesInHighestActiveRegion)
            print i, source;i+=1
            distribution[getLidFromLocation(source)]+=1
#        if i==10: break
    points, colors = zip(*[(getLocationFromLid(k),v) for k, v in sorted(distribution.iteritems(), key=itemgetter(1))])
    cm = matplotlib.cm.get_cmap('Paired')
    sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw = 0)
    plt.colorbar(sc)
    plt.show()
Exemplo n.º 16
0
    def plot_geo_distribution_in_social_networks():
        total_checkins = 0.0
        for social_network in [FOURSQUARE_ID, BRIGHTKITE_ID, GOWALLA_ID]:
            print social_network
            ax = plt.subplot(111)
            tuples_of_location_and_location_occurences_count = [(getLocationFromLid(data['key'].replace('_', ' ')), data['distribution'][social_network]) 
                                                         for i, data in enumerate(iterateJsonFromFile(lidsToDistributionInSocialNetworksMapFile%BOUNDARY_ID))\
                                                         if social_network in data['distribution'] and data['distribution'][social_network]>25]
            tuples_of_location_and_location_occurences_count = sorted(tuples_of_location_and_location_occurences_count, key=itemgetter(1))
            locations, colors = zip(*tuples_of_location_and_location_occurences_count)
            sc = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
            divider = make_axes_locatable(ax)
#            plt.title('Jaccard similarity with New York')
            cax = divider.append_axes("right", size="5%", pad=0.05)
            plt.colorbar(sc, cax=cax)
#            for k, v in tuples_of_location_and_location_occurences_count:
#                print social_network, k, v
#            print len(tuples_of_location_and_location_occurences_count)
            plt.show()
Exemplo n.º 17
0
    def influence_clusters(model_ids, min_cluster_size=15):
        influence_type = InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE
        for model_id in model_ids:
            digraph_of_location_and_location_similarity = nx.DiGraph()
            for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \
                        enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)):
#                print line_count
                for neighbor_location, mf_influence_type_to_similarity in tuo_neighbor_location_and_mf_influence_type_and_similarity: 
                    if isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY) and \
                            isWithinBoundingBox(getLocationFromLid(neighbor_location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY):
                        digraph_of_location_and_location_similarity.add_edge(location, neighbor_location, {'w': mf_influence_type_to_similarity[influence_type]})

            no_of_clusters, tuo_location_and_cluster_id = clusterUsingAffinityPropagation(digraph_of_location_and_location_similarity)
            tuo_cluster_id_to_locations = [ (cluster_id, zip(*ito_tuo_location_and_cluster_id)[0])
                                            for cluster_id, ito_tuo_location_and_cluster_id in 
                                            groupby(
                                                  sorted(tuo_location_and_cluster_id, key=itemgetter(1)),
                                                  key=itemgetter(1)
                                                  )
                                           ]
            mf_location_to_cluster_id = dict(tuo_location_and_cluster_id)
            mf_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)])
            mf_valid_locations_to_color = {}
            for cluster_id, locations in \
                    sorted(tuo_cluster_id_to_locations, key=lambda (cluster_id, locations): len(locations))[-10:]:
#                if len(locations)>min_cluster_size:
                print cluster_id, len(locations)
                for location in locations: mf_valid_locations_to_color[location] \
                    = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[location]]
            locations, colors = zip(*mf_valid_locations_to_color.iteritems())
            locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations]
            _, m = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, s=0, returnBaseMapObject=True, lw = 0)
            for u, v, data in digraph_of_location_and_location_similarity.edges(data=True):
                if u in mf_valid_locations_to_color and v in mf_valid_locations_to_color \
                        and mf_location_to_cluster_id[u]==mf_location_to_cluster_id[v]:
                    color, u, v, w = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
                    m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=0.6)
            plt.show()
Exemplo n.º 18
0
    def load_checkins_graph(checkins_graph_file):
        graph = nx.Graph()
        for data in iterateJsonFromFile(checkins_graph_file):
            (u, v) = data['e'].split('__')
            graph.add_edge(u , v, {'w': data['w']})
        noOfClusters, clusters = clusterUsingAffinityPropagation(graph)
#        for cluster in clusters:
#            print len(cluster), cluster
            
        nodeToClusterIdMap = dict(clusters)
        colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)])
        clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))]
        points, colors = zip(*map(lambda  l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes()))
        _, m =plotPointsOnWorldMap(points[:1], s=0, lw=0, c=colors[:1], returnBaseMapObject=True)
        for u, v, data in graph.edges(data=True):
            if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]:
                color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w']
                m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=1.5)
#        plt.title(title)
        plt.show()
        print noOfClusters
        print graph.number_of_edges()
        print graph.number_of_nodes()
Exemplo n.º 19
0
 def plotLatticesOnMap(timeRange, outputFolder):
     points = [getLocationFromLid(latticeObject['id'].replace('_', ' ')) for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(outputFolder,'%s_%s'%timeRange))]
     plotPointsOnWorldMap(points, c='m', lw=0)
     plt.show()
Exemplo n.º 20
0
 def plotPoints(links, xlabel):
     cm = matplotlib.cm.get_cmap('cool')
     points, colors = zip(*sorted([(getLocationFromLid(k.replace('_', ' ')), v)for k, v in links.iteritems()], key=itemgetter(1)))
     sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, vmin=0, vmax=1)
     plotPointsOnWorldMap([getLocationFromLid(locationObject['id'].replace('_', ' '))], c='k', s=20, lw=0)
     plt.xlabel(xlabel), plt.colorbar(sc)
Exemplo n.º 21
0
def plotHastagClasses(timeRange, folderType):
    def getFileName():
        for i in combinations('abcedfghijklmnopqrstuvwxyz',2): yield ''.join(i)+'.png'
    count=1
#    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)):
    for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('testing_world','%s_%s'%(2,11))):
#        HashtagsClassifier.classify(hashtagObject)
        print count; count+=1
#        if hashtagObject['h']=='ripamy':
        classId = HashtagsClassifier.classify(hashtagObject)
        if classId!=None:
            classId = 1
            outputFile = hashtagsImagesHashtagsClassFolder%folderType+'%s/%s.png'%(classId, hashtagObject['h']); FileIO.createDirectoryForFile(outputFile)
            fileNameIterator = getFileName()
            timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
            occurancesInActivityRegions = [[getOccuranesInHighestActiveRegion(hashtagObject), 'm']]
#            for hashtagPropagatingRegion in HashtagsClassifier._getActivityRegionsWithActivityAboveThreshold(hashtagObject):
#                validTimeUnits = [timeUnits[i] for i in range(hashtagPropagatingRegion[0], hashtagPropagatingRegion[1]+1)]
#                occurancesInActiveRegion = [(p,t) for p,t in hashtagObject['oc'] if GeneralMethods.approximateEpoch(t, TIME_UNIT_IN_SECONDS) in validTimeUnits]
#                occurancesInActivityRegions.append([occurancesInActiveRegion, GeneralMethods.getRandomColor()])
            
            currentMainRangeId = 0
            for occurances1, color1 in occurancesInActivityRegions:
#                outputFile=outputFolder+fileNameIterator.next();FileIO.createDirectoryForFile(outputFile)
                print outputFile
                ax = plt.subplot(312)
                subRangeId = 0
                for occurances, color in occurancesInActivityRegions:
                    if subRangeId==currentMainRangeId: color='m'
                    timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
#                    if len(timeUnits)<24: 
#                        difference = 24-len(timeUnits)
#                        timeUnits=list(timeUnits)+[timeUnits[-1]+(i+1)*HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS for i in range(difference)]
#                        timeSeries=list(timeSeries)+[0 for i in range(difference)]
#                    print len(timeUnits[:24]), len(timeSeries[:24])
                    plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color)
                    subRangeId+=1
#                plt.ylim(ymax=1)
                plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7)
            
                ax=plt.subplot(313)
                subRangeId = 0
                timeUnits, timeSeries = getTimeUnitsAndTimeSeries(hashtagObject['oc'], timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
                plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-')
                for occurances, color in occurancesInActivityRegions:
                    if subRangeId==currentMainRangeId: color='m'
                    timeUnits, timeSeries = getTimeUnitsAndTimeSeries(occurances, timeUnit=HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)
                    plt.plot_date([datetime.datetime.fromtimestamp(t) for t in timeUnits], timeSeries, '-o', c=color)
                    subRangeId+=1
                plt.setp(ax.get_xticklabels(), rotation=10, fontsize=7)
                
                plt.subplot(311)
                occurancesGroupedByLattice = sorted(
                                                    [(getLocationFromLid(lid.replace('_', ' ')), len(list(occs))) for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occurances1], key=itemgetter(0)), key=itemgetter(0))],
                                                    key=itemgetter(1)
                                                    )
                points, colors = zip(*occurancesGroupedByLattice)
                cm = matplotlib.cm.get_cmap('cool')
                if len(points)>1: 
                    sc = plotPointsOnWorldMap(points, c=colors, cmap=cm, lw=0, alpha=1.0)
                    plt.colorbar(sc)
                else: sc = plotPointsOnWorldMap(points, c='m', lw=0)
                plt.title(hashtagObject['h']+ '(%d)'%len(occurancesGroupedByLattice))
#                plt.show()
                try:
                    plt.savefig(outputFile); plt.clf()
                except: pass
                currentMainRangeId+=1