Exemplo n.º 1
0
    def probabilisticCoverageModelExample(hashtag, type):
        MINUTES, timeUnit = 5, 1
        print len(CoverageBasedLatticeSelectionModel.lattices)
        for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
            if hashtagObject['h']==hashtag:
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit)
                else: 
                    print getRadius(allOccurances)
                    probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances)
                latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices)
                points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1))))
#                print points[0], colors[0]
                ax = plt.subplot(111)
                sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0)
                divider = make_axes_locatable(ax)
#                plt.title('Jaccard similarity with New York')
                cax = divider.append_axes("right", size="5%", pad=0.05)
                plt.colorbar(sc, cax=cax)
                plt.show()
#                plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type))
                plt.clf()
                break
Exemplo n.º 2
0
    def coverageIndication():
        MINUTES = 5
        for timeUnit, color, shape in [(1, 'r', 'x'), (3, 'g', 'd'), (6, 'b', 's')]:
            print timeUnit
            data = defaultdict(int)
            for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))):
                try:
                    occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                    occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1])
                    occsInTimeunit =  zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0]
                    if len(occsInTimeunit)>10:
                        allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0]
                        timeUnitRadius, allRadius = getRadius(occsInTimeunit), getRadius(allOccurances)
                        data[int(abs(timeUnitRadius-allRadius))/50*50+50]+=1
#                        data[round(abs(timeUnitRadius-allRadius)/allRadius, 2)]+=1
                except IndexError as e: pass
            for k in data.keys()[:]: 
                if data[k]<3: del data[k]
            dataX, dataY = zip(*sorted(data.iteritems(), key=itemgetter(0)))
            plt.loglog(dataX, dataY, lw=2, label=str(timeUnit*MINUTES) + ' minutes', marker=shape)
#        plt.loglog([1],[1])
#        plt.title('Early indication of coverage'), 
        plt.xlabel('Coverage difference (miles)', fontsize=20), plt.ylabel('Number of hashtags', fontsize=20)
        plt.legend()
#        plt.show()
        plt.savefig('../images/coverageIndication.png')
Exemplo n.º 3
0
    def temp():
        hashtags, MINUTES = [], 60
        for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'):
            if hashtagObject['h']=='americanhorrorstory':
                print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
                occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
                totalOccurances = []
                for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                    occs = occsDistributionInTimeUnits[t]
                    if occs:
                        fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
#                        print interval, t, len(occs)
                        print fileName
                        occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))]
                        occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                        points, colors = zip(*occurancesGroupedByLattice)
                        plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
#                        plt.show()
                        plt.savefig(fileName)
                        plt.clf()
                exit()
Exemplo n.º 4
0
 def plotGraphsForHashtag(hashtag):
     for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'):
         MINUTES = 5
         if hashtagObject['h']==hashtag:
             print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc'])
             occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             totalOccurances = []
             for interval, t in enumerate(sorted(occsDistributionInTimeUnits)):
                 occs = occsDistributionInTimeUnits[t]
                 totalOccurances+=occs
                 if occs:
                     fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName)
                     print fileName
                     occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))]
                     occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1))
                     points, colors = zip(*occurancesGroupedByLattice)
                     plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0)
                     plt.show()
 #                    plt.savefig(fileName)
                     plt.clf()
                 if (interval+1)*MINUTES>=120: break
             break
Exemplo n.º 5
0
 def plot_maps_for_every_minute():
     MINUTES = 1
     hashtags = ['ripstevejobs']
     map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict)
     for hashtag in hashtags:
         for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag):
             map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time =  getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False)
             tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0))
             epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0]
             epoch_ending_time_unit = epoch_starting_time_unit+1*60*60
             for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time:
                 if epoch_time_unit<=epoch_ending_time_unit:
                     if tuples_of_location_and_epoch_occurrence_time:
                         epoch_lag = epoch_time_unit - epoch_starting_time_unit
                         tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1))
                         map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time]
     map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list)
     GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_minute/')
     for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag):
         file_world_map_plot = './images/plot_maps_for_every_minute/%s.png'%(epoch_lag)
         print file_world_map_plot
         map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag]
         for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems():
             map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag
         for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems():
             tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1]))
                                for location, iterator_of_tuples_of_location_and_epoch_lag in 
                                groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0))
                             ]
             locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)])
             plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60)
             plt.title('%s      (%s minutes)'%(hashtag, (epoch_lag+MINUTES*60)/(60.)))
 #        plt.show()
         FileIO.createDirectoryForFile(file_world_map_plot)
         plt.savefig(file_world_map_plot)
         plt.clf()