def probabilisticCoverageModelExample(hashtag, type): MINUTES, timeUnit = 5, 1 print len(CoverageBasedLatticeSelectionModel.lattices) for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): if hashtagObject['h']==hashtag: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit) else: print getRadius(allOccurances) probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances) latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices) points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1)))) # print points[0], colors[0] ax = plt.subplot(111) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) plt.show() # plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type)) plt.clf() break
def coverageIndication(): MINUTES = 5 for timeUnit, color, shape in [(1, 'r', 'x'), (3, 'g', 'd'), (6, 'b', 's')]: print timeUnit data = defaultdict(int) for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))): try: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] if len(occsInTimeunit)>10: allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] timeUnitRadius, allRadius = getRadius(occsInTimeunit), getRadius(allOccurances) data[int(abs(timeUnitRadius-allRadius))/50*50+50]+=1 # data[round(abs(timeUnitRadius-allRadius)/allRadius, 2)]+=1 except IndexError as e: pass for k in data.keys()[:]: if data[k]<3: del data[k] dataX, dataY = zip(*sorted(data.iteritems(), key=itemgetter(0))) plt.loglog(dataX, dataY, lw=2, label=str(timeUnit*MINUTES) + ' minutes', marker=shape) # plt.loglog([1],[1]) # plt.title('Early indication of coverage'), plt.xlabel('Coverage difference (miles)', fontsize=20), plt.ylabel('Number of hashtags', fontsize=20) plt.legend() # plt.show() plt.savefig('../images/coverageIndication.png')
def temp(hashtag='blackparentsquotes'): for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): # print hashtagObject['h'] if hashtagObject['h']==hashtag: print hashtagObject['h'] occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(hashtagObject['oc'], timeUnit=5, fillInGaps=True, occurancesCount=False) # plt.plot_date() exit()
def temp(): hashtags, MINUTES = [], 60 for hashtagObject in FileIO.iterateJsonFromFile('americanhorrorstory'): if hashtagObject['h']=='americanhorrorstory': print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject, timeUnit=60*60), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) # print interval, t, len(occs) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occ in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in occs], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) # plt.show() plt.savefig(fileName) plt.clf() exit()
def plotGraphsForHashtag(hashtag): for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): MINUTES = 5 if hashtagObject['h']==hashtag: print unicode(hashtagObject['h']).encode('utf-8'), len(hashtagObject['oc']) occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) totalOccurances = [] for interval, t in enumerate(sorted(occsDistributionInTimeUnits)): occs = occsDistributionInTimeUnits[t] totalOccurances+=occs if occs: fileName = '../images/plotsOnMap/%s/%s.png'%(hashtagObject['h'], (interval+1)*MINUTES); FileIO.createDirectoryForFile(fileName) print fileName occurancesGroupedByLattice = [(getLocationFromLid(lid.replace('_', ' ')), 'm') for lid, occs in groupby(sorted([(getLatticeLid(l, LATTICE_ACCURACY), t) for l, t in totalOccurances], key=itemgetter(0)), key=itemgetter(0))] occurancesGroupedByLattice = sorted(occurancesGroupedByLattice, key=itemgetter(1)) points, colors = zip(*occurancesGroupedByLattice) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, lw = 0) plt.show() # plt.savefig(fileName) plt.clf() if (interval+1)*MINUTES>=120: break break
def plot_maps_for_every_minute(): MINUTES = 1 hashtags = ['ripstevejobs'] map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag = defaultdict(dict) for hashtag in hashtags: for hashtag_object in FileIO.iterateJsonFromFile('./data/%s.json'%hashtag): map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtag_object), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time = sorted(map_from_epoch_time_unit_to_tuples_of_location_and_epoch_occurrence_time.iteritems(), key=itemgetter(0)) epoch_starting_time_unit = tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time[0][0] epoch_ending_time_unit = epoch_starting_time_unit+1*60*60 for epoch_time_unit, tuples_of_location_and_epoch_occurrence_time in tuples_of_epoch_time_unit_and_tuples_of_location_and_epoch_occurrence_time: if epoch_time_unit<=epoch_ending_time_unit: if tuples_of_location_and_epoch_occurrence_time: epoch_lag = epoch_time_unit - epoch_starting_time_unit tuples_of_location_and_epoch_occurrence_time = sorted(tuples_of_location_and_epoch_occurrence_time, key=itemgetter(1)) map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag][hashtag] = [(getLatticeLid(location, 0.145), epoch_occurrence_time-epoch_starting_time_unit)for location, epoch_occurrence_time in tuples_of_location_and_epoch_occurrence_time] map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag = defaultdict(list) GeneralMethods.runCommand('rm -rf ./images/plot_maps_for_every_minute/') for epoch_lag in sorted(map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag): file_world_map_plot = './images/plot_maps_for_every_minute/%s.png'%(epoch_lag) print file_world_map_plot map_from_hashtag_to_tuples_of_location_and_epoch_lag = map_from_epoch_lag_to_map_from_hashtag_to_tuples_of_location_and_epoch_lag[epoch_lag] for hashtag, tuples_of_location_and_epoch_lag in map_from_hashtag_to_tuples_of_location_and_epoch_lag.iteritems(): map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag[hashtag]+=tuples_of_location_and_epoch_lag for hashtag, accumulated_tuples_of_location_and_epoch_lag in map_from_hashtag_to_accumulated_tuples_of_location_and_epoch_lag.iteritems(): tuples_of_location_and_epoch_max_lag= [(location, max(zip(*iterator_of_tuples_of_location_and_epoch_lag)[1])) for location, iterator_of_tuples_of_location_and_epoch_lag in groupby(sorted(accumulated_tuples_of_location_and_epoch_lag, key=itemgetter(0)), key=itemgetter(0)) ] locations, colors = zip(*[(getLocationFromLid(location.replace('_', ' ')), (epoch_lag+MINUTES*60)-epoch_max_lag) for location, epoch_max_lag in sorted(tuples_of_location_and_epoch_max_lag, key=itemgetter(1), reverse=True)]) plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap=matplotlib.cm.cool, lw = 0, vmax=epoch_lag+MINUTES*60) plt.title('%s (%s minutes)'%(hashtag, (epoch_lag+MINUTES*60)/(60.))) # plt.show() FileIO.createDirectoryForFile(file_world_map_plot) plt.savefig(file_world_map_plot) plt.clf()