def probabilisticCoverageModelExample(hashtag, type): MINUTES, timeUnit = 5, 1 print len(CoverageBasedLatticeSelectionModel.lattices) for hashtagObject in FileIO.iterateJsonFromFile('/mnt/chevron/kykamath/data/geo/hashtags/analysis/all_world/2_11/hashtagsWithoutEndingWindow'): if hashtagObject['h']==hashtag: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] if type=='5m': probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(occsInTimeunit) else: print getRadius(allOccurances) probabilityDistributionForObservedLattices = CoverageBasedLatticeSelectionModel.probabilityDistributionForLattices(allOccurances) latticeScores = CoverageBasedLatticeSelectionModel.spreadProbability(CoverageBasedLatticeSelectionModel.lattices, probabilityDistributionForObservedLattices) points, colors = zip(*map(lambda t: (getLocationFromLid(t[0].replace('_', ' ')), t[1]), sorted(latticeScores.iteritems(), key=itemgetter(1)))) # print points[0], colors[0] ax = plt.subplot(111) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=colors, cmap='cool', lw = 0) divider = make_axes_locatable(ax) # plt.title('Jaccard similarity with New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) plt.show() # plt.savefig('../images/coverage_examples/%s_%s.png'%(hashtag, type)) plt.clf() break
def coverageIndication(): MINUTES = 5 for timeUnit, color, shape in [(1, 'r', 'x'), (3, 'g', 'd'), (6, 'b', 's')]: print timeUnit data = defaultdict(int) for hashtagObject in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))): try: occsDistributionInTimeUnits = getOccurranceDistributionInEpochs(getOccuranesInHighestActiveRegion(hashtagObject), timeUnit=MINUTES*60, fillInGaps=True, occurancesCount=False) occurances = list(zip(*sorted(occsDistributionInTimeUnits.iteritems(), key=itemgetter(0)))[1]) occsInTimeunit = zip(*reduce(lambda aggList, l: aggList+l, occurances[:timeUnit], []))[0] if len(occsInTimeunit)>10: allOccurances = zip(*reduce(lambda aggList, l: aggList+l, occurances, []))[0] timeUnitRadius, allRadius = getRadius(occsInTimeunit), getRadius(allOccurances) data[int(abs(timeUnitRadius-allRadius))/50*50+50]+=1 # data[round(abs(timeUnitRadius-allRadius)/allRadius, 2)]+=1 except IndexError as e: pass for k in data.keys()[:]: if data[k]<3: del data[k] dataX, dataY = zip(*sorted(data.iteritems(), key=itemgetter(0))) plt.loglog(dataX, dataY, lw=2, label=str(timeUnit*MINUTES) + ' minutes', marker=shape) # plt.loglog([1],[1]) # plt.title('Early indication of coverage'), plt.xlabel('Coverage difference (miles)', fontsize=20), plt.ylabel('Number of hashtags', fontsize=20) plt.legend() # plt.show() plt.savefig('../images/coverageIndication.png')