def temporalLocalityTemporalDistanceExample(lattice=NEW_YORK): distances = defaultdict(dict) for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))): if latticeObject['id']==lattice: latticeHashtagsSet = set(latticeObject['hashtags']) for neighborLattice, neighborHashtags in latticeObject['links'].iteritems(): distances[neighborLattice] = {} neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False) neighborHashtagsSet = set(neighborHashtags) distances[neighborLattice]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet))) distances[neighborLattice]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']])/(60.*60.) distances[neighborLattice]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' ')) break dataPoints = [] ax = plt.subplot(111) for k, data in distances.iteritems(): dataPoints.append((getLocationFromLid(k.replace('_', ' ')), data['temporalDistance'])) points, colors = zip(*sorted(dataPoints, key=itemgetter(1))) sc = plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', cmap='RdPu', c=colors, lw = 0, alpha=1.0) plotPointsOnWorldMap([getLocationFromLid(lattice.replace('_', ' '))], blueMarble=False, bkcolor='#CFCFCF', c='#64FF1C', lw = 0) divider = make_axes_locatable(ax) plt.title('Average time difference from New York') cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(sc, cax=cax) # plt.show() plt.savefig('../images/temporalDistanceExample.png')
def _getDistances(): distances = {} for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('training_world','%s_%s'%(2,11))): latticeHashtagsSet = set(latticeObject['hashtags']) for neighborLattice, neighborHashtags in latticeObject['links'].iteritems(): key = '_'.join(sorted([latticeObject['id'], neighborLattice])) if key not in distances: distances[key] = {} neighborHashtags = filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags, findLag=False) neighborHashtagsSet = set(neighborHashtags) distances[key]['similarity']=len(latticeHashtagsSet.intersection(neighborHashtagsSet))/float(len(latticeHashtagsSet.union(neighborHashtagsSet))) distances[key]['temporalDistance']=np.mean([abs(latticeObject['hashtags'][k][0]-neighborHashtags[k][0]) for k in neighborHashtags if k in latticeObject['hashtags']]) distances[key]['geoDistance']=getHaversineDistanceForLids(latticeObject['id'].replace('_', ' '), neighborLattice.replace('_', ' ')) return distances