Exemplo n.º 1
0
    def determineUpperRangeForTemporalDistances(timeRange, outputFolder):
        i = 1
        temporalDistancesForAllLattices = []
        for latticeObject in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%(mrOutputFolder,'%s_%s'%timeRange)):
            print i, latticeObject['id']; i+=1
            for neighborLattice, neighborHashtags in latticeObject['links'].iteritems():
                temporalDistancesForAllLattices+=zip(*filterOutNeighborHashtagsOutside1_5IQROfTemporalDistance(latticeObject['hashtags'], neighborHashtags).iteritems())[1]
#            if i==10: break
        print getOutliersRangeUsingIRQ(temporalDistancesForAllLattices)[1]
Exemplo n.º 2
0
 def estimateUpperRangeForTimePeriod(timeRange, outputFolder):
     dataX = []
     i=1
     for hashtagObject in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(outputFolder,'%s_%s'%timeRange)):
         print i;i+=1
         occuranesInHighestActiveRegion = getOccuranesInHighestActiveRegion(hashtagObject)
         dataX.append((occuranesInHighestActiveRegion[-1][1]-occuranesInHighestActiveRegion[0][1])/TIME_UNIT_IN_SECONDS)
     print getOutliersRangeUsingIRQ(dataX)
     plt.hist(dataX, bins=10)
     plt.show()
Exemplo n.º 3
0
    def plotTemporalLocality():
        distances = Locality._getDistances()
        dataToPlot, dataX, dataY = defaultdict(list), [], []
        ax = plt.gca()
        for _, data in distances.iteritems():
            dataToPlot[int(data['geoDistance'])/100*100+100].append(data['temporalDistance']/(60*60)) 
        
        for k in sorted(dataToPlot):
            _, upperRange = getOutliersRangeUsingIRQ(dataToPlot[k])
            points = filter(lambda i:i<upperRange, dataToPlot[k])
            if len(points)>50:
                dataX.append(k), dataY.append(np.mean(points))
        pearsonCoeff, p_value = scipy.stats.pearsonr(dataX, dataY)
        print round(pearsonCoeff,2), round(p_value, 2)
        plt.scatter(dataX, dataY, c='r', lw = 0)
#        plt.title('Temporal distance between lattices ' + getLatexForString('( \\rho = %0.2f, p-value = %0.2f )'%(pearsonCoeff, p_value)))
        plt.xlabel('Spatial affinity (miles)', fontsize=20), plt.ylabel('Temporal affinity (hours)', fontsize=20)
#        plt.show()
        plt.savefig('../images/temporalLocality.png')
Exemplo n.º 4
0
    def timePeriods(timeRange, folderType):
        distribution = defaultdict(list)
#        i = 1
#        for h in FileIO.iterateJsonFromFile(hashtagsWithoutEndingWindowFile%(folderType,'%s_%s'%timeRange)):
##            if h['h']=='jartic':
#            classId = HashtagsClassifier.classify(h)
#            if classId:
#                print i, unicode(h['h']).encode('utf-8'), classId;i+=1
#                occs = getOccuranesInHighestActiveRegion(h)
#                distribution[classId].append((occs[-1][1]-occs[0][1])/TIME_UNIT_IN_SECONDS)
#        for k,v in distribution.iteritems():
#            FileIO.writeToFileAsJson({'id':k, 'dist': v}, '../data/hashtagsClassTimePeriods.txt')
        i = 1
        for data in FileIO.iterateJsonFromFile('../data/hashtagsClassTimePeriods.txt'):
#            print data.keys()
            plt.subplot(220+i);i+=1
            plt.hist(data['dist'], bins=100)
            boundary = getOutliersRangeUsingIRQ(data['dist'])[1]
            actualHashtags = filter(lambda t:t<=boundary, data['dist'])
            meanTimePeriod = np.mean(actualHashtags)
            print {data['id'] : {'meanTimePeriod': meanTimePeriod, 'outlierBoundary': boundary}}
            plt.title(data['id']+' %0.2f %0.2f %d'%(meanTimePeriod, boundary, len(actualHashtags)))
            plt.xlim(xmax=200)
        plt.show()
Exemplo n.º 5
0
def getRadius(locations):
    meanLid = getCenterOfMass(locations,accuracy=LATTICE_ACCURACY)
    distances = [getHaversineDistance(meanLid, p) for p in locations]
    _, upperBoundForDistance = getOutliersRangeUsingIRQ(distances)
    return np.mean(filter(lambda d: d<=upperBoundForDistance, distances))