Пример #1
0
import cPickle as pickle
from Wrapper import MySQLConnection
import os
from Evaluation import EvaluationFunctions
"""
Print the most regional tokens for a given cluster.

Usage:
python PrintRegionalTokens.py Signature Lon Lat range

"""

if len(sys.argv) < 4:
    print "1. TokenData, 2. ClusterData, 3. Cluster to analyse"
    sys.exit(1)

signature = pickle.load(open(sys.argv[1], 'rb'))
lon = sys.argv[2]
lat = sys.argv[2]
rang = 50 #km

token_to_data = {}
token_db = MySQLConnection.MySQLConnectionWrapper(basedir=os.getcwd() + "/", corpus="TOKENDATA")

for tid, count, medx, medy, medz, varx, vary, varz in token_db.getTokenInfo(ids=None, columns="`id`, `count`, `median_x`, `median_y`, `median_z`, `variance_x`, `variance_y`, `variance_z`"):
    lon_, lat_ = EvaluationFunctions.convertCartesianToLatLong(medx, medy, medz)
    distance = EvaluationFunctions.getDistance(lon, lat, lon_, lat_)
    if distance < rang and count > 20:
        print signature.get(tid), ",", (varx,vary,varz), ",", count

Пример #2
0
    def evaluateTweet(self, tokens, location, user):
        token_data_here = []

        valid = 0
        if self.draw:
            basemap = MapFunctions.prepareMap()

        text_pos = 1890000

        # Look up the data for each token in the tweet
        for token in EvaluationFunctions.getCoOccurrences(tokens):
            token_id = self.signature.add(token)
            if token_id not in self.token_data:
                if False:  #self.draw:
                    plt.text(10000,
                             text_pos,
                             token.decode('utf8', 'ignore') + ' | (fail)',
                             color='grey',
                             fontsize=6)
                    text_pos -= 42000
                continue

            data = self.token_data[token_id]
            variance = data['variance']
            count = data['count']
            x, y, z = data["median"]
            lon, lat = EvaluationFunctions.convertCartesianToLatLong(x, y, z)
            if self.checkVarianceThreshold(token_id):
                valid += 1
                # 0-hypothese
                if self.null:
                    token = self.token_data.keys()[randint(
                        0, len(self.token_data.keys()))]
                    coordinates, variance, count = self.token_data[token]

                if self.draw:
                    #plt.text(10000, text_pos, token.decode('utf8', 'ignore') + ' | ' + str(round(variance,1)) + ' | ' + str(count), color='black', fontsize=6)
                    text_pos -= 42000
                    current_color = EvaluationFunctions.getColorForValue(
                        variance)
                    basemap.plot(
                        lon,
                        lat,
                        'o',
                        latlon=True,
                        markeredgecolor=current_color,
                        color=current_color,
                        markersize=EvaluationFunctions.getSizeForValue(count),
                        alpha=0.7)

                token_data_here.append((token, variance, count, data["median"],
                                        data["variances"]))

            else:
                if self.draw:
                    #plt.text(10000, text_pos,   token.decode('utf8', 'ignore') + ' | ' + str(round(variance,1)) + ' | ' + str(count),color='grey', fontsize=6)
                    text_pos -= 40000
                    current_color = 'gray'
                    basemap.plot(
                        lon,
                        lat,
                        'o',
                        latlon=True,
                        markeredgecolor=current_color,
                        color=current_color,
                        markersize=EvaluationFunctions.getSizeForValue(count),
                        alpha=0.1)

        if valid == 0:
            # use fallback
            #if user in self.fallback:
            #    token_data_here = self.fallback[user]
            #else:
            #    print user , " not in " , self.fallback.keys()
            if len(token_data_here) == 0:
                plt.clf()
                return None
            #else:
            #    print "!"

        # Generate the data for the weighted midpoint
        coordinate_list, weight_list = self.evaluator.evaluate(token_data_here)

        # Calculate the midpoint
        lon_score, lat_score = EvaluationFunctions.getWeightedMidpointXYZ(
            coordinate_list, weight_list)

        distance = EvaluationFunctions.getDistance(lon_score, lat_score,
                                                   location[0], location[1])

        #print " ".join(tokens)
        #print distance
        #print valid
        #print ""

        if self.draw:
            basemap.plot(location[0],
                         location[1],
                         '^',
                         mfc='none',
                         markeredgecolor='black',
                         latlon=True,
                         alpha=1)
            basemap.plot(lon_score,
                         lat_score,
                         'v',
                         mfc='none',
                         markeredgecolor='black',
                         latlon=True,
                         alpha=1)

            plt.text(10000, 10000,
                     'Distance: ' + str(round(distance, 1)) + 'km')
            plt.text(10000, 80000,
                     'Threshold: ' + str(self.variance_threshold))
            plt.savefig('img/tweet_' + str(self.variance_threshold) + "_" +
                        str(self.i) + ".png",
                        format='png')
            plt.clf()

        return (lon_score, lat_score, location[0], location[1], distance)
Пример #3
0
    def evaluateTweet(self, tokens, location, user):
        token_data_here = []

        valid = 0
        if self.draw:
            basemap = MapFunctions.prepareMap()

        text_pos = 1890000
       

        # Look up the data for each token in the tweet
        for token in EvaluationFunctions.getCoOccurrences(tokens):
            token_id =  self.signature.add(token)
            if token_id not in self.token_data:
                if False: #self.draw:
                    plt.text(10000, text_pos, token.decode('utf8', 'ignore') + ' | (fail)', color='grey', fontsize=6)
                    text_pos -= 42000
                continue

            data = self.token_data[token_id]
            variance = data['variance']
            count = data['count']
            x,y,z = data["median"]
            lon, lat = EvaluationFunctions.convertCartesianToLatLong(x,y,z)
            if self.checkVarianceThreshold(token_id):
                valid += 1
                # 0-hypothese
                if self.null:
                    token = self.token_data.keys()[randint(0,len(self.token_data.keys()))]
                    coordinates, variance, count = self.token_data[token]

                if self.draw:
                    #plt.text(10000, text_pos, token.decode('utf8', 'ignore') + ' | ' + str(round(variance,1)) + ' | ' + str(count), color='black', fontsize=6)
                    text_pos -= 42000
                    current_color = EvaluationFunctions.getColorForValue(variance)
                    basemap.plot(lon, lat, 'o', latlon=True, markeredgecolor=current_color, color=current_color, markersize=EvaluationFunctions.getSizeForValue(count), alpha=0.7)

                token_data_here.append((token, variance, count, data["median"], data["variances"]))

            else:
                if self.draw:
                    #plt.text(10000, text_pos,   token.decode('utf8', 'ignore') + ' | ' + str(round(variance,1)) + ' | ' + str(count),color='grey', fontsize=6)
                    text_pos -= 40000
                    current_color = 'gray'
                    basemap.plot(lon, lat, 'o', latlon=True, markeredgecolor=current_color, color=current_color, markersize=EvaluationFunctions.getSizeForValue(count), alpha=0.1)

        if valid == 0:
            # use fallback
            #if user in self.fallback:
            #    token_data_here = self.fallback[user]
            #else:
            #    print user , " not in " , self.fallback.keys()
            if len(token_data_here) == 0:
                plt.clf()
                return None
            #else:
            #    print "!"


        # Generate the data for the weighted midpoint
        coordinate_list, weight_list = self.evaluator.evaluate(token_data_here)

        # Calculate the midpoint
        lon_score, lat_score = EvaluationFunctions.getWeightedMidpointXYZ(coordinate_list, weight_list)

        distance = EvaluationFunctions.getDistance(lon_score, lat_score, location[0], location[1])
        
        #print " ".join(tokens)
        #print distance
        #print valid
        #print ""

        if self.draw:
            basemap.plot(location[0], location[1], '^', mfc='none' , markeredgecolor='black', latlon=True, alpha=1)
            basemap.plot(lon_score, lat_score, 'v',  mfc='none',  markeredgecolor='black', latlon=True, alpha=1)
           
            plt.text(10000,10000,'Distance: '+ str(round(distance,1)) + 'km')
            plt.text(10000,80000, 'Threshold: ' + str(self.variance_threshold))
            plt.savefig('img/tweet_' + str(self.variance_threshold) + "_" + str(self.i) + ".png", format='png')
            plt.clf()

        return (lon_score, lat_score, location[0], location[1], distance)