from pysal.core.IOHandlers import wkt
wkt = wkt.WKTParser()

inDb = 'gooddbs/newdb2.db'
conn = spatialdb.Connection(inDb)
cur = conn.cursor()

countyCentroid = cur.execute('select intptlat10, intptlon10, geoid10 from countyfinal').fetchall()
print len(countyCentroid)
countyCentroid = [list((tuple((float(y),float(x))), id)) for x, y, id in countyCentroid]
countyArray = np.array(map(lambda x: x[0], countyCentroid))
countyDict = {count:id[1] for count, id in enumerate(countyCentroid)}

tweetPoints = cur.execute('select tweet_id, astext(coords) from status').fetchall()
print len(tweetPoints)
tweetPoints = [list((wkt(point), id)) for id, point in tweetPoints]
tweetArray = np.array(map(lambda x: x[0], tweetPoints))
tweetDict = {count:id[1] for count, id in enumerate(tweetPoints)}

closest = vq(tweetArray, countyArray)
myList = []
for count, countyindex in enumerate(closest[0]):
    county = countyDict[countyindex]
    twitter_id = tweetDict[count]
    myList.append((county, twitter_id))
    
cur.execute('create table countytweet (geoid10 text, tweet_id text)')
cur.executemany('insert into countytweet values(?,?)', myList)
    
conn.commit()
conn.close()
Exemplo n.º 2
0
                        
hashesQuery = """select distinct(hash) from hash_tweet
                        group by hash
                        having count(*) > 10;"""
                        
selectCoords = """select astext(transform(status.coords, 5070))
    from status join words 
    on status.tweet_id = words.tweet_id
    where words.word = ?"""

hashDict = {}
for hashtag in cur.execute(hashesQuery).fetchall():
    print hashtag
    points = []
    for point in cur.execute(selectCoords, hashtag).fetchall():
        points.append(wkt(point[0]))
    hashDict[hashtag[0]] = np.array(points)
    
# a little idea
        

obama = hashDict['Obama']
romney = hashDict['Romney']
b = kmeans(romney, 5)
b_x = [x[0] for x in b[0]]
b_y = [x[1] for x in b[0]]
romney_x = [x[0] for x in romney]
romney_y = [x[1] for x in romney]
a = kmeans(obama, 5)
a_x = [x[0] for x in a[0]]
a_y = [x[1] for x in a[0]]