from pysal.core.IOHandlers import wkt wkt = wkt.WKTParser() inDb = 'gooddbs/newdb2.db' conn = spatialdb.Connection(inDb) cur = conn.cursor() countyCentroid = cur.execute('select intptlat10, intptlon10, geoid10 from countyfinal').fetchall() print len(countyCentroid) countyCentroid = [list((tuple((float(y),float(x))), id)) for x, y, id in countyCentroid] countyArray = np.array(map(lambda x: x[0], countyCentroid)) countyDict = {count:id[1] for count, id in enumerate(countyCentroid)} tweetPoints = cur.execute('select tweet_id, astext(coords) from status').fetchall() print len(tweetPoints) tweetPoints = [list((wkt(point), id)) for id, point in tweetPoints] tweetArray = np.array(map(lambda x: x[0], tweetPoints)) tweetDict = {count:id[1] for count, id in enumerate(tweetPoints)} closest = vq(tweetArray, countyArray) myList = [] for count, countyindex in enumerate(closest[0]): county = countyDict[countyindex] twitter_id = tweetDict[count] myList.append((county, twitter_id)) cur.execute('create table countytweet (geoid10 text, tweet_id text)') cur.executemany('insert into countytweet values(?,?)', myList) conn.commit() conn.close()
hashesQuery = """select distinct(hash) from hash_tweet group by hash having count(*) > 10;""" selectCoords = """select astext(transform(status.coords, 5070)) from status join words on status.tweet_id = words.tweet_id where words.word = ?""" hashDict = {} for hashtag in cur.execute(hashesQuery).fetchall(): print hashtag points = [] for point in cur.execute(selectCoords, hashtag).fetchall(): points.append(wkt(point[0])) hashDict[hashtag[0]] = np.array(points) # a little idea obama = hashDict['Obama'] romney = hashDict['Romney'] b = kmeans(romney, 5) b_x = [x[0] for x in b[0]] b_y = [x[1] for x in b[0]] romney_x = [x[0] for x in romney] romney_y = [x[1] for x in romney] a = kmeans(obama, 5) a_x = [x[0] for x in a[0]] a_y = [x[1] for x in a[0]]