def im_tweet(srcs): """ Import tweet from file to database. """ # Connect to MySQL database cur = CONN_POOL.get_cur(GEOTWEET) i = 0 k = 0 for line in fileinput.input(srcs, openhook=fileinput.hook_compressed): try: tjson = json.loads(line) lat = tjson['place']['bounding_box'] \ ['coordinates'][0][0][1] lng = tjson['place']['bounding_box'] \ ['coordinates'][0][0][0] timestr = tjson['created_at'] timestru = time.strptime(timestr, '%a %b %d %H:%M:%S +0000 %Y') #Wed Apr 14 18:51:32 +0000 2010 timex = time.strftime('%Y-%m-%d %H:%M:%S', timestru) item = (tjson['id'], \ tjson['place']['id'], \ tjson['user']['id'], \ tjson['text'], \ lat, \ lng, \ timex) k += 1 if len(get_tokens(tjson['text'])) > 0: cur.execute('INSERT INTO sample (' 'id, ' 'place_id, ' 'user_id, ' 'text, ' 'lat, ' 'lng, ' 'geo, ' 'created_at) ' 'VALUES(%s,%s,%s,%s,%s,%s,' 'GeomFromText(\'POINT({0} {1})\'),%s)'. \ format(lat, lng), item) #cur.execute('INSERT INTO tweet_json(id, json) VALUES(%s,%s)', #(tjson['id'], line)) i += 1 except _mysql_exceptions.IntegrityError: print 'Import Tweets::Tweet ID {0} ignored for duplication.'\ .format(tjson['id']) except StandardError: print 'Fail at line {0}'.format(k) logging.info('Import Tweet::{0} out of {1} imported.'.format(i, k)) logging.info('------------------------------------------')
def filter_tweet(): """get rid of square game text""" scur = CONN_POOL.get_cur(GEOTWEET) dcur = CONN_POOL.get_cur(GEOTWEET) scur.execute('select id, text from tweet') i, k = 0, 0 for tweet in scur: i += 1 if len(get_tokens(tweet['text'])) > 0: dcur.execute( 'insert into `sample` \ select * from `tweet`\ where `tweet`.`id` = %s', tweet['id']) k += 1 logging.info('{0} out of {1} tweets are transferred'.format(k, i))
def filter_tweet(): """get rid of square game text""" scur = CONN_POOL.get_cur(GEOTWEET) dcur = CONN_POOL.get_cur(GEOTWEET) scur.execute("select id, text from tweet") i, k = 0, 0 for tweet in scur: i += 1 if len(get_tokens(tweet["text"])) > 0: dcur.execute( "insert into `sample` \ select * from `tweet`\ where `tweet`.`id` = %s", tweet["id"], ) k += 1 logging.info("{0} out of {1} tweets are transferred".format(k, i))
def im_tweet(srcs): """ Import tweet from file to database. """ # Connect to MySQL database cur = CONN_POOL.get_cur(GEOTWEET) i = 0 k = 0 for line in fileinput.input(srcs, openhook=fileinput.hook_compressed): try: tjson = json.loads(line) lat = tjson["place"]["bounding_box"]["coordinates"][0][0][1] lng = tjson["place"]["bounding_box"]["coordinates"][0][0][0] timestr = tjson["created_at"] timestru = time.strptime(timestr, "%a %b %d %H:%M:%S +0000 %Y") # Wed Apr 14 18:51:32 +0000 2010 timex = time.strftime("%Y-%m-%d %H:%M:%S", timestru) item = (tjson["id"], tjson["place"]["id"], tjson["user"]["id"], tjson["text"], lat, lng, timex) k += 1 if len(get_tokens(tjson["text"])) > 0: cur.execute( "INSERT INTO sample (" "id, " "place_id, " "user_id, " "text, " "lat, " "lng, " "geo, " "created_at) " "VALUES(%s,%s,%s,%s,%s,%s," "GeomFromText('POINT({0} {1})'),%s)".format(lat, lng), item, ) # cur.execute('INSERT INTO tweet_json(id, json) VALUES(%s,%s)', # (tjson['id'], line)) i += 1 except _mysql_exceptions.IntegrityError: print "Import Tweets::Tweet ID {0} ignored for duplication.".format(tjson["id"]) except StandardError: print "Fail at line {0}".format(k) logging.info("Import Tweet::{0} out of {1} imported.".format(i, k)) logging.info("------------------------------------------")