示例#1
0
def im_tweet(srcs):
    """ Import tweet from file to database.
    """

    # Connect to MySQL database
    cur = CONN_POOL.get_cur(GEOTWEET)
    i = 0
    k = 0
    for line in fileinput.input(srcs, openhook=fileinput.hook_compressed):
        try:
            tjson = json.loads(line)
            lat = tjson['place']['bounding_box'] \
                            ['coordinates'][0][0][1]
            lng = tjson['place']['bounding_box'] \
                            ['coordinates'][0][0][0]
            timestr = tjson['created_at']
            timestru = time.strptime(timestr, '%a %b %d %H:%M:%S +0000 %Y')
            #Wed Apr 14 18:51:32 +0000 2010
            timex = time.strftime('%Y-%m-%d %H:%M:%S', timestru)
            item = (tjson['id'], \
                    tjson['place']['id'], \
                    tjson['user']['id'], \
                    tjson['text'], \
                    lat, \
                    lng, \
                    timex)

            k += 1
            if len(get_tokens(tjson['text'])) > 0:
                cur.execute('INSERT INTO sample ('
                        'id, '
                        'place_id, '
                        'user_id, '
                        'text, '
                        'lat, '
                        'lng, '
                        'geo, '
                        'created_at) '
                        'VALUES(%s,%s,%s,%s,%s,%s,'
                        'GeomFromText(\'POINT({0} {1})\'),%s)'. \
                        format(lat, lng), item)
                #cur.execute('INSERT INTO tweet_json(id, json) VALUES(%s,%s)',
                #(tjson['id'], line))
            i += 1
        except _mysql_exceptions.IntegrityError:
            print 'Import Tweets::Tweet ID {0} ignored for duplication.'\
                    .format(tjson['id'])
        except StandardError:
            print 'Fail at line {0}'.format(k)
    logging.info('Import Tweet::{0} out of {1} imported.'.format(i, k))
    logging.info('------------------------------------------')
示例#2
0
def filter_tweet():
    """get rid of square game text"""
    scur = CONN_POOL.get_cur(GEOTWEET)
    dcur = CONN_POOL.get_cur(GEOTWEET)

    scur.execute('select id, text from tweet')
    i, k = 0, 0
    for tweet in scur:
        i += 1
        if len(get_tokens(tweet['text'])) > 0:
            dcur.execute(
                'insert into `sample` \
                    select * from `tweet`\
                    where `tweet`.`id` = %s', tweet['id'])
            k += 1
    logging.info('{0} out of {1} tweets are transferred'.format(k, i))
示例#3
0
def filter_tweet():
    """get rid of square game text"""
    scur = CONN_POOL.get_cur(GEOTWEET)
    dcur = CONN_POOL.get_cur(GEOTWEET)

    scur.execute("select id, text from tweet")
    i, k = 0, 0
    for tweet in scur:
        i += 1
        if len(get_tokens(tweet["text"])) > 0:
            dcur.execute(
                "insert into `sample` \
                    select * from `tweet`\
                    where `tweet`.`id` = %s",
                tweet["id"],
            )
            k += 1
    logging.info("{0} out of {1} tweets are transferred".format(k, i))
示例#4
0
def im_tweet(srcs):
    """ Import tweet from file to database.
    """

    # Connect to MySQL database
    cur = CONN_POOL.get_cur(GEOTWEET)
    i = 0
    k = 0
    for line in fileinput.input(srcs, openhook=fileinput.hook_compressed):
        try:
            tjson = json.loads(line)
            lat = tjson["place"]["bounding_box"]["coordinates"][0][0][1]
            lng = tjson["place"]["bounding_box"]["coordinates"][0][0][0]
            timestr = tjson["created_at"]
            timestru = time.strptime(timestr, "%a %b %d %H:%M:%S +0000 %Y")
            # Wed Apr 14 18:51:32 +0000 2010
            timex = time.strftime("%Y-%m-%d %H:%M:%S", timestru)
            item = (tjson["id"], tjson["place"]["id"], tjson["user"]["id"], tjson["text"], lat, lng, timex)

            k += 1
            if len(get_tokens(tjson["text"])) > 0:
                cur.execute(
                    "INSERT INTO sample ("
                    "id, "
                    "place_id, "
                    "user_id, "
                    "text, "
                    "lat, "
                    "lng, "
                    "geo, "
                    "created_at) "
                    "VALUES(%s,%s,%s,%s,%s,%s,"
                    "GeomFromText('POINT({0} {1})'),%s)".format(lat, lng),
                    item,
                )
                # cur.execute('INSERT INTO tweet_json(id, json) VALUES(%s,%s)',
                # (tjson['id'], line))
            i += 1
        except _mysql_exceptions.IntegrityError:
            print "Import Tweets::Tweet ID {0} ignored for duplication.".format(tjson["id"])
        except StandardError:
            print "Fail at line {0}".format(k)
    logging.info("Import Tweet::{0} out of {1} imported.".format(i, k))
    logging.info("------------------------------------------")