Пример #1
0
def botspy(dynamic):
    time_delta = request.args.get('time_delta')
    url = request.path
    str_time_range = stringtime(time_delta)

    today = str_today()

    most_active = db.session.query(User.user_scrname, User.user_cap_perc,\
    func.count(Post.post_id), User.user_id).\
    join(Post.user).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(User.user_id).order_by(func.count(Post.post_id).desc()).all()

    bot_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.user).join(Post.hashtags).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).order_by(func.count(Hashtag.hashtag).desc()).all()

    most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,\
    Post.text, Post.original_text).\
    join(Post.user).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    filter(Post.is_retweet == 0).\
    order_by(Post.retweet_count.desc()).all()

    most_active_districts = db.session.query(District.district_name,\
    func.count(District.district_name)).\
    join(Post.user).join(Post.districts).\
    filter(User.user_cap_perc >= 43.0).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(District.district_name).\
    order_by(func.count(District.district_name).desc()).all()

    most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets)


    popular_bot = db.session.query(User.user_scrname, User.user_followers,\
    User.user_id).\
    filter(User.user_cap_perc >= 43.0).\
    group_by(User.user_id).order_by(User.user_followers.desc()).all()

    avg_bot_raw = db.session.query(func.avg(User.user_followers)).\
    filter(User.user_cap_perc >= 43.0).\
    first()

    avg_bot = int(avg_bot_raw[0])

    #Get botweather chart data from graph_functions modules
    botchart = gf.botweather_chart()

    return render_template('botspy.html', time_delta=time_delta,\
    most_active=most_active, bot_hashtags=bot_hashtags, \
    most_retweeted_tweets=most_retweeted_tweets, popular_bot=popular_bot,\
    avg_bot=avg_bot, botchart=botchart, get_tweet=get_tweet, \
    most_retweeted_tweet_list=most_retweeted_tweet_list,\
    most_active_districts=most_active_districts)
Пример #2
0
def fill_hash_activity(dist_group, time_delta, table, table_new, table_old):

    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    #CREATE CONNECTION FOR DIRECT DB ACCESS

    conn = db.engine.connect()
    conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old))
    if time_delta == 14:
        conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE hash_activity_{1}_1;'.format(table, dist_group))
    conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table))
    #conn.execute("DELETE FROM {0}".format(table_new))

    #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT

    str_time_range = stringtime(time_delta)

    if dist_group == "allcong" or dist_group == "allsen":
        all_hashes = db.session.query(Hashtag.hash_id, Hashtag.hashtag,\
        func.count(Hashtag.hash_id)).\
        join(Post.hashtags).join(Post.districts).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        filter(District.dist_type==dist_fig).\
        group_by(Hashtag.hash_id).order_by(func.count(Hashtag.hash_id).desc()).all()

    else:
        all_hashes = db.session.query(Hashtag.hash_id, Hashtag.hashtag,\
        func.count(Hashtag.hash_id)).\
        join(Post.hashtags).join(Post.districts).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(Hashtag.hash_id).order_by(func.count(Hashtag.hash_id).desc()).all()


    #INSERT TOP 100 ITEMS INTO SHADOW TABLE (TABLE_NEW)
    counter = 1
    for item in all_hashes[0:100]:

        conn.execute("INSERT INTO {0} VALUES ({1}, {2}, '{3}', {4});".\
                format(table_new, counter, item[0], item[1], item[2]))

        counter += 1



    #RENAME EXISTING (MAIN) TABLE TO OLD, RENAME SHADOW TO MAIN

    conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\
    format(table, table_old, table_new, table))

    conn.close()
Пример #3
0
def fill_retweeted_users(dist_group, time_delta, table, table_new, table_old):

    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    conn = db.engine.connect()
    conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old))
    if time_delta == 14:
        conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE retweeted_users_1;'.format(table))
    conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table))
    #conn.execute("DELETE FROM {0}".format(table_new))

    #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT

    str_time_range = stringtime(time_delta)

    if dist_group == "allcong" or dist_group == "allsen":
        retweeted_users = db.session.query(Post.original_author_scrname, \
        func.count(Post.original_author_scrname)).\
        join(Post.districts).\
        filter(Post.original_author_scrname != "").filter(District.dist_type==dist_fig).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(Post.original_author_scrname).order_by(func.count(Post.original_author_scrname).\
        desc()).all()

    else:
        retweeted_users = db.session.query(Post.original_author_scrname, \
        func.count(Post.original_author_scrname)).\
        filter(Post.original_author_scrname != "").\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(Post.original_author_scrname).order_by(func.count(Post.original_author_scrname).\
        desc()).all()

    counter = 1
    for item in retweeted_users[0:100]:

        conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', {3});".\
                                format(table_new, counter, item[0], item[1]))

        counter += 1



    conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\
    format(table, table_old, table_new, table))

    conn.close()
Пример #4
0
def sen_cache(time_delta):

    str_time_range = stringtime(time_delta)

    with open('app/comp_races_parsed_sen.csv', 'r') as f:
        reader = csv.reader(f)

        for row in reader:
            #row_split = row.split(',')
            dynamic = row[0][1:6]
            print("District is {}".format(dynamic))

            dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
            join(Post.districts).join(Post.hashtags).\
            filter(District.district_name==dynamic).\
            filter(Post.created_at >= str_time_range).\
            group_by(Hashtag.hashtag).\
            order_by(func.count(Hashtag.hashtag).desc()).all()

            print(len(dist_hashes))

            top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\
            User.user_cap_perc, User.user_id).\
            join(Post.user).join(Post.districts).\
            filter(District.district_name==dynamic).\
            filter(Post.created_at >= str_time_range).\
            group_by(User.user_id).\
            order_by(func.count(User.user_id).desc()).all()

            print(len(top_tweeters))

            most_retweeted = db.session.query(Post.original_author_scrname, \
            func.count(Post.original_author_scrname)).\
            join(Post.districts).\
            filter(District.district_name==dynamic).\
            filter(Post.created_at >= str_time_range).\
            filter(Post.original_author_scrname != "").\
            group_by(Post.original_author_scrname).\
            order_by(func.count(Post.original_author_scrname).desc()).all()

            print(len(most_retweeted))
Пример #5
0
def fill_retweeted_tweets(dist_group, time_delta, table, table_new, table_old):
    print("starting {}".format(table))
    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    conn = db.engine.connect()
    conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old))
    if time_delta == 14:
        conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE retweeted_tweets_1;'.format(table))
    conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table))
    #conn.execute("DELETE FROM {0}".format(table_new))

    #Get uncleaned list of top retweeted tweet IDS

    str_time_range = stringtime(time_delta)

    # if dist_group == "allcong" or dist_group == "allsen":
    #     most_retweeted_tweets = db.session.query(Post.post_id, Post.original_tweet_id, \
    #     Post.retweet_count, District.dist_type).\
    #     join(Post.districts).\
    #     filter(Post.created_at >= str_time_range).filter(District.dist_type==dist_fig).\
    #     order_by(Post.retweet_count.desc()).all()
    #
    # else:
    #     most_retweeted_tweets = db.session.query(Post.post_id, Post.original_tweet_id, \
    #     Post.retweet_count, District.dist_type).\
    #     join(Post.districts).\
    #     filter(Post.created_at >= str_time_range).\
    #     order_by(Post.retweet_count.desc()).all()




    if dist_group == "allcong" or dist_group == "allsen":
        most_retweeted_tweets_inperiod = db.session.query(Post.original_tweet_id,\
        func.count(Post.original_tweet_id)).\
        join(Post.districts).\
        filter(Post.is_retweet == 1).filter(District.dist_type==dist_fig).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(Post.original_tweet_id).\
        order_by(func.count(Post.original_tweet_id).desc()).all()

    else:
        most_retweeted_tweets_inperiod = db.session.query(Post.original_tweet_id,\
        func.count(Post.original_tweet_id)).\
        join(Post.districts).\
        filter(Post.is_retweet == 1).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(Post.original_tweet_id).\
        order_by(func.count(Post.original_tweet_id).desc()).all()


    # Get cleaned list of tweet ids (no dupes, w/ district relevance)
    # List returns 20 tweets, with lists of following attributes:
    # [Post.post_id, relevant screen name, retweet_count, tweet_html (if exists),
    # original_tweet_id]
    # tweet_list = get_tweet_list_ids(most_retweeted_tweets)
    # #print(tweet_list[0])
    #
    # # GET FULL LIST. RETURNS FOLLOWING ATTRIBUTE:
    # # [Post id, screen name, retweet count, botscore]
    # populated_list = populate_tweet_list(tweet_list)

    # Get list with attributes:
    # [post_id, user_scrname, post count, user_cap_perc, tweethtml]
    tweet_list_inperiod = get_tweet_list_inperiod(most_retweeted_tweets_inperiod)

    # pprint.pprint(populated_list[0])
    counter = 1
    for item in tweet_list_inperiod:

        x = '''INSERT INTO {table} VALUES ({counter}, '{post_id}', '{botscore}', '{name}', {retweets});'''.\
           format(table=table_new, counter=counter, post_id=item[0], botscore=item[3], name=item[1], retweets=item[2])

        print(x)

        conn.execute(x)

        counter += 1


    conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\
    format(table, table_old, table_new, table))

    conn.close()

    print("finished with table: {}".format(table))
Пример #6
0
def fill_dist_activity(dist_group, time_delta, table, table_new, table_old):


    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    #CREATE CONNECTION FOR DIRECT DB ACCESS
    conn = db.engine.connect()
    print("connected")
    conn.execute("DROP TABLE IF EXISTS {0}, {1};".format(table_new, table_old))
    print("dropped tables")
    if time_delta == 14:
        conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE dist_activity_{1}_1;'.format(table, dist_group))
    print("created base table")
    conn.execute('CREATE TABLE {0} LIKE {1};'.format(table_new, table))
    print("created table {}".format(table_new))

    #conn.execute("DELETE FROM {0};".format(table_new))

    #GET ORDERED LIST OF DISTRICTS AND ACTIVITY COUNT

    str_time_range = stringtime(time_delta)

    #If partial group congress or senate
    if dist_group == "allcong" or dist_group == "allsen":
        most_active = db.session.query(District.district_name,\
        func.count(District.district_name)).\
        join(Post.districts).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        filter(District.dist_type==dist_fig).\
        group_by(District.district_name).\
        order_by(func.count(District.district_name).desc()).all()

    #If all races
    else:

        most_active = db.session.query(District.district_name,\
        func.count(District.district_name)).\
        join(Post.districts).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(District.district_name).\
        order_by(func.count(District.district_name).desc()).all()



    counter = 1
    for item in most_active:

        conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', {3});".\
                                format(table_new, counter, item[0], item[1]))

        counter += 1




    conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\
    format(table, table_old, table_new, table))

    conn.close()
Пример #7
0
def fill_top_tweeters(dist_group, time_delta, table, table_new, table_old):

    if dist_group == "allcong":
        dist_fig = 1
    if dist_group == "allsen":
        dist_fig = 2

    conn = db.engine.connect()
    conn.execute("DROP TABLE IF EXISTS {0}, {1}".format(table_new, table_old))
    if time_delta == 14:
        conn.execute('CREATE TABLE IF NOT EXISTS {0} LIKE top_tweeters_1;'.format(table))
    conn.execute('CREATE TABLE {0} LIKE {1}'.format(table_new, table))
    #conn.execute("DELETE FROM {0}".format(table_new))

    #GET ORDERED LIST OF HASHTAGS AND ACTIVITY COUNT

    str_time_range = stringtime(time_delta)

    if dist_group == "allcong" or dist_group == "allsen":

        top_tweeters = db.session.query(User.user_id, User.user_scrname,\
        User.user_cap_perc, func.count(User.user_scrname)).\
        join(Post.user).join(Post.districts).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        filter(District.dist_type==dist_fig).\
        group_by(User.user_id).order_by(func.count(User.user_id).desc()).all()

    else:

        top_tweeters = db.session.query(User.user_id, User.user_scrname,\
        User.user_cap_perc, func.count(User.user_scrname)).\
        join(Post.user).\
        filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
        group_by(User.user_id).order_by(func.count(User.user_id).desc()).all()


    #INSERT TOP 100 ITEMS INTO SHADOW TABLE (TABLE_NEW)
    counter = 1
    for item in top_tweeters[0:100]:

        if item[2]:

            conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', '{3}', {4}, {5});".\
                                    format(table_new, counter, item[0], item[1],\
                                    item[2], item[3]))

            counter += 1

        else:
            conn.execute("INSERT INTO {0} VALUES ({1}, '{2}', '{3}', {4}, {5});".\
                                    format(table_new, counter, item[0], item[1],\
                                    '-1.0', item[3]))

            counter += 1

    #RENAME EXISTING (MAIN) TABLE TO OLD, RENAME SHADOW TO MAIN

    conn.execute('RENAME TABLE {0} TO {1}, {2} TO {3}'.\
    format(table, table_old, table_new, table))

    conn.close()
Пример #8
0
from app import app, db
from app.models import *

from app.tweepy_cred import *
import botometer
import time
from decimal import Decimal, ROUND_HALF_UP
from app.helpers import stringtime




from sqlalchemy import Column, Integer, String, Float, func

pass_list = ['immelza']
str_time_range = stringtime(14)

#GET ALL USER OBJECTS, in order of posting volume last 14 days
# users = db.session.query(User.user_scrname, func.count(User.user_scrname)).\
# join(Post.user).\
# filter(Post.created_at >= str_time_range).\
# group_by(User.user_scrname).\
# order_by(func.count(User.user_scrname).desc()).all()

# users = db.session.query(User.user_scrname, func.count(User.user_scrname)).\
# join(Post.user).join(Post.districts).\
# filter(Post.created_at >= str_time_range).\
# filter(District.district_name == "TXSen").\
# group_by(User.user_scrname).\
# order_by(func.count(User.user_scrname).desc()).all()
Пример #9
0
def district(dynamic):
    print('starting district {}'.format(dynamic))

    time_delta = request.args.get('time_delta')
    url = request.path

    str_time_range = stringtime(time_delta)
    today = str_today()

    #Set str_today within page call, so is correct (today)
    # NOTE: Possibly faster to do this w/i hash_pickled db lookup
    #today = datetime.combine(date.today(), datetime.min.time())  #datetime object for midnight
    #str_today = today.strftime("%Y-%m-%d %H:%M:%S")         # string version of midnight

    # Most frequently used hashtags column
    dist_hashes = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.districts).join(Post.hashtags).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).\
    order_by(func.count(Hashtag.hashtag).desc()).all()

    print('got dist_hashes')

    # Most active tweeters column
    top_tweeters = db.session.query(User.user_scrname, func.count(User.user_scrname),\
    User.user_cap_perc, User.user_id).\
    join(Post.user).join(Post.districts).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(User.user_id).\
    order_by(func.count(User.user_id).desc()).all()

    print('got top_tweeters')

    # Most frequently retweeted users column )
    most_retweeted = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.districts).\
    filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    #Get botscore for top five most-retweeted users, create list of [name, retweet
    #numbers, botscore] to send to template
    most_retweeted_list = []

    for item in most_retweeted[0:5]:
        tweeter = []
        tweeter.append(item[0])
        tweeter.append(item[1])

        # Get botscore for original authors
        botscore = db.session.query(User.user_cap_perc).\
        filter(User.user_scrname==item[0]).first()

        if botscore:
            tweeter.append(botscore[0])
        else:
            tweeter.append("Not yet in database")
        most_retweeted_list.append(tweeter)

    print('got most_retweeted')

    # Most retweeted tweets column

    # Gets list of tweets in time period, ordered by most-retweeted (NOTE: many
    # or most of these retweets may be previous to this period)
    # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,
    # Post.text, Post.original_text).\
    # join(Post.districts).join(Post.user).\
    # filter(District.district_name==dynamic).filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < str_today).\
    # order_by(Post.retweet_count.desc()).all()
    #
    # # Use helper function to Get botscore for top five most-retweeted tweets,
    # # create list of [post_id, scrname, retweet count, botscore, post_html]
    #
    # most_retweeted_tweet_list = get_tweet_list(most_retweeted_tweets, dynamic)

    #most_retweeted_tweet_list_dated = get_tweet_list_dated(db_search_object, time_delta)

    print('got most_retweeted_list')

    # Get tweets most retweeted in this time period (by counting actual apperances)
    most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\
    func.count(Post.original_tweet_id)).\
    join(Post.districts).\
    filter(Post.is_retweet == 1).filter(District.district_name==dynamic).\
    filter(Post.created_at_dt >= str_time_range).filter(Post.created_at_dt < today).\
    group_by(Post.original_tweet_id, Post.original_author_scrname).\
    order_by(func.count(Post.original_tweet_id).desc()).all()

    most_retweeted_inperiod_list = get_tweet_list_inperiod(
        most_retweeted_inperiod)

    print('got most_retweeted_list_inperiod')

    #Get basic district object for district info
    dist_obj = db.session.query(District.state_fullname, District.district, \
    District.incumbent, District.incumbent_party, District.clinton_2016, \
    District.trump_2016, District.dem_candidate, District.rep_candidate).\
    filter(District.district_name==dynamic).first()

    print('got dist_obj')

    #Using 3-day index for top-row hashtags to spotlight
    #hash_table_rows = gf.get_hash_rows(dynamic)
    hash_pickled = db.session.query(District_graphs.chart_rows).\
    filter(District_graphs.reference_date==today).\
    filter(District_graphs.district_name==dynamic).first()

    if hash_pickled != None:
        hash_table_rows = pickle.loads(hash_pickled[0])
    else:
        hash_table_rows = gf.get_hash_rows(dynamic)

    print(hash_table_rows)

    print('got chart_rows')

    return render_template('district.html', dynamic=dynamic, time_delta=time_delta, \
    url=url, dist_hashes=dist_hashes, top_tweeters=top_tweeters, \
    most_retweeted=most_retweeted, \
    t_form=ChangeTimeForm(), get_tweet=get_tweet, dist_obj=dist_obj, \
    test_insert=test_insert, distlist=distlist, hash_table_rows=hash_table_rows,\
    most_retweeted_list=most_retweeted_list, \
    most_retweeted_inperiod_list=most_retweeted_inperiod_list)
Пример #10
0
def screen_name(dynamic):

    print('starting screen name {}'.format(dynamic))

    time_delta = request.args.get('time_delta')
    url = request.path
    str_time_range = stringtime(time_delta)

    today = str_today()

    # What hashtags are used most frequently by this screen name
    top_hashtags = db.session.query(Hashtag.hashtag, func.count(Hashtag.hashtag)).\
    join(Post.user).join(Post.hashtags).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(Hashtag.hashtag).\
    order_by(func.count(Hashtag.hashtag).desc()).all()

    # Which districts are referenced most frequently by screen name
    top_districts = db.session.query(District.district_name, \
    func.count(District.district_name)).\
    join(Post.districts).join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(District.district_name).\
    order_by(func.count(District.district_name).\
    desc()).all()

    user_obj = db.session.query(User).filter(
        User.user_scrname == dynamic).first()
    #Turn user_created into datetime obj for use with Moment
    if user_obj:
        user_created_date = get_tweet_datetime(user_obj.user_created)

    # Who has this user most frequently retweeted in this time period?

    retweeted_users_period = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    # Who has this user most frequently retweeted overall (in accessible db)?

    retweeted_users_total = db.session.query(Post.original_author_scrname, \
    func.count(Post.original_author_scrname)).\
    join(Post.user).\
    filter(User.user_scrname == dynamic).\
    filter(Post.original_author_scrname != "").\
    group_by(Post.original_author_scrname).\
    order_by(func.count(Post.original_author_scrname).desc()).all()

    # Who has retweeted this user the most?

    who_retweets = db.session.query(User.user_scrname, \
    func.count(User.user_scrname)).\
    join(Post.user).\
    filter(Post.original_author_scrname == dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    group_by(User.user_scrname).\
    order_by(func.count(User.user_scrname).desc()).all()

    #Get top retweet (retweet count reflects original post_ NOTE: needs work on authors
    #idea: filtering by orig_author, only getting retweets, always with dynamic
    #screenname. Thus no User needed. Remove?)

    # All posts that are retweets, and have this user as original_author_scrname
    most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    Post.retweet_count, Post.original_tweet_id, Post.tweet_html,
    Post.text, Post.original_text).\
    filter(Post.original_author_scrname==dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    filter(Post.created_at_dt < today).\
    order_by(Post.retweet_count.desc()).all()

    # Use helper function to Get botscore for top five most-retweeted tweets,
    # create list of [post_id, name, retweet numbers, botscore]

    most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets)

    # most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\
    # func.count(Post.original_tweet_id)).\
    # join(Post.user).\
    # filter(Post.is_retweet == 1).filter(User.user_name==dynamic).\
    # filter(Post.created_at_dt >= str_time_range).\
    # group_by(Post.original_tweet_id).\
    # order_by(func.count(Post.original_tweet_id).desc()).all()
    #
    # most_retweeted_inperiod_list = get_tweet_list_inperiod(most_retweeted_inperiod)

    #Get data for double-7 chart
    scrname_chart = gf.scrname_chart(dynamic)

    hticks = [
        scrname_chart[0][0], scrname_chart[1][0], scrname_chart[2][0],
        scrname_chart[3][0], scrname_chart[4][0], scrname_chart[5][0],
        scrname_chart[6][0], scrname_chart[7][0], scrname_chart[8][0],
        scrname_chart[9][0]
    ]

    if user_obj:
        return render_template('screen_name.html', t_form=ChangeTimeForm(), \
        dynamic=dynamic, url=url, time_delta=time_delta, top_hashtags=top_hashtags, \
        top_districts=top_districts, user_obj=user_obj, \
        retweeted_users_period=retweeted_users_period, \
        retweeted_users_total=retweeted_users_total, who_retweets=who_retweets,\
        user_created_date=user_created_date, most_retweeted_tweets=most_retweeted_tweets,\
        scrname_chart=scrname_chart, hticks=hticks, get_tweet=get_tweet, \
        most_retweeted_tweet_list=most_retweeted_tweet_list)

    else:
        return render_template('doesnt_exist.html', dynamic=dynamic)
Пример #11
0
def overview(dynamic):

    print('starting district group {}'.format(dynamic))
    time_delta = request.args.get('time_delta')
    url = request.path
    str_time_range = stringtime(time_delta)

    today = datetime.combine(
        date.today(), datetime.min.time())  #datetime object for midnight
    str_today = today.strftime(
        "%Y-%m-%d %H:%M:%S")  # string version of midnight

    conn = db.engine.connect()

    # GET 10-day table data for hashtag chart from graph_functions module
    # RETURN THIS TO TEMPLATE

    # Get a desc-ordered list of all hashtags being used in all districts
    # Object returns (hashtag, count)
    all_hashes_result = conn.execute('SELECT hashtag, count FROM hash_activity_{0}_{1};'.\
    format(dynamic, time_delta)).fetchall()

    #RETURN THIS TO TEMPLATE
    all_hashes = []

    for item in all_hashes_result:
        all_hashes.append(item)

    print("got all_hashes")

    #Get a list of tophashtags without district names
    # RETURN THIS TO TEMPLATE
    hashes_no_dists = []
    counter = 0
    for item in all_hashes:
        if item[0] not in distlist:
            hashes_no_dists.append(item)
            counter += 1
        if counter == 20:
            break

    # RETURN THIS TO TEMPLATE
    all_tweets = db.session.query(func.count(Post.post_id)).\
    filter(Post.created_at_dt >= str_time_range).first()

    print("got all tweets")

    #Get list of most active districts

    active_dists_result = conn.execute('SELECT district_name, count FROM dist_activity_{0}_{1};'.\
    format(dynamic, time_delta)).fetchall()

    # RETURN THIS TO TEMPLATE
    most_active = []

    for item in active_dists_result:
        most_active.append(item)

    print("got most_active")

    # Get a desc-ordered list of top-volume Tweeters
    top_tweeters_result = conn.execute('SELECT user_scrname, count, user_cap_perc FROM top_tweeters_{0}_{1};'.\
    format(dynamic, time_delta)).fetchall()

    # RETURN THIS TO TEMPLATE
    top_tweeters = []

    for item in top_tweeters_result:
        new_row = []
        new_row.append(item[0])
        new_row.append(item[1])
        if item[2] == -1.0:
            new_row.append("Not yet in database")
        else:
            new_row.append(item[2])
        top_tweeters.append(new_row)

    print("got top_tweeters")

    retweeted_users_result = conn.execute('SELECT original_author_scrname, count FROM retweeted_users_{0}_{1};'.\
    format(dynamic, time_delta)).fetchall()

    # RETURN THIS TO TEMPLATE
    retweeted_users = []

    for item in retweeted_users_result:
        retweeted_users.append(item)

    print("got retweeted users")

    retweeted_tweets_result = conn.execute('SELECT post_id, original_poster, retweet_count, botscore FROM retweeted_tweets_{0}_{1};'.\
    format(dynamic, time_delta)).fetchall()

    # RETURN THIS TO TEMPLATE
    most_retweeted_tweet_list = []
    tweet_count = 0
    # Each item (tuple) is post_id, poster, count, botscore. Still need HTML
    for item in retweeted_tweets_result:

        # if original_author_scrname is in skip list, skip it
        if item[1].lower() in skip_list:
            continue

        #create list with attributes
        holding_list = []
        for attribute in item:
            holding_list.append(attribute)

        # Get Tweet HTML, add to list
        try:
            tweet_text = get_tweet(item[0])
            holding_list.append(tweet_text)
        except:
            holding_list.append("Can't retrieve tweet")

        most_retweeted_tweet_list.append(holding_list)
        tweet_count += 1
        if tweet_count == 5:
            break

    print("got most retweeted tweets")




    hash_pickled = db.session.query(District_graphs.chart_rows).\
    filter(District_graphs.reference_date==str_today).\
    filter(District_graphs.district_name==dynamic).first()

    if hash_pickled != None:
        hashtable_all = pickle.loads(hash_pickled[0])
    else:
        hashtable_all = gf.get_hashrows_overview(dynamic)

    print("got hashtable")

    conn.close()

    return render_template('overview.html', t_form=ChangeTimeForm(), \
    all_form=AllCongSearchForm(), dynamic=dynamic, time_delta=time_delta, \
    url=url, all_hashes=all_hashes,  hashes_no_dists=hashes_no_dists, \
    hashtable_all=hashtable_all, all_tweets=all_tweets,\
    most_retweeted_tweet_list=most_retweeted_tweet_list,\
    top_tweeters=top_tweeters, retweeted_users=retweeted_users,
    most_active=most_active\
    )
Пример #12
0
def hashtag(dynamic):

    print('starting hashtag {}'.format(dynamic))

    time_delta = request.args.get('time_delta')
    url = request.path

    str_time_range = stringtime(time_delta)

    #Where is this hashtag used column
    top_districts=db.session.query(District.district_id, District.district_name, \
    func.count(District.district_id), District.state_fullname, District.district).\
    join(Post.districts).join(Post.hashtags).\
    filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\
    group_by(District.district_id).order_by(func.count(District.district_id).\
    desc()).all()

    print("got top districts")

    # "Who uses this hashtag" column
    top_users=db.session.query(User.user_scrname, func.count(User.user_scrname), \
    User.user_cap_perc, User.user_id).\
    join(Post.user).join(Post.hashtags).\
    filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\
    group_by(User.user_id).\
    order_by(func.count(User.user_id).desc()).all()

    print("got top users")

    # Data for positive/negative chart
    valences = db.session.query(Post.polarity_val, func.count(Post.polarity_val)).\
    join(Post.hashtags).\
    filter(Hashtag.hashtag == dynamic).filter(Post.created_at_dt >= str_time_range).\
    group_by(Post.polarity_val).all()

    valences_datatable = [['Attitude', 'Number of Tweets']]
    for item in valences:
        valences_datatable.append([item[0], item[1]])

    print("got valences")

    # # Most retweeted tweets column
    # most_retweeted_tweets = db.session.query(Post.post_id, Post.original_author_scrname, \
    # Post.retweet_count, Post.original_tweet_id, User.user_scrname, Post.tweet_html,
    # Post.text, Post.original_text).\
    # join(Post.hashtags).join(Post.user).\
    # filter(Hashtag.hashtag==dynamic).filter(Post.created_at_dt >= str_time_range).\
    # order_by(Post.retweet_count.desc()).all()
    #
    # print("got most retweeted tweets")
    #
    # #Get botscore for top five most-retweeted tweets, create list of [post_id, name,
    # # retweet numbers, botscore] to send to template
    # most_retweeted_tweet_list = get_tweet_list_nodist(most_retweeted_tweets)
    #
    # print("got most retweeted tweets")

    # Get tweets most retweeted in this time period (by counting actual apperances)
    most_retweeted_inperiod = db.session.query(Post.original_tweet_id,\
    func.count(Post.original_tweet_id)).\
    join(Post.hashtags).\
    filter(Post.is_retweet == 1).filter(Hashtag.hashtag==dynamic).\
    filter(Post.created_at_dt >= str_time_range).\
    group_by(Post.original_tweet_id).\
    order_by(func.count(Post.original_tweet_id).desc()).all()

    most_retweeted_inperiod_list = get_tweet_list_inperiod(
        most_retweeted_inperiod)

    return render_template('hashtag.html', t_form=ChangeTimeForm(), url=url, \
    dynamic=dynamic, time_delta=time_delta, top_districts=top_districts, \
    top_users=top_users, valences=valences, valences_datatable=valences_datatable,\
    get_tweet=get_tweet, most_retweeted_inperiod_list=most_retweeted_inperiod_list
    )