def bot_participation():
    Bot = bot.load_bot()

    dir_name = "RetweetNew/"
    files = os.listdir(dir_name)
    bot_list = []
    for postid in files:
        with open(dir_name + postid, 'r') as f:
            tweets = json.load(f)
        users = [tweet['user'] for tweet in tweets.values()]
        bots = [bot.check_bot(Bot, user) for user in users]
        bot_list.append(bots.count(1) / bots.count(0))

    box = BoxPlot(1)
    box.set_data(bot_list, '')
    box.set_xticks('bot_ratio')
    box.save_image('Image/bot_ratio_box.png')
示例#2
0
def time_to_depth_echo_chamber(filename):
    
    _, _, time_depth, _, user_depth = get_depth_time_series(None)    
    print(len(time_depth))
    #with open('Data/time_series_data.json', 'w') as f:
    #    json.dump({'time_depth' : time_depth, 'user_depth' : user_depth}, f)
    #with open('Data/time_series_data.json', 'r') as f:
    #    data = json.load(f)

    #time_depth = data['time_depth']
    #user_depth = data['user_depth']

    print("time series data load done ")
    echo_chamber_values = {}
    non_echo_chamber_values = {} 
   
    for item in ['time_depth', 'user_depth']:
        echo_chamber_values[item] = {}
        non_echo_chamber_values[item] = {}

        for i in range(1,20):
            echo_chamber_values[item][i] = []
            non_echo_chamber_values[item][i] = []
    Bot = bot.load_bot()
    echo_chamber_cascade_root = {}
    cascade_veracity = {}
    echo_chamber_users = e_util.get_echo_chamber_users(filename)
   
    files = os.listdir('RetweetNew')
    #collect echo chamber user participate cascade 
    #for postid in echo_chamber_users.keys():
    for postid in files:
        v = veracity_type(postid).title()
        
        #get origin tweet of echo chamber user 
        with open('RetweetNew/%s'%postid, 'r') as f:
            tweets = json.load(f)

            for tweet in tweets.values():
                try:
                    #if tweet['user'] in echo_chamber_users[postid].keys():
                    origin = tweet['origin']
                    otid = tweet['origin_tweet']
                    #if origin in echo_chamber_users[postid].keys():
                    if tweet['user'] in echo_chamber_users[postid].keys():
                        echo_chamber_cascade_root[tweet['origin_tweet']] = 1
                except KeyError :
                    pass

                cascade_veracity[tweet['origin_tweet']] = v
    
    print("echo chamber cascade extraction done")

    echo_chamber_cascades = echo_chamber_cascade_root.keys()

    print('echo chamber cascades')
    #print(echo_chamber_cascades)

    e = {};  n = {}; r = {}; #echo, non echo, ranked echo 
    for item in ['True', 'False', 'Mixed']:
        e[item] = {}
        n[item] = {}
        r[item] = {}
        
        for d_type in ['user_depth', 'time_depth']:
            e[item][d_type] = {}
            n[item][d_type] = {}
            r[item][d_type] = {}

            for i in range(1, 20):
                e[item][d_type][i] = []
                n[item][d_type][i] = []
                r[item][d_type][i] = []

    for key in time_depth.keys():
        v = cascade_veracity[key]
        if v !='True' and  v != 'False':
            v = 'Mixed'

        if key in echo_chamber_cascades:
            #for i in range(1, max(time_depth[key].keys())+1):
            for i in range(1, max(time_depth[key].keys())+1):
                try:
                    echo_chamber_values['time_depth'][i].append(time_depth[key][i])
                    echo_chamber_values['user_depth'][i].append(user_depth[key][i])
                    e[v]['time_depth'][i].append(time_depth[key][i])
                    e[v]['user_depth'][i].append(user_depth[key][i])

                except KeyError:
                    pass
        else:
            for i in range(1, max(time_depth[key].keys())+1):
                try :
                    non_echo_chamber_values['time_depth'][i].append(time_depth[key][i])
                    non_echo_chamber_values['user_depth'][i].append(user_depth[key][i])
                    n[v]['time_depth'][i].append(time_depth[key][i])
                    n[v]['user_depth'][i].append(user_depth[key][i])

                except KeyError:
                    pass

    box = BoxPlot(1)
    box.set_multiple_data([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']])
    box.set_ylog()
    box.set_label('Depth', 'Minutes to Depth')
    box.save_image('%s/time_depth_echo_chamber_box.png'%foldername)
    print(echo_chamber_values['time_depth'])    

    #draw time to depth, user to depth of cascade for echo chamber users participated or non echo chamer users participated 
    with open('Data/Figure/5_2_1.json', 'w') as f:
        json.dump([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], f)

    
    draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line')
    draw_time_to_depth_echo_chamber([echo_chamber_values['user_depth'], non_echo_chamber_values['user_depth']], ['echo chamber', 'no echo chamber'], 'median unique users', 'user_depth_echo_chamber_line')
    
    with open('Data/Figure/5_2_time.json', 'w') as f:
        json.dump({'e':echo_chamber_values['time_depth'][1], 'ne':non_echo_chamber_values['time_depth'][1]}, f)

    #draw cdf with top retweet 
    cdf = CDFPlot()
    cdf.set_label('Propagation Time', 'CDF')
    cdf.set_log(True)
    #cdf.set_ylog()
    cdf.set_data(echo_chamber_values['time_depth'][1], '')
    cdf.set_data(non_echo_chamber_values['time_depth'][1], '')
    cdf.save_image('Image/20181105/depth_propagation_time_cdf.png')

    """
def cascade_depth_distribution():
    c_breadth, c_depth, c_unique_users = e_util.get_cascade_max_breadth()

    depth_cascade = {}
    depth_unique_users = {}
    depth_breadth = {}
    cascade_list = {}

    for i in range(1,20):
        depth_cascade[i] = []
        depth_unique_users[i] = []
        depth_breadth[i] = []

    for ccc, postid in enumerate(rumors):
        #if postid != '126119':
        #    continue

        #print(postid)
        with open(dirname + '/' + postid, 'r') as f:
            tweets = json.load(f)
            for tweet in tweets.values():
                
                otid = tweet['origin_tweet']
                if cascade_list.get(otid, None) == None:
                    max_depth = c_depth[otid]
                    users = c_unique_users[otid]
                    max_breadth = c_breadth[otid]

         #           print(otid, max_depth)
                    cascade_list[otid] = 1
                    depth_cascade[max_depth].append(tweet['cascade'])
                    depth_unique_users[max_depth].append(users)
                    depth_breadth[max_depth].append(max_breadth)

#        if ccc > 10:
#            break
    
    print(ccc)
    #print(depth_cascade)
    cascade_list = []
    user_list = []
    breadth_list = []
    #box.set_multiple_data([e_child, ne_child])
    for i in range(1, 18):
        cascade_list.append(depth_cascade[i])
        user_list.append(depth_unique_users[i])
        breadth_list.append(depth_breadth[i])
    
    box = BoxPlot(1)
    box.set_data(cascade_list, '')
    box.set_ylog()
    box.set_label('Depth', 'Cascade Size')
    box.save_image('Image/%s/depth_cascade_dist.png'%foldername)
    
    box = BoxPlot(1)
    box.set_data(user_list, '')
    box.set_ylog()
    box.set_label('Depth', 'Number of Users')
    box.save_image('Image/%s/depth_user_dist.png'%foldername)

    box = BoxPlot(1)
    box.set_data(breadth_list, '')
    box.set_ylog()
    box.set_label('Depth', 'Breadth Size')
    box.save_image('Image/%s/depth_breadth_dist.png'%foldername)
def echo_chamber_diversity(filename):
    Bot = bot.load_bot()
    dirname = 'Retweet/'
    files = os.listdir(dirname)
    
    if filename == None:
        echo_chamber_users = {}
        for postid in files:
            echo_chamber_users[postid] = {}
    else:
        echo_chamber_users = e_util.get_echo_chamber_users(filename)


    
    echo_tweet_diversity = []; echo_source_diversity = [];
    necho_tweet_diversity = []; necho_source_diversity = [];
    for postid in files:

        with open(dirname + postid) as f:
            tweets = json.load(f)

        non_echo_users = {}
        for tweet in tweets.values():
            user = tweet['user']

            #non echo chamber collect
            if not user in echo_chamber_users[postid]:
                non_echo_users[user] = 1

        print(len(echo_chamber_users[postid]), len(non_echo_users))

        timeline_dir = '../Timeline/'
        #collect echo chamber users' source diversity
        err = 0; nerr = 0
        for user in echo_chamber_users[postid]:
            try:
                with open(timeline_dir + user, 'r') as f:
                    user_tweets = json.load(f)
            except IOError as e:
                #print(e)
                err +=1
                continue

            tweet_diversity, source_diversity = get_diversity(user_tweets)

            if tweet_diversity != None:
                echo_tweet_diversity.append(tweet_diversity)
            if source_diversity != None:
                echo_source_diversity.append(source_diversity)

        for user in non_echo_users:
            try:
                with open(timeline_dir + user, 'r') as f:
                    user_tweets = json.load(f)
            except IOError as e:
                #print(e)
                nerr += 1
                continue

            tweet_diversity, source_diversity = get_diversity(user_tweets)
            if tweet_diversity != None:
                necho_tweet_diversity.append(tweet_diversity)
            if source_diversity != None:
                necho_source_diversity.append(source_diversity)

        #print(err, nerr)
        #break
                
    #CDF
    cdf = CDFPlot()
    cdf.set_label('Retweet Origin Diversity', 'CDF')
    #cdf.set_log(True)
    cdf.set_data(echo_tweet_diversity, 'Echo Chamber')
    cdf.set_data(necho_tweet_diversity, 'Non Echo Chamber')
    cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type')
    cdf.save_image('Image/20181002/source_diversity_retweet_cdf.png')

    cdf = CDFPlot()
    cdf.set_label('Source News Diversity', 'CDF')
    #cdf.set_log(True)
    cdf.set_data(echo_source_diversity, 'Echo Chamber')
    cdf.set_data(necho_source_diversity, 'Non Echo Chamber')
    cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type')
    cdf.save_image('Image/20181002/source_diversity_news_cdf.png')

    #BoxPlot
    box = BoxPlot(1)
    box.set_data([echo_tweet_diversity, necho_tweet_diversity],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All'])
    box.set_label('', 'Retweet Origin Diversity')
    box.save_image('Image/20181002/source_diversity_retweet.png')

    box = BoxPlot(1)
    box.set_data([echo_source_diversity, necho_source_diversity],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All'])
    box.set_label('', 'Source News Diversity')
    box.save_image('Image/20181002/source_diversity_news.png')
示例#5
0
        "'2016-01-01' and '2016-12-31'", "'2017-01-01' and '2017-12-31'"
    ]
    for date in year:
        sharecount_cdf("between " + date, date[1:5])

    category_list = category_list()
    all_sharecount_list = []
    for item in category_list:
        #sharecount list of one category
        all_sharecount_list.append(sharecount_by_category(item))

    subplot_num = lambda x: int(math.sqrt(x)) if math.sqrt(x).is_integer(
    ) else int(math.sqrt(x)) + 1

    #Display BoxPlot and save at the path
    BoxPlt = BoxPlot(subplot_num(25))
    for i in range(25):
        BoxPlt.set_data(all_sharecount_list[i], '')
        BoxPlt.set_title(category_list[i])
        BoxPlt.set_ylim(1000)

    BoxPlt.save_image('./image/sharecount_box_plot.png')

    all_sharecount_list = []
    #category with source presence
    for item in category_list:
        all_sharecount_list.append([
            sharecount_by_category_with_source(item),
            sharecount_by_category_without_source(item)
        ])
def draw_propagation_time_to_group():
    print('echo chamber 2')
    echo_v2, necho_v2, recho_v2, rnecho_v2 = propagation_time_to_group(
        'Data/echo_chamber2.json')
    box = BoxPlot(1)
    box.set_data([echo_v2, necho_v2], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group2.png' % folder)

    box = BoxPlot(1)
    box.set_data([recho_v2, rnecho_v2], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group_r2.png' % folder)

    print('echo chamber 3')
    echo_v3, necho_v3, recho_v3, rnecho_v3 = propagation_time_to_group(
        'Data/echo_chamber3.json')
    box = BoxPlot(1)
    box.set_data([echo_v3, necho_v3], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group3.png' % folder)

    box = BoxPlot(1)
    box.set_data([recho_v3, rnecho_v3], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group_r3.png' % folder)

    print('echo chamber 4')
    echo_v4, necho_v4, recho_v4, rnecho_v4 = propagation_time_to_group(
        'Data/echo_chamber4.json')
    box = BoxPlot(1)
    box.set_data([echo_v4, necho_v4], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group4.png' % folder)

    box = BoxPlot(1)
    box.set_data([recho_v4, rnecho_v4], '')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group_r4.png' % folder)

    _, non_echo, _, rnon_echo = propagation_time_to_group(None)
    print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo))

    box = BoxPlot(1)
    box.set_data([echo_v2, echo_v3, echo_v4, non_echo], '')
    box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group.png' % folder)

    box = BoxPlot(1)
    box.set_data([recho_v2, recho_v3, recho_v4, rnon_echo], '')
    box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All'])
    box.set_label('', 'Propagation Time')
    box.save_image('Image/%s/propagation_time_to_group_r.png' % folder)
def draw_propagation_velocity():
    echo_v2, _, echo_p2, necho_p2 = rumor_propagation_velocity(
        'Data/echo_chamber2.json')
    #echo_v3, _ = rumor_propagation_velocity('Data/echo_chamber3.json')
    #echo_v4, _ = rumor_propagation_velocity('Data/echo_chamber4.json')
    _, non_echo, _, _ = rumor_propagation_velocity(None)
    #print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo))

    box = BoxPlot(1)
    box.set_data([echo_v2, non_echo], '')
    box.set_xticks(['Echo Chamber2', 'All'])

    #box.set_data([echo_v2, echo_v3, echo_v4, non_echo],'')
    #box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All'])
    box.set_label('', 'Mean Propagation Time')
    box.save_image('Image/%s/propagation_time.png' % folder)

    box = BoxPlot(1)
    box.set_multiple_data([echo_p2, necho_p2])
    box.set_ylog()
    box.set_label('Depth', 'Propagation Time')
    box.save_image('Image/%s/child_all_time_propagation.png' % folder)
def propagation_parent_to_child():
    Bot = bot.load_bot()
    dirname = 'RetweetNew/'
    files = os.listdir(dirname)

    filename = 'Data/echo_chamber2.json'
    if filename == None:
        echo_chamber_users = {}
        for postid in files:
            echo_chamber_users[postid] = {}
    else:
        echo_chamber_users = e_util.get_echo_chamber_users(filename)

    echo_chamber_cascades = {}
    tweet_cache = {}
    '''
    for postid in echo_chamber_users.keys():
        
        users = echo_chamber_users[postid] #echo chamber users 

        with open('RetweetNew/' + postid, 'r') as f:
            tweets = json.load(f)
            tweet_cache[postid] = tweets
            
            for tweet in tweets.values():
                if tweet['user'] in users:
                    root_id = tweet['origin_tweet'] #root tweet id 
                    echo_chamber_cascades[root_id] = 1
        
    echo_chamber_cascades_ids = echo_chamber_cascades.keys()
    '''
    #print(echo_chamber_cascades_ids)
    e_child = {}
    ne_child = {}
    e_time = {}
    ne_time = {}
    ne_time2 = {}
    for i in range(1, 20):
        e_child[i] = []
        ne_child[i] = []
        e_time[i] = {}
        ne_time[i] = {}
        ne_time2[i] = {}

    print(len(echo_chamber_users.keys()))
    for ccc, postid in enumerate(files):
        #if postid != '150232' and  postid != '29947':
        #    continue
        with open(dirname + postid, 'r') as f:
            tweets = json.load(f)
        #tweets = tweet_cache[postid]

        #if not util.is_politics(postid):
        #if not util.is_non_politics(postid):
        #if not util.is_veracity(postid, 'False'):
        #if not util.is_veracity(postid, 'Mixture,Mostly False,Mostly True'):
        #    continue

        #order by timeline
        sort = {}
        for key in tweets.keys():
            tweet = tweets[key]
            sort[key] = parser.parse(tweet['time'])

        #sort by time
        new_list = sorted(sort.items(), key=lambda x: x[1])
        sorted_ids = [item[0] for item in new_list]
        e_users = echo_chamber_users[postid]
        #e_users = echo_chamber_users.get(postid, [])
        print(len(e_users))
        for i, tid in enumerate(sorted_ids):
            tweet = tweets[tid]['tweet']
            parent = tweets[tid]['parent']
            origin = tweets[tid]['origin']
            root = tweets[tid]['origin_tweet']
            cascade = tweets[tid]['cascade']
            userid = tweets[tid]['user']
            ptid = tweets[tid]['parent_tweet']
            if cascade < 2:
                continue

            #bot filter
            if bot.check_bot(Bot, userid) != 0:
                continue

            if userid in e_users:
                e_child[tweets[tid]['depth']].append(tweets[tid]['child'])
            else:
                ne_child[tweets[tid]['depth']].append(tweets[tid]['child'])

            if tweets[tid]['depth'] > 1:
                diff = (parser.parse(tweets[tid]['time']) - parser.parse(
                    tweets[ptid]['time'])).total_seconds() / 60
                if e_time[tweets[ptid]['depth']].get(ptid, -1) > diff:
                    print(e_time[tweets[ptid]['depth']][ptid], diff)

                if parent in e_users:
                    #                if origin in e_users:
                    if e_time[tweets[ptid]['depth']].get(ptid, -1) == -1:
                        e_time[tweets[ptid]['depth']][ptid] = diff
                else:
                    if ne_time[tweets[ptid]['depth']].get(ptid, -1) == -1:
                        ne_time[tweets[ptid]['depth']][ptid] = diff

        #if ccc == 5:
        #    break

    #remove child 0 count
    for i in range(1, 20):
        e_child[i] = [x for x in e_child[i] if x != 0]
        ne_child[i] = [x for x in ne_child[i] if x != 0]

    box = BoxPlot(1)
    box.set_multiple_data([e_child, ne_child])
    box.set_ylog()
    box.set_label('Depth', 'Child Count')
    box.save_image('Image/%s/child_num_wo_propagation.png' % folder)

    for i in range(1, 20):
        e_time[i] = e_time[i].values()
        ne_time[i] = ne_time[i].values()
        ne_time2[i] = ne_time2[i].values()

    #print(e_time)
    #print(ne_time)
    box = BoxPlot(1)
    box.set_multiple_data([e_time, ne_time])
    box.set_ylog()
    box.set_label('Depth', 'Propagation Time')
    box.save_image('Image/%s/child_time_propagation.png' % folder)

    with open('Data/Figure/5_3_1.json', 'w') as f:
        json.dump(
            {
                'e_time': e_time,
                'ne_time': ne_time,
                'e_child': e_child,
                'ne_child': ne_child
            }, f)
def diversity(filename):
    index = filename.replace(".json", "").split('echo_chamber')
    print(index)

    with open(filename) as f:
        echo_chambers = json.load(f)

    print('total ', len(echo_chambers))
    friends_cache = {}
    postid = {}
    count = 0
    echo_diversity = {}

    for key in echo_chambers:
        #print(key)
        users = echo_chambers[key]
        #print(users)
       
        count += 1 
        if count % 100 == 0:
            print(count)
            #break
        
        if len(users) < 2:
            continue
        
        postids = key.split('_')
        for k in postids:
            postid[k] = 1


        #print(len(users))
        polars = []
        user_count = 0
        #polarity scores 
        for userid in users:
            score = get_polarity(userid)
            if score != -999:
                polars.append(score)
                user_count += 1
        postid[postids[0]] = user_count
        postid[postids[1]] = user_count
        if 1 in postid.values():
            break
        diversity = util.eta(polars)
        echo_diversity[key] = diversity

    random_diversity = {}
   
    for key in postid.keys():
        #number of users 
        user_num = postid[key]
        #print(user_num)
        with open(dir_name + key, 'r') as f:
            tweets = json.load(f)
        users = [tweet['user']for tweet in tweets.values()]
        users = get_random_user(users, user_num)

        polars = []
        #polarity scores 
        for userid in users:
            score = get_polarity(userid)
            if score != -999:
                polars.append(score)
        diversity = util.eta(polars)
        random_diversity[key] = diversity
        #print(users)
        #print(polars)
        #print(diversity)
    
    with open('Data/echo_chamber_diversity.json', 'w') as f:
        json.dump({'echo_chamber':echo_diversity, 'random':random_diversity}, f)

    box = BoxPlot(1)
    box.set_data([random_diversity.values(), echo_diversity.values()],'')
    box.set_xticks(['Random', 'Echo chamber'])
    box.save_image('Image/diversity_box_%s.png'%index[1])
def polarity_diversity():
    #check rumor polarity similarity
    #check cascade polarity similarity
    files = os.listdir(dir_name)
    users_polarity = {}
    users_polarity_cascade = {}
    retweet_cache = {}
    for ccc, postid in enumerate(files):
        users_polarity[postid] = {}
        with open(dir_name+ '%s'%postid, 'r') as f:
            tweets = json.load(f)
            retweet_cache[postid] = tweets

        for tweet in tweets.values():
            p_score = get_polarity(tweet['user'])
            users_polarity[postid][tweet['user']] = p_score
            users_polarity_cascade[tweet['origin_tweet']] = users_polarity_cascade.get(tweet['origin_tweet'], {})
            users_polarity_cascade[tweet['origin_tweet']][tweet['user']] = p_score

        #if ccc == 10:
        #    break

    r_diversity = []
    for key in users_polarity.keys():
        r_diversity.append(util.eta([score for score in users_polarity[key].values()]))
    
    print(pd.Series(r_diversity).describe())
    c_diversity = []
    for key in users_polarity_cascade.keys():
        if len(users_polarity_cascade[key]) < 2:
            continue
        c_diversity.append(util.eta([score for score in users_polarity_cascade[key].values()]))

    print(pd.Series(c_diversity).describe())
    box = BoxPlot(1)
    box.set_data([r_diversity, c_diversity],'')
    box.set_xticks(['Rumor', 'Cascade'])
    box.save_image('Image/%s/diversity_box.png'%foldername)

    #check echo chamber users' poarltiy similarity
    e_diversity = []
    echo_chamber_users = {}
    with open('Data/echo_chamber2.json') as f:
        echo_chamber = json.load(f)

    for key in echo_chamber:
        users = echo_chamber[key]

        if len(users) < 2:
            continue

        polar = []
        e_diversity.append(util.eta([get_polarity(user) for user in users]))

        #get all echo chamber users for cascade characteristics
        for postid in key.split('_'):
            echo_chamber_users[postid] = echo_chamber_users.get(postid, {})
            for user in users:
                echo_chamber_users[postid][user] = 1 
        
    print(pd.Series(e_diversity).describe())

    #check echo chamber user pariticpate polarity similarity and non-echo chamber user participate polarity similarity 
    echo_cascade = {}
    cascade_users = {}
    for postid in files:
        tweets = retweet_cache[postid]

        #get echo chamber cascade 
        for tweet in tweets.values():
            if tweet['user'] in echo_chamber_users[postid].keys(): 
                echo_cascade[tweet['origin_tweet']] = 1
            cascade_users[tweet['origin_tweet']] = cascade_users.get(tweet['origin_tweet'], {})
            cascade_users[tweet['origin_tweet']][tweet['user']] = get_polarity(tweet['user'])

    echo_cascade = echo_cascade.keys()
    echo_cascade_diversity = []
    echo_cascade_size = []
    non_echo_cascade_diversity = []
    non_echo_cascade_size = []
    for cascade in cascade_users.keys():
        #echo chamber user participated cascade
        if cascade in echo_cascade:
            echo_cascade_diversity.append(utily.eta([score for score in cascade_users[cascade].values()]))
            echo_cascade_size.append(len(cascade_users[cascade]))
        #non echo chamber user participated cascade
        else:
            non_echo_cascade_diversity.append(util.eta([score for score in cascade_users[cascade].values()]))
            non_echo_cascade_size.append(len(cascade_users[cascade]))

    print('echo chamber cascade')
    print(pd.Series(echo_cascade_diversity).describe())
    print(pd.Series(echo_cascade_size).describe())
    print('non echo chamber cascade')
    print(pd.Series(non_echo_cascade_diversity).describe())
    print(pd.Series(non_echo_cascade_size).describe())

    box = BoxPlot(1)
    box.set_data([echo_cascade_diversity, non_echo_cascade_diversity],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.save_image('Image/20180927/diversity_echo_cascade_box.png')

    box = BoxPlot(1)
    box.set_data([echo_cascade_size, non_echo_cascade_size],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber'])
    box.save_image('Image/20180927/diversity_echo_cascade_size_box.png')
示例#11
0
def draw_5_3_1_figures():
    with open('Data/Figure/5_3_1_2.json', 'r') as f:
        data = json.load(f)

    e_child = data['e_child']
    ne_child = data['ne_child']
    e_time = data['e_time']
    ne_time = data['ne_time']
    ne_time2 = data['ne_time2']

    e_c = {}
    e_t = {}
    ne_c = {}
    ne_t = {}
    ne_t2 = {}
    for i in range(1, 11):
        e_c[i] = e_child[str(i)]
        e_t[i] = e_time[str(i)]
        ne_c[i] = ne_child[str(i)]
        ne_t[i] = ne_time[str(i)]
        ne_t2[i] = ne_time2[str(i)]

        #if i > 6:
        #    print(e_t[i])
        #    print(e_c[i])

    box = BoxPlot(1)
    box.set_multiple_data([e_c, ne_c])
    box.set_ylog()
    box.set_label('Depth', 'Child Count')
    box.save_image('Image/Figure/5_3_1_2.png')

    print(e_t.keys())
    box = BoxPlot(1)
    box.set_multiple_data([e_t, ne_t])
    box.set_ylog()
    box.set_label('Depth', 'Propagation Time')
    box.set_yticks(['0', '1 m', '5 m', '1 h', '1 day'],
                   index=[0, 1, 5, 60, 24 * 60])
    #box.set_yticks(['0', '1 m', '10 m', '1 h', '1 day'], index=[0,1,10,60, 24*60])
    box.save_image('Image/Figure/5_3_1_1.png')

    #filter the wrong value is duration over 6 month
    filter_value = 60 * 24 * 180
    e_time['1'] = [item for item in e_time['1'] if item < filter_value]
    ne_time['1'] = [item for item in ne_time['1'] if item < filter_value]
    print(max(e_time['1']))
    print(max(ne_time['1']))
    e_time['1'] = sorted(e_time['1'])
    ne_time['1'] = sorted(ne_time['1'])
    #print(e_time['1'])
    #print(ne_time['1'])
    draw_cdf_plot([e_time['1'], ne_time['1']], 'Propagation Time',
                  ['Echo chamber', 'Non-echo chamber'], '',
                  'Image/Figure/5_3_2.png')