def bot_participation(): Bot = bot.load_bot() dir_name = "RetweetNew/" files = os.listdir(dir_name) bot_list = [] for postid in files: with open(dir_name + postid, 'r') as f: tweets = json.load(f) users = [tweet['user'] for tweet in tweets.values()] bots = [bot.check_bot(Bot, user) for user in users] bot_list.append(bots.count(1) / bots.count(0)) box = BoxPlot(1) box.set_data(bot_list, '') box.set_xticks('bot_ratio') box.save_image('Image/bot_ratio_box.png')
def draw_propagation_velocity(): echo_v2, _, echo_p2, necho_p2 = rumor_propagation_velocity('Data/echo_chamber2.json') #echo_v3, _ = rumor_propagation_velocity('Data/echo_chamber3.json') #echo_v4, _ = rumor_propagation_velocity('Data/echo_chamber4.json') _, non_echo, _, _ = rumor_propagation_velocity(None) #print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo)) box = BoxPlot(1) box.set_data([echo_v2, non_echo],'') box.set_xticks(['Echo Chamber2', 'All']) #box.set_data([echo_v2, echo_v3, echo_v4, non_echo],'') #box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Mean Propagation Time') box.save_image('Image/%s/propagation_time.png'%folder) box = BoxPlot(1) box.set_multiple_data([echo_p2, necho_p2]) box.set_ylog() box.set_label('Depth', 'Propagation Time') box.save_image('Image/%s/child_all_time_propagation.png'%folder)
def cascade_depth_distribution(): c_breadth, c_depth, c_unique_users = e_util.get_cascade_max_breadth() depth_cascade = {} depth_unique_users = {} depth_breadth = {} cascade_list = {} for i in range(1,20): depth_cascade[i] = [] depth_unique_users[i] = [] depth_breadth[i] = [] for ccc, postid in enumerate(rumors): #if postid != '126119': # continue #print(postid) with open(dirname + '/' + postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): otid = tweet['origin_tweet'] if cascade_list.get(otid, None) == None: max_depth = c_depth[otid] users = c_unique_users[otid] max_breadth = c_breadth[otid] # print(otid, max_depth) cascade_list[otid] = 1 depth_cascade[max_depth].append(tweet['cascade']) depth_unique_users[max_depth].append(users) depth_breadth[max_depth].append(max_breadth) # if ccc > 10: # break print(ccc) #print(depth_cascade) cascade_list = [] user_list = [] breadth_list = [] #box.set_multiple_data([e_child, ne_child]) for i in range(1, 18): cascade_list.append(depth_cascade[i]) user_list.append(depth_unique_users[i]) breadth_list.append(depth_breadth[i]) box = BoxPlot(1) box.set_data(cascade_list, '') box.set_ylog() box.set_label('Depth', 'Cascade Size') box.save_image('Image/%s/depth_cascade_dist.png'%foldername) box = BoxPlot(1) box.set_data(user_list, '') box.set_ylog() box.set_label('Depth', 'Number of Users') box.save_image('Image/%s/depth_user_dist.png'%foldername) box = BoxPlot(1) box.set_data(breadth_list, '') box.set_ylog() box.set_label('Depth', 'Breadth Size') box.save_image('Image/%s/depth_breadth_dist.png'%foldername)
def echo_chamber_diversity(filename): Bot = bot.load_bot() dirname = 'Retweet/' files = os.listdir(dirname) if filename == None: echo_chamber_users = {} for postid in files: echo_chamber_users[postid] = {} else: echo_chamber_users = e_util.get_echo_chamber_users(filename) echo_tweet_diversity = []; echo_source_diversity = []; necho_tweet_diversity = []; necho_source_diversity = []; for postid in files: with open(dirname + postid) as f: tweets = json.load(f) non_echo_users = {} for tweet in tweets.values(): user = tweet['user'] #non echo chamber collect if not user in echo_chamber_users[postid]: non_echo_users[user] = 1 print(len(echo_chamber_users[postid]), len(non_echo_users)) timeline_dir = '../Timeline/' #collect echo chamber users' source diversity err = 0; nerr = 0 for user in echo_chamber_users[postid]: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) err +=1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: echo_tweet_diversity.append(tweet_diversity) if source_diversity != None: echo_source_diversity.append(source_diversity) for user in non_echo_users: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) nerr += 1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: necho_tweet_diversity.append(tweet_diversity) if source_diversity != None: necho_source_diversity.append(source_diversity) #print(err, nerr) #break #CDF cdf = CDFPlot() cdf.set_label('Retweet Origin Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_tweet_diversity, 'Echo Chamber') cdf.set_data(necho_tweet_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_retweet_cdf.png') cdf = CDFPlot() cdf.set_label('Source News Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_source_diversity, 'Echo Chamber') cdf.set_data(necho_source_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_news_cdf.png') #BoxPlot box = BoxPlot(1) box.set_data([echo_tweet_diversity, necho_tweet_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Retweet Origin Diversity') box.save_image('Image/20181002/source_diversity_retweet.png') box = BoxPlot(1) box.set_data([echo_source_diversity, necho_source_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Source News Diversity') box.save_image('Image/20181002/source_diversity_news.png')
for date in year: sharecount_cdf("between " + date, date[1:5]) category_list = category_list() all_sharecount_list = [] for item in category_list: #sharecount list of one category all_sharecount_list.append(sharecount_by_category(item)) subplot_num = lambda x: int(math.sqrt(x)) if math.sqrt(x).is_integer( ) else int(math.sqrt(x)) + 1 #Display BoxPlot and save at the path BoxPlt = BoxPlot(subplot_num(25)) for i in range(25): BoxPlt.set_data(all_sharecount_list[i], '') BoxPlt.set_title(category_list[i]) BoxPlt.set_ylim(1000) BoxPlt.save_image('./image/sharecount_box_plot.png') all_sharecount_list = [] #category with source presence for item in category_list: all_sharecount_list.append([ sharecount_by_category_with_source(item), sharecount_by_category_without_source(item) ]) #Display BoxPlot and save at the path BoxPlt = BoxPlot(subplot_num(25))
def draw_propagation_time_to_group(): print('echo chamber 2') echo_v2, necho_v2, recho_v2, rnecho_v2 = propagation_time_to_group( 'Data/echo_chamber2.json') box = BoxPlot(1) box.set_data([echo_v2, necho_v2], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group2.png' % folder) box = BoxPlot(1) box.set_data([recho_v2, rnecho_v2], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r2.png' % folder) print('echo chamber 3') echo_v3, necho_v3, recho_v3, rnecho_v3 = propagation_time_to_group( 'Data/echo_chamber3.json') box = BoxPlot(1) box.set_data([echo_v3, necho_v3], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group3.png' % folder) box = BoxPlot(1) box.set_data([recho_v3, rnecho_v3], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r3.png' % folder) print('echo chamber 4') echo_v4, necho_v4, recho_v4, rnecho_v4 = propagation_time_to_group( 'Data/echo_chamber4.json') box = BoxPlot(1) box.set_data([echo_v4, necho_v4], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group4.png' % folder) box = BoxPlot(1) box.set_data([recho_v4, rnecho_v4], '') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r4.png' % folder) _, non_echo, _, rnon_echo = propagation_time_to_group(None) print(len(echo_v2), len(echo_v3), len(echo_v4), len(non_echo)) box = BoxPlot(1) box.set_data([echo_v2, echo_v3, echo_v4, non_echo], '') box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group.png' % folder) box = BoxPlot(1) box.set_data([recho_v2, recho_v3, recho_v4, rnon_echo], '') box.set_xticks(['Echo Chamber2', 'Echo Chamber3', 'Echo Chamber4', 'All']) box.set_label('', 'Propagation Time') box.save_image('Image/%s/propagation_time_to_group_r.png' % folder)
def diversity(filename): index = filename.replace(".json", "").split('echo_chamber') print(index) with open(filename) as f: echo_chambers = json.load(f) print('total ', len(echo_chambers)) friends_cache = {} postid = {} count = 0 echo_diversity = {} for key in echo_chambers: #print(key) users = echo_chambers[key] #print(users) count += 1 if count % 100 == 0: print(count) #break if len(users) < 2: continue postids = key.split('_') for k in postids: postid[k] = 1 #print(len(users)) polars = [] user_count = 0 #polarity scores for userid in users: score = get_polarity(userid) if score != -999: polars.append(score) user_count += 1 postid[postids[0]] = user_count postid[postids[1]] = user_count if 1 in postid.values(): break diversity = util.eta(polars) echo_diversity[key] = diversity random_diversity = {} for key in postid.keys(): #number of users user_num = postid[key] #print(user_num) with open(dir_name + key, 'r') as f: tweets = json.load(f) users = [tweet['user']for tweet in tweets.values()] users = get_random_user(users, user_num) polars = [] #polarity scores for userid in users: score = get_polarity(userid) if score != -999: polars.append(score) diversity = util.eta(polars) random_diversity[key] = diversity #print(users) #print(polars) #print(diversity) with open('Data/echo_chamber_diversity.json', 'w') as f: json.dump({'echo_chamber':echo_diversity, 'random':random_diversity}, f) box = BoxPlot(1) box.set_data([random_diversity.values(), echo_diversity.values()],'') box.set_xticks(['Random', 'Echo chamber']) box.save_image('Image/diversity_box_%s.png'%index[1])
def polarity_diversity(): #check rumor polarity similarity #check cascade polarity similarity files = os.listdir(dir_name) users_polarity = {} users_polarity_cascade = {} retweet_cache = {} for ccc, postid in enumerate(files): users_polarity[postid] = {} with open(dir_name+ '%s'%postid, 'r') as f: tweets = json.load(f) retweet_cache[postid] = tweets for tweet in tweets.values(): p_score = get_polarity(tweet['user']) users_polarity[postid][tweet['user']] = p_score users_polarity_cascade[tweet['origin_tweet']] = users_polarity_cascade.get(tweet['origin_tweet'], {}) users_polarity_cascade[tweet['origin_tweet']][tweet['user']] = p_score #if ccc == 10: # break r_diversity = [] for key in users_polarity.keys(): r_diversity.append(util.eta([score for score in users_polarity[key].values()])) print(pd.Series(r_diversity).describe()) c_diversity = [] for key in users_polarity_cascade.keys(): if len(users_polarity_cascade[key]) < 2: continue c_diversity.append(util.eta([score for score in users_polarity_cascade[key].values()])) print(pd.Series(c_diversity).describe()) box = BoxPlot(1) box.set_data([r_diversity, c_diversity],'') box.set_xticks(['Rumor', 'Cascade']) box.save_image('Image/%s/diversity_box.png'%foldername) #check echo chamber users' poarltiy similarity e_diversity = [] echo_chamber_users = {} with open('Data/echo_chamber2.json') as f: echo_chamber = json.load(f) for key in echo_chamber: users = echo_chamber[key] if len(users) < 2: continue polar = [] e_diversity.append(util.eta([get_polarity(user) for user in users])) #get all echo chamber users for cascade characteristics for postid in key.split('_'): echo_chamber_users[postid] = echo_chamber_users.get(postid, {}) for user in users: echo_chamber_users[postid][user] = 1 print(pd.Series(e_diversity).describe()) #check echo chamber user pariticpate polarity similarity and non-echo chamber user participate polarity similarity echo_cascade = {} cascade_users = {} for postid in files: tweets = retweet_cache[postid] #get echo chamber cascade for tweet in tweets.values(): if tweet['user'] in echo_chamber_users[postid].keys(): echo_cascade[tweet['origin_tweet']] = 1 cascade_users[tweet['origin_tweet']] = cascade_users.get(tweet['origin_tweet'], {}) cascade_users[tweet['origin_tweet']][tweet['user']] = get_polarity(tweet['user']) echo_cascade = echo_cascade.keys() echo_cascade_diversity = [] echo_cascade_size = [] non_echo_cascade_diversity = [] non_echo_cascade_size = [] for cascade in cascade_users.keys(): #echo chamber user participated cascade if cascade in echo_cascade: echo_cascade_diversity.append(utily.eta([score for score in cascade_users[cascade].values()])) echo_cascade_size.append(len(cascade_users[cascade])) #non echo chamber user participated cascade else: non_echo_cascade_diversity.append(util.eta([score for score in cascade_users[cascade].values()])) non_echo_cascade_size.append(len(cascade_users[cascade])) print('echo chamber cascade') print(pd.Series(echo_cascade_diversity).describe()) print(pd.Series(echo_cascade_size).describe()) print('non echo chamber cascade') print(pd.Series(non_echo_cascade_diversity).describe()) print(pd.Series(non_echo_cascade_size).describe()) box = BoxPlot(1) box.set_data([echo_cascade_diversity, non_echo_cascade_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.save_image('Image/20180927/diversity_echo_cascade_box.png') box = BoxPlot(1) box.set_data([echo_cascade_size, non_echo_cascade_size],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber']) box.save_image('Image/20180927/diversity_echo_cascade_size_box.png')