def draw_graph(): #user participation user_part_num = [len(rumor_num) for rumor_num in user_participation.values()] cdf = CDFPlot() cdf.set_label('Number of rumors', 'CDF') cdf.set_data(user_part_num, 'CDF') cdf.save_image('Image/%s/user_participation_cdf.png'%foldername) ccdf = CCDFPlot() ccdf.set_label('Number of rumors', 'CCDF') ccdf.set_data(user_part_num) ccdf.save_image('Image/%s/user_participation_ccdf.png'%foldername) top_participated_users(user_part_num)
def sharecount_cdf(date_condition, file_name): sql = """ SELECT post_id, share_count FROM snopes_set WHERE published_date %s """ % date_condition cursor.execute(sql) rs = cursor.fetchall() sharecount_list = [] for item in rs: post_id, share_count, = item sharecount_list.append(int(share_count)) #Display CDF and save at the path Cdf = CDFPlot() Cdf.set_label('share_count', 'CDF') Cdf.set_log(True) Cdf.set_data(sharecount_list, "") Cdf.save_image('./image/sharecount_%s.png' % file_name) return sharecount_list
def depth_cdf(): depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('True').values()])))) #depth = list(itertools.chain(*depth)) depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('False').values()])))) depth3 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('Mixture,Mostly False,Mostly True').values()])))) cdf = CDFPlot() cdf.set_label('Depth', 'CDF') cdf.set_log(True) cdf.set_ylog() cdf.set_data(depth, 'True') cdf.set_data(depth2, 'False') cdf.set_data(depth3, 'Mixed') cdf.set_legends(['True', 'False', 'Mixed'], '') cdf.save_image('Image/depth_cdf.png')
def max_depth_per_rumor(): depth = []; depth2 = []; depth3 = [] for item in get_depth('True').values(): for t in item.values(): depth.append(max(t)) for item in get_depth('False').values(): for t in item.values(): depth2.append(max(t)) for item in get_depth('Mixture,Mostly False,Mostly True').values(): for t in item.values(): depth3.append(max(t)) #cdf_plot.set_label_num(3) cdf = CDFPlot() cdf.set_label('Cascade Max Depth', 'CDF') cdf.set_log(True) cdf.set_ylog() cdf.set_data(depth, 'True') cdf.set_data(depth2, 'False') cdf.set_data(depth3, 'Mixed') cdf.set_legends(['True', 'False', 'Mixed'], '') cdf.save_image('Image/depth_per_cascade_cdf.png') for i in range(1, 20): print(i, depth.count(i), depth2.count(i), depth3.count(i))
def cascade_num(): cascades = [len(item) for item in get_cascades('True').values()] #print(cascades) cascades2 = [len(item) for item in get_cascades('False').values()] cascades3 = [ len(item) for item in get_cascades('Mixture,Mostly False,Mostly True').values() ] print(len(cascades), len(cascades2), len(cascades3)) cdf_plot.set_label_num(2) cdf = CDFPlot() #cdf.set_label('Number of Cascades', 'CDF') cdf.set_log(True) cdf.set_ylog() cdf.set_data(cascades, 'True') cdf.set_data(cascades2, 'False') cdf.set_data(cascades3, 'Mixed') cdf.set_legends(['True', 'False', 'Mixed'], '') cdf.save_image('Image/cascades_number_cdf.png')
def cascade_cdf(): cascades = get_cascades('True') cascades = list( itertools.chain( *[item.values() for item in get_cascades('True').values()])) #cascades = list(itertools.chain(*cascades)) cascades2 = list( itertools.chain( *[item.values() for item in get_cascades('False').values()])) cascades3 = list( itertools.chain(*[ item.values() for item in get_cascades( 'Mixture,Mostly False,Mostly True').values() ])) cdf = CDFPlot() cdf.set_label('Cascade Size', 'CDF') cdf.set_log(True) cdf.set_ylog() cdf.set_data(cascades, 'True') cdf.set_data(cascades2, 'False') cdf.set_data(cascades3, 'Mixed') cdf.set_legends(['True', 'False', 'Mixed'], '') cdf.save_image('Image/cascades_cdf.png')
def rumor_statistics(): rumor_unique_cascade = {} rumor_cascade_size = {} for ccc, postid in enumerate(rumors): #if postid != '126119': # continue rumor_unique_cascade[postid] = {} with open(dirname + '/' + postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): utid = tweet['origin_tweet'] rumor_unique_cascade[postid][utid] = tweet['cascade'] #if ccc == 10: # break #for item in rumor_unique_cascade.values(): # print(len(item)) rumor_nums = [len(item) for item in rumor_unique_cascade.values()] rumor_size = [np.mean(item.values()) for item in rumor_unique_cascade.values()] rumor_size_all = list(itertools.chain(*[item.values() for item in rumor_unique_cascade.values()])) #depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('True').values()])))) #print(rumor_nums) #print(rumor_size_all) cdf = CDFPlot() cdf.set_label('Number of Cascade', 'CDF') cdf.set_title('Distribution of Mean Number of Cascades per Rumor') cdf.set_log(True) cdf.set_data(rumor_nums, '') cdf.save_image("Image/%s/cascade_num_per_rumor_cdf.png"%(foldername)) cdf = CCDFPlot() cdf.set_log(True) cdf.set_data(rumor_nums) cdf.save_image("Image/%s/cascade_num_per_urmor_ccdf.png"%(foldername)) cdf = CDFPlot() cdf.set_label('Cascades Size', 'CDF') cdf.set_title('Distribution of Mean Size of Cascades of per Rumor') cdf.set_log(True) cdf.set_data(rumor_size, '') cdf.save_image("Image/%s/cascade_size_per_rumor_cdf.png"%(foldername)) cdf = CCDFPlot() cdf.set_log(True) cdf.set_data(rumor_size) cdf.save_image("Image/%s/cascade_size_per_urmor_ccdf.png"%(foldername)) cdf = CDFPlot() cdf.set_label('Cascade Size', 'CDF') cdf.set_title('Distribution of Cascade Size') cdf.set_log(True) cdf.set_data(rumor_size_all, '') cdf.save_image("Image/%s/cascade_size_all_rumor_cdf.png"%(foldername)) cdf = CCDFPlot() cdf.set_label('Size of Cascade', 'CCDF') cdf.set_log(True) cdf.set_data(rumor_size_all) cdf.save_image("Image/%s/cascade_size_all_rurmor_ccdf.png"%(foldername))
def draw_cdf_plot(datas, datatype, legend, legend_type, filename, log_scale=True): cdf = CDFPlot() cdf.set_label(datatype, 'CDF') cdf.set_log(log_scale) for i in range(len(datas)): cdf.set_data(datas[i], legend[i]) cdf.set_xticks([-1, 0, 1],index = [-1, 0, 1]) if len(legend) > 1: cdf.set_legends(legend, legend_type) cdf.save_image('Image/%s/%s.png'%(foldername, filename))
def echo_chamber_diversity(filename): Bot = bot.load_bot() dirname = 'Retweet/' files = os.listdir(dirname) if filename == None: echo_chamber_users = {} for postid in files: echo_chamber_users[postid] = {} else: echo_chamber_users = e_util.get_echo_chamber_users(filename) echo_tweet_diversity = []; echo_source_diversity = []; necho_tweet_diversity = []; necho_source_diversity = []; for postid in files: with open(dirname + postid) as f: tweets = json.load(f) non_echo_users = {} for tweet in tweets.values(): user = tweet['user'] #non echo chamber collect if not user in echo_chamber_users[postid]: non_echo_users[user] = 1 print(len(echo_chamber_users[postid]), len(non_echo_users)) timeline_dir = '../Timeline/' #collect echo chamber users' source diversity err = 0; nerr = 0 for user in echo_chamber_users[postid]: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) err +=1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: echo_tweet_diversity.append(tweet_diversity) if source_diversity != None: echo_source_diversity.append(source_diversity) for user in non_echo_users: try: with open(timeline_dir + user, 'r') as f: user_tweets = json.load(f) except IOError as e: #print(e) nerr += 1 continue tweet_diversity, source_diversity = get_diversity(user_tweets) if tweet_diversity != None: necho_tweet_diversity.append(tweet_diversity) if source_diversity != None: necho_source_diversity.append(source_diversity) #print(err, nerr) #break #CDF cdf = CDFPlot() cdf.set_label('Retweet Origin Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_tweet_diversity, 'Echo Chamber') cdf.set_data(necho_tweet_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_retweet_cdf.png') cdf = CDFPlot() cdf.set_label('Source News Diversity', 'CDF') #cdf.set_log(True) cdf.set_data(echo_source_diversity, 'Echo Chamber') cdf.set_data(necho_source_diversity, 'Non Echo Chamber') cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type') cdf.save_image('Image/20181002/source_diversity_news_cdf.png') #BoxPlot box = BoxPlot(1) box.set_data([echo_tweet_diversity, necho_tweet_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Retweet Origin Diversity') box.save_image('Image/20181002/source_diversity_retweet.png') box = BoxPlot(1) box.set_data([echo_source_diversity, necho_source_diversity],'') box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All']) box.set_label('', 'Source News Diversity') box.save_image('Image/20181002/source_diversity_news.png')
def draw_graph(): depth_time1, depth_user1, unique_user_time1, cascade_depth1 = time_series('True') x_ticks1 = depth_time1.keys() y_ticks1 = [np.mean(depth_time1[depth].values()) for depth in x_ticks1] depth_time2, depth_user2, unique_user_time2, cascade_depth2 = time_series('False') x_ticks2 = depth_time2.keys() y_ticks2 = [np.mean(depth_time2[depth].values()) for depth in x_ticks1] #draw mean minutes - depth line plot line = LinePlot() line.set_ylog() line.set_label('Depth', 'Mean Minutes') line.set_plot_data([y_ticks1, y_ticks2], x_ticks1) line.set_legends(['True', 'False']) line.save_image('Image/time_depth_line.png') x_ticks1 = unique_user_time1.keys() x_ticks2 = unique_user_time2.keys() x_ticks1 = sorted(x_ticks1) y_ticks1 = [np.mean(unique_user_time1[num].values()) for num in x_ticks1] y_ticks2 = [np.mean(unique_user_time2[num].values()) for num in x_ticks2] #draw mean minutes - unique users line plot line = LinePlot() line.set_ylog() line.set_label('Unique Users', 'Mean Minutes') line.set_plot_data([y_ticks1, y_ticks2], x_ticks1) line.set_xticks(x_ticks1) line.set_legends(['True', 'False']) line.save_image('Image/time_users_line.png') all_depth_true = [[key] * len(depth_time1[key]) for key in depth_time1.keys()] #True all_depth_false = [[key] * len(depth_time2[key]) for key in depth_time2.keys()] #True all_depth_sum_true = [] all_depth_sum_false = [] for item in all_depth_true: all_depth_sum_true.extend(item) for item in all_depth_false: all_depth_sum_false.extend(item) #Depth CDF, CCDF #cdf = CDFPlot() #cdf.set_data(all_depth_sum_true, 'True') #cdf.set_data(all_depth_sum_false, 'False') #cdf.set_legends(['True', 'False'], '') #cdf.save_image('Image/depth_cdf.png') true_cascade = [] false_cascade = [] for postid in cascade_depth1.keys(): for depth in cascade_depth1[postid].values(): #origin tweet : depth true_cascade.extend(depth) for postid in cascade_depth2.keys(): for depth in cascade_depth2[postid].values(): #origin tweet : depth false_cascade.extend(depth) print('true') for i in range(1, 15): print(i, true_cascade.count(i)) print('false') for i in range(1, 15): print(i, false_cascade.count(i)) cdf = CDFPlot() cdf.set_legends(['True', 'False'], '') cdf.set_xlim(0, 11) #cdf.set_log(True) #cdf.set_ylog() cdf.set_label('Depth', 'CDF') cdf.set_data(true_cascade, 'True') cdf.set_data(false_cascade, 'False') cdf.save_image('Image/depth_cdf.png')
cursor.execute(sql) rs = cursor.fetchall() return [item[0] for item in rs] if __name__ == "__main__": #numberof articles total_count = 0 #sql connect conn, cursor, = sql_connect() total_list = sharecount_cdf("<= date(now())", "total") sub_total_list = sharecount_cdf("< '2018-03-01'", "2018_02") Cdf = CDFPlot() Cdf.set_label('share_count', 'CDF') Cdf.set_log(True) Cdf.set_data(total_list, "total") Cdf.set_data(sub_total_list, "< 2018.02") Cdf.save_image('./image/sharecount_%s.png' % "comparison") sharecount_cdf("between '2016-01-01' and '2018-03-01'", "2016_2018") #share count per year year = [ "'2010-01-01' and '2010-12-31'", "'2011-01-01' and '2011-12-31'", "'2012-01-01' and '2012-12-31'", "'2013-01-01' and '2013-12-31'", "'2014-01-01' and '2014-12-31'", "'2015-01-01' and '2015-12-31'", "'2016-01-01' and '2016-12-31'", "'2017-01-01' and '2017-12-31'" ]
BarPlt = BarPlot(1) BarPlt.set_data(np.arange(max_num), count_list, "") BarPlt.set_width(0.8) BarPlt.set_xticks(np.arange(max_num)) BarPlt.save_image("./image/source_num_bar.png") count_list = [source_count_list_from_2017.count(i) for i in range(max_num)] BarPlt = BarPlot(1) BarPlt.set_data(np.arange(max_num), count_list, "") BarPlt.set_width(0.8) BarPlt.set_xticks(np.arange(max_num)) BarPlt.save_image("./image/source_num_bar_2017.png") #number of source distribution - cdf Cdf = CDFPlot() Cdf.set_label('number of sources', 'CDF') Cdf.set_log(True) Cdf.set_data(source_count_list, "") Cdf.set_data(source_count_list_from_2017, "") Cdf.set_legends(["All", "year >=2017"]) Cdf.save_image('./image/source_num_distribution_cdf.png') #number of articles distirbution by source - cdf veracity_list = veracity_types() sources_json, veracities, postids = sources_count() veracity_dict = {} for v in veracity_list: veracity_dict[v.lower()] = []
def echo_chamber_group_homogeneity_size(): filename = 'Data/echo_chamber2.json' #echo_chamber_users = e_util.get_echo_chamber_users(filename) f = open(filename, 'r') echo_chamber = json.load(f) f.close() d = [] all_similarity = [] for ccc, key in enumerate(echo_chamber): users = echo_chamber[key] user_size = len(users) if ccc % 100 == 0: print(ccc) if user_size < 2: continue similarities = [] for i in range(len(users)): users1 = users[i] p1 = get_polarity(users1) for j in range(i +1, len(users)): users2 = users[j] p2 = get_polarity(users2) if p1 == -999 or p2 == -999: continue similarity = p1 * p2 similarities.append(similarity) all_similarity.append(round(similarity,2)) d.append({'size' : user_size, 'polarity' : round(np.median(similarity),2)}) size_list = [item['size'] for item in d] polarity_list = [item['polarity'] for item in d] #print(size_list) scatter = ScatterPlot() #scatter.set_log(True) scatter.set_xlim(10000) scatter.set_ylim(-1, 1.2) scatter.set_data(size_list, polarity_list) scatter.save_image('Image/%s/echo_chamber_polarity_size.png'%foldername) cdf = CDFPlot() cdf.set_data(all_similarity,'') cdf.set_data(polarity_list,'') cdf.set_label('Polarity', 'CDF') cdf.set_legends(['All', 'Median'], '') cdf.save_image('Image/%s/echo_chamber_all_polarity_similarity_cdf.png'%foldername)
def depth_politics_cdf(): depth, echo_depth, necho_depth = get_depth(politic=True, veracity='False', echo_chamber=True) depth2, echo_depth2, necho_depth2 = get_depth(politic=False, veracity='False', echo_chamber=True) """ depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in depth.values()])))) echo_depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in echo_depth.values()])))) necho_depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in necho_depth.values()])))) depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in depth2.values()])))) echo_depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in echo_depth2.values()])))) necho_depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in necho_depth2.values()])))) """ depth = depth.values() echo_depth = echo_depth.values() necho_depth = necho_depth.values() depth2 = depth2.values() echo_depth2 = echo_depth2.values() necho_depth2 = necho_depth2.values() #print(depth) cdf = CDFPlot() cdf.set_label('Depth', 'CDF') cdf.set_data(depth, 'Politics') cdf.set_data(depth2, 'Other') cdf.set_legends(['Politics', 'Other'], 'Category') cdf.save_image('Image/20181002/depth_cdf.png') cdf = CDFPlot() cdf.set_label('Depth', 'CDF') cdf.set_data(echo_depth, 'Echo Chamber') cdf.set_data(necho_depth, 'Non Echo Chamber') cdf.set_title('Politics') cdf.set_legends(['Echo Chamber', 'Non Echo Chamber'], 'User Type') cdf.save_image('Image/20181002/echo_depth_cdf.png') cdf = CDFPlot() cdf.set_label('Depth', 'CDF') cdf.set_data(echo_depth2, 'Echo Chamber') cdf.set_data(necho_depth2, 'Non Echo Chamber') cdf.set_title('Non Politics') cdf.set_legends(['Echo Chamber', 'Non Echo Chamber'], 'User Type') cdf.save_image('Image/20181002/echo_depth_cdf2.png') cdf = CCDFPlot() cdf.set_label('Depth', 'CCDF') #cdf.set_log(True) #cdf.set_ylog() cdf.set_data(depth) cdf.set_data(depth2) cdf.set_legends(['Politics', 'Other'], 'Category') cdf.save_image('Image/20181002/depth_ccdf.png')
def draw_cdf_plot(datas, datatype, legend, legend_type, filename): cdf = CDFPlot() cdf.set_label(datatype, 'CDF') cdf.set_log(True) for i in range(len(datas)): cdf.set_data(datas[i], legend[i]) cdf.set_legends(legend, legend_type) cdf.save_image('Image/%s/%s.png'%(foldername, filename))
def time_to_depth_echo_chamber(filename): _, _, time_depth, _, user_depth = get_depth_time_series(None) print(len(time_depth)) #with open('Data/time_series_data.json', 'w') as f: # json.dump({'time_depth' : time_depth, 'user_depth' : user_depth}, f) #with open('Data/time_series_data.json', 'r') as f: # data = json.load(f) #time_depth = data['time_depth'] #user_depth = data['user_depth'] print("time series data load done ") echo_chamber_values = {} non_echo_chamber_values = {} for item in ['time_depth', 'user_depth']: echo_chamber_values[item] = {} non_echo_chamber_values[item] = {} for i in range(1,20): echo_chamber_values[item][i] = [] non_echo_chamber_values[item][i] = [] Bot = bot.load_bot() echo_chamber_cascade_root = {} cascade_veracity = {} echo_chamber_users = e_util.get_echo_chamber_users(filename) files = os.listdir('RetweetNew') #collect echo chamber user participate cascade #for postid in echo_chamber_users.keys(): for postid in files: v = veracity_type(postid).title() #get origin tweet of echo chamber user with open('RetweetNew/%s'%postid, 'r') as f: tweets = json.load(f) for tweet in tweets.values(): try: #if tweet['user'] in echo_chamber_users[postid].keys(): origin = tweet['origin'] otid = tweet['origin_tweet'] #if origin in echo_chamber_users[postid].keys(): if tweet['user'] in echo_chamber_users[postid].keys(): echo_chamber_cascade_root[tweet['origin_tweet']] = 1 except KeyError : pass cascade_veracity[tweet['origin_tweet']] = v print("echo chamber cascade extraction done") echo_chamber_cascades = echo_chamber_cascade_root.keys() print('echo chamber cascades') #print(echo_chamber_cascades) e = {}; n = {}; r = {}; #echo, non echo, ranked echo for item in ['True', 'False', 'Mixed']: e[item] = {} n[item] = {} r[item] = {} for d_type in ['user_depth', 'time_depth']: e[item][d_type] = {} n[item][d_type] = {} r[item][d_type] = {} for i in range(1, 20): e[item][d_type][i] = [] n[item][d_type][i] = [] r[item][d_type][i] = [] for key in time_depth.keys(): v = cascade_veracity[key] if v !='True' and v != 'False': v = 'Mixed' if key in echo_chamber_cascades: #for i in range(1, max(time_depth[key].keys())+1): for i in range(1, max(time_depth[key].keys())+1): try: echo_chamber_values['time_depth'][i].append(time_depth[key][i]) echo_chamber_values['user_depth'][i].append(user_depth[key][i]) e[v]['time_depth'][i].append(time_depth[key][i]) e[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass else: for i in range(1, max(time_depth[key].keys())+1): try : non_echo_chamber_values['time_depth'][i].append(time_depth[key][i]) non_echo_chamber_values['user_depth'][i].append(user_depth[key][i]) n[v]['time_depth'][i].append(time_depth[key][i]) n[v]['user_depth'][i].append(user_depth[key][i]) except KeyError: pass box = BoxPlot(1) box.set_multiple_data([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']]) box.set_ylog() box.set_label('Depth', 'Minutes to Depth') box.save_image('%s/time_depth_echo_chamber_box.png'%foldername) print(echo_chamber_values['time_depth']) #draw time to depth, user to depth of cascade for echo chamber users participated or non echo chamer users participated with open('Data/Figure/5_2_1.json', 'w') as f: json.dump([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], f) draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line') draw_time_to_depth_echo_chamber([echo_chamber_values['user_depth'], non_echo_chamber_values['user_depth']], ['echo chamber', 'no echo chamber'], 'median unique users', 'user_depth_echo_chamber_line') with open('Data/Figure/5_2_time.json', 'w') as f: json.dump({'e':echo_chamber_values['time_depth'][1], 'ne':non_echo_chamber_values['time_depth'][1]}, f) #draw cdf with top retweet cdf = CDFPlot() cdf.set_label('Propagation Time', 'CDF') cdf.set_log(True) #cdf.set_ylog() cdf.set_data(echo_chamber_values['time_depth'][1], '') cdf.set_data(non_echo_chamber_values['time_depth'][1], '') cdf.save_image('Image/20181105/depth_propagation_time_cdf.png') """
def draw_cdf_plot(datas, datatype, legend, legend_type, filename, log_scale=True): cdf = CDFPlot() cdf.set_label(datatype, 'CDF') cdf.set_log(log_scale) for i in range(len(datas)): cdf.set_data(datas[i], '') #ticks = np.arange(-1, 1.1, 0.1) #ticks = [round(item,1) for item in ticks] #print(ticks) #cdf.set_xticks(ticks, index=ticks) cdf.set_xticks([-1, 0, 1], index=[-1, 0, 1]) #cdf.set_xticks(['0', '1m', '5m', '1h', '1d', '30d', '6m'], index=[0,1,5,60, 24*60, 24*30*60, 24*30*6*60]) if len(legend) > 1: cdf.set_legends(legend, legend_type) cdf.save_image(filename)