def draw_graph():
    #user participation
    user_part_num = [len(rumor_num) for rumor_num in user_participation.values()]
    cdf = CDFPlot()
    cdf.set_label('Number of rumors', 'CDF')
    cdf.set_data(user_part_num, 'CDF')
    cdf.save_image('Image/%s/user_participation_cdf.png'%foldername)

    ccdf = CCDFPlot()
    ccdf.set_label('Number of rumors', 'CCDF')
    ccdf.set_data(user_part_num)
    ccdf.save_image('Image/%s/user_participation_ccdf.png'%foldername)

    top_participated_users(user_part_num)
Пример #2
0
def sharecount_cdf(date_condition, file_name):
    sql = """
        SELECT post_id, share_count
        FROM snopes_set
        WHERE published_date %s  
        """ % date_condition
    cursor.execute(sql)
    rs = cursor.fetchall()

    sharecount_list = []
    for item in rs:
        post_id, share_count, = item
        sharecount_list.append(int(share_count))

    #Display CDF and save at the path
    Cdf = CDFPlot()
    Cdf.set_label('share_count', 'CDF')
    Cdf.set_log(True)
    Cdf.set_data(sharecount_list, "")
    Cdf.save_image('./image/sharecount_%s.png' % file_name)

    return sharecount_list
Пример #3
0
def depth_cdf():
    depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('True').values()]))))
    #depth = list(itertools.chain(*depth))
    depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('False').values()]))))
    depth3 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('Mixture,Mostly False,Mostly True').values()]))))
    cdf = CDFPlot()
    cdf.set_label('Depth', 'CDF')
    cdf.set_log(True)
    cdf.set_ylog()
    cdf.set_data(depth, 'True')
    cdf.set_data(depth2, 'False')
    cdf.set_data(depth3, 'Mixed')
    cdf.set_legends(['True', 'False', 'Mixed'], '')
    cdf.save_image('Image/depth_cdf.png')
Пример #4
0
def max_depth_per_rumor():
    
    depth = []; depth2 = []; depth3 = []
    for item in get_depth('True').values():
        for t in item.values():
            depth.append(max(t))
    
    for item in get_depth('False').values():
        for t in item.values():
            depth2.append(max(t))

    for item in get_depth('Mixture,Mostly False,Mostly True').values():
        for t in item.values():
            depth3.append(max(t))

    #cdf_plot.set_label_num(3)
    cdf = CDFPlot()
    cdf.set_label('Cascade Max Depth', 'CDF')
    cdf.set_log(True)
    cdf.set_ylog()
    cdf.set_data(depth, 'True')
    cdf.set_data(depth2, 'False')
    cdf.set_data(depth3, 'Mixed')
    cdf.set_legends(['True', 'False', 'Mixed'], '')
    cdf.save_image('Image/depth_per_cascade_cdf.png')
    for i in range(1, 20):
        print(i, depth.count(i), depth2.count(i), depth3.count(i))
Пример #5
0
def cascade_num():
    cascades = [len(item) for item in get_cascades('True').values()]
    #print(cascades)
    cascades2 = [len(item) for item in get_cascades('False').values()]
    cascades3 = [
        len(item)
        for item in get_cascades('Mixture,Mostly False,Mostly True').values()
    ]
    print(len(cascades), len(cascades2), len(cascades3))
    cdf_plot.set_label_num(2)
    cdf = CDFPlot()
    #cdf.set_label('Number of Cascades', 'CDF')
    cdf.set_log(True)
    cdf.set_ylog()
    cdf.set_data(cascades, 'True')
    cdf.set_data(cascades2, 'False')
    cdf.set_data(cascades3, 'Mixed')
    cdf.set_legends(['True', 'False', 'Mixed'], '')
    cdf.save_image('Image/cascades_number_cdf.png')
Пример #6
0
def cascade_cdf():
    cascades = get_cascades('True')
    cascades = list(
        itertools.chain(
            *[item.values() for item in get_cascades('True').values()]))
    #cascades = list(itertools.chain(*cascades))
    cascades2 = list(
        itertools.chain(
            *[item.values() for item in get_cascades('False').values()]))
    cascades3 = list(
        itertools.chain(*[
            item.values() for item in get_cascades(
                'Mixture,Mostly False,Mostly True').values()
        ]))
    cdf = CDFPlot()
    cdf.set_label('Cascade Size', 'CDF')
    cdf.set_log(True)
    cdf.set_ylog()
    cdf.set_data(cascades, 'True')
    cdf.set_data(cascades2, 'False')
    cdf.set_data(cascades3, 'Mixed')
    cdf.set_legends(['True', 'False', 'Mixed'], '')
    cdf.save_image('Image/cascades_cdf.png')
def rumor_statistics():
    rumor_unique_cascade = {}
    rumor_cascade_size = {}

    for ccc, postid in enumerate(rumors):
        #if postid != '126119':
        #    continue

        rumor_unique_cascade[postid] = {}
        with open(dirname + '/' + postid, 'r') as f:
            tweets = json.load(f)

            for tweet in tweets.values():
                utid = tweet['origin_tweet']
                rumor_unique_cascade[postid][utid] = tweet['cascade']


        #if ccc == 10:
        #    break

    #for item in rumor_unique_cascade.values():
    #    print(len(item))

    rumor_nums = [len(item) for item in rumor_unique_cascade.values()]
    rumor_size = [np.mean(item.values()) for item in rumor_unique_cascade.values()]

    rumor_size_all = list(itertools.chain(*[item.values() for item in rumor_unique_cascade.values()]))
            
    #depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in get_depth('True').values()]))))

    #print(rumor_nums)
    #print(rumor_size_all)
    cdf = CDFPlot()
    cdf.set_label('Number of Cascade', 'CDF')
    cdf.set_title('Distribution of Mean Number of Cascades per Rumor')
    cdf.set_log(True)
    cdf.set_data(rumor_nums, '')
    cdf.save_image("Image/%s/cascade_num_per_rumor_cdf.png"%(foldername))

    cdf = CCDFPlot()
    cdf.set_log(True)
    cdf.set_data(rumor_nums)
    cdf.save_image("Image/%s/cascade_num_per_urmor_ccdf.png"%(foldername))

    cdf = CDFPlot()
    cdf.set_label('Cascades Size', 'CDF')
    cdf.set_title('Distribution of Mean Size of Cascades of per Rumor')
    cdf.set_log(True)
    cdf.set_data(rumor_size, '')
    cdf.save_image("Image/%s/cascade_size_per_rumor_cdf.png"%(foldername))

    cdf = CCDFPlot()
    cdf.set_log(True)
    cdf.set_data(rumor_size)
    cdf.save_image("Image/%s/cascade_size_per_urmor_ccdf.png"%(foldername))
    
    cdf = CDFPlot()
    cdf.set_label('Cascade Size', 'CDF')
    cdf.set_title('Distribution of Cascade Size')
    cdf.set_log(True)
    cdf.set_data(rumor_size_all, '')
    cdf.save_image("Image/%s/cascade_size_all_rumor_cdf.png"%(foldername))

    cdf = CCDFPlot()
    cdf.set_label('Size of Cascade', 'CCDF')
    cdf.set_log(True)
    cdf.set_data(rumor_size_all)
    cdf.save_image("Image/%s/cascade_size_all_rurmor_ccdf.png"%(foldername))
Пример #8
0
def draw_cdf_plot(datas, datatype, legend, legend_type, filename, log_scale=True):
    cdf = CDFPlot()
    cdf.set_label(datatype, 'CDF')
    cdf.set_log(log_scale)
    for i in range(len(datas)):
        cdf.set_data(datas[i], legend[i])
    cdf.set_xticks([-1, 0, 1],index = [-1, 0, 1])
    if len(legend) > 1:
        cdf.set_legends(legend, legend_type)
    cdf.save_image('Image/%s/%s.png'%(foldername, filename))
def echo_chamber_diversity(filename):
    Bot = bot.load_bot()
    dirname = 'Retweet/'
    files = os.listdir(dirname)
    
    if filename == None:
        echo_chamber_users = {}
        for postid in files:
            echo_chamber_users[postid] = {}
    else:
        echo_chamber_users = e_util.get_echo_chamber_users(filename)


    
    echo_tweet_diversity = []; echo_source_diversity = [];
    necho_tweet_diversity = []; necho_source_diversity = [];
    for postid in files:

        with open(dirname + postid) as f:
            tweets = json.load(f)

        non_echo_users = {}
        for tweet in tweets.values():
            user = tweet['user']

            #non echo chamber collect
            if not user in echo_chamber_users[postid]:
                non_echo_users[user] = 1

        print(len(echo_chamber_users[postid]), len(non_echo_users))

        timeline_dir = '../Timeline/'
        #collect echo chamber users' source diversity
        err = 0; nerr = 0
        for user in echo_chamber_users[postid]:
            try:
                with open(timeline_dir + user, 'r') as f:
                    user_tweets = json.load(f)
            except IOError as e:
                #print(e)
                err +=1
                continue

            tweet_diversity, source_diversity = get_diversity(user_tweets)

            if tweet_diversity != None:
                echo_tweet_diversity.append(tweet_diversity)
            if source_diversity != None:
                echo_source_diversity.append(source_diversity)

        for user in non_echo_users:
            try:
                with open(timeline_dir + user, 'r') as f:
                    user_tweets = json.load(f)
            except IOError as e:
                #print(e)
                nerr += 1
                continue

            tweet_diversity, source_diversity = get_diversity(user_tweets)
            if tweet_diversity != None:
                necho_tweet_diversity.append(tweet_diversity)
            if source_diversity != None:
                necho_source_diversity.append(source_diversity)

        #print(err, nerr)
        #break
                
    #CDF
    cdf = CDFPlot()
    cdf.set_label('Retweet Origin Diversity', 'CDF')
    #cdf.set_log(True)
    cdf.set_data(echo_tweet_diversity, 'Echo Chamber')
    cdf.set_data(necho_tweet_diversity, 'Non Echo Chamber')
    cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type')
    cdf.save_image('Image/20181002/source_diversity_retweet_cdf.png')

    cdf = CDFPlot()
    cdf.set_label('Source News Diversity', 'CDF')
    #cdf.set_log(True)
    cdf.set_data(echo_source_diversity, 'Echo Chamber')
    cdf.set_data(necho_source_diversity, 'Non Echo Chamber')
    cdf.set_legends(['Echo CHamber', 'Non Echo CHamber'], 'User Type')
    cdf.save_image('Image/20181002/source_diversity_news_cdf.png')

    #BoxPlot
    box = BoxPlot(1)
    box.set_data([echo_tweet_diversity, necho_tweet_diversity],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All'])
    box.set_label('', 'Retweet Origin Diversity')
    box.save_image('Image/20181002/source_diversity_retweet.png')

    box = BoxPlot(1)
    box.set_data([echo_source_diversity, necho_source_diversity],'')
    box.set_xticks(['Echo Chamber', 'Non Echo Chamber', 'All'])
    box.set_label('', 'Source News Diversity')
    box.save_image('Image/20181002/source_diversity_news.png')
Пример #10
0
def draw_graph():
    depth_time1, depth_user1, unique_user_time1, cascade_depth1 = time_series('True')

    x_ticks1 = depth_time1.keys()
    y_ticks1 = [np.mean(depth_time1[depth].values()) for depth in x_ticks1]

    depth_time2, depth_user2, unique_user_time2, cascade_depth2 = time_series('False')
    
    x_ticks2 = depth_time2.keys()
    y_ticks2 = [np.mean(depth_time2[depth].values()) for depth in x_ticks1]

    #draw mean minutes - depth line plot 
    line = LinePlot()
    line.set_ylog()
    line.set_label('Depth', 'Mean Minutes')
    line.set_plot_data([y_ticks1, y_ticks2], x_ticks1)
    line.set_legends(['True', 'False'])
    line.save_image('Image/time_depth_line.png')

    x_ticks1 = unique_user_time1.keys()
    x_ticks2 = unique_user_time2.keys()
    x_ticks1 = sorted(x_ticks1)
    y_ticks1 = [np.mean(unique_user_time1[num].values()) for num in x_ticks1]
    y_ticks2 = [np.mean(unique_user_time2[num].values()) for num in x_ticks2]
    
    #draw mean minutes - unique users line plot 
    line = LinePlot()
    line.set_ylog()
    line.set_label('Unique Users', 'Mean Minutes')
    line.set_plot_data([y_ticks1, y_ticks2], x_ticks1)
    line.set_xticks(x_ticks1)
    line.set_legends(['True', 'False'])
    line.save_image('Image/time_users_line.png')

    all_depth_true = [[key] * len(depth_time1[key]) for key in depth_time1.keys()] #True
    all_depth_false = [[key] * len(depth_time2[key]) for key in depth_time2.keys()] #True
    all_depth_sum_true = []
    all_depth_sum_false = []

    for item in all_depth_true:
        all_depth_sum_true.extend(item)
    for item in all_depth_false:
        all_depth_sum_false.extend(item)

    #Depth CDF, CCDF
    #cdf = CDFPlot()
    #cdf.set_data(all_depth_sum_true, 'True')
    #cdf.set_data(all_depth_sum_false, 'False')
    #cdf.set_legends(['True', 'False'], '')
    #cdf.save_image('Image/depth_cdf.png')

    true_cascade = []
    false_cascade = []
    for postid in cascade_depth1.keys():
        for depth in cascade_depth1[postid].values(): #origin tweet : depth
            true_cascade.extend(depth)
 
    for postid in cascade_depth2.keys():
        for depth in cascade_depth2[postid].values(): #origin tweet : depth
            false_cascade.extend(depth)
   

    print('true')
    for i in range(1, 15):
        print(i, true_cascade.count(i))
    print('false')
    for i in range(1, 15):
        print(i, false_cascade.count(i))
    
    cdf = CDFPlot()
    cdf.set_legends(['True', 'False'], '')
    cdf.set_xlim(0, 11)
    #cdf.set_log(True)
    #cdf.set_ylog()
    cdf.set_label('Depth', 'CDF')
    cdf.set_data(true_cascade, 'True')
    cdf.set_data(false_cascade, 'False')
    cdf.save_image('Image/depth_cdf.png')
Пример #11
0
    cursor.execute(sql)
    rs = cursor.fetchall()

    return [item[0] for item in rs]


if __name__ == "__main__":

    #numberof articles
    total_count = 0

    #sql connect
    conn, cursor, = sql_connect()
    total_list = sharecount_cdf("<= date(now())", "total")
    sub_total_list = sharecount_cdf("< '2018-03-01'", "2018_02")
    Cdf = CDFPlot()
    Cdf.set_label('share_count', 'CDF')
    Cdf.set_log(True)
    Cdf.set_data(total_list, "total")
    Cdf.set_data(sub_total_list, "< 2018.02")
    Cdf.save_image('./image/sharecount_%s.png' % "comparison")

    sharecount_cdf("between '2016-01-01' and '2018-03-01'", "2016_2018")

    #share count per year
    year = [
        "'2010-01-01' and '2010-12-31'", "'2011-01-01' and '2011-12-31'",
        "'2012-01-01' and '2012-12-31'", "'2013-01-01' and '2013-12-31'",
        "'2014-01-01' and '2014-12-31'", "'2015-01-01' and '2015-12-31'",
        "'2016-01-01' and '2016-12-31'", "'2017-01-01' and '2017-12-31'"
    ]
Пример #12
0
    BarPlt = BarPlot(1)
    BarPlt.set_data(np.arange(max_num), count_list, "")
    BarPlt.set_width(0.8)
    BarPlt.set_xticks(np.arange(max_num))
    BarPlt.save_image("./image/source_num_bar.png")

    count_list = [source_count_list_from_2017.count(i) for i in range(max_num)]

    BarPlt = BarPlot(1)
    BarPlt.set_data(np.arange(max_num), count_list, "")
    BarPlt.set_width(0.8)
    BarPlt.set_xticks(np.arange(max_num))
    BarPlt.save_image("./image/source_num_bar_2017.png")

    #number of source distribution - cdf
    Cdf = CDFPlot()
    Cdf.set_label('number of sources', 'CDF')
    Cdf.set_log(True)
    Cdf.set_data(source_count_list, "")
    Cdf.set_data(source_count_list_from_2017, "")
    Cdf.set_legends(["All", "year >=2017"])
    Cdf.save_image('./image/source_num_distribution_cdf.png')

    #number of articles distirbution by source - cdf

    veracity_list = veracity_types()
    sources_json, veracities, postids = sources_count()

    veracity_dict = {}
    for v in veracity_list:
        veracity_dict[v.lower()] = []
Пример #13
0
def echo_chamber_group_homogeneity_size():
    filename = 'Data/echo_chamber2.json'
    #echo_chamber_users = e_util.get_echo_chamber_users(filename)

    f = open(filename, 'r') 
    echo_chamber = json.load(f)
    f.close()
    d = []
    all_similarity = []
    for ccc, key in enumerate(echo_chamber):
        users = echo_chamber[key]
        user_size = len(users)
        
        if ccc % 100 == 0:
            print(ccc)

        if user_size < 2:
            continue
        similarities = [] 
        for i in range(len(users)):
            users1 = users[i]
            p1 = get_polarity(users1)
            for j in range(i +1, len(users)):
                users2 = users[j]
                p2 = get_polarity(users2)

                if p1 == -999 or p2 == -999:
                    continue
                similarity = p1 * p2
                similarities.append(similarity)
                all_similarity.append(round(similarity,2))
            
            
        d.append({'size' : user_size, 'polarity' : round(np.median(similarity),2)})


    size_list = [item['size'] for item in d]
    polarity_list = [item['polarity'] for item in d]

    #print(size_list)
    scatter = ScatterPlot()
    #scatter.set_log(True)
    scatter.set_xlim(10000)
    scatter.set_ylim(-1, 1.2)
    scatter.set_data(size_list, polarity_list)
    scatter.save_image('Image/%s/echo_chamber_polarity_size.png'%foldername)
    
    cdf = CDFPlot()
    cdf.set_data(all_similarity,'')
    cdf.set_data(polarity_list,'')
    cdf.set_label('Polarity', 'CDF')
    cdf.set_legends(['All', 'Median'], '')
    cdf.save_image('Image/%s/echo_chamber_all_polarity_similarity_cdf.png'%foldername)
Пример #14
0
def depth_politics_cdf():
    depth, echo_depth, necho_depth = get_depth(politic=True, veracity='False', echo_chamber=True)
    depth2, echo_depth2, necho_depth2 = get_depth(politic=False, veracity='False', echo_chamber=True)

    """
    depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in depth.values()]))))
    echo_depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in echo_depth.values()]))))
    necho_depth = list(itertools.chain(*list(itertools.chain(*[item.values() for item in necho_depth.values()]))))

    depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in depth2.values()]))))
    echo_depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in echo_depth2.values()]))))
    necho_depth2 = list(itertools.chain(*list(itertools.chain(*[item.values() for item in necho_depth2.values()]))))
    """
    depth = depth.values()
    echo_depth = echo_depth.values()
    necho_depth = necho_depth.values()

    depth2 = depth2.values()
    echo_depth2 = echo_depth2.values()
    necho_depth2 = necho_depth2.values()

    #print(depth)
    cdf = CDFPlot()
    cdf.set_label('Depth', 'CDF')
    cdf.set_data(depth, 'Politics')
    cdf.set_data(depth2, 'Other')
    cdf.set_legends(['Politics', 'Other'], 'Category')
    cdf.save_image('Image/20181002/depth_cdf.png')

    cdf = CDFPlot()
    cdf.set_label('Depth', 'CDF')
    cdf.set_data(echo_depth, 'Echo Chamber')
    cdf.set_data(necho_depth, 'Non Echo Chamber')
    cdf.set_title('Politics')
    cdf.set_legends(['Echo Chamber', 'Non Echo Chamber'], 'User Type')
    cdf.save_image('Image/20181002/echo_depth_cdf.png')

    cdf = CDFPlot()
    cdf.set_label('Depth', 'CDF')
    cdf.set_data(echo_depth2, 'Echo Chamber')
    cdf.set_data(necho_depth2, 'Non Echo Chamber')
    cdf.set_title('Non Politics')
    cdf.set_legends(['Echo Chamber', 'Non Echo Chamber'], 'User Type')
    cdf.save_image('Image/20181002/echo_depth_cdf2.png')

    cdf = CCDFPlot()
    cdf.set_label('Depth', 'CCDF')
    #cdf.set_log(True)
    #cdf.set_ylog()
    cdf.set_data(depth)
    cdf.set_data(depth2)
    cdf.set_legends(['Politics', 'Other'], 'Category')
    cdf.save_image('Image/20181002/depth_ccdf.png')
def draw_cdf_plot(datas, datatype, legend, legend_type, filename):
    cdf = CDFPlot()
    cdf.set_label(datatype, 'CDF')
    cdf.set_log(True)
    for i in range(len(datas)):
        cdf.set_data(datas[i], legend[i])
    cdf.set_legends(legend, legend_type)
    cdf.save_image('Image/%s/%s.png'%(foldername, filename))
Пример #16
0
def time_to_depth_echo_chamber(filename):
    
    _, _, time_depth, _, user_depth = get_depth_time_series(None)    
    print(len(time_depth))
    #with open('Data/time_series_data.json', 'w') as f:
    #    json.dump({'time_depth' : time_depth, 'user_depth' : user_depth}, f)
    #with open('Data/time_series_data.json', 'r') as f:
    #    data = json.load(f)

    #time_depth = data['time_depth']
    #user_depth = data['user_depth']

    print("time series data load done ")
    echo_chamber_values = {}
    non_echo_chamber_values = {} 
   
    for item in ['time_depth', 'user_depth']:
        echo_chamber_values[item] = {}
        non_echo_chamber_values[item] = {}

        for i in range(1,20):
            echo_chamber_values[item][i] = []
            non_echo_chamber_values[item][i] = []
    Bot = bot.load_bot()
    echo_chamber_cascade_root = {}
    cascade_veracity = {}
    echo_chamber_users = e_util.get_echo_chamber_users(filename)
   
    files = os.listdir('RetweetNew')
    #collect echo chamber user participate cascade 
    #for postid in echo_chamber_users.keys():
    for postid in files:
        v = veracity_type(postid).title()
        
        #get origin tweet of echo chamber user 
        with open('RetweetNew/%s'%postid, 'r') as f:
            tweets = json.load(f)

            for tweet in tweets.values():
                try:
                    #if tweet['user'] in echo_chamber_users[postid].keys():
                    origin = tweet['origin']
                    otid = tweet['origin_tweet']
                    #if origin in echo_chamber_users[postid].keys():
                    if tweet['user'] in echo_chamber_users[postid].keys():
                        echo_chamber_cascade_root[tweet['origin_tweet']] = 1
                except KeyError :
                    pass

                cascade_veracity[tweet['origin_tweet']] = v
    
    print("echo chamber cascade extraction done")

    echo_chamber_cascades = echo_chamber_cascade_root.keys()

    print('echo chamber cascades')
    #print(echo_chamber_cascades)

    e = {};  n = {}; r = {}; #echo, non echo, ranked echo 
    for item in ['True', 'False', 'Mixed']:
        e[item] = {}
        n[item] = {}
        r[item] = {}
        
        for d_type in ['user_depth', 'time_depth']:
            e[item][d_type] = {}
            n[item][d_type] = {}
            r[item][d_type] = {}

            for i in range(1, 20):
                e[item][d_type][i] = []
                n[item][d_type][i] = []
                r[item][d_type][i] = []

    for key in time_depth.keys():
        v = cascade_veracity[key]
        if v !='True' and  v != 'False':
            v = 'Mixed'

        if key in echo_chamber_cascades:
            #for i in range(1, max(time_depth[key].keys())+1):
            for i in range(1, max(time_depth[key].keys())+1):
                try:
                    echo_chamber_values['time_depth'][i].append(time_depth[key][i])
                    echo_chamber_values['user_depth'][i].append(user_depth[key][i])
                    e[v]['time_depth'][i].append(time_depth[key][i])
                    e[v]['user_depth'][i].append(user_depth[key][i])

                except KeyError:
                    pass
        else:
            for i in range(1, max(time_depth[key].keys())+1):
                try :
                    non_echo_chamber_values['time_depth'][i].append(time_depth[key][i])
                    non_echo_chamber_values['user_depth'][i].append(user_depth[key][i])
                    n[v]['time_depth'][i].append(time_depth[key][i])
                    n[v]['user_depth'][i].append(user_depth[key][i])

                except KeyError:
                    pass

    box = BoxPlot(1)
    box.set_multiple_data([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']])
    box.set_ylog()
    box.set_label('Depth', 'Minutes to Depth')
    box.save_image('%s/time_depth_echo_chamber_box.png'%foldername)
    print(echo_chamber_values['time_depth'])    

    #draw time to depth, user to depth of cascade for echo chamber users participated or non echo chamer users participated 
    with open('Data/Figure/5_2_1.json', 'w') as f:
        json.dump([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], f)

    
    draw_time_to_depth_echo_chamber([echo_chamber_values['time_depth'], non_echo_chamber_values['time_depth']], ['echo chamber', 'no echo chamber'], 'median minutes', 'time_depth_echo_chamber_line')
    draw_time_to_depth_echo_chamber([echo_chamber_values['user_depth'], non_echo_chamber_values['user_depth']], ['echo chamber', 'no echo chamber'], 'median unique users', 'user_depth_echo_chamber_line')
    
    with open('Data/Figure/5_2_time.json', 'w') as f:
        json.dump({'e':echo_chamber_values['time_depth'][1], 'ne':non_echo_chamber_values['time_depth'][1]}, f)

    #draw cdf with top retweet 
    cdf = CDFPlot()
    cdf.set_label('Propagation Time', 'CDF')
    cdf.set_log(True)
    #cdf.set_ylog()
    cdf.set_data(echo_chamber_values['time_depth'][1], '')
    cdf.set_data(non_echo_chamber_values['time_depth'][1], '')
    cdf.save_image('Image/20181105/depth_propagation_time_cdf.png')

    """
Пример #17
0
def draw_cdf_plot(datas,
                  datatype,
                  legend,
                  legend_type,
                  filename,
                  log_scale=True):
    cdf = CDFPlot()
    cdf.set_label(datatype, 'CDF')
    cdf.set_log(log_scale)
    for i in range(len(datas)):
        cdf.set_data(datas[i], '')
    #ticks = np.arange(-1, 1.1, 0.1)
    #ticks = [round(item,1) for item in ticks]
    #print(ticks)
    #cdf.set_xticks(ticks, index=ticks)
    cdf.set_xticks([-1, 0, 1], index=[-1, 0, 1])
    #cdf.set_xticks(['0', '1m', '5m', '1h', '1d', '30d', '6m'], index=[0,1,5,60, 24*60, 24*30*60, 24*30*6*60])
    if len(legend) > 1:
        cdf.set_legends(legend, legend_type)
    cdf.save_image(filename)