def activity_stat(path, usernames, ids, adj):
    
    rt_c = np.zeros((len(ids), len(ids)))
    qt_c = np.zeros((len(ids), len(ids)))
    rep_c = np.zeros((len(ids), len(ids)))
    node_w = np.zeros((len(ids)))
    
    for name in usernames:
        tweets = functions.load_json_list(path+name+'.json')
        
        for item in tweets:
            
            if 'retweeted_status' in item.keys() and item['retweeted_status']['user']['id_str'] in ids:
                rt_c[usernames.index(name)][ids.index(item['retweeted_status']['user']['id_str'])]+=1
            
            #if its a quoted tweet from and account in the friend net then update qt_count value
            if item['is_quote_status']==True and 'quoted_status' in item.keys():
                if item['quoted_status']['user']['id_str'] in ids:
                    qt_c[usernames.index(name)][ids.index(item['quoted_status']['user']['id_str'])]+=1
            
            #if its a reply(mention) tweet to an account from the friend net then update rep_count value
            if item['in_reply_to_user_id'] != None and str(item['in_reply_to_user_id']) in ids:
                rep_c[usernames.index(name)][ids.index(str(item['in_reply_to_user_id']))]+=1
                
        if len(tweets)>0:
            item1=tweets[-1]
            item2=tweets[0]
            idx = usernames.index(name)
            node_w[idx] = nodes_stat(item1, item2, len(tweets))
        else: 
            node_w[idx] = 0
    
    np.fill_diagonal(rt_c, 0)
    np.fill_diagonal(qt_c, 0)
    np.fill_diagonal(rep_c, 0)
    
    return rt_c, qt_c, rep_c, node_w
def read_multiple_net(accounts, adr):
    
    data, ids, usernames, color = [], [], [], []
    counter = 0
    for acc in accounts:
        path = adr+'/'+acc

        #read data for current user
        raw_data = functions.load_json_list(path+'/'+acc+'_complete.txt')
        base_usernames = functions.load(path+'/'+acc+'_usernames.txt')
        base_ids = functions.load(path+'/'+acc+'_ids.txt')
    
        #substitution usernames with user ids for current user
        temp_data, temp_ids, temp_usernames = prepare_data(raw_data, base_ids, base_usernames)
        data, ids, usernames, color = update_data(data, 
                                                  ids, 
                                                  usernames, 
                                                  temp_data, 
                                                  temp_ids, 
                                                  temp_usernames, 
                                                  color)
        counter+=1
        color = update_color(color, len(data), counter)
    return data, ids, usernames, color
Пример #3
0
username = '******'
save = False

input_dir = 'TWEETS DIRECTORY'
output_dir = 'RESULT DIRECTORY'

directory = os.fsencode(input_dir)
num_of_files = len([name for name in os.listdir(directory)])

counter = 0
english_counter = 0
tweet_counter = 0

for file in os.listdir(directory):

    file_name = os.fsdecode(file)
    path = input_dir + file_name
    data = functions.load_json_list(path)
    tweet_counter += len(data)
    counter += 1

    for item in data:
        if item['lang'] == 'en':
            english_counter += 1

    sys.stdout.write('\r%d/%d' % (counter, num_of_files))
print("\n=========", username, "=========")
print("\nnumber of tweets;", tweet_counter)
print("found hashtags: ", english_counter)
print("final value:", round(english_counter / tweet_counter, 5))
Пример #4
0
user = '******'

alfa = 0.4
extracting_weights = False
plot = True
disjoint = False
bc_analysis = False
cc_analysis = False
jsc = False
sdi = False
entropy = False
w = [0.2, 0.5, 0.3]  #ret, qt, rep

path = 'EGOS DIRECTORY' + user
raw_data = functions.load_json_list(path + '/' + user + '_complete.txt')
base_usernames = functions.load(path + '/' + user + '_usernames.txt')
base_ids = functions.load(path + '/' + user + '_ids.txt')

data, ids, usernames = info_diff_func.prepare_data(raw_data, base_ids,
                                                   base_usernames)

print("\n===========", user, "===========\n")

#creating adjacency matrix
adj = info_diff_func.adjacency(ids, data)

#a simple directed graph using networkx
G = info_diff_func.simple_graph(adj)

#extracting links