def activity_stat(path, usernames, ids, adj): rt_c = np.zeros((len(ids), len(ids))) qt_c = np.zeros((len(ids), len(ids))) rep_c = np.zeros((len(ids), len(ids))) node_w = np.zeros((len(ids))) for name in usernames: tweets = functions.load_json_list(path+name+'.json') for item in tweets: if 'retweeted_status' in item.keys() and item['retweeted_status']['user']['id_str'] in ids: rt_c[usernames.index(name)][ids.index(item['retweeted_status']['user']['id_str'])]+=1 #if its a quoted tweet from and account in the friend net then update qt_count value if item['is_quote_status']==True and 'quoted_status' in item.keys(): if item['quoted_status']['user']['id_str'] in ids: qt_c[usernames.index(name)][ids.index(item['quoted_status']['user']['id_str'])]+=1 #if its a reply(mention) tweet to an account from the friend net then update rep_count value if item['in_reply_to_user_id'] != None and str(item['in_reply_to_user_id']) in ids: rep_c[usernames.index(name)][ids.index(str(item['in_reply_to_user_id']))]+=1 if len(tweets)>0: item1=tweets[-1] item2=tweets[0] idx = usernames.index(name) node_w[idx] = nodes_stat(item1, item2, len(tweets)) else: node_w[idx] = 0 np.fill_diagonal(rt_c, 0) np.fill_diagonal(qt_c, 0) np.fill_diagonal(rep_c, 0) return rt_c, qt_c, rep_c, node_w
def read_multiple_net(accounts, adr): data, ids, usernames, color = [], [], [], [] counter = 0 for acc in accounts: path = adr+'/'+acc #read data for current user raw_data = functions.load_json_list(path+'/'+acc+'_complete.txt') base_usernames = functions.load(path+'/'+acc+'_usernames.txt') base_ids = functions.load(path+'/'+acc+'_ids.txt') #substitution usernames with user ids for current user temp_data, temp_ids, temp_usernames = prepare_data(raw_data, base_ids, base_usernames) data, ids, usernames, color = update_data(data, ids, usernames, temp_data, temp_ids, temp_usernames, color) counter+=1 color = update_color(color, len(data), counter) return data, ids, usernames, color
username = '******' save = False input_dir = 'TWEETS DIRECTORY' output_dir = 'RESULT DIRECTORY' directory = os.fsencode(input_dir) num_of_files = len([name for name in os.listdir(directory)]) counter = 0 english_counter = 0 tweet_counter = 0 for file in os.listdir(directory): file_name = os.fsdecode(file) path = input_dir + file_name data = functions.load_json_list(path) tweet_counter += len(data) counter += 1 for item in data: if item['lang'] == 'en': english_counter += 1 sys.stdout.write('\r%d/%d' % (counter, num_of_files)) print("\n=========", username, "=========") print("\nnumber of tweets;", tweet_counter) print("found hashtags: ", english_counter) print("final value:", round(english_counter / tweet_counter, 5))
user = '******' alfa = 0.4 extracting_weights = False plot = True disjoint = False bc_analysis = False cc_analysis = False jsc = False sdi = False entropy = False w = [0.2, 0.5, 0.3] #ret, qt, rep path = 'EGOS DIRECTORY' + user raw_data = functions.load_json_list(path + '/' + user + '_complete.txt') base_usernames = functions.load(path + '/' + user + '_usernames.txt') base_ids = functions.load(path + '/' + user + '_ids.txt') data, ids, usernames = info_diff_func.prepare_data(raw_data, base_ids, base_usernames) print("\n===========", user, "===========\n") #creating adjacency matrix adj = info_diff_func.adjacency(ids, data) #a simple directed graph using networkx G = info_diff_func.simple_graph(adj) #extracting links