def add_attributes(g, att_names, dbname, colname, db_field_names): db = dbt.db_connect_no_auth(dbname) com = db[colname] for att_name in att_names: g.vs[att_name] = 0.0 for x in com.find({}, ['id'] + db_field_names): uid = x['id'] exist = True try: v = g.vs.find(name=str(uid)) except ValueError: exist = False if exist: for db_field_name in db_field_names: if '.' in db_field_name: levels = db_field_name.split('.') t = x.get(levels[0]) for level in levels[1:]: t = t.get(level) if t is None: break v[att_name] = t else: v[att_name] = x.get(db_field_name) return g
def load_network_subset(db_name, collection='None', filter={}): ''' Friendship network: directed network from a user list Edge: user---------> follower ''' if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, set() # filter['user'] = {'$in': uset_list} # filter['follower'] = {'$in': uset_list} for row in cols.find(filter, no_cursor_timeout=True): n1 = str(row['follower']) n2 = str(row['user']) n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id edges.add((n1id, n2id)) g = Graph(len(name_map), directed=True) g.vs["name"] = list(sorted(name_map, key=name_map.get)) g.add_edges(list(edges)) g.es["weight"] = 1 return g
def add_attribute(g, att_name, dbname, colname, db_field_name): db = dbt.db_connect_no_auth(dbname) com = db[colname] defaultV = -1000000000.0 g.vs[att_name] = defaultV for x in com.find({db_field_name: { '$exists': True }}, ['id', db_field_name]): uid = x['id'] exist = True try: v = g.vs.find(name=str(uid)) except ValueError: exist = False if exist: if '.' in db_field_name: levels = db_field_name.split('.') t = x.get(levels[0]) for level in levels[1:]: t = t.get(level) # if t is None: # t = defaultV # break v[att_name] = t else: v[att_name] = x.get(db_field_name) return g
def load_user_hashtag_network(db_name, collection='None'): ''' User-Hashtag network: weighted directed network Edge: user---------> hashtag ''' if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, {} for row in cols.find({'$where': "this.entities.hashtags.length>0"}, no_cursor_timeout=True): n1 = row['user']['id_str'] hashtags = row['entities']['hashtags'] hash_set = set() for hash in hashtags: # need no .encode('utf-8') hash_set.add(hash['text'].encode('utf-8').lower().replace( '_', '').replace('-', '')) for n2 in hash_set: n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 g = Graph(len(name_map), directed=True) #get key list of dict according to value ranking g.vs["name"] = list(sorted(name_map, key=name_map.get)) g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def add_attribute(g, att_name, dbname, colname, db_field_name): db = dbt.db_connect_no_auth(dbname) com = db[colname] defaultV = -1000000000.0 g.vs[att_name] = defaultV for x in com.find({db_field_name: {'$exists': True}}, ['id', db_field_name]): uid = x['id'] exist = True try: v = g.vs.find(name=str(uid)) except ValueError: exist = False if exist: if '.' in db_field_name: levels = db_field_name.split('.') t = x.get(levels[0]) for level in levels[1:]: t = t.get(level) # if t is None: # t = defaultV # break v[att_name] = t else: v[att_name] = x.get(db_field_name) return g
def load_user_hashtag_network(db_name, collection='None'): ''' User-Hashtag network: weighted directed network Edge: user---------> hashtag ''' if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, {} for row in cols.find({'$where': "this.entities.hashtags.length>0"}, no_cursor_timeout=True): n1 = row['user']['id_str'] hashtags = row['entities']['hashtags'] hash_set = set() for hash in hashtags: # need no .encode('utf-8') hash_set.add(hash['text'].encode('utf-8').lower().replace('_', '').replace('-', '')) for n2 in hash_set: n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 g = Graph(len(name_map), directed=True) #get key list of dict according to value ranking g.vs["name"] = list(sorted(name_map, key=name_map.get)) g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def load_beh_network(db_name, collection='None', btype='communication'): ''' All interctions of a user behavior network: directed weighted network Tweet: 0 Retweet: 1; Reply: 2; Direct Mention: 3; undirect mention: 4 Reply and mention Edge: u0 -----------> u1 Retweet Edge: u1 ----------> u0 ''' btype_dic = {'retweet': [1], 'reply': [2], 'mention': [3], 'communication': [2, 3]} if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, {} # for row in cols.find({}): for row in cols.find({'type': {'$in': btype_dic[btype]}}, no_cursor_timeout=True): n1 = str(row['id0']) n2 = str(row['id1']) if n1 != n2: n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 g = Graph(len(name_map), directed=True) g.vs["name"] = list(sorted(name_map, key=name_map.get)) g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def export_poi(dbname, colname, index='All'): db = dbutil.db_connect_no_auth(dbname) poidb = db[colname] data = [] for x in poidb.find({'timeline_count': {'$gt': 0}}): if index != 0: x['time_index'] = index data.append(x) return data
def load_hashtag_coocurrent_network_undir(db_name, collection='None', uids=[]): ''' Hashtag Co-occurrence Network: weighted undirected network Edge: Hashtag --------- Hashtag ''' if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges, node_weight = {}, {}, {} filter = {} tag_user = {} if len(uids) > 0: filter['user.id'] = {'$in': uids} filter['$where'] = 'this.entities.hashtags.length>0' for row in cols.find(filter, no_cursor_timeout=True): # if 'retweeted_status' in row: # continue hashtags = row['entities']['hashtags'] hash_set = set() for hash in hashtags: # need no .encode('utf-8') hash_set.add(hash['text'].encode('utf-8').lower().replace('_', '').replace('-', '')) hash_list = list(hash_set) # print hash_list for i in xrange(len(hash_list)): n1 = hash_list[i] n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id w = node_weight.get(n1id, 0) node_weight[n1id] = w + 1 user_set = tag_user.get(n1id, set()) user_set.add(row['user']['id']) tag_user[n1id] = user_set for j in xrange(i+1, len(hash_list)): n2 = hash_list[j] if n1 != n2: n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id if n1id < n2id: wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 else: wt = edges.get((n2id, n1id), 0) edges[(n2id, n1id)] = wt + 1 g = Graph(len(name_map), directed=False) #get key list of dict according to value ranking name_list = list(sorted(name_map, key=name_map.get)) g.vs["name"] = name_list g.vs["weight"] = [node_weight[name_map[name]] for name in name_list] g.vs['user'] = [len(tag_user[name_map[name]]) for name in name_list] g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def load_beh_network_subset(userlist, db_name, collection='None', btype='communication', tag=None): ''' only interaction among poi behavior network: directed weighted network Tweet: 0 Retweet: 1; Reply: 2; Direct Mention: 3; undirect mention: 4 Reply and mention Edge: u0 -----------> u1 Retweet Edge: u1 ----------> u0 ''' btype_dic = { 'retweet': [1], 'reply': [2], 'mention': [3], 'communication': [2, 3], 'all': [1, 2, 3] } if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, {} filter = {} filter['type'] = {'$in': btype_dic[btype]} filter['id0'] = {'$in': userlist} filter['id1'] = {'$in': userlist} if tag: filter['tags'] = {'$in': tag} # for row in cols.find({}): for row in cols.find(filter, no_cursor_timeout=True): n1 = str(row['id0']) n2 = str(row['id1']) if n1 != n2: n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 g = Graph(len(name_map), directed=True) g.vs["name"] = list(sorted(name_map, key=name_map.get)) # If items(), keys(), values(), iteritems(), iterkeys(), and itervalues() are called with no intervening modifications to the dictionary, the lists will directly correspond. # http://stackoverflow.com/questions/835092/python-dictionary-are-keys-and-values-always-the-same-order g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def load_beh_network(db_name, collection='None', btype='communication'): ''' All interctions of a user behavior network: directed weighted network Tweet: 0 Retweet: 1; Reply: 2; Direct Mention: 3; undirect mention: 4 Reply and mention Edge: u0 -----------> u1 Retweet Edge: u1 ----------> u0 ''' btype_dic = { 'retweet': [1], 'reply': [2], 'mention': [3], 'communication': [2, 3] } if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges = {}, {} # for row in cols.find({}): for row in cols.find({'type': { '$in': btype_dic[btype] }}, no_cursor_timeout=True): n1 = str(row['id0']) n2 = str(row['id1']) if n1 != n2: n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 g = Graph(len(name_map), directed=True) g.vs["name"] = list(sorted(name_map, key=name_map.get)) g.add_edges(edges.keys()) g.es["weight"] = edges.values() return g
def export_net_agg(dbname, colname, file_name): db = dbutil.db_connect_no_auth(dbname) net = db[colname] fields = ['id0', 'id1', 'type', 'count'] ttypes = {1: 'retweet', 2: 'reply', 3: 'mention'} '''Only include poi users''' data = [] tems = {} for re in net.find({"type": {'$in': [1, 2, 3]}}): id0 = re['id0'] id1 = re['id1'] typeid = re['type'] if id0 != id1: count = tems.get((id0, id1, typeid), 0) tems[(id0, id1, typeid)] = count+1 for id0, id1, typeid in tems.keys(): data.append({'id0': id0, 'id1': id1, 'type': ttypes[typeid], 'count': tems[(id0, id1, typeid)]}) csv_output(fields, file_name, data)
def load_hashtag_coocurrent_network_undir(db_name, collection='None', uids=[]): ''' Hashtag Co-occurrence Network: weighted undirected network Edge: Hashtag --------- Hashtag excluding retweets ''' if collection is 'None': cols = db_name else: db = dbt.db_connect_no_auth(db_name) cols = db[collection] name_map, edges, node_weight = {}, {}, {} filter = {} tag_user = {} if len(uids) > 0: filter['user.id'] = {'$in': uids} filter['$where'] = 'this.entities.hashtags.length>0' filter['retweeted_status'] = {'$exists': False} for row in cols.find(filter, no_cursor_timeout=True): # if 'retweeted_status' in row: # continue hashtags = row['entities']['hashtags'] hash_set = set() for hash in hashtags: # need no .encode('utf-8') hash_set.add(hash['text'].encode('utf-8').lower().replace( '_', '').replace('-', '')) hash_list = list(hash_set) # print hash_list for i in xrange(len(hash_list)): n1 = hash_list[i] n1id = name_map.get(n1, len(name_map)) name_map[n1] = n1id w = node_weight.get(n1id, 0) node_weight[n1id] = w + 1 user_set = tag_user.get(n1id, set()) user_set.add(row['user']['id']) ## for norm data # user_set.add(row['from_user_id']) ## for ian data tag_user[n1id] = user_set for j in xrange(i + 1, len(hash_list)): n2 = hash_list[j] if n1 != n2: n2id = name_map.get(n2, len(name_map)) name_map[n2] = n2id if n1id < n2id: wt = edges.get((n1id, n2id), 0) edges[(n1id, n2id)] = wt + 1 else: wt = edges.get((n2id, n1id), 0) edges[(n2id, n1id)] = wt + 1 g = Graph(len(name_map), directed=False) #get key list of dict according to value ranking name_list = list(sorted(name_map, key=name_map.get)) g.vs["name"] = name_list g.vs["weight"] = [node_weight[name_map[name]] for name in name_list] ## numbers of occurrences g.vs['user'] = [len(tag_user[name_map[name]]) for name in name_list] ## numbers of users who use g.add_edges(edges.keys()) g.es["weight"] = edges.values() ## numbers of co-occurrence return g