def __init__(self): self.ego = EgoNetwork()
class MatchUsers: def __init__(self): self.ego = EgoNetwork() # discover top 5 similar users for a specific user def match_user(self, target_ID, target_value): score = [] # iterate users file_en_filter_entity = open('/Users/shengjing/Desktop/Thesis-Project/DATASET/twitter/twitter_en_filter_Alchemy.csv','r') for line in file_en_filter_entity: line = line.strip('\r') line = line.strip('\n') tokens = line.split('\t') #location = tokens[1] source_value = tokens[3] #source_value is a string data type, pay attention source_Id = tokens[0] # test case # if source_Id == '14417192': # score_item= self.similarity_metric(source_value, target_value) # print score_item if source_Id != target_ID: #compare similarity with the rest of users score_item= self.similarity_metric(source_value, target_value) temp = [target_ID, source_Id, score_item] score.append(temp) file_en_filter_entity.close() return score #compare topical category def similarity_metric(self, v1, v2): # in case of topical value v = v1.replace(']', '').strip() v = v.replace('[', '').strip() if v != '': # normalizeValue normalize_V1 = self.normalize_value(v) # jaccardSimilarity score = self.ego.jaccard_similarity(normalize_V1, v2) return score else: return 0.0 def normalize_value(self, v): # topic data type item_set = set() #v = v.encode('ascii','ignore') v = v.replace('\'', '').strip() tokens = v.split(',') for token in tokens: term = token.split('/') for item in term: if item != ' ': item_set.add(item) return item_set #rank the users based on similarity score of topical category in their bio-description def rank(self, data): # return l.sort(key=lambda tup: tup[2]) return sorted(data, key=lambda tup: tup[2]) #get top 5 users def top(self, n, data): print data[-n:]