def fill_up_clusters(id_list, clusters): for i, id in enumerate(id_list): print( f'current loop: {i}\tcurrent clusters: {len(clusters)}\tcurrent id: {id}' ) if clusters == []: clusters.append(Cluster(head=id)) else: max_index = -1 max_score = -1e9 for n, c in enumerate(clusters): s = theta.similarity(database_getter.get_location_info(id),\ database_getter.get_location_info(c.head)) #print(f"{n} {l} v.s. {c.get_head()} -> {s}") if s > max_score: max_score = s max_index = n if max_score < ALPHA: print("Add a new cluster for {}".format(\ database_getter.get_location_info(id)[0])) clusters.append(Cluster(head=id)) else: print("{} belongs to the same cluster as {}".format(\ database_getter.get_location_info(id)[0],\ database_getter.get_location_info(clusters[max_index].head)[0])) clusters[max_index].add_member(id) return clusters
def update_ave_dist_to_center(self): total_lat_dist_to_center = total_long_dist_to_center = 0 for i in self.members: i_lat, i_long = database_getter.get_location_info(i)[2],\ database_getter.get_location_info(i)[3] total_lat_dist_to_center += math.fabs(i_lat - self.center[0]) total_long_dist_to_center += math.fabs(i_long - self.center[1]) self.ave_dist_to_center = ((total_lat_dist_to_center/len(self.members)),\ total_long_dist_to_center/len(self.members))
def build_new_cluster(uid, id, clusters) -> bool: print("Building a new cluster...") info = database_getter.get_location_info(uid, id) lat, long = info[2], info[3] types = [t for t in info[4].split(',') \ if not database_getter.is_in_address_type(t)] associate_event = info[7] if associate_event == 'school': new_cluster = Cluster(uid=uid) id_list = database_getter.get_location_with_event_as_list(\ uid,associate_event) new_cluster.init_cluster(set(id_list)) clusters.append(new_cluster) return True for t in types: print(f"type = {t}") id_list = [i[1] for i in FindPlaces.get_nearby_places(location=(\ lat,long), keyword=t, radius=theta.MAX_CLUSTER_RADIUS)] visited_ids = set(database_getter.contains_visited_places(\ uid, id_list)) print("\tvisted places that have the same type:") exclude = set() for v in visited_ids: v_info = database_getter.get_location_info(uid, v) n = is_in_clusters(uid, v, clusters) if n != 0: print("\t\t(already in another cluster)") v_types = [t for t in v_info[4].split(',') \ if not database_getter.is_in_address_type(t)] v_id_overlap = len(set(v_types) & set(types)) v_cluster_overlap = len(set(v_types)&set(\ clusters[n-1].dominant_types)) if v_id_overlap > v_cluster_overlap: print("\t\t(removed from old cluster)") clusters[n - 1].remove_member(v) else: exclude.add(v) print(f"\t\t{v} {v_info[0]}") if len(exclude) >= 3: print("warning: exclude length >= 3") visited_ids = visited_ids - exclude visited_ids.add(id) if len(visited_ids) >= theta.MIN_VISITED_PLACE: print("\t\thas enough to build a cluster!") new_cluster = Cluster(uid=uid) new_cluster.init_cluster(visited_ids) clusters.append(new_cluster) return True return False
def update_center_after_adding(place_id, old_lat, old_long, ): id_info = database_getter.get_location_info(id) lat,long = id_info[2], id_info[3] old_lat,old_long = self.center[0],self.center[1] new_lat = (len(self.members)*old_lat + lat)/(len(self.members)+1) new_long = (len(self.members)*old_long + long)/(len(self.members)+1) return new_lat,new_long
def fits_into_clusters(uid, id, clusters) -> bool: info = database_getter.get_location_info(uid, id) for c in clusters: if c.accept_id(info): c.add_member(id) return True return False
def update_center_after_removing(self, id): id_info = database_getter.get_location_info(self.uid, id) lat, long = id_info[2], id_info[3] old_lat, old_long = self.center[0], self.center[1] new_lat = (len(self.members) * old_lat - lat) / (len(self.members) - 1) new_long = (len(self.members) * old_long - long) / (len(self.members) - 1) self.center = (new_lat, new_long)
def _init_center(self): total_lat = total_long = 0 for id in self.members: id_info = database_getter.get_location_info(id) lat,long = id_info[2], id_info[3] total_lat += lat total_long += long new_lat = total_lat/len(self.members) new_long = total_long/len(self.members) self.center = (new_lat,new_long)
def remove_member(self, id): self.frequency -= database_getter.get_location_info(self.uid, id)[6] self.update_center_after_removing(id) self.update_staying_time_after_removing(id) self.members.remove(id) self.update_ave_dist_to_center() self.remove_types(id) self.update_dominant_types() self.remove_event(id) self.update_event_type()
def add_member(self, id): self.frequency += database_getter.get_location_info(self.uid, id)[6] self.update_center_after_adding(id) self.update_staying_time_after_adding(id) self.members.add(id) self.update_ave_dist_to_center() self.add_types(id) self.update_dominant_types() self.add_event(id) self.update_event_type()
def classifier(uid, id, clusters): print("location name = {}".format( database_getter.get_location_info(uid, id)[0])) if not is_in_clusters(uid, id, clusters): print("Does not exist in clusters") if not fits_into_clusters(uid, id, clusters): print("Does not fit into any cluster") if not build_new_cluster(uid, id, clusters): return False else: print('Fits into one of the clusters') else: print("Exists in clusters") return True
def __str__(self): members_str = "" for id in self.members: id_info = database_getter.get_location_info(id) members_str+=f"{id_info[0]} {id_info[4]} {id_info[2]},{id_info[3]}\n" main_text = '''{}center: ({},{}) average distance to center: ({},{}) dominant types: {} average staying time: {} min cluster frequency: {} '''.format(members_str,self.distance_to_home,\ self.distance_to_school,\ self.center[0],self.center[1],\ self.ave_dist_to_center[0],self.ave_dist_to_center[1],\ self.dominant_types,\ self.ave_staying_time, self.frequency) return main_text
def _init_ave_staying_time(self): for member in self.members: staying_time = database_getter.get_location_info(\ self.uid,member)[5] self.ave_staying_time+=staying_time self.ave_staying_time/=len(self.members)
def _init_frequency(self): for id in self.members: self.frequency += database_getter.get_location_info(self.uid, id)[6]
def update_staying_time_after_adding(self, id): self.ave_staying_time = (self.ave_staying_time*len(self.members)+\ database_getter.get_location_info(self.uid, id)[5])/(\ len(self.members)+1)
def add_types(self,id): id_info = database_getter.get_location_info(id) types = [t for t in id_info[4].split(',') \ if not database_getter.is_in_address_type(t)] for t in types: self._type_count[t]+=1
def remove_event(self, place_id): associate_event = database_getter.get_location_info( self.uid, place_id)[7] self._event_count[associate_event] -= 1
new_cluster = Cluster(uid=uid) new_cluster.init_cluster(visited_ids) clusters.append(new_cluster) return True return False id_list = database_getter.get_location_index_as_list("1") random.shuffle(id_list) clusters = [] alienated_points = [] for n, id in enumerate(id_list): print("=======================================================") name = database_getter.get_location_info("1", id)[0] if classifier(uid="1",id=id,\ clusters=clusters): print(f'RESULT: {name} is in a cluster') else: print(f'RESULT: {name} did not build a cluster') alienated_points.append(id) # print("\n\n++++++++++++++++++++++++++++++++++++++++++++++++++++") # print("REVISIT") # revisit = [] # new_alienated_points = [] # for a in alienated_points: # types = [t for t in database_getter.get_location_info(\ # "1",a)[4].split(",") if not \ # database_getter.is_in_address_type(t)]