def fill_up_clusters(id_list, clusters):
    for i, id in enumerate(id_list):
        print(
            f'current loop: {i}\tcurrent clusters: {len(clusters)}\tcurrent id: {id}'
        )
        if clusters == []:
            clusters.append(Cluster(head=id))
        else:
            max_index = -1
            max_score = -1e9
            for n, c in enumerate(clusters):
                s = theta.similarity(database_getter.get_location_info(id),\
                    database_getter.get_location_info(c.head))
                #print(f"{n} {l} v.s. {c.get_head()} -> {s}")
                if s > max_score:
                    max_score = s
                    max_index = n
            if max_score < ALPHA:
                print("Add a new cluster for {}".format(\
                    database_getter.get_location_info(id)[0]))
                clusters.append(Cluster(head=id))
            else:
                print("{} belongs to the same cluster as {}".format(\
                    database_getter.get_location_info(id)[0],\
                    database_getter.get_location_info(clusters[max_index].head)[0]))
                clusters[max_index].add_member(id)
    return clusters
 def update_ave_dist_to_center(self):
     total_lat_dist_to_center = total_long_dist_to_center = 0
     for i in self.members:
         i_lat, i_long = database_getter.get_location_info(i)[2],\
             database_getter.get_location_info(i)[3]
         total_lat_dist_to_center += math.fabs(i_lat - self.center[0])
         total_long_dist_to_center += math.fabs(i_long - self.center[1])
     self.ave_dist_to_center = ((total_lat_dist_to_center/len(self.members)),\
         total_long_dist_to_center/len(self.members))
def build_new_cluster(uid, id, clusters) -> bool:
    print("Building a new cluster...")
    info = database_getter.get_location_info(uid, id)
    lat, long = info[2], info[3]
    types = [t for t in info[4].split(',') \
        if not database_getter.is_in_address_type(t)]
    associate_event = info[7]

    if associate_event == 'school':
        new_cluster = Cluster(uid=uid)
        id_list = database_getter.get_location_with_event_as_list(\
            uid,associate_event)
        new_cluster.init_cluster(set(id_list))
        clusters.append(new_cluster)
        return True

    for t in types:
        print(f"type = {t}")
        id_list = [i[1] for i in FindPlaces.get_nearby_places(location=(\
            lat,long), keyword=t, radius=theta.MAX_CLUSTER_RADIUS)]
        visited_ids = set(database_getter.contains_visited_places(\
            uid, id_list))

        print("\tvisted places that have the same type:")
        exclude = set()
        for v in visited_ids:
            v_info = database_getter.get_location_info(uid, v)
            n = is_in_clusters(uid, v, clusters)
            if n != 0:
                print("\t\t(already in another cluster)")
                v_types = [t for t in v_info[4].split(',') \
                    if not database_getter.is_in_address_type(t)]
                v_id_overlap = len(set(v_types) & set(types))
                v_cluster_overlap = len(set(v_types)&set(\
                    clusters[n-1].dominant_types))
                if v_id_overlap > v_cluster_overlap:
                    print("\t\t(removed from old cluster)")
                    clusters[n - 1].remove_member(v)
                else:
                    exclude.add(v)
            print(f"\t\t{v} {v_info[0]}")

        if len(exclude) >= 3:
            print("warning: exclude length >= 3")

        visited_ids = visited_ids - exclude
        visited_ids.add(id)
        if len(visited_ids) >= theta.MIN_VISITED_PLACE:
            print("\t\thas enough to build a cluster!")
            new_cluster = Cluster(uid=uid)
            new_cluster.init_cluster(visited_ids)
            clusters.append(new_cluster)
            return True
    return False
 def update_center_after_adding(place_id, old_lat, old_long, ):
     id_info = database_getter.get_location_info(id)
     lat,long = id_info[2], id_info[3]
     old_lat,old_long = self.center[0],self.center[1]
     new_lat = (len(self.members)*old_lat + lat)/(len(self.members)+1)
     new_long = (len(self.members)*old_long + long)/(len(self.members)+1)
     return new_lat,new_long
def fits_into_clusters(uid, id, clusters) -> bool:
    info = database_getter.get_location_info(uid, id)
    for c in clusters:
        if c.accept_id(info):
            c.add_member(id)
            return True
    return False
示例#6
0
 def update_center_after_removing(self, id):
     id_info = database_getter.get_location_info(self.uid, id)
     lat, long = id_info[2], id_info[3]
     old_lat, old_long = self.center[0], self.center[1]
     new_lat = (len(self.members) * old_lat - lat) / (len(self.members) - 1)
     new_long = (len(self.members) * old_long - long) / (len(self.members) -
                                                         1)
     self.center = (new_lat, new_long)
 def _init_center(self):
     total_lat = total_long = 0
     for id in self.members:
         id_info = database_getter.get_location_info(id)
         lat,long = id_info[2], id_info[3]
         total_lat += lat
         total_long += long
     new_lat = total_lat/len(self.members)
     new_long = total_long/len(self.members)
     self.center = (new_lat,new_long)
示例#8
0
 def remove_member(self, id):
     self.frequency -= database_getter.get_location_info(self.uid, id)[6]
     self.update_center_after_removing(id)
     self.update_staying_time_after_removing(id)
     self.members.remove(id)
     self.update_ave_dist_to_center()
     self.remove_types(id)
     self.update_dominant_types()
     self.remove_event(id)
     self.update_event_type()
示例#9
0
 def add_member(self, id):
     self.frequency += database_getter.get_location_info(self.uid, id)[6]
     self.update_center_after_adding(id)
     self.update_staying_time_after_adding(id)
     self.members.add(id)
     self.update_ave_dist_to_center()
     self.add_types(id)
     self.update_dominant_types()
     self.add_event(id)
     self.update_event_type()
def classifier(uid, id, clusters):
    print("location name = {}".format(
        database_getter.get_location_info(uid, id)[0]))
    if not is_in_clusters(uid, id, clusters):
        print("Does not exist in clusters")
        if not fits_into_clusters(uid, id, clusters):
            print("Does not fit into any cluster")
            if not build_new_cluster(uid, id, clusters):
                return False
        else:
            print('Fits into one of the clusters')
    else:
        print("Exists in clusters")
    return True
    def __str__(self):
        members_str = ""
        for id in self.members:
            id_info = database_getter.get_location_info(id)
            members_str+=f"{id_info[0]} {id_info[4]} {id_info[2]},{id_info[3]}\n"
        main_text = '''{}center: ({},{})
average distance to center: ({},{})
dominant types: {}
average staying time: {} min
cluster frequency: {}
        '''.format(members_str,self.distance_to_home,\
        self.distance_to_school,\
        self.center[0],self.center[1],\
        self.ave_dist_to_center[0],self.ave_dist_to_center[1],\
        self.dominant_types,\
        self.ave_staying_time, self.frequency)
        return main_text
 def _init_ave_staying_time(self):
     for member in self.members:
         staying_time = database_getter.get_location_info(\
             self.uid,member)[5]
         self.ave_staying_time+=staying_time
     self.ave_staying_time/=len(self.members)
示例#13
0
 def _init_frequency(self):
     for id in self.members:
         self.frequency += database_getter.get_location_info(self.uid,
                                                             id)[6]
示例#14
0
 def update_staying_time_after_adding(self, id):
     self.ave_staying_time = (self.ave_staying_time*len(self.members)+\
         database_getter.get_location_info(self.uid, id)[5])/(\
         len(self.members)+1)
 def add_types(self,id):
     id_info = database_getter.get_location_info(id)
     types = [t for t in id_info[4].split(',') \
         if not database_getter.is_in_address_type(t)]
     for t in types:
         self._type_count[t]+=1
示例#16
0
 def remove_event(self, place_id):
     associate_event = database_getter.get_location_info(
         self.uid, place_id)[7]
     self._event_count[associate_event] -= 1
            new_cluster = Cluster(uid=uid)
            new_cluster.init_cluster(visited_ids)
            clusters.append(new_cluster)
            return True
    return False


id_list = database_getter.get_location_index_as_list("1")
random.shuffle(id_list)

clusters = []
alienated_points = []

for n, id in enumerate(id_list):
    print("=======================================================")
    name = database_getter.get_location_info("1", id)[0]
    if classifier(uid="1",id=id,\
        clusters=clusters):
        print(f'RESULT: {name} is in a cluster')
    else:
        print(f'RESULT: {name} did not build a cluster')
        alienated_points.append(id)

# print("\n\n++++++++++++++++++++++++++++++++++++++++++++++++++++")
# print("REVISIT")
# revisit = []
# new_alienated_points = []
# for a in alienated_points:
#     types = [t for t in database_getter.get_location_info(\
#         "1",a)[4].split(",") if not \
#         database_getter.is_in_address_type(t)]