def main(): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # getting data #locations = ctag.open_location_tags(USER_TAGS_FILE) #corpus = ctag.get_corpus([x.tags for x in locations.values()]) # getting data - way 2 locations = clocation.open_locations() users = cuser.open_users_posts_afile(USER_POSTS_FILE) print("Sampling users posts...") for key, a_user in users.items(): posts = [x for x in a_user.posts if (x.time > FILTER_TIME_S) and (x.time < FILTER_TIME_E)] users[key].posts = posts locations = clocation.fit_users_to_location(locations, users, "tags") corpus = ctag.get_location_posts_corpus(locations) vector, tag_name = clda.get_tag_vector(corpus) topic_word, doc_topic = clda.fit_lda(vector, tag_name, TOPIC_NUM) ccluster.output_topics(topic_word, doc_topic, tag_name, [x.lid for x in locations.values()], OUTPUT_TAG_TOPIC, OUTPUT_LOCATION_TOPIC) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(): """end for intersection clustering""" print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # getting data locations = clocation.get_locations_list() users = cuser.get_users_posts_afile(USER_POSTS_FILE) locations = clocation.fit_users_to_location(locations, users, "tags", "uid") del users set_location_tags(locations) set_location_user_count(locations) coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()]) intersection = get_tag_intersection(locations.values()) location_frequency = numpy.array([x.usercount for x in locations.values()]) print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency)) print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid) print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6]) print("location_frequency:", location_frequency.shape) # original intersect clustering #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR) # intersect clustering with the kth locations in each cluster #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster") # intersect clustering with the kth locations in each cluster & location frequency as weight cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect( coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency") for i, key in enumerate(locations.keys()): setattr(locations[key], "cluster", membership[i]) cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \ membership, CLUSTER_NUM, OUTPUT_MAP) cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")
def main(*argv): print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # set UNIXTIME global FILTER_TIME_S global FILTER_TIME_E if len(argv) > 0: FILTER_TIME_S = argv[0] FILTER_TIME_E = argv[1] # getting data locations = clocation.open_locations() users = cuser.open_users_posts_afile(USER_POSTS_FILE) # preporcessing. remove unqualified users removes = filter_users_timeperiod(users, FILTER_TIME_S, FILTER_TIME_E) sequences = ctrajectory.split_trajectory_byday([a_user.posts for a_user in users.values() if len(a_user.posts) != 0]) sequences = ctrajectory.remove_adjacent_location(sequences) sequences = ctrajectory.remove_short(sequences) removes = list(set(removes) | (set(users.keys()) - set([x[0].uid for x in sequences]))) for key in removes: del users[key] print(" remain users #:", len(users.keys())) locations = clocation.fit_users_to_location(locations, users, "uid") set_location_user_count(locations) coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()]) location_frequency = numpy.array([x.usercount for x in locations.values()]) # clustering locations cntr, u, u0, d, jm, p, fpc, membership = cfuzzy.cmeans_location(coordinate.T, CLUSTER_NUM, MAX_KTH, location_frequency, Y, e=ERROR, algorithm="kthCluster_LocationFrequency") locations = ccluster.fit_locations_membership(locations, u, locations.keys()) locations = ccluster.fit_locations_cluster(locations, membership, locations.keys()) # output result """ cpygmaps.output_clusters(\ [(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname + "(" + x.lid + ")>>u:" + str(u[x.cluster, i])) \ for i, x in enumerate(locations.values())], membership, CLUSTER_NUM, OUTPUT_MAP) output_representatives(coordinate, u, MAX_KTH) """ #ccluster.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------") return users, locations
def main(): """end for intersection clustering""" print("--------------------------------------") print("STARTTIME:", (datetime.datetime.now())) print("--------------------------------------") # getting data locations = clocation.get_locations_list() users = cuser.get_users_posts_afile(USER_POSTS_FILE) locations = clocation.fit_users_to_location(locations, users, "tags", "uid") del users set_location_tags(locations) set_location_user_count(locations) coordinate = numpy.array([(float(x.lat), float(x.lng)) for x in locations.values()]) intersection = get_tag_intersection(locations.values()) location_frequency = numpy.array([x.usercount for x in locations.values()]) print("avg location_frequency:", sum(location_frequency) / len(location_frequency), " max:", max(location_frequency), " min:", min(location_frequency)) print("location 1:", list(locations.values())[0].lname, list(locations.values())[0].lid) print("intersection.sum:", intersection.sum(axis=0)[0:6], intersection.sum(axis=1)[0:6]) print("location_frequency:", location_frequency.shape) # original intersect clustering #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, w=WEIGHT, e=ERROR) # intersect clustering with the kth locations in each cluster #cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, w=WEIGHT, e=ERROR, algorithm="kthCluster") # intersect clustering with the kth locations in each cluster & location frequency as weight cntr1, u, u0, d1, d2, d, jm, p, fpc, membership = cfuzzy.cmeans_intersect(coordinate.T, intersection, CLUSTER_NUM, MAX_KTH, location_frequency, w=WEIGHT, e=ERROR, algorithm="kthCluster_LocationFrequency") for i, key in enumerate(locations.keys()): setattr(locations[key], "cluster", membership[i]) cpygmaps.output_clusters([(float(x.lat), float(x.lng), str(x.cluster) + " >> " + x.lname) for x in locations.values()], \ membership, CLUSTER_NUM, OUTPUT_MAP) cfuzzy.output_location_cluster(locations.values(), "cluster", OUTPUT_CLUSTER) print("--------------------------------------") print("ENDTIME:", (datetime.datetime.now())) print("--------------------------------------")