def testRetrieveTripsToImproveWithClusters(self): sectionList = list(get_section_db().find()) get_routeCluster_db().insert({"user": self.testUUID, "clusters": {"cluster1": [s["_id"] for s in sectionList[0:10]], "cluster2": [s["_id"] for s in sectionList[10:20]]}}) trip_list = list(self.pipeline.get_trips_to_improve(self.testUUID)) self.assertEquals(len(trip_list), 2)
def testRetrieveTripsToImproveWithClusters(self): sectionList = list(get_section_db().find()) get_routeCluster_db().insert({ "user": self.testUUID, "clusters": { "cluster1": [s["_id"] for s in sectionList[0:10]], "cluster2": [s["_id"] for s in sectionList[10:20]] } }) trip_list = self.pipeline.get_trips_to_improve(self.testUUID) self.assertEquals(len(trip_list), 2)
def cluster_route_match_score(segment,step1=100000,step2=100000,method='lcs',radius1=2000,threshold=0.5): userRouteClusters=get_routeCluster_db().find_one({'$and':[{'user':segment['user_id']},{'method':method}]})['clusters'] route_seg = getRoute(segment['_id']) dis=999999 medoid_ids=userRouteClusters.keys() if len(medoid_ids)!=0: choice=medoid_ids[0] for idx in userRouteClusters.keys(): route_idx=getRoute(idx) try: dis_new=fullMatchDistance(route_seg,route_idx,step1,step2,method,radius1) except RuntimeError: dis_new=999999 if dis_new<dis: dis=dis_new choice=idx # print(dis) # print(userRouteClusters[choice]) if dis<=threshold: cluster=userRouteClusters[choice] cluster.append(choice) ModePerc=get_mode_share_by_count(cluster) else: ModePerc=get_mode_share_by_count([]) return ModePerc
def plot_each_route_cluster_for_user(user_id, method='lcs'): i = 0 Sections = get_section_db() user_route_clusters = get_routeCluster_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]}) # plot each cluster as a file. for idx in list(user_route_clusters['clusters'].keys()): print(idx) gmap = pygmaps.maps(37.8717, -122.2728, 14) # gmap = pygmaps.maps(getRoute(idx)[0][0], getRoute(idx)[0][1], 14) section = Sections.find_one({'_id': idx}) r = lambda: random.randint(0, 255) color = '#%02X%02X%02X' % (r(), r(), r()) drawSection(section, 'path', gmap, color) print(len(user_route_clusters['clusters'][idx])) first = True for idi in user_route_clusters['clusters'][idx]: # print(Sections.find({'_id': idi}).count()) section = Sections.find_one({'_id': idi}) if first: print(section) first = False color = '#%02X%02X%02X' % (r(), r(), r()) drawSection(section, 'path', gmap, color) gmap.draw(str(user_id) + '_' + method + '_' + str(i) + '.html') i += 1 break
def plot_each_route_cluster_for_user(user_id,method='lcs'): i=0 Sections = get_section_db() user_route_clusters = get_routeCluster_db().find_one({'$and':[{'user':user_id},{'method':method}]}) # plot each cluster as a file. for idx in user_route_clusters['clusters'].keys(): print idx gmap = pygmaps.maps(37.8717, -122.2728, 14) # gmap = pygmaps.maps(getRoute(idx)[0][0], getRoute(idx)[0][1], 14) section=Sections.find_one({'_id': idx}) r = lambda: random.randint(0,255) color = '#%02X%02X%02X' % (r(),r(),r()) drawSection(section, 'path', gmap,color) print len(user_route_clusters['clusters'][idx]) first = True for idi in user_route_clusters['clusters'][idx]: # print(Sections.find({'_id': idi}).count()) section=Sections.find_one({'_id': idi}) if first: print section first = False color = '#%02X%02X%02X' % (r(),r(),r()) drawSection(section, 'path', gmap,color) gmap.draw(str(user_id) + '_'+ method+ '_'+str(i) + '.html') i+=1 break
def get_clusters_info(uid): c_db = get_routeCluster_db() s_db = get_section_db() clusterJson = c_db.find_one({"clusters":{"$exists":True}, "user": uid}) if clusterJson is None: return [] c_info = [] clusterSectionLists= clusterJson["clusters"].values() logging.debug( "Number of section lists for user %s is %s" % (uid, len(clusterSectionLists))) for sectionList in clusterSectionLists: first = True logging.debug( "Number of sections in sectionList for user %s is %s" % (uid, len(sectionList))) if (len(sectionList) == 0): # There's no point in returning this cluster, let's move on continue distributionArrays = [[] for _ in range(5)] for section in sectionList: section_json = s_db.find_one({"_id":section}) if first: representative_trip = section_json first = False appendIfPresent(distributionArrays[0], section_json, "section_start_datetime") appendIfPresent(distributionArrays[1], section_json, "section_end_datetime") appendIfPresent(distributionArrays[2], section_json, "section_start_point") appendIfPresent(distributionArrays[3], section_json, "section_end_point") appendIfPresent(distributionArrays[4], section_json, "confirmed_mode") c_info.append((distributionArrays, representative_trip)) return c_info
def all_user_clusters_to_kml(user, user_id): """ Creates KML files for all of a given user's clusters """ user_clusters = get_routeCluster_db().find_one({'$and':[{'user':user_id},{'method':"dtw"}]}) num_clusters = len(user_clusters['clusters'].items()) print("Writing " + str(num_clusters) + " clusters to disk for " + user + ".") for idc, cluster in user_clusters['clusters'].items(): cluster_to_kml(user, cluster, idc)
def update_user_routeClusters(user_id, clusters, method='lcs'): user_query = get_routeCluster_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]}) if user_query == None: get_routeCluster_db().insert({ 'user': user_id, 'method': method, 'clusters': clusters }) else: get_routeCluster_db().update({ 'user': user_id, 'method': method }, { 'user': user_id, 'method': method, 'clusters': clusters })
def all_user_clusters_to_kml(user, user_id): """ Creates KML files for all of a given user's clusters """ user_clusters = get_routeCluster_db().find_one( {'$and': [{ 'user': user_id }, { 'method': "dtw" }]}) num_clusters = len(user_clusters['clusters'].items()) print("Writing " + str(num_clusters) + " clusters to disk for " + user + ".") for idc, cluster in user_clusters['clusters'].items(): cluster_to_kml(user, cluster, idc)
def get_common_routes_for_user(user_id, method='lcs'): common_idxs = [] Sections = get_section_db() user_route_clusters = get_routeCluster_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]})['clusters'] for idx in user_route_clusters.keys(): # print(idx) if len(user_route_clusters[idx]) >= 3: section = Sections.find_one({'_id': idx}) # print(section) if section['distance'] > 2000 and len( getRoute(idx)) > 10 and section['duration'] > 600: common_idxs.append(idx) return common_idxs
def plot_each_route_cluster_for_user(user_id, method='lcs'): i = 0 Sections = get_section_db() user_route_clusters = get_routeCluster_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]}) # plot each cluster as a file. for idx in user_route_clusters.keys(): gmap = pygmaps.maps(getRoute(idx)[0][0], getRoute(idx)[0][1], 14) section = Sections.find_one({'_id': idx}) drawSection(section, 'path', gmap) for idi in user_route_clusters[idx]: # print(Sections.find({'_id': idi}).count()) section = Sections.find_one({'_id': idi}) drawSection(section, 'path', gmap) gmap.draw(str(user_id) + '_' + method + '_' + str(i) + '.html') i += 1
def cluster_route_match_score(segment, step1=100000, step2=100000, method='lcs', radius1=2000, threshold=0.5): userRouteClusters = get_routeCluster_db().find_one( {'$and': [{ 'user': segment['user_id'] }, { 'method': method }]})['clusters'] route_seg = getRoute(segment['_id']) dis = 999999 medoid_ids = userRouteClusters.keys() if len(medoid_ids) != 0: choice = medoid_ids[0] for idx in userRouteClusters.keys(): route_idx = getRoute(idx) try: dis_new = fullMatchDistance(route_seg, route_idx, step1, step2, method, radius1) except RuntimeError: dis_new = 999999 if dis_new < dis: dis = dis_new choice = idx # print(dis) # print(userRouteClusters[choice]) if dis <= threshold: cluster = userRouteClusters[choice] cluster.append(choice) ModePerc = get_mode_share_by_count(cluster) else: ModePerc = get_mode_share_by_count([]) return ModePerc
def tearDown(self): get_section_db().remove({"user_id": self.testUUID}) self.ModesColl.remove() get_routeCluster_db().remove() self.assertEquals(self.ModesColl.find().count(), 0)