def all_gold_dst(): """Compute the distance between all gold regions and the query ones for all metrics.""" assert GROUND_TRUTH, 'load GROUND_TRUTH before calling' districts = GROUND_TRUTH.keys() cities = GROUND_TRUTH.items()[0][1]['gold'].keys() cities.remove('paris') metrics = ['cluster', 'emd', 'emd-lmnn', 'jsd'] results = {} for city, district in i.product(cities, districts): geo = GROUND_TRUTH[district]['gold']['paris'][0]['geometry'] for metric in metrics: name = '_'.join([city, district, metric]) info = interpret_query('paris', city, geo, metric) _, target_city, target_desc, regions_distance, _, threshold = info support = target_desc[1] candidates = get_gold_desc(target_city, district) if not candidates: print(name + ' is empty') continue current_dsts = [] for region in candidates: features, _, weights, _ = region if metric == 'cluster' and weights.size < 3: print("{}: can't make three clusters".format(name)) continue dst = generic_distance(metric, regions_distance, features, weights, support) if metric == 'leftover': dst = emd_leftover.collect_matlab_output(1) clean_tmp_mats() current_dsts.append(dst) results[name] = current_dsts return results
def batch_matching(query_city='paris'): """Match preselected regions of `query_city` into the other target cities""" import ujson global QUERY_NAME global OTMPDIR with open('static/ground_truth.json') as gt: regions = ujson.load(gt) districts = sorted(regions.keys()) cities = sorted(regions.values()[0]['gold'].keys()) assert query_city in cities cities.remove(query_city) OTMPDIR = os.path.join(OTMPDIR, 'www_comparaison_'+query_city) try: os.mkdir(OTMPDIR) except OSError: pass # cities = ['berlin'] # districts = ['montmartre', 'triangle'] for city in cities: print(city) for neighborhood in districts: # for _ in [1]: # for city, neighborhood in [('washington', 'marais'), ('washington', 'montmartre')]: print(neighborhood) possible_regions = regions[neighborhood]['gold'].get(query_city) rgeo = choose_query_region(possible_regions) if not rgeo: continue for metric in ['emd-itml', 'emd-tsne']: # for metric in ['jsd', 'emd', 'cluster', 'emd-lmnn', 'leftover']: print(metric) for radius in np.linspace(200, 500, 5): print(radius) QUERY_NAME = '{}_{}_{}_{}.my'.format(city, neighborhood, int(radius), metric) logging.info('will write: '+str(os.path.join(OTMPDIR, QUERY_NAME))) if os.path.isfile(os.path.join(OTMPDIR, QUERY_NAME)): continue res, values, _ = best_match(query_city, city, rgeo, radius, metric=metric).next() continue distance, r_vids, center, radius = res print(distance) if center is None: result = {'dst': distance, 'metric': metric, 'nb_venues': 0} else: center = cities.euclidean_to_geo(city, center) result = {'geo': {'type': 'circle', 'center': center, 'radius': radius}, 'dst': distance, 'metric': metric, 'nb_venues': len(r_vids)} regions[neighborhood][city].append(result) # outname = '{}_{}_{}_{}.png'.format(city, neighborhood, # int(radius), metric) # interpolate_distances(values, outname) with open('static/cpresets.js', 'w') as out: out.write('var PRESETS =' + ujson.dumps(regions) + ';')
def batch_matching(query_city='paris'): """Match preselected regions of `query_city` into the other target cities""" import ujson global QUERY_NAME global OTMPDIR with open('static/ground_truth.json') as gt: regions = ujson.load(gt) districts = sorted(regions.keys()) cities = sorted(regions.values()[0]['gold'].keys()) assert query_city in cities cities.remove(query_city) OTMPDIR = os.path.join(OTMPDIR, 'www_comparaison_' + query_city) try: os.mkdir(OTMPDIR) except OSError: pass # cities = ['berlin'] # districts = ['montmartre', 'triangle'] for city in cities: print(city) for neighborhood in districts: # for _ in [1]: # for city, neighborhood in [('washington', 'marais'), ('washington', 'montmartre')]: print(neighborhood) possible_regions = regions[neighborhood]['gold'].get(query_city) rgeo = choose_query_region(possible_regions) if not rgeo: continue for metric in ['emd-itml', 'emd-tsne']: # for metric in ['jsd', 'emd', 'cluster', 'emd-lmnn', 'leftover']: print(metric) for radius in np.linspace(200, 500, 5): print(radius) QUERY_NAME = '{}_{}_{}_{}.my'.format( city, neighborhood, int(radius), metric) logging.info('will write: ' + str(os.path.join(OTMPDIR, QUERY_NAME))) if os.path.isfile(os.path.join(OTMPDIR, QUERY_NAME)): continue res, values, _ = best_match(query_city, city, rgeo, radius, metric=metric).next() continue distance, r_vids, center, radius = res print(distance) if center is None: result = { 'dst': distance, 'metric': metric, 'nb_venues': 0 } else: center = cities.euclidean_to_geo(city, center) result = { 'geo': { 'type': 'circle', 'center': center, 'radius': radius }, 'dst': distance, 'metric': metric, 'nb_venues': len(r_vids) } regions[neighborhood][city].append(result) # outname = '{}_{}_{}_{}.png'.format(city, neighborhood, # int(radius), metric) # interpolate_distances(values, outname) with open('static/cpresets.js', 'w') as out: out.write('var PRESETS =' + ujson.dumps(regions) + ';')