def load_data(city): features = cn.load_matrix(city + '_fv.mat') density = features['v'][:, 4] weights = density + np.abs(density.min()) venues_generator = WeightedRandomGenerator(weights) vids, _, locs = p.load_var(city + '_svenues.my').all() vindex = features['i'] venues = np.zeros((len(vindex), 2)) index = dict(itertools.imap(lambda x: (x[1], x[0]), enumerate(vindex))) for vid, loc in itertools.izip(vids, locs): pos = index.get(vid) if pos is not None: venues[pos, :] = loc kdtree = cKDTree(venues) with open('static/ground_truth.json') as infile: gold_list = json.load(infile) return vindex, venues_generator, venues, kdtree, gold_list
def load_data(city): features = cn.load_matrix(city + '_fv.mat') density = features['v'][:, 4] weights = density + np.abs(density.min()) venues_generator = WeightedRandomGenerator(weights) vids, _, locs = p.load_var(city+'_svenues.my').all() vindex = features['i'] venues = np.zeros((len(vindex), 2)) index = dict(itertools.imap(lambda x: (x[1], x[0]), enumerate(vindex))) for vid, loc in itertools.izip(vids, locs): pos = index.get(vid) if pos is not None: venues[pos, :] = loc kdtree = cKDTree(venues) with open('static/ground_truth.json') as infile: gold_list = json.load(infile) return vindex, venues_generator, venues, kdtree, gold_list
if __name__ == '__main__': # pylint: disable=C0103 import arguments args = arguments.city_parser().parse_args() city = args.city DB, CLIENT = cm.connect_to_db('foursquare', args.host, args.port) clusterer = cl.KMeans(3, n_init=5) clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock') clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean') clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3) hel = cn.load_matrix(city) features = hel['v'] scale = pp.MinMaxScaler(copy=False) scale.fit_transform(features[:, 0:3]) scores = [] for k in range(3, 16): clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500) labels = clusterer.fit_predict(features) scores.append(mt.silhouette_score(features, labels)) print(Counter(labels)) np.argsort(scores)[::-1]+3 ppl.plot(range(3, 16), scores[0:], '+') clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500) visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
import CommonMongo as cm if __name__ == '__main__': # pylint: disable=C0103 import arguments args = arguments.city_parser().parse_args() city = args.city DB, CLIENT = cm.connect_to_db('foursquare', args.host, args.port) clusterer = cl.KMeans(3, n_init=5) clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock') clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean') clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3) hel = cn.load_matrix(city) features = hel['v'] scale = pp.MinMaxScaler(copy=False) scale.fit_transform(features[:, 0:3]) scores = [] for k in range(3, 16): clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500) labels = clusterer.fit_predict(features) scores.append(mt.silhouette_score(features, labels)) print(Counter(labels)) np.argsort(scores)[::-1] + 3 ppl.plot(range(3, 16), scores[0:], '+') clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500) visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
if __name__ == '__main__': # pylint: disable=C0103 client = pymongo.MongoClient() db = client.foursquare with open('static/raw_ground_truth.json') as inf: regions = json.load(inf) cities_venues = {} for district, gold in regions.iteritems(): # a not very elegant indirection gold = gold['gold'] for city, areas in gold.iteritems(): print(city, district) if city not in cities_venues: try: city_venues = list(cn.load_matrix(city)['i']) except IOError: city_venues = None cities_venues[city] = city_venues if cities_venues[city]: ground_truth = merge_regions(city, district, db, cities_venues[city]) msg = '{}, {}: merged into {} areas' print(msg.format(city, district, len(ground_truth))) regions[district]['gold'][city] = ground_truth else: msg = '{}, {}: not merged' print(msg.format(city, district)) with open('static/ground_truth.json', 'w') as out: json.dump(regions, out,
if __name__ == '__main__': # pylint: disable=C0103 client = pymongo.MongoClient() db = client.foursquare with open('static/raw_ground_truth.json') as inf: regions = json.load(inf) cities_venues = {} for district, gold in regions.iteritems(): # a not very elegant indirection gold = gold['gold'] for city, areas in gold.iteritems(): print(city, district) if city not in cities_venues: try: city_venues = list(cn.load_matrix(city)['i']) except IOError: city_venues = None cities_venues[city] = city_venues if cities_venues[city]: ground_truth = merge_regions(city, district, db, cities_venues[city]) msg = '{}, {}: merged into {} areas' print(msg.format(city, district, len(ground_truth))) regions[district]['gold'][city] = ground_truth else: msg = '{}, {}: not merged' print(msg.format(city, district)) with open('static/ground_truth.json', 'w') as out: json.dump(regions, out, sort_keys=True, indent=2, separators=(',', ': '))