def plot_city(city, weekly=False, clusters=5): """Plot the 5 time clusters of `city` and save them on disk.""" shift = 2 # start from 1am instead of midnight chunk = 4 venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city) # Compute aggregated frequency for venues with at least 5 visits enough = { k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)]) for k, v in venue_visits.iteritems() if len(v) > 5 } sval = np.array(enough.values()) num_cluster = clusters min_disto = 1e9 for _ in range(7): tak, tkl = DO_CLUSTER(sval, num_cluster) current_disto = vf.get_distorsion(tak, tkl, sval) if current_disto < min_disto: min_disto, ak, kl = current_disto, tak, tkl std_ord = np.argsort((np.argsort(ak)), 0)[:, -1] # vf.draw_classes(ak[std_ord, :], shift, chunk) # vf.plt.title('{}, {} venues'.format(city, len(enough))) # vf.plt.ylim([0, 0.28 if weekly else 0.9]) city = 'times/' + city city += '_weekly' if weekly else '_daily' sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
def plot_city(city, weekly=False, clusters=5): """Plot the 5 time clusters of `city` and save them on disk.""" shift = 2 # start from 1am instead of midnight chunk = 4 venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city) # Compute aggregated frequency for venues with at least 5 visits enough = { k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)]) for k, v in venue_visits.iteritems() if len(v) > 5 } sval = np.array(enough.values()) num_cluster = clusters min_disto = 1e9 for _ in range(7): tak, tkl = DO_CLUSTER(sval, num_cluster) current_disto = vf.get_distorsion(tak, tkl, sval) if current_disto < min_disto: min_disto, ak, kl = current_disto, tak, tkl std_ord = np.argsort((np.argsort(ak)), 0)[:, -1] # vf.draw_classes(ak[std_ord, :], shift, chunk) # vf.plt.title('{}, {} venues'.format(city, len(enough))) # vf.plt.ylim([0, 0.28 if weekly else 0.9]) city = "times/" + city city += "_weekly" if weekly else "_daily" sio.savemat(city + "_time", {"t": ak[std_ord, :]}, do_compression=True)
import matplotlib.colors as mcolor import matplotlib as mpl from scipy.stats import zscore import random as rd import persistent as p import ir_evaluation as ir from collections import namedtuple LOOP = namedtuple('Loop', 'path dst size') FEATURES = ['likes', 'users', 'checkins', 'publicness', 'density', 'category', 'art', 'education', 'food', 'night', 'recreation', 'shop', 'professional', 'residence', 'transport', 'focus', 'photogenicity', 'weekend'] for i in range(6, 15): FEATURES[i] += ' surrounding' FEATURES.extend(['activity at ' + t for t in vf.named_ticks('day', 1, 4)]) FEATURES.append('opening') FEATURES.extend(['surrounding activity at ' + t for t in vf.named_ticks('day', 1, 4)]) RESTRICTED = np.array(range(len(FEATURES))) # pylint: disable=E1101 LCATS = {} def load_matrix(city, hide_category=False): """Open `city` matrix or compute it.""" filename = city if not filename.endswith('.mat'): filename = city + '_fv.mat' mat = vf.sio.loadmat(filename) log_nb_users = [] # pylint: disable=E1101
clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock') clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean') clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3) hel = cn.load_matrix(city) features = hel['v'] scale = pp.MinMaxScaler(copy=False) scale.fit_transform(features[:, 0:3]) scores = [] for k in range(3, 16): clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500) labels = clusterer.fit_predict(features) scores.append(mt.silhouette_score(features, labels)) print(Counter(labels)) np.argsort(scores)[::-1]+3 ppl.plot(range(3, 16), scores[0:], '+') clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500) visits = xp.get_visits(CLIENT, xp.Entity.venue, city) visitors = xp.get_visitors(CLIENT, city) density = vf.estimate_density(city) c0, _ = vf.venues_info([_ for _ in hel['i'][labels == 0].tolist() if _ in visits], visits, visitors, density, depth=2, tags_freq=False) c5, v = vf.venues_info([v for v in hel['i'][labels == 5].tolist() if v in visits], visits, visitors, density, depth=2, tags_freq=False) c0.describe() c5.describe()
scale = pp.MinMaxScaler(copy=False) scale.fit_transform(features[:, 0:3]) scores = [] for k in range(3, 16): clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500) labels = clusterer.fit_predict(features) scores.append(mt.silhouette_score(features, labels)) print(Counter(labels)) np.argsort(scores)[::-1] + 3 ppl.plot(range(3, 16), scores[0:], '+') clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False) clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500) visits = xp.get_visits(CLIENT, xp.Entity.venue, city) visitors = xp.get_visitors(CLIENT, city) density = vf.estimate_density(city) c0, _ = vf.venues_info( [_ for _ in hel['i'][labels == 0].tolist() if _ in visits], visits, visitors, density, depth=2, tags_freq=False) c5, v = vf.venues_info( [v for v in hel['i'][labels == 5].tolist() if v in visits], visits, visitors, density, depth=2, tags_freq=False) c0.describe()