Пример #1
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems() if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = 'times/' + city
    city += '_weekly' if weekly else '_daily'
    sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
Пример #2
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems()
        if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = "times/" + city
    city += "_weekly" if weekly else "_daily"
    sio.savemat(city + "_time", {"t": ak[std_ord, :]}, do_compression=True)
Пример #3
0
import matplotlib.colors as mcolor
import matplotlib as mpl
from scipy.stats import zscore
import random as rd
import persistent as p
import ir_evaluation as ir
from collections import namedtuple
LOOP = namedtuple('Loop', 'path dst size')

FEATURES = ['likes', 'users', 'checkins', 'publicness', 'density',
            'category', 'art', 'education', 'food', 'night', 'recreation',
            'shop', 'professional', 'residence', 'transport', 'focus',
            'photogenicity', 'weekend']
for i in range(6, 15):
    FEATURES[i] += ' surrounding'
FEATURES.extend(['activity at ' + t for t in vf.named_ticks('day', 1, 4)])
FEATURES.append('opening')
FEATURES.extend(['surrounding activity at ' + t
                 for t in vf.named_ticks('day', 1, 4)])
RESTRICTED = np.array(range(len(FEATURES)))  # pylint: disable=E1101
LCATS = {}


def load_matrix(city, hide_category=False):
    """Open `city` matrix or compute it."""
    filename = city
    if not filename.endswith('.mat'):
        filename = city + '_fv.mat'
    mat = vf.sio.loadmat(filename)
    log_nb_users = []
    # pylint: disable=E1101
Пример #4
0
    clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock')
    clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean')
    clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3)

    hel = cn.load_matrix(city)
    features = hel['v']
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1]+3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = vf.estimate_density(city)
    c0, _ = vf.venues_info([_ for _ in hel['i'][labels == 0].tolist()
                            if _ in visits],
                           visits, visitors, density, depth=2, tags_freq=False)
    c5, v = vf.venues_info([v for v in hel['i'][labels == 5].tolist()
                            if v in visits],
                           visits, visitors, density, depth=2, tags_freq=False)
    c0.describe()
    c5.describe()
Пример #5
0
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1] + 3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = vf.estimate_density(city)
    c0, _ = vf.venues_info(
        [_ for _ in hel['i'][labels == 0].tolist() if _ in visits],
        visits,
        visitors,
        density,
        depth=2,
        tags_freq=False)
    c5, v = vf.venues_info(
        [v for v in hel['i'][labels == 5].tolist() if v in visits],
        visits,
        visitors,
        density,
        depth=2,
        tags_freq=False)
    c0.describe()