Exemplo n.º 1
0
def describe_city(city):
    """Compute feature vector for selected venue in `city`."""
    CATS2 = p.load_var('cat_depth_2.my')
    # a few venues don't have level 2 categories (TODO add it manually?)
    CATS2.update({cat: int(idx*1e5) for idx, cat in enumerate(CATS)})
    info = global_info(city)
    lvenues, lcheckins, lphotos = info[:3]
    visits, visitors, density = info[3:6]
    nb_visitors = np.unique(np.array([v for place in visitors.itervalues()
                                      for v in place])).size
    svenues, scheckins, sphotos = info[6:]
    categories = categories_repartition(city, svenues, lvenues, RADIUS)
    venues = DB.venue.find({'city': city, 'closed': {'$ne': True},
                            'cat': {'$ne': None}, 'usersCount': {'$gt': 1}},
                           {'cat': 1})
    chosen = [v['_id'] for v in venues
              if len(visits.get(v['_id'], [])) > 4 and
              len(np.unique(visitors.get(v['_id'], []))) > 1 and
              not is_event(v['cat'])]
    print("Chosen {} venues in {}.".format(len(chosen), city))
    info, _ = venues_info(chosen, visits, visitors, density, depth=2,
                          tags_freq=False)
    print("{} of them will be in the matrix.".format(len(info)))
    numeric = np.zeros((len(info), 31), dtype=np.float32)
    numeric[:, :5] = np.array([info['likes'], info['users'], info['checkins'],
                               info['H'], info['Den']]).T
    print('venues with no level 2 category:')
    print([info.index[i] for i, c in enumerate(info['cat'])
           if CATS2[c] % int(1e5) == 0])
    numeric[:, 5] = [CATS2[c] for c in info['cat']]
    numeric[:, 24] = np.array(info['Ht'])
    for idx, vid in enumerate(info.index):
        surrounding = full_surrounding(vid, lvenues, lphotos, lcheckins,
                                       svenues, scheckins, sphotos, city)
        cat, focus, ratio, around_visits = surrounding
        numeric[idx, 6:15] = cat
        numeric[idx, 15] = focus
        numeric[idx, 16] = ratio
        own_visits = visits[vid]
        numeric[idx, 17] = is_week_end_place(own_visits)
        daily_visits = xp.aggregate_visits(own_visits, 1, 4)[0]
        numeric[idx, 18:24] = xp.to_frequency(daily_visits)
        numeric[idx, 25:31] = xp.to_frequency(around_visits)
    weird = np.argwhere(np.logical_or(np.isnan(numeric), np.isinf(numeric)))
    numeric[weird] = 0.0
    sio.savemat(city+'_fv', {'v': numeric, 'c': categories,
                             'i': np.array(list(info.index)),
                             'stat': [nb_visitors]}, do_compression=True)
Exemplo n.º 2
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems() if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = 'times/' + city
    city += '_weekly' if weekly else '_daily'
    sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
Exemplo n.º 3
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems()
        if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = "times/" + city
    city += "_weekly" if weekly else "_daily"
    sio.savemat(city + "_time", {"t": ak[std_ord, :]}, do_compression=True)
Exemplo n.º 4
0
def photos_around(id_, centroid, offset, daily, radius=200):
    """Gather photos timestamp in a `radius` around `id_` and return its time
    pattern (`daily` or not), and its distance to every `centroid`."""
    center = get_loc(id_)
    photos = xp.get_visits(CLIENT, xp.Entity.photo, ball=(center, radius))
    kind = xp.to_frequency(xp.aggregate_visits(photos.values(), offset)[daily])
    nb_class = centroid.shape[0]
    # pylint: disable=E1101
    classes = np.linalg.norm(np.tile(kind, (nb_class, 1)) - centroid, axis=1)
    return len(photos), kind, classes, np.argmin(classes)