Пример #1
0
def load_data(city):
    features = cn.load_matrix(city + '_fv.mat')
    density = features['v'][:, 4]
    weights = density + np.abs(density.min())
    venues_generator = WeightedRandomGenerator(weights)

    vids, _, locs = p.load_var(city + '_svenues.my').all()
    vindex = features['i']
    venues = np.zeros((len(vindex), 2))
    index = dict(itertools.imap(lambda x: (x[1], x[0]), enumerate(vindex)))
    for vid, loc in itertools.izip(vids, locs):
        pos = index.get(vid)
        if pos is not None:
            venues[pos, :] = loc
    kdtree = cKDTree(venues)

    with open('static/ground_truth.json') as infile:
        gold_list = json.load(infile)

    return vindex, venues_generator, venues, kdtree, gold_list
Пример #2
0
def load_data(city):
    features = cn.load_matrix(city + '_fv.mat')
    density = features['v'][:, 4]
    weights = density + np.abs(density.min())
    venues_generator = WeightedRandomGenerator(weights)

    vids, _, locs = p.load_var(city+'_svenues.my').all()
    vindex = features['i']
    venues = np.zeros((len(vindex), 2))
    index = dict(itertools.imap(lambda x: (x[1], x[0]),
                                enumerate(vindex)))
    for vid, loc in itertools.izip(vids, locs):
        pos = index.get(vid)
        if pos is not None:
            venues[pos, :] = loc
    kdtree = cKDTree(venues)

    with open('static/ground_truth.json') as infile:
        gold_list = json.load(infile)

    return vindex, venues_generator, venues, kdtree, gold_list
Пример #3
0

if __name__ == '__main__':
    # pylint: disable=C0103
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    DB, CLIENT = cm.connect_to_db('foursquare', args.host, args.port)

    clusterer = cl.KMeans(3, n_init=5)
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock')
    clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean')
    clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3)

    hel = cn.load_matrix(city)
    features = hel['v']
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1]+3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
Пример #4
0
import CommonMongo as cm

if __name__ == '__main__':
    # pylint: disable=C0103
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    DB, CLIENT = cm.connect_to_db('foursquare', args.host, args.port)

    clusterer = cl.KMeans(3, n_init=5)
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock')
    clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean')
    clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3)

    hel = cn.load_matrix(city)
    features = hel['v']
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1] + 3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
Пример #5
0
if __name__ == '__main__':
    # pylint: disable=C0103
    client = pymongo.MongoClient()
    db = client.foursquare
    with open('static/raw_ground_truth.json') as inf:
        regions = json.load(inf)
    cities_venues = {}
    for district, gold in regions.iteritems():
        # a not very elegant indirection
        gold = gold['gold']
        for city, areas in gold.iteritems():
            print(city, district)
            if city not in cities_venues:
                try:
                    city_venues = list(cn.load_matrix(city)['i'])
                except IOError:
                    city_venues = None
                cities_venues[city] = city_venues
            if cities_venues[city]:
                ground_truth = merge_regions(city, district, db,
                                             cities_venues[city])
                msg = '{}, {}: merged into {} areas'
                print(msg.format(city, district, len(ground_truth)))
                regions[district]['gold'][city] = ground_truth
            else:
                msg = '{}, {}: not merged'
                print(msg.format(city, district))
    with open('static/ground_truth.json', 'w') as out:
        json.dump(regions,
                  out,
Пример #6
0
if __name__ == '__main__':
    # pylint: disable=C0103
    client = pymongo.MongoClient()
    db = client.foursquare
    with open('static/raw_ground_truth.json') as inf:
        regions = json.load(inf)
    cities_venues = {}
    for district, gold in regions.iteritems():
        # a not very elegant indirection
        gold = gold['gold']
        for city, areas in gold.iteritems():
            print(city, district)
            if city not in cities_venues:
                try:
                    city_venues = list(cn.load_matrix(city)['i'])
                except IOError:
                    city_venues = None
                cities_venues[city] = city_venues
            if cities_venues[city]:
                ground_truth = merge_regions(city, district, db,
                                             cities_venues[city])
                msg = '{}, {}: merged into {} areas'
                print(msg.format(city, district, len(ground_truth)))
                regions[district]['gold'][city] = ground_truth
            else:
                msg = '{}, {}: not merged'
                print(msg.format(city, district))
    with open('static/ground_truth.json', 'w') as out:
        json.dump(regions, out, sort_keys=True, indent=2,
                  separators=(',', ': '))