def tag_count(mongo, city, flickr=True, bbox=None, output='tags_count.dat'): """Return ordered {tags: count} dict from `flickr` photos (or Foursquare venues), in `city` and optionally within `bbox`.""" start = clock() city_field = 'hint' if flickr else 'city' collec = mongo.world.photos if flickr else mongo.foursquare.venue query = [{ "$match": { city_field: city } }, { "$unwind": "$tags" }, { "$group": { "_id": "$tags", "count": { "$sum": 1 } } }, { "$sort": SON([("count", -1), ("_id", -1)]) }] if bbox: query[0]['$match'].update({"loc": inside_bbox(bbox)}) tags = collec.aggregate(query) t = 1000 * (clock() - start) print('aggregate in {:.3f}ms ({})'.format(t, tags['result'][0])) name = map(itemgetter('_id'), tags['result']) count = map(itemgetter('count'), tags['result']) outplot(output, ['tag', 'count'], name, count) return OrderedDict(zip(name, count))
def top_metrics(tags): # pool = Pool(4) res = map(fixed_tag_metrics, tags) # pool.close() outplot('e_grav.dat', ['H', 'tags'], [v[2] for v in res], [v[4] for v in res]) outplot('e_pair.dat', ['H', 'tags'], [v[3] for v in res], [v[4] for v in res]) mus = np.array([v[0] for v in res]) sigmas = np.array([v[1] for v in res]) sio.savemat('mu_sigma', {'A': np.vstack([mus, sigmas]).T}) tag_cloud([v[4] for v in res], zip(mus, sigmas), True)
def tag_count(mongo, city, flickr=True, bbox=None, output='tags_count.dat'): """Return ordered {tags: count} dict from `flickr` photos (or Foursquare venues), in `city` and optionally within `bbox`.""" start = clock() city_field = 'hint' if flickr else 'city' collec = mongo.world.photos if flickr else mongo.foursquare.venue query = [{"$match": {city_field: city}}, {"$unwind": "$tags"}, {"$group": {"_id": "$tags", "count": {"$sum": 1}}}, {"$sort": SON([("count", -1), ("_id", -1)])}] if bbox: query[0]['$match'].update({"loc": inside_bbox(bbox)}) tags = collec.aggregate(query) t = 1000*(clock() - start) print('aggregate in {:.3f}ms ({})'.format(t, tags['result'][0])) name = map(itemgetter('_id'), tags['result']) count = map(itemgetter('count'), tags['result']) outplot(output, ['tag', 'count'], name, count) return OrderedDict(zip(name, count))