示例#1
0
def prepare():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area'):
        business = mongo_functions.mongo_get(
            collection='pre_business',
            filter={'tile15': {
                '$in': area['city_tiles15']
            }},
            fields={
                'tile10': 1,
                'tile15': 1,
                'tile18': 1,
                'norm_categories': 1
            })
        business_ids = [doc['_id'] for doc in business]
        reviews = mongo_functions.mongo_get(
            collection='review', filter={'business_id': {
                '$in': business_ids
            }})
        business_dict = {doc['_id']: doc for doc in business}
        for review in reviews:
            review['tile10'] = business_dict[review['business_id']]['tile10']
            review['tile15'] = business_dict[review['business_id']]['tile15']
            review['tile18'] = business_dict[review['business_id']]['tile18']
            review['norm_categories'] = business_dict[
                review['business_id']]['norm_categories']
            review['city_area'] = area['_id']
        mongo_functions.batch_upsert(reviews,
                                     collection='pre_review',
                                     update="{'$set': item}")
def prepare():
    areas = mongo_functions.mongo_get(collection='pre_metropolitan_area')

    for area in areas:
        businesses = mongo_functions.mongo_get(
            collection='pre_business',
            filter={'tile10': {
                '$in': area['tiles']
            }})
        tile15_dict = _group_business_by_tile15(businesses)
        tile15_ordered_list = sorted([(key, value)
                                      for key, value in tile15_dict.items()],
                                     key=lambda x: len(x[1]),
                                     reverse=True)
        city_area = _group(set([tile for tile in tile15_dict.keys()]),
                           tile15_ordered_list[0][0])
        city_business = []
        for tile in city_area[0]:
            city_business += tile15_dict[tile]
        # mongo_functions.batch_upsert(city_business, collection='pre_city_business', update="{'$set': item}")
        area['city_center'] = list(
            tile_functions.tile_center(
                int(tile15_ordered_list[0][0].split('_')[0]),
                int(tile15_ordered_list[0][0].split('_')[1]), 15))
        area['city_tiles15'] = city_area[0]
        area['city_businesses'] = len(city_business)
    mongo_functions.batch_update(areas,
                                 collection='pre_metropolitan_area',
                                 update="{'$set': item}")
示例#3
0
def export_city_business():
    categories = _business_categories_get()
    csv_fields = [
        '_id', 'area_id', 'ratio', 'name', 'review_count', 'stars',
        'categories'
    ] + categories
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area'):
        business_dict = {}
        _business = mongo_functions.mongo_get(collection='pre_business',
                                              filter={
                                                  'tile10': {
                                                      '$in': area['tiles']
                                                  },
                                                  'raw_ratio': {
                                                      '$exists': True
                                                  }
                                              },
                                              fields={
                                                  'categories': 1,
                                                  'name': 1,
                                                  'review_count': 1,
                                                  'stars': 1,
                                                  'norm_categories': 1,
                                                  'raw_ratio': 1
                                              })
        _business_dict_build(_business, business_dict, categories, area,
                             csv_fields)
        lines = [';'.join(line) for line in business_dict.values()]
        content = ';'.join(csv_fields) + '\n' + '\n'.join(lines)
        write_csv_file(
            '{}_business_ratio.csv'.format(area['_id'].replace(' ', '_')),
            content)
def main():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={
                                                'city_area': area['_id'],
                                                'user_from': {
                                                    '$exists': True
                                                }
                                            },
                                            fields={
                                                'tile_id': 1,
                                                'user_id': 1,
                                                'tile15': 1,
                                                'tile18': 1,
                                                'user_from': 1
                                            })
        city_unique_tourist = city_unique_tourist_get(reviews, area)
        city_unique_residents = city_unique_residents_get(reviews, area)
        total_tourist = len(city_unique_tourist)
        total_residents = len(city_unique_residents)
        for zoom in [15, 18]:
            tile_reviews_dict = tile_reviews_dict_group(reviews, zoom)
            pre_tile_list = []
            for tile, tile_reviews in tile_reviews_dict.items():
                tourist_reviews = [
                    review['user_id'] for review in tile_reviews
                    if review['user_from'] != area['_id']
                ]
                resident_reviews = [
                    review['user_id'] for review in tile_reviews
                    if review['user_from'] == area['_id']
                ]
                pre_tile = {
                    '_id': '{}_{}'.format(tile, zoom),
                    'area_id': area['_id'],
                    'tourist_unique': list(set(tourist_reviews)),
                    'resident_unique': list(set(resident_reviews)),
                    'tourist_review_counter': len(tourist_reviews),
                    'resident_review_counter': len(resident_reviews)
                }
                pre_tile['tourist_unique_counter'] = len(
                    pre_tile['tourist_unique'])
                pre_tile['resident_unique_counter'] = len(
                    pre_tile['resident_unique'])
                pre_tile['ratio_signed'] = tile_dissimilarity_ratio(
                    pre_tile['tourist_unique_counter'],
                    pre_tile['resident_unique_counter'], total_tourist,
                    total_residents)
                pre_tile['ratio'] = pre_tile['ratio_signed'] if pre_tile[
                    'ratio_signed'] > 0 else -1 * pre_tile['ratio_signed']
                pre_tile_list.append(pre_tile)

            mongo_functions.batch_upsert(pre_tile_list,
                                         collection='pre_tile',
                                         update='{"$set": item}')
示例#5
0
def prepare():
    areas = mongo_functions.mongo_get(collection='pre_metropolitan_area')
    for area in areas:
        businessess = mongo_functions.mongo_get(
            collection='pre_business',
            filter={'tile10': {
                '$in': area['tiles']
            }})

        for zoom in ZOOMS:
            features = features_prepare(businessess, zoom)
            geojson_file_create(area['_id'], zoom, features)
    areas_json_create(areas)
def prepare():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={'city_area': area['_id']},
                                            fields={'user_id': 1})

        try:
            user_ids = list({review['user_id'] for review in reviews})
        except Exception as e:
            print(e)
            return

        try:
            user_type_dict = {}
            while len(user_ids) > 0:
                current_user_ids = user_ids[:100000]
                user_ids = user_ids[100000:]
                user_type_dict.update({
                    user['_id']: user['local']
                    for user in mongo_functions.mongo_get(
                        collection='pre_user',
                        filter={
                            '_id': {
                                '$in': current_user_ids
                            },
                            'local': {
                                '$exists': True
                            }
                        },
                        fields={'local': 1})
                })
        except Exception as e:
            print(e)
            return

        _reviews = []
        while len(reviews) > 0:
            review = reviews.pop()
            try:
                review['user_from'] = user_type_dict[review['user_id']]
                _reviews.append(review)
            except KeyError:
                pass
            except Exception as e:
                print(e)
                pass
        mongo_functions.batch_update(
            _reviews,
            collection='pre_review',
            update='{"$set": {"user_from": item["user_from"]}}')
示例#7
0
def yearly_dissimilarity_ratio():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        business_dict = {}
        for year, reviews in _reviews_per_year(area).items():

            city_unique_visitors = city_unique_visitors_get(reviews, area)
            city_unique_residents = city_unique_residents_get(reviews, area)
            total_visitors = len(city_unique_visitors)
            total_residents = len(city_unique_residents)
            business_reviews_dict = business_reviews_dict_group(reviews)

            for business_id, business_reviews in business_reviews_dict.items():
                if business_id not in business_dict:
                    business_dict[business_id] = {
                        '_id': business_id,
                        'ratio_yearly': {}
                    }
                business_dict[business_id]['ratio_yearly'][str(year)] = float(
                    '{0:.9f}'.format(
                        business_dissimilarity_ratio(business_reviews,
                                                     total_visitors,
                                                     total_residents, area)))

        business_list = [item for item in business_dict.values()]
        mongo_functions.batch_update(
            business_list,
            collection='pre_business',
            update='{"$set": {"ratio_yearly": item["ratio_yearly"]}}')
def process():
    # as step 1 in `reviewer_classifier` is to discard the users with `review_count` less than 2 -> we already filter the users
    users = mongo_functions.mongo_get(
        collection='pre_user',
        filter={'review_count': {'$gt': 2}},
        fields={'reviews': 1, 'review_count': 1, 'grouped_reviews': 1},
        page_size=2000000
    )
    located_users = []
    index = 0
    users_len = len(users)
    classifier_codes = {}
    print('\n')
    for user in users:
        index += 1
        sys.stdout.write('\rProcessing user {}/{}...'.format(index, users_len))
        sys.stdout.flush()
        user_location, code = reviewer_classifier(user)
        if code is not None:
            if code not in classifier_codes:
                classifier_codes[code] = 0
            classifier_codes[code] += 1
        if user_location is None:
            continue
        user['local'] = user_location
        located_users.append(user)
    ordered_classification_codes = sorted([(key, value) for key, value in classifier_codes.items()], key=lambda x: x[0])
    print('\nSummary:')
    for item in ordered_classification_codes:
        print('{} users classified as: {}'.format(item[1], classifier_code_map[item[0]]))
    mongo_functions.batch_update(located_users, collection='pre_user', update='{"$set": item}')
def prepare():
    businesses = mongo_functions.mongo_get(collection='business')
    pre_businessess = []
    for business in businesses:

        try:
            business['tile10'] = '_'.join([
                str(item) for item in tiles.deg2num(business['latitude'],
                                                    business['longitude'], 10)
            ])
            business['tile15'] = '_'.join([
                str(item) for item in tiles.deg2num(business['latitude'],
                                                    business['longitude'], 15)
            ])
            business['tile18'] = '_'.join([
                str(item) for item in tiles.deg2num(business['latitude'],
                                                    business['longitude'], 18)
            ])
            pre_businessess.append(business)
        except TypeError:
            pass
        except Exception as e:
            raise

    mongo_functions.batch_upsert(pre_businessess,
                                 collection='pre_business',
                                 update='{"$set": item}')
def metropolitan_area_center(metropolitan_area):
    businesses = mongo_functions.mongo_get(
        collection='pre_business',
        filter={'tile10': {'$in': metropolitan_area['tiles']}},
        fields={'latitude': 1, 'longitude': 1}
    )
    center_lat, center_lon = center(businesses)
    metropolitan_area['center'] = [center_lat, center_lon]
def prepare():
    tiles = {doc['tile10'] for doc in mongo_functions.mongo_get(collection='pre_business', fields={'tile10': 1})}
    tiles_sorted = sorted(tiles, key=lambda x: x.split('_')[0])
    tiles_grouped = tiles_group(tiles_sorted)
    metropolitan_areas = groups_filter(tiles_grouped)
    for metropolitan_area in metropolitan_areas:
        metropolitan_area_center(metropolitan_area)
        metropolitan_area_name(metropolitan_area)
    mongo_functions.batch_upsert(metropolitan_areas, collection='pre_metropolitan_area', update="{'$set': item}")
示例#12
0
def prepare():
    areas = mongo_functions.mongo_get(collection='pre_metropolitan_area')
    for area in areas:
        businessess = mongo_functions.mongo_get(
            collection='pre_business',
            filter={'tile10': {
                '$in': area['tiles']
            }})

        for zoom in ZOOMS:
            business_counter_tiled = business_count_per_tile(businessess, zoom)
            polygons = []
            for tile in business_counter_tiled:
                polygons.append(
                    map_functions.geojson_polygon_formatter(
                        {'business': tile['business']}, tile['tile'], zoom))
            geojson_file_create(area['_id'], zoom, polygons)
    areas_json_create(areas)
示例#13
0
def city_unique_users_get():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={
                                                'city_area': area['_id'],
                                                'user_from': {
                                                    '$exists': True
                                                }
                                            },
                                            fields={
                                                'user_from': 1,
                                                'user_id': 1
                                            })
        city_unique_visitors = city_unique_visitors_get(reviews, area)
        city_unique_residents = city_unique_residents_get(reviews, area)
        total_visitors = len(city_unique_visitors)
        total_residents = len(city_unique_residents)
        print('{} unique visitors -> {}'.format(area['_id'], total_visitors))
        print('{} unique residents -> {}'.format(area['_id'], total_residents))
示例#14
0
def user_reviews_dict_create():
    reviews = mongo_functions.mongo_get(
        collection='pre_review', filter={'city_area': {
            '$exists': True
        }})
    user_reviews_dict = {}
    for review in reviews:
        if review['user_id'] not in user_reviews_dict:
            user_reviews_dict[review['user_id']] = []
        user_reviews_dict[review['user_id']].append(review)
    return user_reviews_dict
def _get_all_years():
    global years
    _years = set([])
    business_years_reviews = mongo_functions.mongo_get(
        collection='pre_business',
        filter={'ratio_yearly': {
            '$exists': True
        }},
        fields={'ratio_yearly': 1})
    for business in business_years_reviews:
        [_years.add(year) for year in business['ratio_yearly'].keys()]
    years = list(_years)
示例#16
0
def main():
    metropolitan_area_counter = 0
    city_counter = 0
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={
                                              'businesses': 1,
                                              'city_businesses': 1
                                          }):
        metropolitan_area_counter += area['businesses']
        city_counter += area['city_businesses']
    print('tiles in metropolitan_area {}'.format(metropolitan_area_counter))
    print('tiles in cities {}'.format(city_counter))
示例#17
0
def prepare():
    category_dict = category_dict_prepare()
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={
                                              'tiles': 1,
                                              'name': 1
                                          }):
        print('processing business for ', area['_id'])
        business = mongo_functions.mongo_get(
            collection='pre_business',
            filter={"tile10": {
                '$in': area['tiles']
            }},
            fields={
                'name': 1,
                'categories': 1
            })
        business_prepare(business, category_dict)
        mongo_functions.batch_update(
            business,
            collection='pre_business',
            update='{"$set": {"norm_categories": item["norm_categories"]}}')
def prepare():
    areas = mongo_functions.mongo_get(collection='pre_metropolitan_area')
    for area in areas:
        businesses = mongo_functions.mongo_get(collection='pre_business',
                                               filter={
                                                   'tile10': {
                                                       '$in': area['tiles']
                                                   },
                                                   'ratio_yearly': {
                                                       '$exists': True
                                                   }
                                               },
                                               fields={
                                                   'ratio_yearly': 1,
                                                   'tile15': 1,
                                                   'tile18': 1,
                                                   'norm_categories': 1
                                               })

        for zoom in ZOOMS:
            ratios_dict = ratios_group(businesses, zoom)
            features = features_prepare(ratios_dict, zoom)
            geojson_file_create(area['_id'], zoom, features)
示例#19
0
def main():
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area',
                                          fields={}):
        reviews = mongo_functions.mongo_get(collection='pre_review',
                                            filter={
                                                'city_area': area['_id'],
                                                'user_from': {
                                                    '$exists': True
                                                }
                                            },
                                            fields={
                                                'business_id': 1,
                                                'user_id': 1,
                                                'tile15': 1,
                                                'tile18': 1,
                                                'user_from': 1
                                            })
        city_unique_visitors = city_unique_visitors_get(reviews, area)
        city_unique_residents = city_unique_residents_get(reviews, area)
        total_visitors = len(city_unique_visitors)
        total_residents = len(city_unique_residents)
        business_reviews_dict = business_reviews_dict_group(reviews)
        business_list = []
        for business_id, business_reviews in business_reviews_dict.items():
            business = {
                '_id':
                business_id,
                'raw_ratio':
                business_dissimilarity_ratio(business_reviews, total_visitors,
                                             total_residents, area)
            }
            business_list.append(business)

        mongo_functions.batch_update(
            business_list,
            collection='pre_business',
            update='{"$set": {"raw_ratio": item["raw_ratio"]}}')
def metropolitan_area_name(metropolitan_area):
    city_names = [doc['city'] for doc in mongo_functions.mongo_get(
        collection='pre_business',
        filter={'tile10': {'$in': metropolitan_area['tiles']}},
        fields={'city': 1}
    )]
    names = {}
    for city in city_names:
        if city not in names:
            names[city] = 0
        names[city] += 1
    names_list = [{'city': city, 'count': count} for city, count in names.items()]
    names_list.sort(key=lambda x: x['count'], reverse=True)
    metropolitan_area['name'] = names_list[0]['city']
    metropolitan_area['_id'] = metropolitan_area['name']
示例#21
0
def export():
    categories = _business_categories_get()
    business = {}
    csv_fields = [
        '_id', 'area_id', 'ratio', 'name', 'review_count', 'stars',
        'categories'
    ] + categories
    for area in mongo_functions.mongo_get(collection='pre_metropolitan_area'):
        for category in categories:
            for order in [1, -1]:
                _business = _business_sorted_get(filter={
                    'tile10': {
                        '$in': area['tiles']
                    },
                    'norm_categories': category,
                    'raw_ratio': {
                        '$exists': True
                    }
                },
                                                 sort=('raw_ratio', order),
                                                 fields={
                                                     'categories': 1,
                                                     'name': 1,
                                                     'review_count': 1,
                                                     'stars': 1,
                                                     'norm_categories': 1,
                                                     'raw_ratio': 1
                                                 })
                for venue in _business:
                    if venue['_id'] in business:
                        continue
                    venue['area_id'] = area['_id']
                    venue['categories'] = ','.join(venue['categories'])
                    venue['ratio'] = venue['raw_ratio']
                    for _cat in categories:
                        venue[_cat] = ''
                        if _cat in venue['norm_categories']:
                            venue[_cat] = '1'
                    csv_values = []
                    for field in csv_fields:
                        csv_values.append(str(venue[field]))
                    business.update({venue['_id']: csv_values})
    lines = [';'.join(line) for line in business.values()]
    content = ';'.join(csv_fields) + '\n' + '\n'.join(lines)
    write_csv_file('business_category_ratio.csv', content)
    print('yey')
示例#22
0
def prepare():
    user_reviews_dict = user_reviews_dict_create()
    _users = mongo_functions.mongo_get(collection='user')
    users = []
    index = 0
    users_len = len(_users)
    print('\n')
    while len(_users) > 0:
        index += 1
        sys.stdout.write('\rProcessing user {}/{}...'.format(index, users_len))
        sys.stdout.flush()
        _user = _users.pop()
        if _user['_id'] in user_reviews_dict:
            _user.update({'reviews': user_reviews_dict[_user['_id']]})
            _user_reviews_prepare(_user)
            users.append(_user)
    mongo_functions.batch_upsert(users,
                                 collection='pre_user',
                                 update='{"$set": item}')
示例#23
0
def _reviews_per_year(area):
    reviews = mongo_functions.mongo_get(collection='pre_review',
                                        filter={
                                            'city_area': area['_id'],
                                            'user_from': {
                                                '$exists': True
                                            }
                                        },
                                        fields={
                                            'business_id': 1,
                                            'user_id': 1,
                                            'tile15': 1,
                                            'tile18': 1,
                                            'user_from': 1,
                                            'date': 1
                                        })
    reviews_per_year_dict = {}
    for review in reviews:
        year = datetime.datetime.strptime(review['date'], '%Y-%m-%d').year
        if year not in reviews_per_year_dict:
            reviews_per_year_dict[year] = []
        reviews_per_year_dict[year].append(review)
    return reviews_per_year_dict
def business_count(tiles):
    return len(mongo_functions.mongo_get(collection='pre_business', filter={'tile10': {'$in': tiles}}, fields={}))