Пример #1
0
def _get_region(french_regions_tsv: str, prefix_tsv: str, region_id: str,
                default: Dict[str, str]) -> Dict[str, str]:
    if not _REGIONS:
        _REGIONS.append(
            cleaned_data.french_regions(
                filename=french_regions_tsv,
                prefix_filename=prefix_tsv).to_dict(orient='index'))
    return _REGIONS[0].get(region_id, default)
Пример #2
0
def make_dicts(french_regions_tsv, prefix_tsv):
    """Import régions info in MongoDB.

    Args:
        french_regions_tsv: path to a TSV file containing the main
            information about régions from INSEE.
        prefix_tsv: path to a TSV file containing the prefix for each
            région.
    Returns:
        A list of dict that maps the JSON representation of Departement protos.
    """

    regions = cleaned_data.french_regions(filename=french_regions_tsv,
                                          prefix_filename=prefix_tsv)
    regions['_id'] = regions.index
    return regions[['_id', 'name', 'prefix']].to_dict('records')
Пример #3
0
def prepare_cities(data_folder='data',
                   stats_filename=None,
                   urban_entities_filename=None,
                   transport_scores_filename=None):
    """Prepare cities for upload to Algolia.

    Args:
        data_folder: the root of the data folder.
        stats_filename: path to a file containing more stats about cities.
        urban_entities_filename: path to an excel file containing the
            description about French urban entities.
        transport_scores_filename: path to an html file containing the scores for public
            transportation in some French cities.

    Returns:
        A list of dict JSON-like objects each containing properties of a French
        city.
    """

    cities = cleaned_data.french_cities(data_folder)

    useful_columns = [
        'objectID', 'cityId', 'name', 'departementId', 'departementName',
        'departementPrefix', 'regionId', 'regionName'
    ]

    # Keep only cities that are still cities on 2016-01-01 and arrondissements.
    cities = cities[cities.current | cities.arrondissement]

    # Set city ID on objectID as this is what Algolia uses.
    cities['objectID'] = cities.index

    # Get département's names.
    cities['departementId'] = cities.departement_id
    departements = cleaned_data.french_departements(data_folder)
    cities['departementName'] = cities.departement_id.map(departements.name)
    cities['departementPrefix'] = cities.departement_id.map(
        departements.prefix)

    # Get région's names.
    cities['regionId'] = cities.region_id
    cities['regionName'] = cities.region_id.map(
        cleaned_data.french_regions(data_folder).name)

    if stats_filename:
        city_stats = pandas.read_csv(
            stats_filename,
            sep=',',
            header=None,
            usecols=[8, 10, 14],
            names=['zipCode', 'city_id', 'population'],
            dtype={
                'zipCode': str,
                'city_id': str,
                'population': int
            })
        city_stats.set_index('city_id', inplace=True)
        cities = cities.join(city_stats)
        cities.zipCode.fillna('', inplace=True)
        cities.population.fillna(0, inplace=True)
        useful_columns.extend(['zipCode', 'population'])

    cities['cityId'] = cities['objectID']
    # Treat arrondissements specifically: remove the full cities and use the
    # full city ID for the arrondissements..
    cities.loc[cities.objectID.str.startswith('132'), 'cityId'] = '13055'
    cities.loc[cities.objectID.str.startswith('751'), 'cityId'] = '75056'
    cities.loc[cities.objectID.str.startswith('6938'), 'cityId'] = '69123'
    cities.drop(['13055', '75056', '69123'], errors='ignore', inplace=True)

    # The urban score is 0 for rural, 1 for cities in urban areas between
    # 2k and 5k inhabitants, 2 for urban areas below 10k, 3 for 20k, 4 for
    # 50k, 5 for 100k, 6 for 200k, 7 for 2M, and 8 for Paris urban area.
    if urban_entities_filename:
        urban = cleaned_data.french_urban_entities(
            filename=urban_entities_filename)
        cities['urban'] = cities.cityId.map(urban.urban)
        cities.urban.fillna(0, inplace=True)
        cities.urban.astype(int, inplace=True)
        useful_columns.append('urban')

    if transport_scores_filename:
        transport = cleaned_data.transport_scores(
            filename=transport_scores_filename)
        cities['transport'] = cities.objectID.map(transport)
        cities.transport.fillna(0, inplace=True)
        useful_columns.append('transport')

    return cities[useful_columns].to_dict(orient='records')
Пример #4
0
def prepare_cities(
        data_folder: str = 'data',
        stats_filename: Optional[str] = None,
        urban_entities_filename: Optional[str] = None,
        transport_scores_filename: Optional[str] = None) \
        -> list[dict[str, Any]]:
    """Prepare cities for upload to Algolia.

    Args:
        data_folder: the root of the data folder.
        stats_filename: path to a file containing more stats about cities.
        urban_entities_filename: path to an excel file containing the
            description about French urban entities.
        transport_scores_filename: path to an html file containing the scores for public
            transportation in some French cities.

    Returns:
        A list of dict JSON-like objects each containing properties of a French
        city.
    """

    cities = cleaned_data.french_cities(data_folder)

    useful_columns = [
        'objectID', 'cityId', 'name', 'departementId', 'departementName',
        'departementPrefix', 'regionId', 'regionName']

    # Keep only cities that are still cities on 2016-01-01.
    cities = cities[cities.current]

    # Set city ID on objectID as this is what Algolia uses.
    cities['objectID'] = cities.index

    # Get département's names.
    cities['departementId'] = cities.departement_id
    departements = cleaned_data.french_departements(data_folder)
    cities['departementName'] = cities.departement_id.map(departements.name)
    cities['departementPrefix'] = cities.departement_id.map(departements.prefix)

    # Get région's names.
    cities['regionId'] = cities.region_id
    cities['regionName'] = cities.region_id.map(
        cleaned_data.french_regions(data_folder).name)

    if stats_filename:
        city_stats = pandas.read_csv(
            stats_filename,
            sep=',', header=None, usecols=[8, 10, 14, 19, 20],
            names=['zipCode', 'city_id', 'population', 'longitude', 'latitude'],
            dtype={
                'zipCode': str,
                'city_id': str,
                'population': int,
                'latitude': float,
                'longitude': float,
            })
        city_stats.set_index('city_id', inplace=True)
        cities = cities.join(city_stats)
        cities.zipCode.fillna('', inplace=True)
        cities.population.fillna(0, inplace=True)
        cities.latitude.fillna(0, inplace=True)
        cities.longitude.fillna(0, inplace=True)
        useful_columns.extend(['zipCode', 'population', 'latitude', 'longitude'])

    # cityId is the ID used throughout the app.
    cities['cityId'] = cities['objectID']

    # The urban score is 0 for rural, 1 for cities in urban areas between
    # 2k and 5k inhabitants, 2 for urban areas below 10k, 3 for 20k, 4 for
    # 50k, 5 for 100k, 6 for 200k, 7 for 2M, and 8 for Paris urban area.
    if urban_entities_filename:
        urban = cleaned_data.french_urban_entities(filename=urban_entities_filename)
        cities['urban'] = cities.cityId.map(urban.urban)
        cities.urban.fillna(0, inplace=True)
        cities.urban.astype(int)
        useful_columns.append('urban')

    if transport_scores_filename:
        transport = cleaned_data.transport_scores(filename=transport_scores_filename)
        cities['transport'] = cities.objectID.map(transport)
        cities.transport.fillna(0, inplace=True)
        useful_columns.append('transport')

    return typing.cast(
        list[dict[str, Any]], cities.sort_index()[useful_columns].to_dict(orient='records'))