Пример #1
0
    def test_importer_filter_ids(self, mongo_mock):
        """Test of the filter_ids flag."""
        def richer_importer_func():
            """An importer with many outputs."""

            return list({
                '_id': 'foo-{:02d}'.format(i),
                'value': i
            } for i in range(20))

        mongo_mock.return_value = mock.MagicMock()
        mongo.importer_main(richer_importer_func,
                            'my-collection', ['foo', '--filter_ids', 'foo-.2'],
                            flag_values=gflags.FlagValues())

        import_in_collection = mongo_mock.return_value.import_in_collection
        self.assertTrue(import_in_collection.called)
        call_args = import_in_collection.call_args[0]
        self.assertEqual([{
            '_id': 'foo-02',
            'value': 2
        }, {
            '_id': 'foo-12',
            'value': 12
        }], call_args[0])
        self.assertEqual('my-collection', call_args[1])
Пример #2
0
    def test_importer_main_no_args(self) -> None:
        """Test the importer_main without args."""

        with self.assertRaises(SystemExit):
            mongo.importer_main(_my_importer_func,
                                'my-collection', ['foo'],
                                out=self.output)
Пример #3
0
    def test_fail_on_diff_when_no_diff(
            self, mock_requests: requests_mock.Mocker) -> None:
        """Test of the fail_on_diff flag when there are no diffs."""

        result = [{'dummy': 3, '_id': 'only-one'}]

        def import_func() -> list[dict[str, Any]]:
            """Foo."""

            return result

        mock_requests.post('https://slack.example.com/webhook')

        mongo.importer_main(import_func, 'my-collection', [], out=self.output)

        self.assertEqual(1, mock_requests.call_count)

        mongo.importer_main(import_func,
                            'my-collection', ['--fail_on_diff'],
                            out=self.output)

        self.assertEqual(2, mock_requests.call_count)
        self.assertIn(
            'The data is already up to date.',
            mock_requests.request_history[1].json()['attachments'][0]['text'])

        self.assertEqual(
            1, self.db_client.test['my-collection'].count_documents({}))
        value = self.db_client.test['my-collection'].find_one()
        assert value
        del value['_id']
        self.assertEqual({'dummy': 3}, value)
Пример #4
0
    def test_importer_main_no_args(self) -> None:
        """Test the importer_main without args."""

        with self.assertRaises(argparse.ArgumentError):
            mongo.importer_main(_my_importer_func,
                                'my-collection', ['foo'],
                                out=self.output)
Пример #5
0
    def test_importer_main_no_args_but_default(self) -> None:
        """Test the importer_main without args but with default value."""
        def import_func(arg1: str = 'default value') -> list[dict[str, Any]]:
            """Foo."""

            return [{'dummy': 2, 'arg1': arg1}]

        mongo.importer_main(import_func, 'my-collection', [], out=self.output)

        value = self.db_client.test['my-collection'].find_one()
        assert value
        del value['_id']
        self.assertEqual({'arg1': 'default value', 'dummy': 2}, value)
Пример #6
0
    def test_importer_main(self, mongo_mock):
        """Test of basic usage of the importer_main function."""

        mongo_mock.return_value = mock.MagicMock()
        mongo.importer_main(_my_importer_func,
                            'my-collection',
                            ['foo', '--arg1', 'Value of arg1'],
                            flag_values=gflags.FlagValues())

        import_in_collection = mongo_mock.return_value.import_in_collection
        self.assertTrue(import_in_collection.called)
        call_args = import_in_collection.call_args[0]
        self.assertEqual([{'arg1': 'Value of arg1', 'dummy': 2}], call_args[0])
        self.assertEqual('my-collection', call_args[1])
Пример #7
0
    def test_importer_main(self) -> None:
        """Test of basic usage of the importer_main function."""

        mongo.importer_main(_my_importer_func,
                            'my-collection', ['--arg1', 'Value of arg1'],
                            out=self.output)

        self.assertEqual(['meta', 'my-collection'],
                         sorted(self.db_client.test.list_collection_names()))
        self.assertEqual(
            1, self.db_client.test['my-collection'].count_documents({}))
        value = self.db_client.test['my-collection'].find_one()
        assert value
        del value['_id']
        self.assertEqual({'arg1': 'Value of arg1', 'dummy': 2}, value)
Пример #8
0
    def test_importer_main_no_args_but_default(self, mongo_mock):
        """Test the importer_main without args but with default value."""
        def import_func(arg1='default value'):
            """Foo."""

            return [{'dummy': 2, 'arg1': arg1}]

        mongo_mock.return_value = mock.MagicMock()
        mongo.importer_main(import_func,
                            'my-collection', ['foo'],
                            flag_values=gflags.FlagValues())
        import_in_collection = mongo_mock.return_value.import_in_collection
        self.assertTrue(import_in_collection.called)
        call_args = import_in_collection.call_args[0]
        self.assertEqual([{'arg1': 'default value', 'dummy': 2}], call_args[0])
Пример #9
0
    def test_importer_main_with_input_file(self) -> None:
        """Test that the import_func doesn't get called with an input file."""
        def importer_func() -> list[dict[str, Any]]:  # pragma: no-cover
            """Foo."""

            self.fail('Should not be called')
            return []

        testdata_dir = path.join(path.dirname(__file__), 'testdata')
        json_path = path.join(testdata_dir, 'import_dummy_data.json')
        mongo.importer_main(importer_func,
                            'my_collection', ['--from_json', json_path],
                            out=self.output)
        self.assertEqual(1,
                         len(list(self.db_client.test.my_collection.find())))
Пример #10
0
    def test_importer_collection_name(self) -> None:
        """Test the importer_main getting the collection name."""
        def import_func(collection_name: str) -> list[dict[str, Any]]:
            """Foo."""

            return [{'dummy': 2, 'collection_name': collection_name}]

        mongo.importer_main(import_func,
                            'my-collection',
                            ['--mongo_collection', 'cli-name'],
                            out=self.output)

        value = self.db_client.test['cli-name'].find_one()
        assert value
        del value['_id']
        self.assertEqual({'collection_name': 'cli-name', 'dummy': 2}, value)
Пример #11
0
    def test_importer_main_with_output_file(self, mongo_mock):
        """Test that data gets written to file instead of DB when file given."""

        out_path = tempfile.mktemp()
        mongo.importer_main(
            _my_importer_func,
            'my-collection',
            ['', '--to_json', out_path, '--arg1', 'arg1 test value'],
            flag_values=gflags.FlagValues())
        import_in_collection = mongo_mock.return_value.import_in_collection
        self.assertFalse(import_in_collection.called)
        with open(out_path) as json_file:
            json_content = json_file.read()
            self.assertEqual([{
                'arg1': 'arg1 test value',
                'dummy': 2
            }], json.loads(json_content))
            self.assertTrue(json_content.endswith('\n'))
Пример #12
0
    def test_importer_filter_ids(self) -> None:
        """Test of the filter_ids flag."""
        def richer_importer_func() -> list[dict[str, Any]]:
            """An importer with many outputs."""

            return list({'_id': f'foo-{i:02d}', 'value': i} for i in range(20))

        mongo.importer_main(richer_importer_func,
                            'my-collection', ['--filter_ids', 'foo-.2'],
                            out=self.output)

        self.assertEqual([{
            '_id': 'foo-02',
            'value': 2
        }, {
            '_id': 'foo-12',
            'value': 12
        }], list(self.db_client.test['my-collection'].find()))
Пример #13
0
    def test_importer_main_with_input_file(self, pymongo_mock):
        """Test that the import_func doesn't get called with an input file."""

        mock_importer_func = mock.MagicMock(spec=_my_importer_func)

        def importer_func():
            """Foo."""

            mock_importer_func()

        client = mongomock.MongoClient('mongodb://mongo-url/test')
        pymongo_mock.MongoClient.return_value = client
        testdata_dir = path.join(path.dirname(__file__), 'testdata')
        json_path = path.join(testdata_dir, 'import_dummy_data.json')
        mongo.importer_main(importer_func,
                            'my_collection', ['', '--from_json', json_path],
                            flag_values=gflags.FlagValues())
        self.assertFalse(mock_importer_func.called)
        self.assertEqual(1, len(list(client.test.my_collection.find())))
Пример #14
0
    def test_importer_main_with_output_file(self) -> None:
        """Test that data gets written to file instead of DB when file given."""

        out_path = tempfile.mktemp()
        mongo.importer_main(
            _my_importer_func,
            'my-collection',
            ['--to_json', out_path, '--arg1', 'arg1 test value'],
            out=self.output)

        with open(out_path, encoding='utf-8') as json_file:
            json_content = json_file.read()
            self.assertEqual([{
                'arg1': 'arg1 test value',
                'dummy': 2
            }], json.loads(json_content))
            self.assertTrue(json_content.endswith('\n'))

        self.assertEqual(
            0, len(list(self.db_client.test['my-collection'].find())))
Пример #15
0
    def test_fail_on_diff(self, mock_requests: requests_mock.Mocker) -> None:
        """Test of the fail_on_diff flag."""

        result = [{'dummy': 3, '_id': 'only-one'}]

        def import_func() -> list[dict[str, Any]]:
            """Foo."""

            return result

        mock_requests.post('https://slack.example.com/webhook')

        mongo.importer_main(import_func, 'my-collection', [], out=self.output)

        self.assertEqual(1, mock_requests.call_count)
        mock_requests.reset_mock()  # type: ignore

        result[0]['dummy'] = 4

        with self.assertRaises(ValueError):
            mongo.importer_main(import_func,
                                'my-collection', ['--fail_on_diff'],
                                out=self.output)

        self.assertEqual(1, mock_requests.call_count)
        self.assertIn(
            'There are some diffs to import.',
            mock_requests.request_history[0].json()['attachments'][0]['text'])

        self.assertEqual(
            1, self.db_client.test['my-collection'].count_documents({}))
        value = self.db_client.test['my-collection'].find_one()
        assert value
        del value['_id']
        self.assertEqual({'dummy': 3},
                         value,
                         msg='Values should not have been updated')
Пример #16
0
def validate(values, proto_class):
    """Validate that the values have the right format.

    Args:
        values: an iterable of dict with the JSON values of proto. They may
            have an additional "_id" field that will be ignored.
        proto_class: the Python class of the proto that should be contained in
            the values.
    Returns:
        the input for chainability
    Raises:
        ValueError if one of the values doesn't have the right format.
    """

    for value in values:
        proto = proto_class()
        _id = value.pop('_id', None)
        # Enforce Proto schema.
        try:
            json_format.Parse(json.dumps(value), proto)
        except json_format.ParseError as error:
            raise ValueError('Error while parsing:\n{}\n{}'.format(
                json.dumps(value, indent=2), error))
        if _id is not None:
            value['_id'] = _id
    return values


if __name__ == '__main__':
    mongo.importer_main(airtable2dicts, 'test')  # pragma: no-cover
Пример #17
0
    ]]
    samples.rename(columns={
        'target_job': 'codeOgr',
        'target_job_name': 'name',
        'target_job_masculine_name': 'masculineName',
        'target_job_feminine_name': 'feminineName',
    },
                   inplace=True)

    return {
        'jobGroup': {
            'romeId': jobs.target_job_group.iloc[0],
            'name': jobs.target_job_group_name.iloc[0],
            'samples': samples.to_dict('records'),
        }
    }


def _sample_jobs(num_samples):
    def _sampling(jobs):
        if len(jobs.index) > num_samples:
            jobs = jobs.sample(n=num_samples)
        jobs = jobs[['codeOgr', 'name', 'masculineName', 'feminineName']]
        return jobs.to_dict('records')

    return _sampling


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'similar_jobs')  # pragma: no cover
Пример #18
0
    departement_missions = missions[~missions.isAvailableEverywhere]\
        .groupby('departement').apply(_get_random_missions_picker(5))

    returned_missions = country_wide_missions + [{
        '_id': departement_id,
        'missions': missions
    } for departement_id, missions in departement_missions.iteritems()]
    if not check_coverage(returned_missions):
        raise ValueError('The putative new data lacks coverage.')
    return returned_missions


def _get_random_missions_picker(num_missions: int) \
        -> Callable[[pd.DataFrame], List[Dict[str, Any]]]:
    def _pick_random_missions(missions: pd.DataFrame) -> List[Dict[str, Any]]:
        if len(missions) > num_missions:
            samples = missions.sample(num_missions)
        else:
            samples = missions
        return typing.cast(
            List[Dict[str, Any]],
            samples[['associationName', 'title', 'link',
                     'description']].to_dict('records'))

    return _pick_random_missions


if __name__ == '__main__':
    mongo.importer_main(get_missions_dicts, 'volunteering_missions')
Пример #19
0
    with open(events_file_name) as json_data:
        salons = typing.cast(List[Dict[str, Any]], json.load(json_data))

    for salon in salons:
        salon['start_date'] = _isodate_from_string(salon['dateDebut'])
        salon['application_start_date'] = _isodate_from_string(
            salon['dateDebutCandidature'])
        salon['application_end_date'] = _isodate_from_string(
            salon['dateFinCandidature'], is_end_of_day=True)
        salon['locations'] = _get_city(
            french_regions_tsv, prefix_tsv,
            typing.cast(str, salon.get('localisation', '')))
        salon = _aggregate_rule_results(salon, rules)
        if not salon['locations']:
            logging.warning('Missing locations on salon\n%s', salon)
        # TODO(cyrille): Add test for not missing case.
        if not salon.get('jobGroupIds'):
            logging.warning('Missing job groups on salon\n%s', salon)
        for old, new in _FIELD_RENAMER.items():
            try:
                salon[new] = salon.pop(old)
            except KeyError:
                continue
        for field in _FIELDS_TO_DROP:
            salon.pop(field, None)
    return salons


if __name__ == '__main__':
    mongo.importer_main(json2dicts, 'online_salons')
Пример #20
0
        stats_filename: path to a file containing stats about cities.
        urban_context_filename: path to a file containing urban context
        info for each cities.

    Returns:
        A list of dict JSON-like object compatible with the geo_pb2.FrenchCity
        proto.
    """

    city_stats = pandas.read_csv(stats_filename,
                                 sep=',',
                                 header=None,
                                 usecols=[10, 19, 20],
                                 names=['_id', 'longitude', 'latitude'],
                                 dtype={
                                     '_id': str,
                                     'latitude': float,
                                     'longitude': float
                                 })
    city_stats.dropna()
    urban_contexts = cleaned_data.french_urban_areas(
        filename=urban_context_filename)
    city_stats['urbanContext'] = city_stats['_id'].map(urban_contexts.periurban)\
        .fillna(geo_pb2.UNKNOWN_URBAN_CONTEXT).astype(int)
    return typing.cast(List[Dict[str, Any]],
                       city_stats.to_dict(orient='records'))


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'cities')
Пример #21
0
    by_region['region_count'] = region_count
    city_count = by_region.reset_index()

    # Compute country counts for each city.
    country_count = recent_offers.groupby('rome_id').id_offre.count()
    by_country = city_count.set_index('rome_id')
    by_country['country_count'] = country_count
    city_count = by_country.reset_index()

    for row in city_count.itertuples():
        res.append({
            '_id': row.rome_id + ':c' + row.city_code,
            'city': {
                'cityId': row.city_code,
                'name': row.city_name,
                'departementId': row.departement_code,
                'departementName': row.departement_name,
                'regionId': row.region_code,
                'regionName': row.region_name,
            },
            'cityCount': int(row.city_count),
            'departementCount': int(row.departement_count),
            'regionCount': int(row.region_count),
            'countryCount': int(row.country_count),
        })
    return res


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'job_offers')  # pragma: no cover
Пример #22
0
            job_requirement.pop('_id'): job_requirement
            for job_requirement in job_requirements_list
        }
    job_groups['requirements'] = job_groups.index.map(job_requirements_dict)
    # Replace NaN by empty dicts.
    job_groups['requirements'] = job_groups.requirements.apply(
        lambda r: r if isinstance(r, dict) else {})

    # SkillsForFuture
    skills_for_future_by_rome = airtable_to_protos.load_items_from_prefix(
        'Skill', job_groups.index, skills_for_future_airtable,
        'soc_prefixes_us')
    if skills_for_future_by_rome:
        with translation.Translator() as translator:
            translated_skills_for_future_by_rome = {
                rome_id: [
                    skill | translator.ensure_translate_fields(
                        skill, locale='en', fields=_SKILL_18N_FIELDS)
                    for skill in skills
                ]
                for rome_id, skills in skills_for_future_by_rome.items()
            }
        job_groups['skillsForFuture'] = job_groups.index.map(
            translated_skills_for_future_by_rome)

    return typing.cast(list[dict[str, Any]], job_groups.to_dict('records'))


if __name__ == '__main__':
    mongo.importer_main(make_dicts, 'job_group_info')
API_KEY = os.getenv('AIRTABLE_API_KEY')


def airtable2dicts(base_id: str,
                   table: str,
                   view: Optional[str] = None) -> List[Dict[str, Any]]:
    """Import the users email from Airtable.

    Args:
        base_id: the ID of your Airtable app.
        table: the name of the table to import.
        view: optional - the name of the view to import.
    Returns:
        an iterable of dict with the JSON values of the proto.
    """

    if not API_KEY:
        raise ValueError(
            'No API key found. Create an airtable API key at '
            'https://airtable.com/account and set it in the AIRTABLE_API_KEY '
            'env var.')
    client = airtable.Airtable(base_id, API_KEY)
    records = client.iterate(table, view=view)

    return [{'_id': r.get('fields', {}).get('email', '')} for r in records]


if __name__ == '__main__':
    mongo.importer_main(airtable2dicts, 'show_unverified_data_users')
            },
        ]
    else:
        country_wide_missions = []

    # TODO(pascal): Add some missions per city as well.

    departement_missions = missions[~missions.isAvailableEverywhere]\
        .groupby('departement').apply(_get_random_missions_picker(5))
    return country_wide_missions + [{
        '_id': departement_id,
        'missions': missions
    } for departement_id, missions in departement_missions.iteritems()]


def _get_random_missions_picker(num_missions):
    def _pick_random_missions(missions):
        if len(missions) > num_missions:
            samples = missions.sample(num_missions)
        else:
            samples = missions
        return samples[['associationName', 'title', 'link',
                        'description']].to_dict('records')

    return _pick_random_missions


if __name__ == '__main__':
    mongo.importer_main(get_missions_dicts,
                        'volunteering_missions')  # pragma: no-cover
Пример #25
0
    return f'{year:04d}-{month:02d}-{day:02d}'


def _adie_event_to_proto(props: Dict[str, Any]) -> Dict[str, Any]:
    props['cityName'] = props['ville'].title()
    return {
        '_id': props['rdvGroupeId'],
        'cityName': props['cityName'],
        'description':
            '***Ça parle de quoi ?***\n\n'
            '{sousTitre}\n\n'
            '***Ça se passe où ?***\n\n'
            '{nomSite}\n'
            '{adresse1}, {adresse2}, {codePostal} {cityName}\n\n'
            '***Quand ?***\n\n'
            'le {date}\n'.format(**props),
        'latitude': props['latitude'],
        'longitude': props['longitude'],
        'timingText': f'le {" ".join(props["date"].split(" ")[1:3])}',
        'startDate': _parse_date(_drop_first_word(props['date'])),
        'title': props['titre'],
    }


def _drop_first_word(text: str) -> str:
    return ' '.join(text.split(' ')[1:])


if __name__ == '__main__':
    mongo.importer_main(adie_events2dicts, 'adie_events')
Пример #26
0
                                       grouping=True)
    to_salary = locale.format_string('%d',
                                     estimation['maxSalary'],
                                     grouping=True)
    estimation['shortText'] = f'{from_salary} - {to_salary}'
    estimation['unit'] = 'ANNUAL_GROSS_SALARY'
    return estimation


def _get_training_count(trainings_csv: str) -> pandas.Series:
    trainings = pandas.read_csv(trainings_csv,
                                dtype={'address.postalCode': str})
    # Fix short postal codes.
    short_postal_codes = trainings['address.postalCode'].str.len() == 4
    trainings.loc[short_postal_codes, 'address.postalCode'] = \
        '0' + trainings.loc[short_postal_codes, 'address.postalCode']
    # Extract deparatement ID
    trainings['departement_id'] = trainings['address.postalCode'].str[:2]
    oversee_departement = trainings.departement_id == '97'
    trainings.loc[oversee_departement, 'departement_id'] = \
        trainings.loc[oversee_departement, 'address.postalCode'].str[:3]
    # Create local_id.
    trainings['local_id'] = trainings['departement_id'] + ':' +\
        trainings['formation.proximiteRomes.code']
    return trainings.dropna(subset=['local_id']).groupby('local_id').size()\
        .rename('trainingCount').reset_index()


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'local_diagnosis')
Пример #27
0
    fap_growth['growth_2012_2022'] = \
        fap_growth.num_job_creations_2012_2022.div(fap_growth.num_jobs_2012)
    rome_fap_mapping = cleaned_data.rome_fap_mapping(
        filename=rome_fap_crosswalk_txt)
    rome_fap_flat_mapping = pandas.melt(
        rome_fap_mapping.fap_codes.apply(lambda s: pandas.Series(list(s))).reset_index(),
        id_vars=['index']).set_index('index').value.dropna().to_frame('fap_qualified_code')
    rome_fap_flat_mapping['fap_code'] = rome_fap_flat_mapping.fap_qualified_code.str[:3]
    multi_fap_groups = {
        'D0Z-D3Z': {'D0Z', 'D3Z'},
        'F0Z-F1Z': {'F0Z', 'F1Z'},
        'F2Z-F3Z': {'F2Z', 'F3Z'},
    }
    for fap_codes, fap_codes_as_set in multi_fap_groups.items():
        rome_fap_flat_mapping.loc[
            rome_fap_flat_mapping.fap_code.isin(fap_codes_as_set), 'fap_code'] = fap_codes
    rome_fap_flat_mapping.drop(
        rome_fap_flat_mapping[rome_fap_flat_mapping.fap_code == 'K0Z'].index, inplace=True)
    rome_fap_flat_mapping['growth_2012_2022'] = \
        rome_fap_flat_mapping.fap_code.map(fap_growth.set_index('fap_codes').growth_2012_2022)
    rome_fap_flat_mapping['num_jobs_2012'] = \
        rome_fap_flat_mapping.fap_code.map(fap_growth.set_index('fap_codes').num_jobs_2012)

    return rome_fap_flat_mapping.groupby(level=0).apply(
        lambda faps: 0 if faps.num_jobs_2012.sum() == 0 else
        faps.growth_2012_2022.mul(faps.num_jobs_2012).sum() / faps.num_jobs_2012.sum())


if __name__ == '__main__':
    mongo.importer_main(make_dicts, 'job_group_info')  # pragma: no cover
Пример #28
0
        'target_job': 'codeOgr',
        'target_job_name': 'name',
        'target_job_masculine_name': 'masculineName',
        'target_job_feminine_name': 'feminineName',
    },
                   inplace=True)

    return {
        'jobGroup': {
            'romeId': jobs.target_job_group.iloc[0],
            'name': jobs.target_job_group_name.iloc[0],
            'samples': samples.to_dict('records'),
        }
    }


def _sample_jobs(
        num_samples: int
) -> Callable[[pandas.DataFrame], list[dict[str, Any]]]:
    def _sampling(jobs: pandas.DataFrame) -> list[dict[str, Any]]:
        if len(jobs.index) > num_samples:
            jobs = jobs.sample(n=num_samples)
        jobs = jobs[['codeOgr', 'name', 'masculineName', 'feminineName']]
        return typing.cast(list[dict[str, Any]], jobs.to_dict('records'))

    return _sampling


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'similar_jobs')
Пример #29
0
    by_region['region_count'] = region_count
    city_count = by_region.reset_index()

    # Compute country counts for each city.
    country_count = recent_offers.groupby('rome_id').id_offre.count()
    by_country = city_count.set_index('rome_id')
    by_country['country_count'] = country_count
    city_count = by_country.reset_index()

    for row in city_count.itertuples():
        res.append({
            '_id': row.rome_id + ':c' + row.city_code,
            'city': {
                'cityId': row.city_code,
                'name': row.city_name,
                'departementId': row.departement_code,
                'departementName': row.departement_name,
                'regionId': row.region_code,
                'regionName': row.region_name,
            },
            'cityCount': int(row.city_count),
            'departementCount': int(row.departement_count),
            'regionCount': int(row.region_count),
            'countryCount': int(row.country_count),
        })
    return res


if __name__ == '__main__':
    mongo.importer_main(csv2dicts, 'job_offers')
Пример #30
0
    Args:
        durations_csv: path to a CSV file containing one line for each job
        seeker, some of their properties and the duration of their last
        unemployment period. See the full doc in the
        `fhs_category_duration.py` script.

    Returns:
        A list of dict compatible with the JSON version of
        TODO: Add proto here
        with an additional unique "_id" field.
    """

    job_seekers = pandas.read_csv(durations_csv, dtype={'city_id': str})

    global_diagnoses = []
    for rome_id, group in job_seekers.groupby('code_rome'):
        estimation = importer_helpers.unemployment_estimation(group.duration)
        global_diagnoses.append({
            '_id':
            rome_id,
            'unemploymentTimeHistogram':
            _get_histogram(group.duration),
            'diagnosis':
            estimation,
        })
    return global_diagnoses


if __name__ == '__main__':
    mongo.importer_main(fhs2dicts, 'global_diagnosis')