Пример #1
0
    def test_missing_several_keys(self) -> None:
        """Missing several translations on ensure_translate."""

        with self.assertRaises(KeyError) as error:
            with translation.Translator() as translator:
                translator.ensure_translate('country', 'en_UK')
                translator.ensure_translate('sacrebleu', 'en_UK')
                translator.ensure_translate('fully missing key', 'en_UK')

        self.assertIn('sacrebleu', str(error.exception))
        self.assertIn('fully missing key', str(error.exception))
Пример #2
0
    def test_translate_fieldss(self) -> None:
        """Translate fields of a mapping."""

        with translation.Translator() as translator:
            translated = translator.ensure_translate_fields(
                {
                    'a': 'country',
                    'b': 'untranslated',
                    'c': 'my language'
                },
                locale='en_UK',
                fields=('a', 'c', 'unknown'))
            self.assertEqual({
                'a': 'United Kingdom',
                'c': 'English'
            }, translated)
Пример #3
0
def make_dicts(
    *,
    soc_definitions_xls: str,
    hires_csv: str,
    job_seekers_csv: str,
    states_txt: str,
    application_mode_csv: str,
    soc_structure_xls: str,
    soc_fap_crosswalk_airtable: str,
    brookings_automation_risk_json: str,
    occupation_requirements_json: str,
    skills_for_future_airtable: Optional[str] = None,
) -> list[dict[str, Any]]:
    """Prepare job info for MongoDB."""

    job_groups = usa_cleaned_data.us_soc2010_job_groups(
        filename=soc_definitions_xls)
    job_groups['_id'] = job_groups['romeId']

    # Domains
    structure = pd.read_excel(soc_structure_xls, skiprows=11)\
        .dropna(subset=['Major Group'])\
        .drop(['Minor Group', 'Broad Group', 'Detailed Occupation'], axis='columns')
    structure['Major Group'] = structure['Major Group'].str[:3]
    major_groups = structure.set_index('Major Group').squeeze()
    job_groups['domain'] = job_groups['romeId'].str[:3].map(major_groups)
    job_groups.domain.fillna('', inplace=True)

    local_stats = usa_cleaned_data.usa_compute_market_score(
        hires_csv=hires_csv,
        job_seekers_csv=job_seekers_csv,
        job_groups=job_groups)

    # Application modes.
    modes = cleaned_data.fap_application_modes(filename=application_mode_csv)
    fap_prefixes = pd.DataFrame({
        'fap_code': modes.index,
        'fap_prefix': modes.index.str[:3]
    })
    soc_to_fap_prefix = _load_crosswalk_airtable(
        *soc_fap_crosswalk_airtable.split(':'))
    soc_to_fap = soc_to_fap_prefix.join(fap_prefixes.set_index('fap_prefix'),
                                        on='fap_prefix',
                                        how='inner')
    soc_to_fap['modes'] = soc_to_fap.fap_code.map(modes)
    job_groups['applicationModes'] = soc_to_fap.groupby('soc_2018').apply(
        lambda faps: {
            str(fap.fap_code): fap.modes
            for fap in faps.itertuples() if fap.modes
        })
    # Fill NaN with empty {}.
    job_groups['applicationModes'] = job_groups.applicationModes.apply(
        lambda s: s if isinstance(s, dict) else {})

    # Add best counties.
    # TODO(cyrille): Rename field to a more generic area name.
    job_groups[
        'departementScores'] = market_score_derivatives.get_less_stressful_districts(
            local_stats, max_districts=20)
    # Fill NaN with empty [].
    job_groups['departementScores'] = job_groups.departementScores.apply(
        lambda s: s if isinstance(s, list) else [])

    job_groups['automationRisk'] = usa_cleaned_data.us_automation_brookings(
        filename=brookings_automation_risk_json,
        soc_filename=soc_definitions_xls).mul(100).round(0).astype(int)
    # Mark 0 values as 1, as 0 means undefined.
    job_groups.loc[job_groups['automationRisk'] == 0, 'automationRisk'] = 1
    job_groups['automationRisk'].fillna(0, inplace=True)

    # Data per state.
    states = pd.read_csv(states_txt, delimiter='|')
    local_stats['state'] = local_stats['district_id'].astype(int).div(1000).astype(int)\
        .map(states.set_index('STATE').STUSAB)
    state_scores = local_stats.dropna(subset=['market_score'])\
        .groupby(['state', 'job_group']).market_score.median().reset_index()
    job_groups['admin1AreaScores'] = state_scores.groupby('job_group').apply(
        lambda stat_scores: [{
            'areaId': row.state,
            'localStats': {
                'imt': {
                    'yearlyAvgOffersPer10Candidates': round(row.market_score)
                }
            },
        } for row in stat_scores.itertuples()])
    # Fill NaN with empty [].
    job_groups['admin1AreaScores'] = job_groups.admin1AreaScores.apply(
        lambda s: s if isinstance(s, list) else [])
    job_groups['inDomain'] = 'in your industry'

    # Add occupation requirements from json file.
    with open(occupation_requirements_json,
              encoding='utf-8') as job_requirements_file:
        job_requirements_list = json.load(job_requirements_file)
        job_requirements_dict = {
            job_requirement.pop('_id'): job_requirement
            for job_requirement in job_requirements_list
        }
    job_groups['requirements'] = job_groups.index.map(job_requirements_dict)
    # Replace NaN by empty dicts.
    job_groups['requirements'] = job_groups.requirements.apply(
        lambda r: r if isinstance(r, dict) else {})

    # SkillsForFuture
    skills_for_future_by_rome = airtable_to_protos.load_items_from_prefix(
        'Skill', job_groups.index, skills_for_future_airtable,
        'soc_prefixes_us')
    if skills_for_future_by_rome:
        with translation.Translator() as translator:
            translated_skills_for_future_by_rome = {
                rome_id: [
                    skill | translator.ensure_translate_fields(
                        skill, locale='en', fields=_SKILL_18N_FIELDS)
                    for skill in skills
                ]
                for rome_id, skills in skills_for_future_by_rome.items()
            }
        job_groups['skillsForFuture'] = job_groups.index.map(
            translated_skills_for_future_by_rome)

    return typing.cast(list[dict[str, Any]], job_groups.to_dict('records'))
Пример #4
0
def make_dicts(
    *,
    postings_csv: str,
    occupations_csv: str,
    jobs_xls: str,
    soc2010_js: str,
    career_jumps_csv: str,
    automation_xls: str,
    info_by_prefix_airtable: str,
    occupation_requirements_json: str,
    skills_for_future_airtable: Optional[str] = None,
) -> list[dict[str, Any]]:
    """Prepare job info for MongoDB."""

    job_groups = uk_cleaned_data.uk_soc2010_job_groups(filename=jobs_xls) \
        .reset_index().rename(columns={'Unit_Group': 'romeId'})
    job_groups['_id'] = job_groups['romeId']
    job_groups.set_index('_id', inplace=True)

    descriptions = uk_cleaned_data.uk_soc2010_group_descriptions(
        filename=soc2010_js)
    job_groups['description'] = descriptions['description']
    job_groups['samples'] = descriptions['jobs'].apply(
        lambda job_names: [{
            'name': name
        } for name in job_names])

    domains = uk_cleaned_data.uk_soc2010_job_groups(filename=jobs_xls, level='Major_Group')\
        .squeeze().str.capitalize()
    job_groups['domain'] = job_groups.romeId.str[:1].map(domains)

    local_stats = local_diagnosis.compute_market_score(
        postings_csv=postings_csv, occupations_csv=occupations_csv)

    job_groups['automationRisk'] = _get_automation_risk(automation_xls).mul(
        100).round(0)
    job_groups.loc[job_groups.automationRisk == 0, 'automationRisk'] = 1
    job_groups.automationRisk.fillna(0, inplace=True)

    job_groups = job_groups.join(
        local_diagnosis.load_prefixed_info_from_airtable(
            job_groups.index, info_by_prefix_airtable))

    # Add related job groups.
    career_jumps = pd.read_csv(career_jumps_csv, dtype='str')
    safe_career_jumps = career_jumps[career_jumps.target_job_group.map(
        job_groups.covidRisk) != 'COVID_RISKY']
    job_groups['relatedJobGroups'] = safe_career_jumps\
        .join(job_groups, on='job_group')\
        .join(job_groups, on='target_job_group', lsuffix='_source')\
        .groupby(['romeId_source'])\
        .apply(lambda df: [
            {
                'jobGroup': target_job,
                'mobilityType': 'CLOSE',
            }
            for target_job in df[['romeId', 'name', 'automationRisk']].to_dict('records')])
    # Fill NaN with empty [].
    job_groups['relatedJobGroups'] = job_groups.relatedJobGroups.apply(
        lambda s: s if isinstance(s, list) else [])

    # Add best counties.
    # TODO(cyrille): Rename field to a more generic area name.
    job_groups[
        'departementScores'] = market_score_derivatives.get_less_stressful_districts(
            local_stats, max_districts=20)
    # Fill NaN with empty [].
    job_groups['departementScores'] = job_groups.departementScores.apply(
        lambda s: s if isinstance(s, list) else [])

    # Add occupation requirements from json file.
    with open(occupation_requirements_json,
              encoding='utf-8') as job_requirements_file:
        job_requirements_list = json.load(job_requirements_file)
        job_requirements_dict = {
            job_requirement.pop('_id'): job_requirement
            for job_requirement in job_requirements_list
        }
    job_groups['requirements'] = job_groups.index.map(job_requirements_dict)
    # Replace NaN by empty dicts.
    job_groups['requirements'] = job_groups.requirements.apply(
        lambda r: r if isinstance(r, dict) else {})

    # SkillsForFuture
    skills_for_future_by_rome = airtable_to_protos.load_items_from_prefix(
        'Skill', job_groups.index, skills_for_future_airtable,
        'soc_prefixes_uk')
    if skills_for_future_by_rome:
        with translation.Translator() as translator:
            translated_skills_for_future_by_rome = {
                rome_id: [
                    skill | translator.ensure_translate_fields(
                        skill, locale='en_UK', fields=_SKILL_18N_FIELDS)
                    for skill in skills
                ]
                for rome_id, skills in skills_for_future_by_rome.items()
            }
        job_groups['skillsForFuture'] = job_groups.index.map(
            translated_skills_for_future_by_rome)

    return typing.cast(list[dict[str, Any]],
                       job_groups.reset_index().to_dict('records'))
Пример #5
0
    def test_fallback(self) -> None:
        """Fallback locale on ensure_translate."""

        with translation.Translator() as translator:
            self.assertEqual(
                'English', translator.ensure_translate('my language', 'en_UK'))
Пример #6
0
    def test_ensure_translate(self) -> None:
        """Basic usage of ensure_translate."""

        with translation.Translator() as translator:
            self.assertEqual('United Kingdom',
                             translator.ensure_translate('country', 'en_UK'))
Пример #7
0
    def test_missing_key(self) -> None:
        """Missing translation on ensure_translate."""

        with self.assertRaises(KeyError):
            with translation.Translator() as translator:
                translator.ensure_translate('sacrebleu', 'en_UK')