def test_exactmatch_skill_extractor(): competency_framework = CompetencyFramework( name='test_competencies', description='Test competencies', competencies=[ Competency(identifier='2.a.1.a', name='Reading Comprehension'), Competency(identifier='2.a.1.b', name='Active Listening'), ]) extractor = ExactMatchSkillExtractor(competency_framework) assert competency_framework.name in extractor.name assert competency_framework.description in extractor.description result = [ extractor.document_skill_counts({'description': doc}) for doc in [ 'this is a job that needs active listening', 'this is a reading comprehension job', 'this is an active and reading listening job', 'this is a reading comprehension and active listening job', ] ] assert result == [ Counter({'active listening': 1}), Counter({'reading comprehension': 1}), Counter(), Counter({ 'active listening': 1, 'reading comprehension': 1 }) ]
def sample_framework(): return CompetencyFramework( name='sample_framework', description='A few basic competencies', competencies=[ Competency(identifier='a', name='Reading Comprehension'), Competency(identifier='b', name='Active Listening'), ] )
def sample_framework(): return CompetencyFramework(name='Sample Framework', description='A few basic competencies', competencies=[ Competency(identifier='a', name='Organization'), Competency(identifier='b', name='Communication Skills'), Competency(identifier='c', name='Cooking') ])
def ontology_from_candidate_skills(candidate_skills: CandidateSkillYielder, skill_extractor_name: str='unknown') -> CompetencyOntology: """Create an ontology from a list of candidate skills Simply associate each candidate skill with its ONET occupation. Args: candidate_skills (iterable of algorithms.skill_extractors.base.CandidateSkill objects) Returns: (skills_ml.ontologies.base.CompetencyOntology) """ ontology = CompetencyOntology( name=f'candidate_skill_{skill_extractor_name}', competency_name=f'candidate_skill_{skill_extractor_name}', competency_description=f'Constructed from CandidateSkill objects produced by the {skill_extractor_name} skill extractor' ) competencies_by_document_id = defaultdict(set) for candidate_skill in candidate_skills: competency = Competency( identifier=candidate_skill.skill_name.lower(), name=candidate_skill.skill_name ) if competency not in competencies_by_document_id[candidate_skill.document_id]: competencies_by_document_id[candidate_skill.document_id].add(competency) if competency not in ontology.competencies: ontology.add_competency(competency) occupation_code = get_onet_occupation(candidate_skill.source_object) occupation = Occupation(identifier=occupation_code) if occupation not in ontology.occupations: ontology.add_occupation(occupation) ontology.add_edge(occupation=occupation, competency=competency) return ontology
def sample_ontology(): return CompetencyOntology( competency_name='Sample Framework', competency_description='A few basic competencies', edges=[ CompetencyOccupationEdge( competency=Competency(identifier='a', name='Organization'), occupation=Occupation(identifier='11-1011.00')), CompetencyOccupationEdge( competency=Competency(identifier='a', name='Organization'), occupation=Occupation(identifier='11-1012.00')), CompetencyOccupationEdge( competency=Competency(identifier='b', name='Communication Skills'), occupation=Occupation(identifier='11-1011.00')), CompetencyOccupationEdge( competency=Competency(identifier='c', name='Cooking'), occupation=Occupation(identifier='11-1011.00')), ])
def test_occupation_scoped_freetext_skill_extractor(): ontology = CompetencyOntology( competency_name='Sample Framework', competency_description='A few basic competencies', edges=[ CompetencyOccupationEdge( competency=Competency(identifier='2.a.1.a', name='Reading Comprehension'), occupation=Occupation(identifier='11-1011.00')), CompetencyOccupationEdge( competency=Competency(identifier='2.a.1.b', name='Active Listening'), occupation=Occupation(identifier='11-1011.00')), ]) extractor = SocScopedExactMatchSkillExtractor(ontology) documents = [ { 'id': '1234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a job that needs active listening', 'expected_value': Counter({'active listening': 1}) }, { 'id': '2234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a reading comprehension job', 'expected_value': Counter({'reading comprehension': 1}) }, { 'id': '3234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '4234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter({ 'active listening': 1, 'reading comprehension': 1 }) }, { 'id': '5234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'id': '6234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'id': '7234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '8234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, { 'id': '9234', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'id': '1334', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'id': '1434', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '1534', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, ] for document in documents: assert extractor.document_skill_counts( document) == document['expected_value']