def ontology_from_candidate_skills(candidate_skills: CandidateSkillYielder, skill_extractor_name: str='unknown') -> CompetencyOntology:
    """Create an ontology from a list of candidate skills

    Simply associate each candidate skill with its ONET occupation.

    Args:
        candidate_skills (iterable of algorithms.skill_extractors.base.CandidateSkill objects)

    Returns: (skills_ml.ontologies.base.CompetencyOntology)
    """
    ontology = CompetencyOntology(
        name=f'candidate_skill_{skill_extractor_name}',
        competency_name=f'candidate_skill_{skill_extractor_name}',
        competency_description=f'Constructed from CandidateSkill objects produced by the {skill_extractor_name} skill extractor'
    )
    competencies_by_document_id = defaultdict(set)
    for candidate_skill in candidate_skills:
        competency = Competency(
            identifier=candidate_skill.skill_name.lower(),
            name=candidate_skill.skill_name
        )
        if competency not in competencies_by_document_id[candidate_skill.document_id]:
            competencies_by_document_id[candidate_skill.document_id].add(competency)
        if competency not in ontology.competencies:
            ontology.add_competency(competency)
        occupation_code = get_onet_occupation(candidate_skill.source_object)
        occupation = Occupation(identifier=occupation_code)
        if occupation not in ontology.occupations:
            ontology.add_occupation(occupation)
        ontology.add_edge(occupation=occupation, competency=competency)

    return ontology
 def eval(self, candidate_skills: CandidateSkillYielder,
          sample_len: int) -> float:
     num_total_occupations = len(self.lookup)
     num_total_terms = len(self.lookup)
     if num_total_terms == 0:
         logging.warning(
             'Lookup has zero terms, cannot evaluate. Returning 0')
         return 0
     found_occupations = set()
     for candidate_skill in candidate_skills:
         occupation = get_onet_occupation(candidate_skill.source_object)
         if occupation and occupation not in found_occupations:
             found_occupations.add(occupation)
     num_found_occupations = len(found_occupations)
     logging.info('Found %s occupations out of %s total',
                  num_found_occupations, num_total_occupations)
     return float(num_found_occupations) / num_total_occupations
示例#3
0
 def transformer(self):
     return lambda job_posting: self.encoder.transform(
         [get_onet_occupation(job_posting)])
示例#4
0
 def extract_occupation_from_jobposting(self, job_posting):
     return (get_onet_occupation(job_posting), job_posting['id'])