Пример #1
0
def test_exactmatch_skill_extractor():
    competency_framework = CompetencyFramework(
        name='test_competencies',
        description='Test competencies',
        competencies=[
            Competency(identifier='2.a.1.a', name='Reading Comprehension'),
            Competency(identifier='2.a.1.b', name='Active Listening'),
        ])
    extractor = ExactMatchSkillExtractor(competency_framework)
    assert competency_framework.name in extractor.name
    assert competency_framework.description in extractor.description

    result = [
        extractor.document_skill_counts({'description': doc}) for doc in [
            'this is a job that needs active listening',
            'this is a reading comprehension job',
            'this is an active and reading listening job',
            'this is a reading comprehension and active listening job',
        ]
    ]

    assert result == [
        Counter({'active listening': 1}),
        Counter({'reading comprehension': 1}),
        Counter(),
        Counter({
            'active listening': 1,
            'reading comprehension': 1
        })
    ]
Пример #2
0
def test_exactmatch_skill_extractor():
    content = [[
        '', 'O*NET-SOC Code', 'Element ID', 'ONET KSA', 'Description',
        'skill_uuid', 'nlp_a'
    ],
               [
                   '1', '11-1011.00', '2.a.1.a', 'reading comprehension',
                   '...', '2c77c703bd66e104c78b1392c3203362',
                   'reading comprehension'
               ],
               [
                   '2', '11-1011.00', '2.a.1.b', 'active listening', '...',
                   'a636cb69257dcec699bce4f023a05126', 'active listening'
               ]]
    with utils.makeNamedTemporaryCSV(content, '\t') as skills_filename:
        extractor = ExactMatchSkillExtractor(skill_lookup_path=skills_filename)
        result = [
            extractor.document_skill_counts(doc) for doc in [
                'this is a job that needs active listening',
                'this is a reading comprehension job',
                'this is an active and reading listening job',
                'this is a reading comprehension and active listening job',
            ]
        ]

        assert result == [
            Counter({'active listening': 1}),
            Counter({'reading comprehension': 1}),
            Counter(),
            Counter({
                'active listening': 1,
                'reading comprehension': 1
            })
        ]
Пример #3
0
def test_exactmatch_skill_extractor_candidate_skills():
    extractor = ExactMatchSkillExtractor(sample_framework())
    candidate_skills = sorted(extractor.candidate_skills(sample_job_posting()),
                              key=lambda cs: cs.skill_name)

    assert candidate_skills[0].skill_name == 'cooking'
    assert candidate_skills[
        0].context == 'One-two years cooking experience in a professional kitchen'
    assert candidate_skills[0].confidence == 100

    assert candidate_skills[1].skill_name == 'organization'
    assert candidate_skills[
        1].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
    assert candidate_skills[1].confidence == 100
Пример #4
0
    def _compute_func_on_one(self):
        corpus_creator = SimpleCorpusCreator()
        skill_extractor = ExactMatchSkillExtractor(
            skill_lookup_path=self.skill_lookup_path)

        def func(job_posting):
            count_dict = skill_extractor.document_skill_counts(
                document=corpus_creator._transform(job_posting))
            count_lists = [[k] * v for k, v in count_dict.items()]
            flattened = [
                count for countlist in count_lists for count in countlist
            ]
            return {self.property_name: flattened}

        return func
Пример #5
0
 def setUp(self):
     s3_conn = boto.connect_s3()
     client = boto3.resource('s3')
     bucket = client.create_bucket(Bucket='test-bucket')
     storage = S3Store('s3://test-bucket/computed_properties')
     skill_extractor = ExactMatchSkillExtractor(utils.sample_framework())
     self.computed_property = SkillCounts(
         skill_extractor=skill_extractor,
         storage=storage,
     )
     self.job_postings = [
         utils.job_posting_factory(datePosted=self.datestring,
                                   description='reading comprehension')
     ]
     self.computed_property.compute_on_collection(self.job_postings)
Пример #6
0
from skills_ml.ontologies.onet import Onet
from skills_ml.evaluation.skill_extractors import candidate_skills_from_sample, metrics_for_candidate_skills
from skills_ml.evaluation.skill_extraction_metrics import TotalOccurrences, TotalVocabularySize, OntologyCompetencyRecall
from skills_ml.job_postings.common_schema import JobPostingCollectionSample
from tests.utils import sample_factory

sample = sample_factory(JobPostingCollectionSample())
print('Building ONET, may take a while to download')
full_onet = Onet()

skill_extractors = [
    SectionExtractSkillExtractor(),
    SkillEndingPatternExtractor(only_bulleted_lines=False),
    AbilityEndingPatternExtractor(only_bulleted_lines=False),
    FuzzyMatchSkillExtractor(full_onet.competency_framework),
    ExactMatchSkillExtractor(full_onet.competency_framework),
    SocScopedExactMatchSkillExtractor(full_onet)
]
print('Done building ONET! Now subsetting ONET into K,S,A')
metric_ontologies = [
    full_onet,
    full_onet.filter_by(lambda edge: 'Knowledge' in edge.competency.categories,
                        competency_name='onet_knowledge',
                        competency_description='ONET Knowledge'),
    full_onet.filter_by(lambda edge: 'Abilities' in edge.competency.categories,
                        competency_name='onet_ability',
                        competency_description='ONET Ability'),
    full_onet.filter_by(lambda edge: 'Skills' in edge.competency.categories,
                        competency_name='onet_skill',
                        competency_description='ONET Skill')
]