Exemplo n.º 1
0
def transform(observations, nlp):
    # TODO Docstring
    logging.info('Begin transform')

    # Extract candidate name
    observations['candidate_name'] = observations['text'].apply(
        lambda x: field_extraction.candidate_name_extractor(x, nlp))

    if observations['candidate_name'] == "NOT FOUND":
        match = re.search(field_extraction.NAME_REGEX, observations['text'],
                          re.IGNORECASE)
        observations['candidate_name'] = match[0]

    # Extract contact fields
    observations['email'] = observations['text'].apply(
        lambda x: lib.term_match(x, field_extraction.EMAIL_REGEX))
    observations['phone'] = observations['text'].apply(
        lambda x: lib.term_match(x, field_extraction.PHONE_REGEX))

    # Extract skills
    observations = field_extraction.extract_fields(observations)

    # Archive schema and return
    lib.archive_dataset_schemas('transform', locals(), globals())
    logging.info('End transform')
    return observations, nlp
Exemplo n.º 2
0
def transform(observations, nlp):
    # TODO Docstring
    logging.info('Begin transform')

    # Extract candidate name
    observations['candidate_name'] = observations['text'].apply(
        lambda x: field_extraction.candidate_name_extractor(x, nlp))

    # Extract contact fields
    observations['email'] = observations['text'].apply(
        lambda x: lib.term_match(x, field_extraction.EMAIL_REGEX))
    observations['phone'] = observations['text'].apply(
        lambda x: lib.term_match(x, field_extraction.PHONE_REGEX))

    # Extract education data
    count = 0
    observations['university'] = observations['text'].apply(
        lambda x: field_extraction.university_extractor(x, nlp))
    observations['Major'] = observations['text'].apply(
        lambda x: field_extraction.major_extractor(x, nlp))

    # Extract skills
    observations = field_extraction.extract_fields(observations)

    # Archive schema and return
    lib.archive_dataset_schemas('transform', locals(), globals())
    logging.info('End transform')
    return observations, nlp